1 #!/bin/bash
2 #Shell statistics.sh
3 #Author lipc
4 #Date 20200818
5 #Ps tomcat接入日志分析,需要tomcat日志开启访问时间记录
6
7 time1=$(date "+%Y-%m-%d %H:%M:%S")
8 time2=$(date "+%Y-%m-%d-%H")
9 echo "请输入日志路径,例如:/tmp/localhost_access_log.2020-12-19.txt"
10 read name
11 echo "请输入日志存放路径,例如:/tmp/test/"
12 read dir
13 echo "请输入模块名称,例如:pbs"
14 read module
15 log="$dir""$module"$time2.log
16 statistics="$dir""$module"statistics$time2.log
17 IFS=$'\n'
18
19 function check() {
20 if [ -f $name ]; then
21 echo $name'文件目录检测成功'
22 echo ""
23 echo "程序开始时间:"$time1 >>$statistics
24 echo "正在收集URL接口信息,预计一分钟,请等待......"
25 meta=$(cat $name | awk '{print $7}' | awk -F 'sig' '{print $1}' | sort | uniq | grep -v -w '/' | grep -v 400)
26 amount=$(echo "$meta" | wc -l)
27 echo "URL接口信息收集完成,即将开始下一步操作......"
28 echo ""
29 else
30 echo $name'文件目录检测失败,请重新检查文件路径'
31 exit 1
32 fi
33 }
34
35 function metadata() {
36 echo "正在收集日志元数据,用于后续统计分析操作,日志存放于$log......"
37 i=0
38 for url in $meta; do
39 w=$(expr $i \* 100 / $amount)
40 echo "当前进度$w%"
41 for time in $(seq -w 07 16); do
42 num=$(cat $name | grep 2020:$time | grep $url | wc -l)
43 echo $num $url 2020:$time >>$log
44 done
45 i=$(expr $i + 1)
46 done
47 echo "日志元数据收集完毕,存放于$log"
48 echo ""
49 }
50
51 function concurrent() {
52 echo "正在分析日志元数据,用于获取接口并发信息,日志存放于$statistics......"
53 echo "每个接口的5个最高并发时段" >>$statistics
54 for url in $meta; do
55 cat $log | grep $url | sort -nr | head -n 5 >>$statistics
56 echo "" >>$statistics
57 done
58
59 echo "全天最高并发的5个接口" >>$statistics
60 for url in $meta; do
61 a=$(cat $name | grep $url | wc -l)
62 echo $a $url >>"$dir"a.log
63 done
64 cat "$dir"a.log | sort -nr | head -n 5 >>$statistics
65 }
66
67 function speed() {
68 echo "正在分析接口耗时信息......"
69 echo "" >>$statistics
70 echo "全天10个最高耗时请求:" >>$statistics
71 echo "" >>$statistics
72 for url in $meta; do
73 cat $name | grep $url | sort -nr -k 11 | head -n 1 >>"$dir"b.log
74 done
75
76 cat "$dir"b.log | sort -nr -k 11 | head -n 10 >>$statistics
77 echo " " >>$statistics
78 meta1=$(cat "$dir"b.log | sort -nr -k 11 | head -n 10 | awk '{print $7}' | awk -F 'sig' '{print $1}' | grep -v -w '/' | grep -v 400)
79
80 echo "分析每个接口3个最高耗请求和3个最低耗请求,及其时间:" >>$statistics
81 echo "" >>$statistics
82 for url in $meta1; do
83 echo "$url接口3个最高耗请求,及其时间:" >>$statistics
84 cat $name | grep $url | sort -nr -k 11 | head -n 3 >>$statistics
85 echo "$url接口3个最低耗请求,及其时间:" >>$statistics
86 cat $name | grep $url | sort -nr -k 11 | tail -n 3 >>$statistics
87 num2=$(cat $name | grep $url | awk '{print $11}' | awk '{sum+=$1} END {print "Average = ", sum/NR}')
88 echo "$url接口平均耗时:"$num2 ms >>$statistics
89 echo " " >>$statistics
90 done
91 echo "接口耗时信息分析已结束......"
92 }
93
94 function peak() {
95 echo "正在分析接口高峰时段信息......"
96 echo "本次日志分析,接口高峰时段时间" >>$statistics
97 cat $statistics | awk '{print $3}' | grep 2020 | sort | uniq -c | sort -nr | head -n 6 >>$statistics
98 }
99
100 check
101 metadata
102 concurrent
103 speed
104
105 time3=$(date "+%Y-%m-%d %H:%M:%S")
106 echo "程序结束时间:"$time3 >>$statistics
107 echo "本次分析结束"