一、分析访问日志(nginx为例)
日志格式
'$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"'
统计访问ip次数
awk '{!a[$1]++}END{for(i in a){print a[i],"",i}}' http_acces.log
统计访问访问大于100次的IP
awk '{!a[$1]++}END{for(i in a){if(a[i]>100){print a[i],"",i}}}'
统计访问IP次数并排序取前10
//按照访问量逆序排序
awk '{!a[$1]++}END(for(i in a){print a[i],"",i})' |sort -k1 -nr
//取top 10
awk '{!a[$1]++}END(for(i in a){print a[i],"",i})' |sort -k1 -nr |head -10
统计指定时间段内访问最多的IP
awk '{if($1>"2022-03-08 14:15:00" && $1 < "2022-03-08 16:15:10"){!a[$1]++}}END(for(i in a){print a[i],"",i})' |sort -k1 -nr |head -10
字符串拆分
echo "hello" |awk -F '' 'BEGIN{i=1}{for(;i<NF;i++) print $i}'
echo "hello" |awk '{split($0,a,"''");for(v in a)print a[v]}'

统计出现的次数
echo "a.b.c,c.d.e" |awk -F '[.,]' 'BEGIN{i=1}{for(;i<= NF;i++)a[$i]++}END{for(v in a)print v,a[v]}'

得出每个员工出差总费用及次数
cat a
zhangsan 8000 1
zhangsan 5000 1
lisi 1000 1 lisi
2000 1 wangwu 1500 1
zhaoliu 6000 1
zhaoliu 2000 1
zhaoliu 3000 1
awk '{name[$1]++;cost[$1]+=$2;number[$1]+=$3}END{for(v in name)print v,cost[v],number[v]}' a

获取某列数字最大数
cat a
a b 1
c d 2
e f 3
g h 3
i j 2
awk 'BEGIN{max=0}{if($3>max)max=$3}END{print max}' a\
