Talk is cheap, show me your code

文本处理三剑客之awk

1、awk 基本功,用 awk 求一列数字的最小值、最大值和平均值

seq 10 | awk 'BEGIN{total=0} {total+=$1; if(NR==1){min=$1; max=$1; next} if($1<min){min=$1} if($1>max){max=$1}} END{print min, max, total/NR}'

2、awk 单字符/字符串/特殊字符分隔符

echo "1 | 2 | 3" | awk -F ' [|] ' '{print $1+$2+$3}'
echo "E0322 22:17:49 str: [msgid:171,uid:JackMa,Content:I love 996]" | awk -F ' str: |[][]' '{print $1, $3}'

 3、内置变量/函数使用

awk '{print FILENAME, NR, FNR, $0}' /tmp/file{1..2}
echo "timestamp" | awk '{now=strftime("%Y-%m-%d %T", systime()); print toupper($0)": "now}'

 4、基本控制逻辑

#!/bin/awk -f

BEGIN{
    for(i = 1; i <=9; i++) {
        for(j = 1; j <= i; j++) {
            printf("%1d * %1d = %2d\t", i, j, i * j)

            if(i == j) {
                printf("\n")
            }
        }
    }
}

 5、关联数组

awk -F '[ ]|[.]|, ' '!/^$/{for(i=1; i<NF; i++) {arr[$i]++}} END{for(x in arr) {print x, arr[x]}}' ChinaDaily0321 | sort -k2 -nr

6、awk 中调用shell命令

echo | awk '{"du -hs * | sort -h | tail -1" | getline cmdout; print cmdout}'
echo "Jan 02 2006 15:04:05" | awk '{sprintf("date -d \"%s\" \"+%%F %%T\"",$0) | getline cmdout; print cmdout}'
awk -F ',' 'NR == 1 {print $1, $2}; NR > 1 {print $1, $2 | "sort -k2 -nr"}' score.csv

7、awk 中引用shell变量

echo | awk '{print "'$PWD'"}'
echo | awk '{print ENVIRON["HOSTNAME"]}'
echo | awk '{print v1, v2}' v1=$SHELL v2=$LANG
money="1000W" && echo | awk -v var=$money '{print "I got", var}'

 

posted on 2020-03-22 00:11  LoveFlying-Nine  阅读(129)  评论(0编辑  收藏  举报

导航