[shell基础]——awk命令

关于awk

awk是一个强大的文本分析工具,相对于grep的查找、sed的编辑,awk在其对数据分析并生成报告时,显得尤为强大。

简单来说awk就是把文件逐行的读入,以空格为默认分隔符将每行切片,切开的部分再进行各种分析处理。

awk有3个不同版本: awk、nawk和gawk,未作特别说明,一般指gawk,gawk 是 AWK 的 GNU 版本。

 

本文用的测试文本

# cat -n passwd 
     1	root:x:0:0:root:/root:/bin/bash
     2	bin:x:1:1:bin:/bin:/sbin/nologin
     3	daemon:x:2:2:daemon:/sbin:/sbin/nologin
     4	adm:x:3:4:adm:/var/adm:/sbin/nologin
     5	lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
     6	sync:x:5:0:sync:/sbin:/bin/sync
     7	shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
     8	halt:x:7:0:halt:/sbin:/sbin/halt
     9	mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
    10	uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin
    11	operator:x:11:100:operator:/root:/sbin/nologin
# cat -n netstat.txt 
     1  Proto Recv-Q Send-Q Local-Address          Foreign-Address             State
     2  tcp        0      0 0.0.0.0:3306           0.0.0.0:*                   LISTEN
     3  tcp        0      0 0.0.0.0:80             0.0.0.0:*                   LISTEN
     4  tcp        0      0 127.0.0.1:9000         0.0.0.0:*                   LISTEN
     5  tcp        0      0 coolshell.cn:80        124.205.5.146:18245         TIME_WAIT
     6  tcp        0      0 coolshell.cn:80        61.140.101.185:37538        FIN_WAIT2
     7  tcp        0      0 coolshell.cn:80        110.194.134.189:1032        ESTABLISHED
     8  tcp        0      0 coolshell.cn:80        123.169.124.111:49809       ESTABLISHED
     9  tcp        0      0 coolshell.cn:80        116.234.127.77:11502        FIN_WAIT2
    10  tcp        0      0 coolshell.cn:80        123.169.124.111:49829       ESTABLISHED
    11  tcp        0      0 coolshell.cn:80        183.60.215.36:36970         TIME_WAIT
    12  tcp        0   4166 coolshell.cn:80        61.148.242.38:30901         ESTABLISHED
    13  tcp        0      1 coolshell.cn:80        124.152.181.209:26825       FIN_WAIT1
    14  tcp        0      0 coolshell.cn:80        110.194.134.189:4796        ESTABLISHED
    15  tcp        0      0 coolshell.cn:80        183.60.212.163:51082        TIME_WAIT
    16  tcp        0      1 coolshell.cn:80        208.115.113.92:50601        LAST_ACK
    17  tcp        0      0 coolshell.cn:80        123.169.124.111:49840       ESTABLISHED
    18  tcp        0      0 coolshell.cn:80        117.136.20.85:50025         FIN_WAIT2
    19  tcp        0      0 :::22                  :::*                        LISTEN

 

域和记录的理解

(1) awk执行时浏览域标记为$1,$2...$n,这种方法称为域的标识

(2) $1代表分割第一个域、$2 代表分割第二个域、依次类推....

(3) 域标识符$后面可以跟变量、或者变量表达式

(4) 一些特殊的域和记录:

$0 当前记录(这个变量中存放着整个文本的内容)
$1~$n 当前记录的第n个字段,字段间由FS分隔
FS 输入字段分隔符 默认是空格或Tab
NF 当前记录中的“域”个数,就是有多少列
$NF 表示最后一个“域”的内容
NR 感觉上就是匹配上的行的行号,且是文本当中本身的行号。NR==1可以输出表头
FNR 当前记录数,与NR不同的是,这个值会是各个文件自己的行号
RS 输入的记录分隔符, 默认为换行符
OFS 输出字段分隔符, 默认也是空格
ORS 输出的记录分隔符,默认为换行符
FILENAME 当前输入文件的名字

(5) awk的默认分隔域是空格

(6)-F 选项指定分隔域

(7) BEGIN { } END 的理解

BEGIN{ 这里面放的是执行前的语句 }

END {这里面放的是处理完所有的行后要执行的语句 }

{这里面放的是处理每一行时要执行的语句}

 

基本匹配($N=="string")、输出表头、格式化输出

# awk '$3==0 && $6=="LISTEN"' netstat.txt 
tcp        0      0 0.0.0.0:3306           0.0.0.0:*                   LISTEN
tcp        0      0 0.0.0.0:80             0.0.0.0:*                   LISTEN
tcp        0      0 127.0.0.1:9000         0.0.0.0:*                   LISTEN
tcp        0      0 :::22                  :::*                        LISTEN

# awk '$3==0 && $6=="LISTEN" || NR==1' netstat.txt 
Proto Recv-Q Send-Q Local-Address          Foreign-Address             State
tcp        0      0 0.0.0.0:3306           0.0.0.0:*                   LISTEN
tcp        0      0 0.0.0.0:80             0.0.0.0:*                   LISTEN
tcp        0      0 127.0.0.1:9000         0.0.0.0:*                   LISTEN
tcp        0      0 :::22                  :::*                        LISTEN

# awk '$3==0 && $6=="LISTEN" || NR==1 {printf "%-20s %-20s %s\n",$4,$5,$6}' netstat.txt  
Local-Address        Foreign-Address      State
0.0.0.0:3306         0.0.0.0:*            LISTEN
0.0.0.0:80           0.0.0.0:*            LISTEN
127.0.0.1:9000       0.0.0.0:*            LISTEN
:::22                :::*                 LISTEN

 

字符匹配

  • *$N~/ /
  • *$N!~/ /
  •  !/ /

可以模糊匹配
可以使用正则表达式

# awk '$6~/FIN/ || NR==1 {print NR,$4,$5,$6}' netstat.txt   
1 Local-Address Foreign-Address State
6 coolshell.cn:80 61.140.101.185:37538 FIN_WAIT2
9 coolshell.cn:80 116.234.127.77:11502 FIN_WAIT2
13 coolshell.cn:80 124.152.181.209:26825 FIN_WAIT1
18 coolshell.cn:80 117.136.20.85:50025 FIN_WAIT2

# awk '$6~/FIN/ || NR==1 {print $4,$5,$6}' OFS=";\t" netstat.txt              
Local-Address;  Foreign-Address;        State
coolshell.cn:80;        61.140.101.185:37538;   FIN_WAIT2
coolshell.cn:80;        116.234.127.77:11502;   FIN_WAIT2
coolshell.cn:80;        124.152.181.209:26825;  FIN_WAIT1
coolshell.cn:80;        117.136.20.85:50025;    FIN_WAIT2

# awk '$6~/FIN|WAIT/ || NR==1 {print $4,$5,$6}' netstat.txt 
Local-Address Foreign-Address State
coolshell.cn:80 124.205.5.146:18245 TIME_WAIT
coolshell.cn:80 61.140.101.185:37538 FIN_WAIT2
coolshell.cn:80 116.234.127.77:11502 FIN_WAIT2
coolshell.cn:80 183.60.215.36:36970 TIME_WAIT
coolshell.cn:80 124.152.181.209:26825 FIN_WAIT1
coolshell.cn:80 183.60.212.163:51082 TIME_WAIT
coolshell.cn:80 117.136.20.85:50025 FIN_WAIT2

# awk '$6!~/FIN|WAIT/ || NR==1 {print $4,$5,$6}' netstat.txt 
Local-Address Foreign-Address State
0.0.0.0:3306 0.0.0.0:* LISTEN
0.0.0.0:80 0.0.0.0:* LISTEN
127.0.0.1:9000 0.0.0.0:* LISTEN
coolshell.cn:80 110.194.134.189:1032 ESTABLISHED
coolshell.cn:80 123.169.124.111:49809 ESTABLISHED
coolshell.cn:80 123.169.124.111:49829 ESTABLISHED
coolshell.cn:80 61.148.242.38:30901 ESTABLISHED
coolshell.cn:80 110.194.134.189:4796 ESTABLISHED
coolshell.cn:80 208.115.113.92:50601 LAST_ACK
coolshell.cn:80 123.169.124.111:49840 ESTABLISHED
:::22 :::* LISTEN

# awk '!/ESTABLISHED/ || NR==1 {print $4,$5,$6}' netstat.txt   
Local-Address Foreign-Address State
0.0.0.0:3306 0.0.0.0:* LISTEN
0.0.0.0:80 0.0.0.0:* LISTEN
127.0.0.1:9000 0.0.0.0:* LISTEN
coolshell.cn:80 124.205.5.146:18245 TIME_WAIT
coolshell.cn:80 61.140.101.185:37538 FIN_WAIT2
coolshell.cn:80 116.234.127.77:11502 FIN_WAIT2
coolshell.cn:80 183.60.215.36:36970 TIME_WAIT
coolshell.cn:80 124.152.181.209:26825 FIN_WAIT1
coolshell.cn:80 183.60.212.163:51082 TIME_WAIT
coolshell.cn:80 208.115.113.92:50601 LAST_ACK
coolshell.cn:80 117.136.20.85:50025 FIN_WAIT2
:::22 :::* LISTEN
# cat passwd |awk -F: '{if($1~/root/) print $0}'   --->#匹配$1域中包含root的并打印匹配行的完整信息
root:x:0:0:root:/root:/bin/bash

# cat passwd |awk '$0~/root/'               --->#匹配整个文本中包含root的
root:x:0:0:root:/root:/bin/bash
operator:x:11:100:operator:/root:/sbin/nologin

# cat passwd |awk -F: '$1~/^root/'      --->#匹配$1域中以root开头的
root:x:0:0:root:/root:/bin/bash

# cat /etc/passwd |awk -F: '$7~/bash$/'  --->#匹配$7域中以bash结尾的
root:x:0:0:root:/root:/bin/bash
huanhuan:x:500:500::/home/huanhuan:/bin/bash
hsy:x:501:501::/home/hsy:/bin/bash
user01:x:502:502::/home/user01:/bin/bash
hh:x:503:503::/home/hh:/bin/bash

 

打印报告头/信息尾

BEGIN{print "......"}    END{print "......"}

# cat passwd |awk -F: 'BEGIN{print "======用户列表====="} {print "用户名:"$1} END{print "======ending======"}'
======用户列表=====
用户名:root
用户名:bin
用户名:daemon
用户名:adm
用户名:lp
用户名:sync
用户名:shutdown
用户名:halt
用户名:mail
用户名:uucp
用户名:operator
======ending======

 

条件匹配

'{ if(......)  print $n }'  (注:if语句中可以使用 && || 实现多条件匹配)

# cat passwd |awk -F: '{if($3<10) print $0}'
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
sync:x:5:0:sync:/sbin:/bin/sync
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin

# cat passwd |awk -F: '{if($3>=10) print $0}'
uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin
operator:x:11:100:operator:/root:/sbin/nologin

# cat passwd |awk -F: '{if($1=="root" && $5=="root") print $0}'
root:x:0:0:root:/root:/bin/bash

# cat passwd |awk -F: '{if($1=="root" || $1=="mail") print $0}'
root:x:0:0:root:/root:/bin/bash
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin

# cat passwd |awk -F: '{if($4>=10 && $4<=30) print $0}'
mail:x:8:12:mail:/var/spool/mail:/sbin/nologin
uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin

 

折分文件

# ls
netstat.txt

# awk 'NR!=1 {print > $6}' netstat.txt 

# ls
ESTABLISHED  FIN_WAIT2  LISTEN       TIME_WAIT
FIN_WAIT1    LAST_ACK   netstat.txt

# cat TIME_WAIT
tcp        0      0 coolshell.cn:80        124.205.5.146:18245         TIME_WAIT
tcp        0      0 coolshell.cn:80        183.60.215.36:36970         TIME_WAIT
tcp        0      0 coolshell.cn:80        183.60.212.163:51082        TIME_WAIT
# awk 'NR!=1 {print $4,$5 > $6}' netstat.txt 

# ls
ESTABLISHED  FIN_WAIT2  LISTEN       TIME_WAIT
FIN_WAIT1    LAST_ACK   netstat.txt

# cat TIME_WAIT 
coolshell.cn:80 124.205.5.146:18245
coolshell.cn:80 183.60.215.36:36970
coolshell.cn:80 183.60.212.163:51082
# awk 'NR!=1 {if($6~/TIME|ESTABLISHED/) print $5 > "est.ip" ; else if($6~/LISTEN/) print $5 > "listen.ip" }'  netstat.txt 

# cat listen.ip 
0.0.0.0:*
0.0.0.0:*
0.0.0.0:*
:::*
# cat est.ip 
124.205.5.146:18245
110.194.134.189:1032
123.169.124.111:49809
123.169.124.111:49829
183.60.215.36:36970
61.148.242.38:30901
110.194.134.189:4796
183.60.212.163:51082
123.169.124.111:49840

 

统计

# awk 'NR!=1{a[$NF]++} END {for(i in a) print i "," a[i];}' netstat.txt 
TIME_WAIT,3
FIN_WAIT1,1
ESTABLISHED,6
FIN_WAIT2,3
LAST_ACK,1
LISTEN,4

# awk ' /^tcp/ {a[$NF]++} END{ for(i in a) print i,a[i] } ' netstat.txt  
TIME_WAIT 3
FIN_WAIT1 1
ESTABLISHED 6
FIN_WAIT2 3
LAST_ACK 1
LISTEN 4

# ps aux|awk ' NR!=1 {a[$1]+=$6} END{ for(i in a) print i,a[i]"KB" } '  ——> 统计每个用户的进程占用了多少内存
gdm 40388KB
rpc 912KB
dbus 1620KB
68 6140KB
nobody 604KB
postfix 6948KB
rpcuser 1340KB
root 86040KB

 

awk使用if/for/while语句

#编写一个脚本,统计/etc/passwd 有多少系统用户,每个系统用户是否拥有登录系统的权限?
#!/bin/bash
awk -F: '{ if($3>=1&&$3<=499)
           {
             print"系统用户:"$1;
             if($NF~/nologin/) 
                {print"登录权限为:NO";} 
             else 
                {print"登录权限为:YES";} 
            }
         }' /etc/passwd |paste -d "," - -

 

运行awk的另一种方法,以及BEGIN、END的运用

# vim cal.awk 

#
!/bin/awk -f BEGIN{ math=0 english=0 computer=0 printf "NAME NO. MATH ENGLISH COMPUTER TOTAL\n" printf "---------------------------------------------\n" } { math+=$3 english+=$4 computer+=$5 printf "%-6s %-6s %4d %8d %8d %8d\n", $1, $2, $3,$4,$5, $3+$4+$5 } END{ printf "---------------------------------------------\n" printf " TOTAL:%10d %8d %8d \n", math, english, computer printf "AVERAGE:%10.2f %8.2f %8.2f\n", math/NR, english/NR, computer/NR }
# cat score.txt 
Marry   2143 78 84 77
Jack    2321 66 78 45
Tom     2122 48 77 71
Mike    2537 87 97 95
Bob     2415 40 57 62
# awk -f cal.awk score.txt 
NAME    NO.   MATH  ENGLISH  COMPUTER   TOTAL
---------------------------------------------
Marry  2143     78       84       77      239
Jack   2321     66       78       45      189
Tom    2122     48       77       71      196
Mike   2537     87       97       95      279
Bob    2415     40       57       62      159
---------------------------------------------
  TOTAL:       319      393      350 
AVERAGE:     63.80    78.60    70.00

 

awk内置的字符串函数

(1) gsub替换

gsub(/r/,s):把r替换成s
# cat passwd |awk 'gsub(/root/,"Jellylyj") {print $1}'
Jellylyj:x:0:0:Jellylyj:/Jellylyj:/bin/bash
operator:x:11:100:operator:/Jellylyj:/sbin/nologin

gsub(/r/,s,t):在t中把r替换成s
# cat passwd |awk  -F: 'gsub(/root/,"Jelly",$1) {print$0}'

(2) length(s)返回s的长度

# cat passwd |awk -F: '{print$1}'|awk '{if(length($1)==4) print $1}'
root
sync
halt
mail
uucp

 

《AWK简明教程》

awk练习题:点此打开

posted @ 2017-10-09 17:34  Jelly_lyj  阅读(200)  评论(0编辑  收藏  举报