linux 中统计指定列特定字符连续出现的最多次数

 

001、shell实现

root@PC1:/home/test2# ls
test.txt
root@PC1:/home/test2# cat test.txt
3 a 8
a y a
a a f
y a a
a a a
e a a
a r k
root@PC1:/home/test2# awk 'BEGIN{idx = 0}{if($1 == "a") {idx++; print idx}; if($1 != "a") {idx = 0}}' test.txt | sort -rn | head -n 1  ## 第一列a连续出现最多次数
2
root@PC1:/home/test2# awk 'BEGIN{idx = 0}{if($2 == "a") {idx++; print idx}; if($2 != "a") {idx = 0}}' test.txt | sort -rn | head -n 1  ## 第二列
4
root@PC1:/home/test2# awk 'BEGIN{idx = 0}{if($3 == "a") {idx++; print idx}; if($3 != "a") {idx = 0}}' test.txt | sort -rn | head -n 1  ## 第三列
3

 

002、python实现

root@PC1:/home/test2# ls
test.py  test.txt
root@PC1:/home/test2# cat test.txt
3 a 8
a y a
a a f
y a a
a a a
e a a
a r k
root@PC1:/home/test2# cat test.py
#!/usr/bin/python
import argparse
parser = argparse.ArgumentParser(description='manual to this script')
parser.add_argument('--aa', type=int, default = 2)
parser.add_argument('--bb', type=str, default = None)
args = parser.parse_args()

in_file = open("test.txt", "r")
lines = in_file.readlines()
result = [];
idx = 0;
for i in lines:
    tmp = i.split()
    if tmp[args.aa - 1] == "a":
        idx = idx + 1
        result.append(idx)
    else:
        idx = 0
print(max(result))
in_file.close()
root@PC1:/home/test2# python test.py --aa 1   
2
root@PC1:/home/test2# python test.py --aa 2
4
root@PC1:/home/test2# python test.py --aa 3
3

 

root@PC1:/home/test2# ls
test.py  test.txt
root@PC1:/home/test2# cat test.txt
3 a 8
a a a
a a b
b b a
a b a
b b a
b b k
root@PC1:/home/test2# cat test.py     ## 改进脚本, 同时指定列和匹配字符串
#!/usr/bin/python
import argparse
parser = argparse.ArgumentParser(description='manual to this script')
parser.add_argument('--aa', type=int, default = 2)
parser.add_argument('--bb', type=str, default = "a")
args = parser.parse_args()

in_file = open("test.txt", "r")
lines = in_file.readlines()
result = [];
idx = 0;
for i in lines:
    tmp = i.split()
    if tmp[args.aa - 1] == args.bb:
        idx = idx + 1
        result.append(idx)
    else:
        idx = 0
print(max(result))
in_file.close()
root@PC1:/home/test2# python test.py --aa 1
2
root@PC1:/home/test2# python test.py --aa 2
3
root@PC1:/home/test2# python test.py --aa 3
3
root@PC1:/home/test2# python test.py --aa 1 --bb b
2
root@PC1:/home/test2# python test.py --aa 2 --bb b
4
root@PC1:/home/test2# python test.py --aa 3 --bb b
1

 

posted @ 2022-08-03 15:33  小鲨鱼2018  阅读(166)  评论(0编辑  收藏  举报