jieba 分词
尾号为1,2,3的同学做,西游记相关的分词,出现次数最高的20个。
import jieba
#
# wb = openpyxl.load_workbook('学生.xlsx')
# ws = wb.active
# for col in ws['a1':'f2']:
# for c in col:
# print(c.value,end='\t')
# print()
f = open('4447.txt', mode='r',encoding='GB18030')
txt = f.read()
txt = jieba.lcut(txt)
for ch in ',。:;!?“’”【】()|《》、{}\n':
if ch in txt:
for i in range(len(ch)):
txt.remove(ch)
buyao = ['。',',',':','“','”','?','、','《','》','!','!','\n']
count = {}
for ch in txt:
if ch not in buyao:
count[ch] = count.get(ch, 0)+1
items = list(count.items())
items.sort(key=lambda x: x[1], reverse=True)
for i in range(1000):
rw, gs=items[i]
print("{0:5} {1:5}".format(rw, gs))

浙公网安备 33010602011771号