#统计一个文本中单词频次最高的 10 个单词?
import re
import string
word_list =[]
word_dict ={}
with open("d:\\2.py","r") as fp:
fp_file = fp.readlines()
for line in fp_file:
if line.strip() != '':
line_word = re.findall(r"[a-zA-Z]+",line) #l利用切片把Word取出来,返回是一个list
word_list += line_word #把Word汇总成一个list
for word in word_list:
if word in word_dict:
word_dict[word] += 1
else:
word_dict[word] = 1
#对字典按value进行排序,并去除前10个数据
sorted_word_dict = sorted(word_dict.items(),key = lambda x:x[1],reverse =True)[:10]
print(sorted_word_dict)