实例9 英文词频统计
实例
# 英文噪音(大小写、标点)处理,得到归一化的结果 def get_txt(): txt = open("D:\\hamlet.txt", "r").read() txt = txt.lower() # 单词都统一为小写 for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': txt = txt.replace(ch, " ") return txt # 进行词频统计 t = get_txt() dic_counts = {} # 用于存放统计结果的字典 for word in t.split(): dic_counts[word] = dic_counts.get(word, 0) + 1 items = list(dic_counts.items()) items.sort(key=lambda x: x[1], reverse=True) # 依据每个元素中索引为1的值排序,从大到小 for i in range(10): word, nums = items[i] if word in " ": pass else: print("{0:<10}{1:>5}".format(word, nums))
浙公网安备 33010602011771号