一篇文章中的高频字数需要去除标点符号。这样统计的字词数直接用字典的功能即可不需要分词JIEBA
txt = open("名著.txt","r",encoding = "utf-8").read()
d = {}
for ch in ",。?;【】":
if ch in txt:
txt = txt.replace(ch,"")
for word in txt:
d[word] = d.get(word,0)+1
ls = list(d.items())
ls.sort(key = lambda x:x[1],reverse = True)
a,b = ls[0]
print(“{}:{}”.format(a,b))
1 txt = open("名著.txt","r",encoding = "utf-8").read() 2 d = {} 3 for ch in ",。?;【】": 4 if ch in txt: 5 txt = txt.replace(ch,"") 6 for word in txt: 7 d[word] = d.get(word,0)+1 8 ls = list(d.items()) 9 ls.sort(key = lambda x:x[1],reverse = True) 10 a,b = ls[0] 11 print(“{}:{}”.format(a,b))
浙公网安备 33010602011771号