Hamlet 词频统计

 1 def getText():
 2     txt = open('I:\Hamlet_-Prince-of-Denmark_哈姆雷特_.txt','r').read()
 3     txt = txt.lower()
 4     for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~':
 5         txt = txt.replace(ch, " ")
 6     return txt
 7 
 8 hamletTxt = getText()
 9 words = hamletTxt.split()
10 counts = {}
11 for word in words:
12     counts[word] = counts.get(word,0) + 1
13 items = list(counts.items())
14 items.sort(key=lambda x:x[1], reverse=True)
15 for i in range(10):
16     word, count = items[i]
17     print("{0:<10} {1:>5}".format(word,count))

 

posted @ 2023-02-20 15:18  摆烂小T  阅读(57)  评论(0)    收藏  举报