str='''If I should stay I would only be in your way
So I'll go But I know
I'll think of your every step of the way
And I will always love you Will always love you
You my darling you Bitter-sweet memories
That is all I'm taking with me
So goodbye Please don't cry We both know I'm not
What you need And I will always love you
I will always love you
I hope life treats you kind
And I hope you have all you dreamed of
And I wish you joy and happiness
But above all this Ii wish your love
And I will always love you
I will always love you I will always love you
I will always love you I will always love you
I will always love you Darling I love you
I'll always love you'''
#读取文本文件
f = open('英文歌词.txt','r',encoding='utf-8')
Go = f.read()
f.close()
print(Go)
#预处理
print(Go.lower())
a = ",.;:'`"
for b in a:
Go.replace(b,' ') #利用for循环语句把特殊符号替换成空格
print(Go)
#分别从空格提取单词
firelist = Go.split()
print(firelist)
#统计每个单词出现的次数
fireset = set(firelist) #把列表firelist转换成集合,使得单词不会重复出现
#排除语法型词汇,代词、冠词、连词等无语义词
se = {'a','the','and','if','do','of'}
fireset =fireset-se
firedict = {}
for word in fireset:
firedict[word] = firelist.count(word)
print(len(firedict),firedict)
wordlist = list(firedict.items())
#按单词的频数排序
wordlist.sort(key=lambda x:x[1],reverse=True)
print(wordlist)
#输出TOP(20)
for i in range(20):
print(wordlist[i])
![]()
asd = open('百万英镑.txt', 'r', encoding='utf-8')
strasd = asd.read()
asd.close()
print(strasd)
#单词计数
strGoSet = set(strasd)
print(len(strGoSet),strGoSet)
strDict ={}
for word in strGoSet:
strDict[word] = strasd.count(word)
print(len(strDict),strDict)
wcList = list(strDict.items())
wcList.sort()
print(strDict.items())
#词频排序
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)
#输出top20
for s in range(20):
print(wcList[s])
![]()