英文词频统计

song = '''
Can't believe its over
That you're leaving
Weren't we meant to be?
Should've sensed the danger
Read the warnings
Right there in front of me
Just stop
Lets start it over
Couldn't I get one more try?
All:
Maybe tomorrow you'll say that you're mine
You'll realize, I could change
I'm gonna show you I'm in it for life
I'll get you back someday
Maybe tomorrow
Shane:
I forgot to be there
I was selfish
I can see that now
Mark:
I should've got to known you
Should've held you
When your tears fell down
Just stop
Don't make me beg you
Tell me that you'll stay the night
All:
Maybe tomorrow you'll say that you're mine
You'll realize, I could change
I'm gonna show you I'm in it for life
I'll get you back someday
I will find a way
Nicky:
Wait a minute
Just hear me out
This time I promise, I'll put you first
Shane:
Turn around now
Your heart can't let you walk away
I'll do what it takes
All:
Maybe tomorrow you'll say that you're mine
You'll realize (realize), I could change (I can change)
I'm gonna show you I'm in it for life
I'll get you back someday
Maybe tomorrow
Kian:
There's so much I wanna say now
I just wanna make a life with you (don't walk away)
There's so much I wanna do now
I just wanna make love to you
Shane:
Maybe tomorrow
'''

UnusefulWords = ['on', 'was', 'I', 'i', 'at']  # 需要替换的单词
UnusefulSymbol = ["." "'", "(", ")"]  # 需要替换的标点

NewWords = song
for i in range(len(UnusefulSymbol)):
    NewWords = NewWords.replace(UnusefulWords[i], ' ')  # 把文章的标点符号替换
NewWords = NewWords.lower()  # 全部改成小写

WordsList = NewWords.split()  # 将字符串分成一个个单词

Count = dict(zip())

for i in WordsList:
    Count[i] = NewWords.count(i)  # 用字典记录单词和其出现次数

for i in song:
    if (Count.get(i) != None):

     Count.pop(i)

CountWords = sorted(Count.items(), key=lambda x: x[1], reverse=True)

for i in range(10):
    print(CountWords[i])  # 输出出现频率最高的10个词

 

posted on 2018-03-26 20:41  155林俊彪  阅读(120)  评论(0编辑  收藏  举报

导航