strYoung ='''young for you
Gala
sunday's coming i wanna drive my car
to your apartment with present like a star
forecaster said the weather may be rainy hard
but i know the sun will shine for us
oh lazy seagull fly me from the dark
i dress my jeans and feed my monkey banana
then i think my age how old, skyline how far
or we need each other in california
you show me your body before night comes down
i touch your face and promise to stay ever young
on this ivory beach we kissed so long
it seems that the passion's never gone
you sing me your melody and i feel so please
i want you to want me to keep your dream
together we'll run wild by a summer symphony
this is what we enjoyed not a fantasy
the tin-man's surfing i wanna try my luck
to the top of tide rip like just have some drugs
i know you have no blame for my proud moonish heart
welcome to the golden beatnik park
oh diamond seashore drag me from the yard
incredible sunward i watch as you're in photograph
for camera your smile's so sweet, palm trees' so lush
would you believe my honey it's califonia
you show me your body before night comes down
i touch your face and promise to stay ever young
on this ivory beach we kissed so long
it seems that the passion's never gone
you sing me your melody and i feel so please
i want you to want me to keep your dream
together we'll run wild by a summer symphony
-
'''




#准备utf-8编码的文件file,并通过文件读取字符串str
fo = open('young for you.txt','r',encoding='utf-8')
young = fo.read()
fo.close()
print(young)

strList = strYoung.split()
print(len(strList),strList)


#单词计数 strSet
= set(strList) print(len(strSet),strSet) strDict ={} for word in strSet: strDict[word] = strList.count(word) print(len(strDict),strDict) wcList = list(strDict.items()) wcList.sort() print(strDict.items()) sep = '.,:;?|_' for ch in sep: strYoung = strYoung.replace(ch,' ')


#词频排序
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)
#输出top20
for s in range(20):
print(wcList[s])

结果

2.中文小说词频统计

import jieba

bn = open('百年孤独1.txt', 'r', encoding='utf-8')
strbn = bn.read()  #通过文件读取字符串 str
bn.close()
print(strbn)

#单词计数
strSet = set(strbn)
print(len(strSet),strSet)

strDict ={}
for word in strSet:
    strDict[word] = strbn.count(word)

print(len(strDict),strDict)

wcList = list(strDict.items())
wcList.sort()
print(strDict.items())

#词频排序
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)

#输出top20
for s in range(20):
    print(wcList[s])

运行结果

 

posted on 2018-09-27 11:42  duola-ling  阅读(234)  评论(0编辑  收藏  举报