第四次作业——完整的中英文词频统计

fo=open('C:\\Users\\Administrator\\Desktop\\Novel.txt','r')
str=fo.read()
fo.close()

str=str.lower() #转化为小写

#替换符号
str=str.replace(';',' ')
str=str.replace(',',' ')
str=str.replace('-',' ')

#分割
str=str.split()

#设置列表
strset=set(str)
#排除语法型词汇，代词等
escape={'a','the','and','it','i','you','we','of','to'}
strset=strset-escape
print(strset)

#以字典形式输出
strdict={}
for word in strset:
    strdict[word]=str.count(word)

#按词频排序
wordlist=list(strdict.items())
wordlist.sort(key=lambda x:x[1],reverse=True)



for i in range(20):
    print(wordlist[i])

fo=open('C:\\Users\\Administrator\\Desktop\\星辰变.txt','r')
str=fo.read()


#替换符号
str=str.replace(':',' ')
str=str.replace('，',' ')
str=str.replace('-',' ')
str=str.replace('！',' ')
str=str.replace('。',' ')

#分割
str=str.split()

#设置列表
strset=set(str)
#排除无意义词语
escape={'我','你','他','她','是'}
strset=strset-escape
print(strset)

#设置字典
strdict={}
for word in strset:
    strdict[word]=str.count(word)

#按词频排序
wordlist=list(strdict.items())
wordlist.sort(key=lambda x:x[1],reverse=True)

for i in range(20):
    print(wordlist[i])

posted @ 2018-10-15 11:44 庄裕翔阅读(165) 评论(0) 收藏举报

刷新页面返回顶部

庄裕翔

第四次作业——完整的中英文词频统计

公告