import operator
import string
f=open("a.txt","r")
a=f.read()
a1=f.read()
s=string.punctuation+string.digits
for i in s: #标点符号换成空格
a=a.replace(i," ")
print(s)
print(a)
print()
print(a.lower())#大写换成小写
b=a.split()
print(b)
c={}
for i in b:#将键和对应的值输入字典
d=0
for j in b:
if i==j:
d=d+1
c[i]=d
for i in c:
print(i,':',c[i])
print()
print()
print()
# 去除介词、冠词等
jie1={'to','for','and','of','is','a','an','the'}
jie2=set(c)
jie=jie2-jie1
print(jie)
for i in jie:
print(i,":",c[i])
#排序
f=sorted(c.items(), key=lambda e:e[1], reverse=True)#将字典c转换为有序的列表f
print(f)
j = {}
for item in f:#将列表f转换为字典j
j[item[0]] = item[1]
for i in j:
print(i,':',j[i])
#输出词频最大TOP20
jishu=0
print("词频最大TOP20:")
for i in j:
if jishu==20:
break
else:
print(i,':',j[i])
jishu=jishu+1