06.齐普夫定律验证

import re
from operator import itemgetter
import matplotlib.pyplot as plt
from collections import Counter


# 英文:
frequency = {}
with open("Alice's adventures in wonderland.txt") as f:
    file_to_string = f.read()

words = re.findall(r"(\b[A-Za-z][a-z]{2,9}\b)", file_to_string)

for word in words:
    count = frequency.get(word, 0)
    frequency[word] = count + 1

# 用于打印输出前100名
for key, value in sorted(frequency.items(), key=itemgetter(1), reverse=True)[:100]:
    print(key, value)

sorted_freq = sorted(frequency.values(), reverse=True)

# 用matplotlib验证Zipf-Law并出图
plt.title("Zipf-Law")
plt.xlabel("rank")
plt.ylabel("freq")
x = [i for i in range(100)]
plt.loglog(x, sorted_freq[:100])
plt.show()

# 条形图
plt.bar(x, sorted_freq[:100])
plt.show()

 

posted @ 2020-11-09 12:50  止一  阅读(353)  评论(0编辑  收藏  举报