python: 字频统计
import re
import time
import os.path
work_dir = os.getcwd() + '\\'
file_time = time.strftime("%y%m%d%H%M%S")
file_name = ""
terminal_words = ""
frequency_number = 0
def nameMark():
print(" " + "*" * 20 + " 字频统计 " + "*" * 20)
def separatorLine():
print("-" * 50)
def processTxt(words):
punctuation = ",.。,??1234567890ABCD" # defines characters that are not counted
words = words.replace(" ", "")
words = re.sub(r"[%s]+" % punctuation, "", words)
return words
def isRulePath(file_path):
re_path = r'^(?P<path>(?:[a-zA-Z]:)?\\(?:[^\\\?\/\*\|<>:"]+\\)+)' \
r'(?P<filename>(?P<name>[^\\\?\/\*\|<>:"]+?)\.' \
r'(?P<ext>[^.\\\?\/\*\|<>:"]+))$'
path_flag = re.search(re_path, file_path)
if path_flag:
rule_path = path_flag.group()
return rule_path
else:
print("Invalid path")
return False
def getFilesInput(rule_path):
with open(rule_path, 'r', encoding='utf-8') as file:
words = file.readline()
words = processTxt(words)
print()
print(f"{rule_path} 内容已被读取!")
separatorLine()
return words
def getFrequency():
global frequency_number
while True:
frequency_number = input("输入最低统计字频次数 : \n 注: 小于输入数字频次的结果将不被保留!\n :-->")
if frequency_number.isdigit():
separatorLine()
return frequency_number
break
else:
continue
def analyseText(words, number):
result = {}
for i in words:
k = words.count(i)
if i in result:
result[i] += 1
else:
result[i] = 1
for k in list(result.keys()):
if result[k] < int(number):
del result[k]
result_sort_list = sorted(result.items(), key=lambda d: d[1], reverse=True)
return result_sort_list
def writeTxtFile(result_file):
with open(work_dir + 'result_' + file_time + '.txt', 'w', encoding="UTF-8") as file:
if file_name != "":
file.write("将要分析的文件:" + file_name)
file.write("\n" * 2)
file.write(f"注: 本文件记录了字频大于等于 {frequency_number} 的字符!")
file.write("\n" * 2)
file.write("-" * 50)
file.write("\n" * 2)
else:
file.write("终端输入如下:")
file.write("\n" * 2)
file.write("-" * 50)
file.write("\n" * 2)
file.write(terminal_words)
file.write("\n" * 2)
file.write("-" * 50)
file.write("\n" * 2)
file.write(str(result_file))
file.write("\n" * 2)
file.write("-" * 50)
file.write("\n" * 2)
for i in result_file:
file.write(str(i))
file.write("\n")
file.write("-" * 50)
print(f"分析完成, 已保存! \n :--> {work_dir}result_{file_time}.txt")
while True:
nameMark()
flag = input("分析文本文件,输入 F ; 分析终端文本,输入 T [q = 退出]: \n :-->")
if flag.lower() == 'q':
break
elif flag.lower() == "t":
terminal_words = input("输入要分析的文本: \n")
t_words = processTxt(terminal_words)
show_time = getFrequency()
analysis_result = analyseText(t_words, show_time)
writeTxtFile(analysis_result)
separatorLine()
answer = input('退出? y/n \n')
if answer.upper() == 'Y':
print('已退出!')
break
else:
continue
elif flag.lower() == "f":
while 1:
file_name = input("输入要分析的文件路径 [q = 退出]:-->> ")
if file_name.lower() == 'q':
break
file_name = isRulePath(file_name)
if file_name:
f_words = getFilesInput(file_name)
show_time = getFrequency()
analysis_result = analyseText(f_words, show_time)
writeTxtFile(analysis_result)
separatorLine()
answer = input('退出? y/n \n')
if answer.upper() == 'Y':
print('已退出!')
break
else:
continue
else:
continue
else:
print("输入错误!")
separatorLine()
print()
continue

本文来自博客园,作者:Annzi-Py,转载请注明原文链接:https://www.cnblogs.com/annzi/p/15427694.html

浙公网安备 33010602011771号