python: 字频统计


import re
import time
import os.path

work_dir = os.getcwd() + '\\'
file_time = time.strftime("%y%m%d%H%M%S")
file_name = ""
terminal_words = ""
frequency_number = 0


def nameMark():
    print(" " + "*" * 20 + " 字频统计 " + "*" * 20)


def separatorLine():
    print("-" * 50)


def processTxt(words):
    punctuation = ",.。,??1234567890ABCD"  # defines characters that are not counted
    words = words.replace(" ", "")
    words = re.sub(r"[%s]+" % punctuation, "", words)
    return words


def isRulePath(file_path):
    re_path = r'^(?P<path>(?:[a-zA-Z]:)?\\(?:[^\\\?\/\*\|<>:"]+\\)+)' \
              r'(?P<filename>(?P<name>[^\\\?\/\*\|<>:"]+?)\.' \
              r'(?P<ext>[^.\\\?\/\*\|<>:"]+))$'
    path_flag = re.search(re_path, file_path)
    if path_flag:
        rule_path = path_flag.group()
        return rule_path
    else:
        print("Invalid path")
        return False


def getFilesInput(rule_path):
    with open(rule_path, 'r', encoding='utf-8') as file:
        words = file.readline()
        words = processTxt(words)
        print()
        print(f"{rule_path} 内容已被读取!")
        separatorLine()
        return words


def getFrequency():
    global frequency_number
    while True:
        frequency_number = input("输入最低统计字频次数 : \n 注: 小于输入数字频次的结果将不被保留!\n :-->")
        if frequency_number.isdigit():
            separatorLine()
            return frequency_number
            break
        else:
            continue


def analyseText(words, number):
    result = {}
    for i in words:
        k = words.count(i)
        if i in result:
            result[i] += 1
        else:
            result[i] = 1
    for k in list(result.keys()):
        if result[k] < int(number):
            del result[k]
    result_sort_list = sorted(result.items(), key=lambda d: d[1], reverse=True)
    return result_sort_list


def writeTxtFile(result_file):
    with open(work_dir + 'result_' + file_time + '.txt', 'w', encoding="UTF-8") as file:
        if file_name != "":
            file.write("将要分析的文件:" + file_name)
            file.write("\n" * 2)
            file.write(f"注: 本文件记录了字频大于等于  {frequency_number}  的字符!")
            file.write("\n" * 2)
            file.write("-" * 50)
            file.write("\n" * 2)
        else:
            file.write("终端输入如下:")
            file.write("\n" * 2)
            file.write("-" * 50)
            file.write("\n" * 2)
            file.write(terminal_words)
            file.write("\n" * 2)
            file.write("-" * 50)
            file.write("\n" * 2)
        file.write(str(result_file))
        file.write("\n" * 2)
        file.write("-" * 50)
        file.write("\n" * 2)
        for i in result_file:
            file.write(str(i))
            file.write("\n")
        file.write("-" * 50)
        print(f"分析完成, 已保存! \n :--> {work_dir}result_{file_time}.txt")


while True:
    nameMark()
    flag = input("分析文本文件,输入 F ; 分析终端文本,输入 T [q = 退出]: \n :-->")
    if flag.lower() == 'q':
        break
    elif flag.lower() == "t":

        terminal_words = input("输入要分析的文本: \n")
        t_words = processTxt(terminal_words)
        show_time = getFrequency()
        analysis_result = analyseText(t_words, show_time)
        writeTxtFile(analysis_result)
        separatorLine()
        answer = input('退出? y/n \n')
        if answer.upper() == 'Y':
            print('已退出!')
            break
        else:
            continue
    elif flag.lower() == "f":
        while 1:
            file_name = input("输入要分析的文件路径 [q = 退出]:-->> ")
            if file_name.lower() == 'q':
                break
            file_name = isRulePath(file_name)
            if file_name:
                f_words = getFilesInput(file_name)
                show_time = getFrequency()
                analysis_result = analyseText(f_words, show_time)
                writeTxtFile(analysis_result)
                separatorLine()
                answer = input('退出? y/n \n')
                if answer.upper() == 'Y':
                    print('已退出!')
                    break
                else:
                    continue
            else:
                continue

    else:
        print("输入错误!")
        separatorLine()
        print()
        continue


posted @ 2021-10-20 20:30  Annzi-Py  阅读(343)  评论(0)    收藏  举报