【习题1】利用Python分离文件中的英文和中文(3)

优化了一下【习题1】利用Python分离文件中的英文和中文(2)帖子里的代码,并附上了思路的图片

c_list = []
e_list = []
c_index_dict = {}
e_sum_dict = {}


def read_data(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        all_data = f.readlines()

        for i in all_data:
            data_s = i.split()
            if len(data_s) == 2:
                e_list.append(data_s[0])
                c_list.append(data_s[1])
            else:
                continue

        for j in "".join(c_list):
            if j in c_index_dict:
                continue
            else:
                c_index_dict[j] = len(c_index_dict) + 1

        for k in "".join(e_list):
            if k in e_sum_dict:
                e_sum_dict[k] += 1
            else:
                e_sum_dict[k] = 1



read_data("NLP basic\\data\\test.txt")
print(c_list)
print(e_list)
print(c_index_dict)
print(e_sum_dict)

all_data
de8cd322828c53971f735b654a04c2c
i
b873ff951aa6637368d1f19efd5679b
data_s
df5d12e2ac0860f5a6d87fd81f38262
len(data_s)
4b14e5865939d7cdfa6fee0c2fbad31
c_list
e_list
c0c62f0ee034c2fec8ca7ed17dbba9c
j
193d915b8154f739d83d7831fe359cf
c_index_dict
997d6fd3262830c4946d3c95d8c323c
k
eff943d9922b52810ffb9a5f436a7d7
e_sum_dict
image

posted @ 2025-07-04 13:40  李大嘟嘟  阅读(9)  评论(0)    收藏  举报