【习题1】利用Python分离文件中的英文和中文(3)
优化了一下【习题1】利用Python分离文件中的英文和中文(2)帖子里的代码,并附上了思路的图片
c_list = []
e_list = []
c_index_dict = {}
e_sum_dict = {}
def read_data(file_path):
with open(file_path, "r", encoding="utf-8") as f:
all_data = f.readlines()
for i in all_data:
data_s = i.split()
if len(data_s) == 2:
e_list.append(data_s[0])
c_list.append(data_s[1])
else:
continue
for j in "".join(c_list):
if j in c_index_dict:
continue
else:
c_index_dict[j] = len(c_index_dict) + 1
for k in "".join(e_list):
if k in e_sum_dict:
e_sum_dict[k] += 1
else:
e_sum_dict[k] = 1
read_data("NLP basic\\data\\test.txt")
print(c_list)
print(e_list)
print(c_index_dict)
print(e_sum_dict)
all_data

i

data_s

len(data_s)

c_list
e_list

j

c_index_dict

k

e_sum_dict


浙公网安备 33010602011771号