# coding=utf-8
import re
import os
path = os.path.abspath('.')
def all_path(dirname):
result = []
for maindir, subdir, file_name_list in os.walk(dirname):
for filename in file_name_list:
apath = os.path.join(maindir, filename)
result.append(apath)
return result
def print_zh(document_list):
all_text = []
for d in document_list:
if '.txt' in d:
with open(d, 'r', encoding="utf-8") as f:
regStr = ".*?([\u4E00-\u9FA5]+).*?"
text = f.read()
target_text = re.findall(regStr, text)
all_text += target_text
print(all_text)
print(set(all_text))
if __name__ == '__main__':
print_zh(all_path(path))