111
import os import jieba path=r'F:\duym\123' fs=os.listdir(path) for f in fs: fp=os.path.join(path,f) word=open (fp,'r',encoding='utf-8').read() for w in jieba.cut(word): print(w) # jieba.load_userdict(r'') # tokens =jieba.lcut(word) # print(tokens) jieba.add_word('持稳在') tokens=jieba.lcut(word) print(tokens) file_path =r'F:\duym\stopsCN.txt' import numpy as np stops=np.loadtxt(file_path,dtype=str,delimiter=r't',encoding='utf-8') stops.shape for s in stops: print(s) tokens=[token for token in tokens if token not in stops] tokens="".join([char for char in tokens if char.isalpha()]) print(tokens) jieba.add_word('持稳在') tokens=jieba.lcut(word) print(tokens)