import os
import jieba
path=r'F:\duym\123'
fs=os.listdir(path)
for f in fs:
fp=os.path.join(path,f)
word=open (fp,'r',encoding='utf-8').read()
for w in jieba.cut(word):
print(w)
# jieba.load_userdict(r'')
# tokens =jieba.lcut(word)
# print(tokens)
jieba.add_word('持稳在')
tokens=jieba.lcut(word)
print(tokens)
file_path =r'F:\duym\stopsCN.txt'
import numpy as np
stops=np.loadtxt(file_path,dtype=str,delimiter=r't',encoding='utf-8')
stops.shape
for s in stops:
print(s)
tokens=[token for token in tokens if token not in stops]
tokens="".join([char for char in tokens if char.isalpha()])
print(tokens)
jieba.add_word('持稳在')
tokens=jieba.lcut(word)
print(tokens)