111

import os
import jieba
path=r'F:\duym\123'
fs=os.listdir(path)

for f in fs:
    fp=os.path.join(path,f)
    word=open (fp,'r',encoding='utf-8').read()

for  w in  jieba.cut(word):
    print(w)

# jieba.load_userdict(r'')
# tokens =jieba.lcut(word)
# print(tokens)

jieba.add_word('持稳在')
tokens=jieba.lcut(word)
print(tokens)

file_path =r'F:\duym\stopsCN.txt'


import numpy as np
stops=np.loadtxt(file_path,dtype=str,delimiter=r't',encoding='utf-8')
stops.shape

for s in stops:
    print(s)

tokens=[token for token in tokens if token not in stops]
tokens="".join([char for char in tokens if char.isalpha()])
print(tokens)

jieba.add_word('持稳在')
tokens=jieba.lcut(word)
print(tokens)

  

posted @ 2018-12-17 11:15  DT_TD  阅读(94)  评论(0编辑  收藏  举报