#from nltk.tree import *
from nltk.tree import Tree
f = open('C:/Users/gao/Desktop/文档/12.txt')#打开文件
se=f.read().replace("\n","")#读取文件,并替换掉换行符
#print(se)
le=se.split('(ROOT')#切割成句子
#还原完整句子
item=[]
for i in le:
item.append(str('(ROOT'+i))
#移除掉第一个无用的元素
item.pop(0)
#用自然怨言处理还原出单词
words=[]
lenss=[]
for j in item:
test = Tree.fromstring(j)
print(test.leaves())
lenss.append(len(test.leaves())-1)#添加每个句子的单词个数
#words.append([word for word in test.leaves() if word != '.' & word != ',' & word != '?'])
#print(words)
print(lenss)