pandas 每行是list文本 使用NLTK统计语料词频 nltk
def frequent_words(tdf,k): words = ' '.join(tdf['OriginalTweet'].apply(lambda x:' '.join(x)).values.flatten()).split(' ') freq = nltk.FreqDist(words) common = freq.most_common(k) return common
def frequent_words(tdf,k): words = ' '.join(tdf['OriginalTweet'].apply(lambda x:' '.join(x)).values.flatten()).split(' ') freq = nltk.FreqDist(words) common = freq.most_common(k) return common