基于gensim的LDA主题模型实现 一键式函数打包
def genlda(textlist,n): ticks = str(time.time()).replace('.','')[-6:-1] nn=str(n) dictionary = corpora.Dictionary(textlist) corpus = [ dictionary.doc2bow(text) for text in textlist ] #tfidf = models.TfidfModel(corpus) #corpus_tfidf = tfidf[corpus] #print(list(corpus_tfidf))#输出词的tfidf #print(list(corpus))#输出文本向量空间 #########Run the LDA model for XX topics ############################### lda =LdaMulticore(corpus=corpus, id2word=dictionary, num_topics=n,passes=100,workers=3) doc_topic = [a for a in lda[corpus]] ####### write the topics in file topics_result.txt ############## topics_r = lda.print_topics(num_topics = n, num_words =20) topic_name = codecs.open('词汇矩阵主题个数'+nn+'时间'+ticks+'.txt','w') for v in topics_r: topic_name.write(str(v)+'\n') lda.save('模型主题个数'+nn+'时间'+ticks) print('主题数',nn,ticks,lda.log_perplexity(corpus)) f=open('每篇分类主题个数'+nn+'时间'+ticks+'.txt','a+') k=0 for i in lda.get_document_topics(corpus)[:]: listj=[] for j in i: listj.append(j[1]) bz=listj.index(max(listj)) print(k,i[bz][0],i[bz][1],listj,listj.index(max(listj)),file=f) k=k+1