朴素贝叶斯算法对新闻进行分类

 1 #!/usr/bin/python
 2 # coding=utf-8
 3 from sklearn.datasets import fetch_20newsgroups
 4 from sklearn.model_selection import train_test_split
 5 from sklearn.feature_extraction.text import TfidfVectorizer
 6 from sklearn.naive_bayes import MultinomialNB
 7 
 8 
 9 def nb_news():
10     #朴素贝叶斯对新闻进行分类
11 
12     #获取数据
13     news = fetch_20newsgroups(subset="all")
14 
15     #划分数据
16     x_train, x_test, y_train, y_test = train_test_split(news.data, news.target)
17 
18     #特征工程:文本特征提取
19     transfer = TfidfVectorizer()
20     x_train = transfer.fit_transform(x_train)
21     x_test = transfer.transform(x_test)
22 
23     #朴素贝叶斯算法估计
24     estimator = MultinomialNB()
25     estimator.fit(x_train, y_train)
26 
27     #模型评估
28     y_predict = estimator.predict(x_test)
29     print "y_predict:\n", y_predict
30     print "对比真实值和预测值:\n", y_test == y_predict
31 
32     # 方法二:计算正确率
33     score = estimator.score(x_test, y_test)
34     print "准确率:\n", score
35 
36     return None
37 
38 nb_news()

 

posted on 2021-03-10 16:56  一仟零一夜丶  阅读(202)  评论(0)    收藏  举报