2021/10/5
利用python对新闻进行分词,获取数据,存入数据库
import jieba
txt = open("all2.csv", "r", encoding='utf-8').read()
words = jieba.lcut(txt) # 使用精确模式对文本进行分词
counts = {} # 通过键值对的形式存储词语及其出现的次数
for word in words:
if len(word) == 1: # 单个词语不计算在内
continue
else:
counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
items = list(counts.items()) # 将键值对转换成列表
items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
for i in range(5000):
word, count = items[i]
import pymysql
db = pymysql.connect(host="localhost", user="root", password="08301016", database="cloud", charset="utf8mb4")
cursor = db.cursor()
sql = "insert into all1(word,count) values ('" + str(word) + "','" + str(
count) + "')"
try:
cursor.execute(sql)
db.commit()
# print(school_shengfen + "\t" + school_name + "添加成功")
# except:
# print("插入出错")
# db.rollback()
except pymysql.Error as e:
print("增加数据失败: " + str(e))
db.rollback()
print("{0:<5}{1:>5}".format(word, count))
通过读取数据库数据,生成词云图
<!DOCTYPE html>
<html>
<head>
<title>词云图统计</title>
<meta charset="utf-8">
<!-- <script src="echarts/echarts.min.js"></script>-->
<script src="echarts/echart3.js"></script>
<script src="echarts/echarts-wordcloud.js"></script>
<script src="js/jquery.min.js"></script>
</head>
<body>
<div id="main" style="width: 50%;height: 600px;float: left"></div>
<script>
var myChart = echarts3.init(document.getElementById('main'));
var dt;
var hzb=new Array(0);
var zzb=new Array(0);
$.ajax({
url : "cjCloudServlet",
async : true,
type : "post",
data : {
},
dataType : "json",
success : function(data) {
dt = data;
var mydata = new Array(0);
for (var i = 0; i < dt.length; i++) {
var d = {};
d["name"] = dt[i].word;
d["value"] = dt[i].count;
mydata.push(d);
hzb.push(dt[i].word);
zzb.push(dt[i].count);
}
//alert("mydata"+mydata);
var myChart = echarts3.init(document.getElementById('main'));
//设置点击效果
myChart.setOption({
title: {
text: ''
},
tooltip: {
formatter:function (params){
console.log(params);
return data.name + " : " +data.value + "次";
}
},
series: [{
type : 'wordCloud', //类型为字符云
shape:'smooth', //平滑
gridSize : 8, //网格尺寸
size : ['50%','50%'],
//sizeRange : [ 50, 100 ],
rotationRange : [-45, 0, 45, 90,60,16], //旋转范围
textStyle : {
normal : {
fontFamily:'微软雅黑',
color: function() {
return 'rgb(' +
Math.round(Math.random() * 255) +
', ' + Math.round(Math.random() * 255) +
', ' + Math.round(Math.random() * 255) + ')'
}
},
emphasis : {
shadowBlur : 5, //阴影距离
shadowColor : '#333' //阴影颜色
}
},
left: 'center',
top: 'center',
right: null,
bottom: null,
width:'100%',
height:'100%',
data:mydata
}]
});
},
error : function() {
alert("请求失败");
},
});
</script>
</body>
</html>


浙公网安备 33010602011771号