2020年新型冠状病毒疫情分析实战
一、 数据爬取:爬取门户网站上的疫情统计数据,要求具体到各个省的地级市的每日的确诊病例的统计人数,并将上述爬取信息导入到本地数据库中。
二、 疫情分布图可视化展示:要求展示当前的疫情分布情况。单击各个省可以下钻到各个地市的数据显示。
爬取网站:https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1
Python(省级总确诊病例爬取):
import requests
import re
import xlwt
url = 'http://m.look.360.cn/subject/400?sign=360_6aa05217'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
def get_page(url):
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
print('获取网页成功')
return response.text
else:
print('获取网页失败')
except Exception as e:
print(e)
f = xlwt.Workbook(encoding='utf-8')
sheet01 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
sheet01.write(0, 0, '省份') # 第一行第一列
sheet01.write(0, 1, '确诊病例') # 第一行第二列
sheet01.write(0, 2, '死亡数量') # 第一行第三列
sheet01.write(0, 3, '痊愈数量') # 第一行第四列
page = get_page("https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1")
items = re.findall('{"confirmed":"(.*?)","died":"(.*?)","crued":"(.*?)".*?"area":"(.*?)"',page,re.S)
print(len(items))
for i in range(len(items)):
sheet01.write(i + 1, 0, items[i][3].encode('utf-8').decode('unicode_escape'))
if len(items[i][0])==0:
sheet01.write(i + 1, 1,"0")
else:
sheet01.write(i + 1, 1,items[i][0])
if len(items[i][1])==0:
sheet01.write(i + 1, 2,"0")
else:
sheet01.write(i + 1, 2,items[i][1])
if len(items[i][2])==0:
sheet01.write(i + 1, 3,"0")
else:
sheet01.write(i + 1, 3,items[i][2])
print("打印完!!!")
f.save('Outbreak.xls')
截图:

但是爬取结构存在外国病例总数:

不是很多直接删掉就行了。然后开始进行数据可视化。
jsp代码:
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
<script src="https://cdn.staticfile.org/echarts/4.3.0/echarts.min.js"></script>
<script src="js/jquery-1.5.1.js"></script>
<script src="js/china.js"></script>
</head>
<body>
<div id="main" style="width: 1500px;height:600px;"></div>
<script type="text/javascript"charset="utf-8">
var statisticsData =[];
var myChart = echarts.init(document.getElementById('main'));
/////////
myChart.showLoading();
$.ajax({
type : "post",
async : true, //异步请求(同步请求将会锁住浏览器,其他操作须等请求完成才可执行)
url : "servlet?method=find", //请求发送到Servlet
data : {},
dataType : "json", //返回数据形式为json
//7.请求成功后接收数据name+num两组数据
success : function(result) {
//result为服务器返回的json对象
if (result) {
//8.取出数据存入数组
for (var i = 0; i <result.length; i++) {
var statisticsObj = {name:'',value:''}; //因为ECharts里边需要的的数据格式是这样的
statisticsObj.name =result[i].shengfen;
statisticsObj.value =result[i].quezhen;
//alert( statisticsObj.name);
//alert(statisticsObj.value);
statisticsData.push(statisticsObj);
}
//alert(statisticsData);
//把拿到的异步数据push进我自己建的数组里
myChart.hideLoading();
//9.覆盖操作-根据数据加载数据图表
optionChinaMap = {
tooltip : {
trigger: 'item'
},
legend: {
orient: 'horizontal',//图例的排列方向
textStyle: {color:'#fff'},
x:'left',//图例的位置
y:'20',
data:['全国数据']
},
visualMap: {//颜色的设置 dataRange
textStyle: {color:'#fff'},
x: 'left',
y: 'bottom',
splitList: [
{start: 1500},{start: 900, end: 1500},
{start: 310, end: 1000},{start: 200, end: 300},
{start: 50, end: 200},{start: 0, end: 50},
],
// text:['高','低'],// 文本,默认为数值文本
// color: ['#65A2D9', '#E09107', '#A3E00B']
color: ['#5475f5', '#9feaa5', '#3FA7FF','#66E0E3', '#FFDC5E', '#9fb5ea']
},
// roamController: {//控制地图的上下左右放大缩小
// show: true,
// x: 'right',
// mapTypeControl: {
// 'china': true
// }
// },
series : [
{
name: '确诊病例',
type: 'map',
mapType: 'china',
zoom: 1.1,
roam: false,//是否开启鼠标缩放和平移漫游
itemStyle:{//地图区域的多边形 图形样式
normal:{//是图形在默认状态下的样式
label:{
show: true,
textStyle: {color: "rgb(249, 249, 249)"}
}
},
emphasis:{//是图形在高亮状态下的样式,比如在鼠标悬浮或者图例联动高亮时
label:{show:true},
}
},
top:"100",//组件距离容器的距离
data:statisticsData
}
]
};
myChart.setOption(optionChinaMap, true);
}
},
})
</script>
</body>
</html>
dao层代码:
package com.epidemic.dao;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import com.epidemic.entity.entity;
public class dao {
public List<entity> list1(){
List<entity> list =new ArrayList<entity>();
try {
// 加载数据库驱动,注册到驱动管理器
Class.forName("com.mysql.jdbc.Driver");
// 数据库连接字符串
String url = "jdbc:mysql://localhost:3306/outbreak?useUnicode=true&characterEncoding=utf-8";
// 数据库用户名
String username = "root";
// 数据库密码
String password = "893225523";
// 创建Connection连接
Connection conn = DriverManager.getConnection(url, username,
password);
// 添加图书信息的SQL语句
String sql = "select * from sheet1";
// 获取Statement
Statement statement = conn.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
while (resultSet.next()) {
entity book = new entity();
book.setShengfen(resultSet.getString("省份"));
book.setQuezhen(resultSet.getString("确诊病例"));
book.setDead(resultSet.getString("死亡数量"));
book.setCure(resultSet.getString("痊愈数量"));
list.add(book);
}
resultSet.close();
statement.close();
conn.close();
}catch (Exception e) {
e.printStackTrace();
}
return list;
}
///
public List<entity> list2(){
List<entity> list =new ArrayList<entity>();
try {
// 加载数据库驱动,注册到驱动管理器
Class.forName("com.mysql.jdbc.Driver");
// 数据库连接字符串
String url = "jdbc:mysql://localhost:3306/xinwen?useUnicode=true&characterEncoding=utf-8";
// 数据库用户名
String username = "root";
// 数据库密码
String password = "893225523";
// 创建Connection连接
Connection conn = DriverManager.getConnection(url, username,
password);
// 添加图书信息的SQL语句
String sql = "select * from sheet1";
// 获取Statement
Statement statement = conn.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
while (resultSet.next()) {
entity book = new entity();
book.setShi(resultSet.getString("省份市"));
book.setQuezhen(resultSet.getString("确诊病例"));
book.setDead(resultSet.getString("死亡数量"));
book.setCure(resultSet.getString("痊愈数量"));
list.add(book);
}
resultSet.close();
statement.close();
conn.close();
}catch (Exception e) {
e.printStackTrace();
}
return list;
}
}
servlet层:
package com.epidemic.servlet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import com.epidemic.dao.dao;
import com.google.gson.Gson;
import com.epidemic.entity.*;
/**
* Servlet implementation class servlet
*/
@WebServlet("/servlet")
public class servlet extends HttpServlet {
private static final long serialVersionUID = 1L;
dao dao1=new dao();
/**
* @see HttpServlet#HttpServlet()
*/
public servlet() {
super();
// TODO Auto-generated constructor stub
}
protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("utf-8");
String method=request.getParameter("method");
if("find".equals(method))
{
find(request, response);
}else if("find2".equals(method))
{
find2(request, response);
}
}
private void find(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
request.setCharacterEncoding("utf-8");
List<entity> list =new ArrayList<entity>();
HttpSession session=request.getSession();
String buy_nbr=(String) session.getAttribute("userInfo");
entity book = new entity();
List<entity> list2=dao1.list1();
System.out.println(list2.size());
// String buy_nbr=(String) session.getAttribute("userInfo");
// System.out.println(buy_nbr);
Gson gson2 = new Gson();
String json = gson2.toJson(list2);
System.out.println(json);
// System.out.println(json);
// System.out.println(json.parse);
response.setContentType("text/html;charset=UTF-8");
response.getWriter().write(json);
}
private void find2(HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
request.setCharacterEncoding("utf-8");
List<entity> list =new ArrayList<entity>();
HttpSession session=request.getSession();
String buy_nbr=(String) session.getAttribute("userInfo");
entity book = new entity();
List<entity> list2=dao1.list2();
System.out.println(list2.size());
// String buy_nbr=(String) session.getAttribute("userInfo");
// System.out.println(buy_nbr);
Gson gson2 = new Gson();
String json = gson2.toJson(list2);
System.out.println(json);
// System.out.println(json);
// System.out.println(json.parse);
response.setContentType("text/html;charset=UTF-8");
response.getWriter().write(json);
}
}
项目结构:

运行结果:

然后我们开始制作地图的下钻,显示各个省的市的确诊病例:
python源代码:
import requests
import re
import xlwt
url = 'http://m.look.360.cn/subject/400?sign=360_6aa05217'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
def get_page(url):
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
print('获取网页成功')
return response.text
else:
print('获取网页失败')
except Exception as e:
print(e)
f = xlwt.Workbook(encoding='utf-8')
sheet01 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
sheet01.write(0, 0, '省份') # 第一行第一列
sheet01.write(0, 1, '确诊病例') # 第一行第二列
sheet01.write(0, 2, '死亡数量') # 第一行第三列
sheet01.write(0, 3, '痊愈数量') # 第一行第四列
page = get_page("https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1")
items = re.findall('{"city":"(.*?)","confirmed":"(.*?)","died":"(.*?)","crued":"(.*?)",',page,re.S)
print(len(items))
print(items)
for i in range(len(items)):
sheet01.write(i + 1, 0, items[i][0].encode('utf-8').decode('unicode_escape'))
if len(items[i][1])==0:
sheet01.write(i + 1, 1,"0")
else:
sheet01.write(i + 1, 1,items[i][1])
if len(items[i][2])==0:
sheet01.write(i + 1, 2,"0")
else:
sheet01.write(i + 1, 2,items[i][2])
if len(items[i][3])==0:
sheet01.write(i + 1, 3,"0")
else:
sheet01.write(i + 1, 3,items[i][3])
print("打印完!!!")
f.save('Outbreak2.xls')
爬取结果:

在这里我在地区市后面统一加了个“市”字,为了方便运用echarts制作地图,但是还是要修改很多市地区的名称,比如数据库里“邢台”运行会没有结果,必去要改成“邢台市”,我这里有一份sql文件,有需要的联系Q:893225523。
之后开始实现可视化:
jsp代码:
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Insert title here</title>
<script src="https://cdn.staticfile.org/echarts/4.3.0/echarts.min.js"></script>
<script src="js/jquery-1.5.1.js"></script>
<script src="js/china.js"></script>
<script src="js/echarts-all.js"></script>
</head>
<body>
<div id="main" style="width: 1500px;height:600px;"></div>
<script type="text/javascript">
var statisticsData =[];
var myChart = echarts.init(document.getElementById('main'));
myChart.showLoading();
$.ajax({
type : "post",
async : true, //异步请求(同步请求将会锁住浏览器,其他操作须等请求完成才可执行)
url : "servlet?method=find2", //请求发送到Servlet
data : {},
dataType : "json", //返回数据形式为json
//7.请求成功后接收数据name+num两组数据
success : function(result) {
//result为服务器返回的json对象
if (result) {
//8.取出数据存入数组
for (var i = 0; i <result.length; i++) {
var statisticsObj = {name:'',value:''}; //因为ECharts里边需要的的数据格式是这样的
statisticsObj.name =result[i].shi;
statisticsObj.value =result[i].quezhen;
//alert(statisticsObj.name);
//alert(statisticsObj.value);
statisticsData.push(statisticsObj);
}
//alert(statisticsData);
//把拿到的异步数据push进我自己建的数组里
myChart.hideLoading();
//9.覆盖操作-根据数据加载数据图表
option = {
tooltip : {
trigger: 'item'
},
toolbox: {
show : true,
orient: 'vertical',
x:'right',
y:'center',
feature : {
mark : {show: true},
dataView : {show: true, readOnly: false}
}
},
series : [
{
tooltip: {
trigger: 'item',
formatter: '{b}'
},
name: '选择器',
type: 'map',
mapType: 'china',
mapLocation: {
x: 'left',
y: 'top',
width: '30%'
},
roam: true,
selectedMode : 'single',
itemStyle:{
//normal:{label:{show:true}},
emphasis:{label:{show:true}}
},
data:[
{name: '北京', selected:false},
{name: '天津', selected:false},
{name: '上海', selected:false},
{name: '重庆', selected:false},
{name: '河北', selected:false},
{name: '河南', selected:false},
{name: '云南', selected:false},
{name: '辽宁', selected:false},
{name: '黑龙江', selected:false},
{name: '湖南', selected:false},
{name: '安徽', selected:false},
{name: '山东', selected:false},
{name: '新疆', selected:false},
{name: '江苏', selected:false},
{name: '浙江', selected:false},
{name: '江西', selected:false},
{name: '湖北', selected:false},
{name: '广西', selected:false},
{name: '甘肃', selected:false},
{name: '山西', selected:false},
{name: '内蒙古', selected:false},
{name: '陕西', selected:false},
{name: '吉林', selected:false},
{name: '福建', selected:false},
{name: '贵州', selected:false},
{name: '广东', selected:false},
{name: '青海', selected:false},
{name: '西藏', selected:false},
{name: '四川', selected:false},
{name: '宁夏', selected:false},
{name: '海南', selected:false},
{name: '台湾', selected:false},
{name: '香港', selected:false},
{name: '澳门', selected:false}
]
}
],
animation: false
};
var ecConfig = echarts.config;
myChart.on(ecConfig.EVENT.MAP_SELECTED, function (param){
var selected = param.selected;
var selectedProvince;
var name;
for (var i = 0, l = option.series[0].data.length; i < l; i++) {
name = option.series[0].data[i].name;
option.series[0].data[i].selected = selected[name];
if (selected[name]) {
selectedProvince = name;
}
}
if (typeof selectedProvince == 'undefined') {
option.series.splice(1);
option.legend = null;
option.dataRange = null;
myChart.setOption(option, true);
return;
}
option.series[1] = {
name: '确诊病例',
type: 'map',
mapType: selectedProvince,
itemStyle:{
normal:{label:{show:true}},
emphasis:{label:{show:true}}
},
mapLocation: {
x: '35%'
},
roam: true,
data:statisticsData
};
option.legend = {
x:'right',
data:['随机数据']
};
option.dataRange = {
orient: 'horizontal',
x: 'right',
min: 0,
max: 1000,
color:['orange','yellow'],
text:['高','低'], // 文本,默认为数值文本
splitNumber:0
};
myChart.setOption(option, true);
})
myChart.setOption(option, true);
}
},
})
</script>
</body>
</html>
dao层、servlet层就上面的已经给出。
运行结果:


浙公网安备 33010602011771号