返回顶部

一缕半夏微光

温柔半两,从容一生

导航

Python爬取疫情历史数据

每日日报24 3月24日
所花时间(包括上课) 7小时
代码量(行) 139
博客量(篇) 2
了解到的知识点 Python爬取疫情历史数据

 

 

 

 

 

 

效果图如下:

其中“各省疫情数据.xls”文件中为各个省的疫情历史数据:

 其余文件为各个省所对应的市的疫情历史数据,以“安徽”为例子:

代码如下:

  1 import sys
  2 import datetime
  3 import requests
  4 import xlwt
  5 
  6 def getURLContent(url):
  7     headers = {
  8         'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36'
  9     }
 10     response = requests.post(url, headers=headers)
 11     return response.status_code, response
 12 
 13 def getCountry(workbook,province, city):
 14         worksheet=workbook.add_sheet(city)
 15         print("开始爬取 %s 的 %s的疫情数据... ... " % (province,city))
 16         url = 'https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province=%s&city=%s' % (province,city)
 17         status_code, data = getURLContent(url)
 18         if status_code != 200:
 19             print("%s数据爬取失败,状态码%d" % (province, status_code))
 20             sys.exit()
 21         data = data.json()["data"]
 22         if data == None:
 23             print("%s数据爬取数据为空" % province)
 24 
 25         worksheet.col(0).width = 128 * 20  # 设置excel中第A列的宽度(方便日期数据展示)
 26 
 27         current_row_index = 0  # 记录当前所写入数据的行号
 28 
 29         # 将列标题写入excel
 30         for i, str_col in enumerate(['日期', '地点', '确诊人数', '死亡人数', '治愈人数', '确诊新增']):
 31             worksheet.write(current_row_index, i, str_col)  # 参数对应 行, 列, 值
 32         current_row_index += 1
 33 
 34         # 往excel中写入日期格式
 35         style = xlwt.XFStyle()
 36         style.num_format_str = 'YYYY-MM-DD'
 37 
 38         # 将抓取到的疫情数据写入excel
 39         for data_i in data:
 40             worksheet.write(current_row_index, 0, datetime.datetime.strptime('2020.' + data_i['date'], "%Y.%m.%d"),style)
 41             worksheet.write(current_row_index, 1, data_i['city'])
 42             worksheet.write(current_row_index, 2, data_i['confirm'])
 43             worksheet.write(current_row_index, 3, data_i['dead'])
 44             worksheet.write(current_row_index, 4, data_i['heal'])
 45             worksheet.write(current_row_index, 5, data_i['confirm_add'])
 46             current_row_index += 1
 47         workbook.save('E:\人数采集3.0\%s的疫情数据.xls' % province)
 48 
 49 def getProvince(workbook,province):
 50     worksheet = workbook.add_sheet(province)
 51     print("开始爬取 %s 疫情数据... ... " % province)
 52     url = 'https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province=%s' % province
 53     status_code, data = getURLContent(url)
 54 
 55     if status_code != 200:
 56         print("%s数据爬取失败,状态码%d" % (province, status_code))
 57         sys.exit()
 58     data = data.json()["data"]
 59     if data == None:
 60         print("%s数据爬取数据为空" % province)
 61 
 62     worksheet.col(0).width = 128 * 20  # 设置excel中第A列的宽度(方便日期数据展示)
 63 
 64     current_row_index = 0  # 记录当前所写入数据的行号
 65 
 66     # 将列标题写入excel
 67     for i, str_col in enumerate(['日期', '地点', '确诊人数', '死亡人数', '治愈人数', '确诊新增']):
 68         worksheet.write(current_row_index, i, str_col)  # 参数对应 行, 列, 值
 69     current_row_index += 1
 70 
 71     # 往excel中写入日期格式
 72     style = xlwt.XFStyle()
 73     style.num_format_str = 'YYYY-MM-DD'
 74 
 75     # 将抓取到的疫情数据写入excel
 76     for data_i in data:
 77         worksheet.write(current_row_index, 0, datetime.datetime.strptime('2020.' + data_i['date'], "%Y.%m.%d"),
 78                             style)
 79         worksheet.write(current_row_index, 1, data_i['province'])
 80         worksheet.write(current_row_index, 2, data_i['confirm'])
 81         worksheet.write(current_row_index, 3, data_i['dead'])
 82         worksheet.write(current_row_index, 4, data_i['heal'])
 83         worksheet.write(current_row_index, 5, data_i['confirm_add'])
 84         current_row_index += 1
 85         workbook.save('E:\人数采集3.0\各省疫情数据.xls')
 86 
 87 if __name__ == "__main__":
 88 
 89     # 创建一个workbook 设置编码
 90     workbook = xlwt.Workbook(encoding='utf-8')
 91     country = {"北京", "天津", "上海", "重庆", "河北", "山西", "辽宁", "吉林", "黑龙江", "江苏", "浙江", "安徽", "福建", "江西", "山东",  "河南", "湖北", "湖南", "广东", "海南", "四川", "贵州", "云南", "陕西", "甘肃", "青海", "台湾", "新疆", "宁夏", "西藏", "广西", "内蒙古", "香港", "澳门"}
 92     for count in country:
 93         getProvince(workbook,count)
 94         print(count)
 95 
 96     # 添加要爬取疫情数据的国家
 97     d = {
 98         '河北': ["石家庄", "唐山", "秦皇岛", "邯郸", "邢台", "保定", "张家口", "承德", "沧州", "廊坊", "衡水"],
 99         '山西': ["太原", "大同", "阳泉", "长治", "晋城", "朔州", "忻州", "吕梁", "晋中", "临汾", "运城"],
100         '辽宁': ["沈阳", "大连", "鞍山", "抚顺", "本溪", "丹东", "锦州", "营口", "阜新", "辽阳", "盘锦", "铁岭", "葫芦岛"],
101         '吉林': ["长春", "吉林", "四平", "辽源", "通化", "白城", "松原"],
102         '黑龙江': ["哈尔滨", "齐齐哈尔", "牡丹江", "佳木斯", "大庆", "伊春", "鸡西", "鹤岗", "双鸭山", "七台河", "绥化", "黑河"],
103         '江苏': ["南京", "无锡", "徐州", "常州", "苏州", "南通", "连云港", "淮安", "盐城", "扬州", "镇江", "泰州", "宿迁"],
104         '浙江': ["杭州", "宁波", "温州", "绍兴", "湖州", "嘉兴", "金华", "衢州", "台州", "丽水", "舟山"],
105         '安徽': ["合肥", "芜湖", "蚌埠", "淮南", "马鞍山", "淮北", "铜陵", "安庆", "黄山", "阜阳", "宿州", "滁州", "六安", "宣城", "池州", "亳州"],
106         '福建': ["福州", "莆田", "泉州", "厦门", "漳州", "龙岩", "三明", "南平", "宁德"],
107         '江西': ["南昌", "赣州", "宜春", "吉安", "上饶", "抚州", "九江", "景德镇", "萍乡", "新余", "鹰潭"],
108         '山东': ["济南", "青岛", "淄博", "枣庄", "烟台", "潍坊", "济宁", "泰安", "威海", "日照", "滨州", "德州", "聊城", "临沂", "菏泽"],
109         '河南': ["郑州", "开封", "洛阳", "平顶山", "安阳", "鹤壁", "新乡", "焦作", "濮阳", "许昌", "漯河", "三门峡", "商丘", "周口", "驻马店", "南阳", "信阳"],
110         '湖北': ["武汉", "黄石", "十堰", "荆州", "宜昌", "襄阳", "鄂州", "荆门", "黄冈", "孝感", "咸宁", "随州"],
111         '湖南': ["长沙", "株洲", "湘潭", "衡阳", "邵阳", "岳阳", "张家界", "益阳", "常德", "娄底", "郴州", "永州", "怀化"],
112         '广东': ["广州", "深圳", "珠海", "汕头", "佛山", "韶关", "湛江", "肇庆", "江门", "茂名", "惠州", "梅州", "汕尾", "河源", "阳江", "清远", "东莞","中山", "潮州", "揭阳"],
113         '海南': ["海口", "三亚", "儋州"],
114         '四川': ["成都", "绵阳", "自贡", "攀枝花", "泸州", "德阳", "广元", "遂宁", "内江", "乐山", "资阳", "宜宾", "南充", "达州", "雅安", "广安", "巴中","眉山"],
115         '贵州': ["贵阳", "六盘水", "遵义", "铜仁", "毕节", "安顺"],
116         '云南': ["昆明", "曲靖", "玉溪", "普洱", "临沧"],
117         '陕西': ["西安", "铜川", "宝鸡", "咸阳", "渭南", "汉中", "安康", "商洛", "延安", "榆林"],
118         '甘肃': ["兰州", "金昌", "白银", "天水", "张掖", "定西", "陇南", "平凉", "庆阳"],
119         '青海': ["西宁"],
120         '新疆': ["乌鲁木齐", "吐鲁番"],
121         '宁夏': ["银川", "石嘴山", "吴忠", "固原", "中卫"],
122         '西藏': ["拉萨"],
123         '广西': ["南宁", "柳州", "桂林", "梧州", "北海", "来宾", "贺州", "玉林", "百色", "河池", "钦州", "防城港", "贵港"],
124         '内蒙古': ["呼和浩特", "包头", "乌海", "赤峰", "呼伦贝尔", "通辽", "乌兰察布", "鄂尔多斯", "巴彦淖尔"],
125     }
126 
127     def itertransfer(d):
128         for k, values in d.items():
129             for v in values:
130                 yield (k, v)
131 
132     for name in d.keys():
133         print(name.title())
134         # 创建一个workbook 设置编码
135         workbookk = xlwt.Workbook(encoding='utf-8')
136         for k,v in itertransfer(d):
137             if k==name.title():
138                 getCountry(workbookk, k, v)
139                 print(k,v)

posted on 2021-03-24 23:00  一缕半夏微光  阅读(237)  评论(0)    收藏  举报