以下是数据爬取代码:
# @Author Hero_poi
import requests
import json
import pymysql
# 获得页面信息
session = requests.session()
result = session.get('https://api.inews.qq.com/newsqa/v1/automation/foreign/country/ranklist')
res = json.loads(result.text)
s1 = json.dumps(res["data"])
data_all = json.loads(s1)
print(data_all[184])
# 获取时间
today = data_all[0]["y"] + '.' + data_all[0]["date"]
data_p = []
#构建数组
for x in range(0, 185):
name = data_all[x]["name"]
continent = data_all[x]["continent"]
date = data_all[x]["y"] + '.' + data_all[x]["date"]
confirmAdd = data_all[x]["confirmAdd"]
confirm = data_all[x]["confirm"]
suspect = data_all[x]["suspect"]
dead = data_all[x]["dead"]
heal = data_all[x]["heal"]
nowConfirm = data_all[x]["nowConfirm"]
pk = name + date
data_p.append([pk, name, continent, date, confirmAdd, confirm, suspect, dead, heal, nowConfirm])
# 连接数据库
conn = pymysql.connect(host="127.0.0.1", port=3306, user="root", password="260702266", database="hope", charset="utf8")
cursor = conn.cursor()
pro = "delete from datas where country = '马提尼克岛'"
try:
cursor.execute(pro)
conn.commit()
except:
conn.rollback()
count = 0
sql = "insert into datas(id,country,continent,date,confirmAdd,confirm,suspect,dead,heal,nowConfirm) values(%s,%s," \
"%s,%s,%s,%s,%s,%s,%s,%s) "
try:
count = count+1
cursor.executemany(sql, data_p)
conn.commit()
except:
conn.rollback()
count = count + 1
query = "update redodata set date = '" + today + "' where name = 'today'"
try:
cursor.execute(query)
conn.commit()
except:
conn.rollback()
try:
cursor.execute(pro)
conn.commit()
except:
conn.rollback()
cursor.close()
conn.close()
这一段中删除马克提尼岛的原因是该地区的疫情数据一直停留在2020年的某天,我们的主键是时间和地名,这样就会触发回滚事件……
另外我们还建了一个标记最后爬取日期的表,以便咱们搭建网站时使用



浙公网安备 33010602011771号