数据爬取

import time

import requests

import pymysql

 

def build_connection_from_sourcedb():

    return pymysql.connect(host="localhost",database="test",user="root",password="123456",port=3306,charset='utf8')

def get_province_list():

    conn = build_connection_from_sourcedb()

    cur = conn.cursor()

    sql = "select province_short_name from dict_zone_province"  # 请注意数据表中应事先存储此数据. 

    cur.execute(sql)

    result = cur.fetchall()

    cur.close()

    conn.close()

    province_list = []

    for i in result:

        province_list.append(str(i[0]))

    return province_list

###############################################################

# 目标数据库操作

###############################################################

def target_database_operation(sql):

    conn = build_connection_from_sourcedb()

    cur = conn.cursor()

    cur.execute(sql)

    conn.commit()

    cur.close()

    conn.close()

def record_time_slot():

    return time.strftime("%m.%d")

 

def delta_load():

    today = record_time_slot()

    url_prefix = "https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province="

    province_list = get_province_list()

    province_list.append("香港")

    province_list.append("澳门")

    province_list.append("台湾")

    i = 0

    len_province_list = len(province_list)

    conn = build_connection_from_sourcedb()

    cur = conn.cursor()

    sql_prefix = "insert into info_copy1(Date,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num) values('"

    sql_prefix_all = "insert into ncovi2019_daily_all(date,province,confirm,dead,heal,confirm_add) values('"

    while i < len_province_list:

        url = url_prefix + province_list[i]

        response = requests.get(url)

        data_province_i = response.json()["data"]

        len_data_province_i = len(data_province_i)

        j = 0

        while j < len_data_province_i:

            Date = str(data_province_i[j]["date"])

            Province = str(data_province_i[j]["province"])

            Confirmed_num = str(data_province_i[j]["confirm"])

            Yisi_num = str(data_province_i[j]["confirm_add"])

            Dead_num = str(data_province_i[j]["dead"])

            Cured_num = str(data_province_i[j]["heal"])

            

            # 如果是今天,那么插入数据到ncovi2019_daily

            if today == Date:

                fields = Date + "','" + Province + "',"  + Confirmed_num + "," + Yisi_num + "," + Dead_num + "," + Cured_num +")"

                sql = sql_prefix + fields

                print(sql)

                cur.execute(sql)

            Date = ""

            Province = ""

            Confirmed_num = ""

            Yisi_num = ""

            Dead_num = ""

            Cured_num = ""



            j = j + 1

        conn.commit()

        print("",str(i),"个省份-",province_list[i],""+today+"的疫情数据已插入ncovi2019_daily")

        i = i + 1

    target_database_operation("delete from ncovi2019_daily_all where date = '"+today+"'")

    print("今天"+today+"的数据已经从ncovi2019_daily_all表中删除")

    # 在ncovi2019_daily 中插入全国汇总数

    print("将今天的数据汇总插入到ncovi2019_daily")

    sql = """

            insert into info_copy1(Date,Province,Confirmed_num,Yisi_num,Cured_num,Dead_num)

            SELECT

                Date,

              '全国' as Province,

                sum(Confirmed_num) AS Confirmed_num,

                sum(Yisi_num) AS Yisi_num,

                sum(Cured_num) AS Cured_num,

                sum(Dead_num) AS Dead_num,

            FROM

                info_copy1

            GROUP BY

                Date

            ORDER BY

                1

            """

    cur.execute(sql)

    conn.commit()

    print("将今天的数据插入到ncovi2019_daily_all")

    sql_today = """

            insert into ncovi2019_daily_all(date,province,confirm,dead,heal,confirm_add)

            SELECT

                date,

                province as province,

                confirm AS confirm,

                dead AS dead,

                heal AS heal,

                confirm_add AS confirm_add

            FROM

                ncovi2019_daily

                where province != '全国'

            """

    print("上步结束后进行全国汇总")

    cur.execute(sql_today)

    conn.commit()

    sql_all_summary = """

               insert into ncovi2019_daily_all(date,province,confirm,dead,heal,confirm_add)

               SELECT

                   date,

                 '全国' as province,

                   sum(confirm) AS confirm,

                   sum(dead) AS dead,

                   sum(heal) AS heal,

                   sum(confirm_add) AS confirm_add

               FROM

                   ncovi2019_daily_all

                   where province != '全国'

               GROUP BY

                   date

               ORDER BY

                   1

               """

    cur.execute(sql_all_summary)

    conn.commit()

    print("全国疫情数据已汇总插入ncovi2019_daily_all")

    target_database_operation("update info_copy1 set report_date =  Date(concat('2020',left(Date,2),right(Date,2))) ")

    target_database_operation("update ncovi2019_daily_all set report_date =  date(concat('2020',left(date,2),right(date,2))) ")

    cur.close()

    conn.close()

if __name__ == "__main__":

    print("今天日期:"+record_time_slot())

    target_database_operation("truncate table info_copy1")

    print("清空ncovi2019_daily")

    delta_load()
test.py

posted @ 2020-06-10 00:57  晨起  阅读(235)  评论(0编辑  收藏  举报