此版本中的疫情下载方式只到11.20之前,更新的下载连接格式为:http://www.sy72.com/download/D_yiqing.asp?c=5&d=1&s=1,其中c=为国家代码,d=为下载格式,s不变,即可得到对应国家的历史疫情数据,最新好像还开放了接口可以直接得到数据。文件可能有错无法实现,但是主题思路是没问题的。

PS:历史疫情数据网址:http://www.sy72.com/world/world6.html (中国的数据是独立的,需要对页面数据进行爬取在写成xls文件)。

代码如下:

import requests
import xlwt
import xlrd
import re
import sys
import threading
import os
import PySimpleGUI as sg
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import matplotlib
def url_text_get(url,code='utf-8'):#一个网页类容获取
    r=requests.get(url)
    kv={'User-angent':'Mozilla/5.0'}
    r.raise_for_status()
    r.encoding=code
    return r.text
def url_infor_get():#总人口链接信息提取
    start_url='https://www.phb123.com/city/renkou/rk_'
    r_url_lis=[]
    for i in range(1,13):
        r_url_lis.append(start_url+str(i)+'.html')
    return r_url_lis
def ayurl(html):#总疫情链接类容获取
    soup=BeautifulSoup(html,'html.parser')
    y_url_list_infor=list(list())
    lp=soup('ul')
    soup1=BeautifulSoup(str(lp[1]),'html.parser')
    l=soup1('a')
    i=0
    for link in soup1.find_all('a'):
        li=[]
        li.append(l[i].string)
        i=i+1
        li.append(link.get('href'))
        y_url_list_infor.append(li)
    return y_url_list_infor
def download(li,p_list):
    global apath
    root = "d:/python练习文档/全球历史数据"
    isExists = os.path.exists(root)
    if not isExists:
        os.makedirs(root)
    else:
        pass
    path = root +'/'+ li[0] + '.xls'
    apath=root
    url =li[1]
    r = requests.get(url)
    with open(path, 'wb') as t:
        t.write(r.content)
        t.close()
    try:
        d_xieru(p_list, path, li[0])
    except:
        pass
def down(li,p_list):
    day = lp(p_list)
    for i in li:
        t=threading.Thread(target=download,args=(i,p_list,))
        t.start()
    while threading.activeCount() !=1:
        pass
    return day
def yqlj():#历史疫情情况链接获取
    y_url_list_infor2=list(list())
    ul='http://www.sy72.com/xls/world'
    ul2='http://www.sy72.com/world/world417_25970.html'
    y_url_list_infor=ayurl(url_text_get(ul2))
    for i in range(0,len(y_url_list_infor)):
        ty=[]
        ty.append(y_url_list_infor[i][0])
        number=''.join(re.findall(r'[0-9]+',y_url_list_infor[i][1]))
        ty.append(ul+number+'.xls')
        y_url_list_infor2.append(ty)
    return y_url_list_infor2
def text_renkou_get(html,li):#一个人口数获取
    soup=BeautifulSoup(html,'html.parser')
    s=soup.find_all('tr')
    o=BeautifulSoup(str(s),'html.parser').find_all('td')
    for i in range(int(len(o)/5)):
        ls=[]
        ls.append(o[5*i+1].p.string)
        ls.append(o[5*i+2].string)
        li.append(ls)
def renkouhuode(ur_list):#总人口数获得
    global window
    layout = [[sg.Text('人口下载进度'),sg.Text('0.00%',key='2')],
              [sg.ProgressBar(len(ur_list), orientation='h', size=(20, 20), key='progressbar')],
              [sg.Cancel()]]
    window1= sg.Window('下载进度', layout)
    progress_bar = window1['progressbar']
    t_li=list(list())
    for i in range(len(ur_list)):
        url=ur_list[i]
        text_renkou_get(url_text_get(url),t_li)
        event, values = window1.read(timeout=20)
        if event == "Cancel":
            sys.exit(0)
        progress_bar.UpdateBar(i+ 1)
        window1.FindElement("2").Update(str("{:.2f}".format((i/len(ur_list)) * 100) + '%'))
    window1.close()
    # window.Element('2').Update(disabled=True)
    # window.Element('4').Update(disabled=True)
    window.Element('6').Update("***全球人口下载成功!***")
    event, values = window.read(timeout=100)
    f = xlwt.Workbook()
    sheet1 = f.add_sheet('class')
    for i in range(len(t_li)):
        sheet1.write(i + 1, 0, t_li[i][0])
        sheet1.write(i + 1, 1, t_li[i][1])
    f.save('全球国家人口数.xls')
    window.Element('6').Update(values.get('6')+"***全球人口保存路径为:"+os.path.split(os.path.realpath(__file__))[0]+"***")
    return t_li
def tjs(x,y):#天数计算
    day=0
    if(x==1):
        day=y
    elif(x==2):
        day=31+y
    elif(x==3):
        day=60+y
    elif(x==4):
        day=91+y
    elif(x==5):
        day=121+y
    elif(x==6):
        day=152+y
    elif(x==7):
        day=182+y
    elif (x == 8):
        day = 213 + y
    elif (x == 9):
        day = 244+ y
    elif (x == 10):
        day = 274 + y
    elif (x == 11):
        day = 305+ y
    elif (x == 12):
        day = 335 + y
    return day
def wenjianhuoqu():#文件夹所有子文件目录获取
    filepath = 'd:/python练习文档/全球历史数据/'
    r_lis = list()
    pathDir=os.listdir(filepath)
    for allDir in pathDir:
        try:
            r_lis.append(os.path.join('%s%s'%(filepath,allDir)))
        except:
            print('{0:}位元素出错'.format(i))
    return r_lis
def lp(p_list):
    url="http://www.sy72.com/covid/index.asp?s1=0&s2=0"
    soup=BeautifulSoup(url_text_get(url),"html.parser")
    l=soup.find('tr',id="cx")
    data=list()
    adata=list()
    path="D:/python练习文档/全球历史数据/中国.xls"
    for i in l.children:
        soupl=BeautifulSoup(str(i),"html.parser")
        p=soupl.find_all("span")
        if(p!=[]):
            g=[]
            for k in p:
                g.append(k.string)
            data.append(g)
    y=['国家','疫情总确诊','疫情治愈','疫情死亡','时间']
    adata.append(y)
    for j in range(len(data[0])):
        t=list()
        t.append("中国")
        t.append(data[2][j])
        t.append(data[3][j])
        t.append(data[4][j])
        t.append(str(tjs(int(data[0][j].split(".")[-2]),int(data[0][j].split(".")[-1]))))
        adata.append(t)
    for j in range(len(p_list)):
        if(p_list[j][0]=="中国"):
            adata[0].append('人口')
            adata[0].append('占比')
            for o in range(1,len(adata)):
                adata[o].append(p_list[j][1])
                adata[o].append(int(adata[o][1])/cf(p_list[j][1].split(',')))
    renkoubaocun(adata, path)
    return len(adata)-1
def riqizhuanhuan(data,p_list,s):#日期转换为天数
    for i in range(1,len(data)):
        try:
            k=tjs(int(data[i][4].split('/')[-2]),int(data[i][4].split('/')[-1]))
            data[i][4]=str(k)
        except:
            print("第{0:}行出错".format(i))
    for j in range(len(p_list)):
        if(p_list[j][0]==s):
            data[0].append('人口')
            data[0].append('占比')
            for o in range(1,len(data)):
                data[o].append(p_list[j][1])
                data[o].append(int(data[o][1])/cf(p_list[j][1].split(',')))
    return data
def cf(li):#'千分号字符转换int'
    s=''
    for i in range(len(li)):
        s=s+li[i]
    return int(s)
def renkoubaocun(l_li,filename2):#疫情信息人口保存.xls
    f=xlwt.Workbook()
    sheet1 = f.add_sheet('class')
    for i in range(0,len(l_li)):
        try:
            sheet1.write(i,0,l_li[i][0])
            sheet1.write(i,1,l_li[i][1])
            sheet1.write(i,2,l_li[i][2])
            sheet1.write(i,3,l_li[i][3])
            sheet1.write(i,4,l_li[i][4])
            sheet1.write(i,5,l_li[i][5])
            sheet1.write(i,6,l_li[i][6])
        except:
            break
    f.save(filename2)
def d_xieru(p_list,path,s):#多个文件天数转换加xls转换
    l_list=riqizhuanhuan(read_txt(path),p_list,s)
    renkoubaocun(l_list,path)
def read_txt(filename):
    data=list()
    file=open(filename,'r',encoding='ANSI')
    file_data=file.readlines()
    for row in file_data:
        row = re.sub(r'<.*?>', "\t",row)
        tmp_list=row.split('\t')
        tmp_list[-1]=tmp_list[-1].replace('\n','')
        tmp_list.pop(-1)
        data.append(tmp_list)
    file.close()
    return data
def pxu(e_list):#排序
    for lis in e_list:
        for i in range(len(lis)-1):
            for k in range(i+1,len(lis)):
                if(lis[k][1]>lis[i][1]):
                    p=lis[i]
                    lis[i]=lis[k]
                    lis[k]=p
def xieru(r_lis,e_list,z_list,day):#每天的数据读入
    global window
    n=26
    layout = [[sg.Text('数据加载进度'),sg.Text('0.00%',key='2')],
              [sg.ProgressBar(day, orientation='h', size=(20, 20), key='progressbar')],
              [sg.Cancel()]]
    window1= sg.Window('加载进度', layout)
    progress_bar = window1['progressbar']
    for i in range(n,day+n-1):
        e_list.append([])
        z_list.append([])
        for j in range(len(r_lis)):
            try:
                data=xlrd.open_workbook(r_lis[j])
                table=data.sheet_by_name(data.sheet_names()[0])
                rowNum=table.nrows
                kli=[]
                cli=[]
                key=0
                for l in range(1,rowNum):
                    if(int(table.cell(l,4).value)==i and table.cell(l,0).value==table.cell(2,0).value):
                        kli.append(table.cell(l,0).value)
                        kli.append(table.cell(l,6).value)
                        try:
                            cli.append(table.cell(l,0).value)
                            cli.append(int(table.cell(l,3).value)/int(table.cell(l,1).value))
                        except:
                            cli.append(0)
                        key=1
                        break
                if(key==1):
                    e_list[i-n].append(kli)
                    z_list[i-n].append(cli)
            except:
                continue
        event, values = window1.read(timeout=20)
        if event=="Cancel":
            window1.close()
            sys.exit(0)
        progress_bar.UpdateBar(i-n+1)
        window1.FindElement("2").Update(str("{:.2f}".format(((i-n)/(day))*100)+'%'))
    window1.close()
    #window.Element('2').Update(disabled=True)
    #window.Element('4').Update(disabled=True)
    try:
        e_list.remove([])
        z_list.remove([])
    except:
        return
def plante(e_list,t):#循环画图
    plt.switch_backend('TkAgg')
    plt.ion()
    matplotlib.rc('font', family='SimHei', weight='bold')
    try:
        for j in range(len(e_list)):
            if((j+26)==153 or (j+26)==133 ):
                continue
            else:
                if(len(e_list[j])>=20):
                    N=20
                else :
                    N=len(e_list[j])
                name=[]
                for i in range(N):
                    name.append(e_list[j][i][0])
                for i in range(N,20):
                    name.append('暂无数据')
                city_name = name
                city_name.reverse()
                data = []
                we=10
                while(e_list[j][0][1]*we<50):
                    we=we*10
                if(e_list[j][0][1]*we>200):
                    we=we/5
                for i in range(N,20):
                    data.append(0)
                for i in range(N):
                    data.append(((e_list[j][N-i-1][1])*we))
                colors = ['red', 'yellow', 'blue', 'green', 'gray','pink','black']
                colors.reverse()
                plt.barh(range(len(data)),width=data, tick_label=city_name, color=colors)
                for a,b in zip(data,range(len(data))):
                    plt.text(a+3, b,'{0:.2f}'.format(a), ha='center', va= 'center',fontsize=7)
                if(t==1):
                    plt.title('2020年第{0:}天全球国家感染率'.format(j+26))
                    plt.xlabel('累计人数/总人口(1/{0:})'.format(we))
                if(t==2):
                    plt.title('2020年第{0:}天全球国家治愈率'.format(j+26))
                    plt.xlabel('治愈人数/总确诊(1/{0:})'.format(we))
                plt.pause(0.275)
                plt.show()
                if(j==len(e_list)-1):
                    break
                plt.clf()
    except:
        pass
def guojiachaxun(r_list,s):
    li=list()
    for i in r_list:
        if(i.split("/")[-1].split(".")[0]==s):
            data = xlrd.open_workbook(i)
            table = data.sheet_by_name(data.sheet_names()[0])
            rowNum = table.nrows
            for l in range(0, rowNum):
                kli = []
                key = 0
                kli.append(table.cell(l, 0).value)
                kli.append(table.cell(l, 1).value)
                kli.append(table.cell(l, 4).value)
                kli.append(table.cell(l, 5).value)
                kli.append(table.cell(l, 6).value)
                try:
                    kli.remove([])
                except:
                    pass
                li.append(kli)
    return li
def riqichaxun(e_list,z_list,s):
    i=list()
    try:
        day=tjs(int(s.split(".")[0]),int(s.split(".")[1]))
        i=e_list[day-26]
        for j in range(len(z_list[day-26])):
            i[j].append(z_list[day-26][j][1])
    except:
        pass
    return i
if __name__=="__main__":#程序入口
    global window,apath
    apath=''
    sg.theme('Dark Brown 1')
    gn = [
        [sg.InputText(default_text="请输入国家名", key='0', font=("Helvetica", 10)),
         sg.Button('G查询', size=(10, 1),key=('5'), font=("Helvetica", 10),disabled=True),
         sg.Text('叙述', size=(3, 1), font=("Helvetica", 10)),
         sg.Button('确诊统计图', size=(10, 1),key=('2'), font=("Helvetica", 10),disabled=True)],
        [sg.InputText(default_text="请输入日期(x.y)", key='1', font=("Helvetica", 10)),
         sg.Button('T查询', size=(10, 1),key=('3'),font=("Helvetica", 10),disabled=True),
         sg.Text('叙述', size=(3, 1), font=("Helvetica", 10)),
         sg.Button('治愈统计图', size=(10, 1),key=('4'),font=("Helvetica", 10),disabled=True)],
    ]
    headings = ['', '', '', '', '']
    header = [[sg.Text('  ')] + [sg.Text(h, size=(12, 1)) for h in headings]]
    input_rows = [[sg.Multiline('', key='6', size=(75, 45), autoscroll=True)]]
    layout = gn + header + input_rows
    window = sg.Window('全球新冠历史数据', layout, icon='icon/puple128.ico', font='Courier 12', size=(750, 500))
    event, values = window.read(timeout=100)
    try:
        day=down(yqlj(),renkouhuode(url_infor_get()))#文件下载加转换集合
    except:
        window.Element('6').Update( "***全球疫情信息下载失败,将使用历史文件信息!***")
    event, values = window.read(timeout=100)
    window.Element('6').Update(values.get('6')+"***全球疫情信息下载转换成功!***")
    window.Element('6').Update(values.get('6') + "***历史疫情保存地址为:{:}***".format(apath))
    window.Element('2').Update(disabled=False)
    window.Element('4').Update(disabled=False)
    window.Element('3').Update(disabled=False)
    window.Element('5').Update(disabled=False)
    event, values = window.read(timeout=100)
    e_list = list(list(list()))
    z_list = list(list(list()))
    try:
        xieru(wenjianhuoqu(), e_list, z_list, day)
    except:
        window.Element('2').Update(disabled=True)
        window.Element('4').Update(disabled=True)
        window.Element('3').Update(disabled=True)
        window.Element('6').Update(values.get('6') + "***数据载入失败!***")
    event, values = window.read(timeout=100)
    window.Element('6').Update(values.get('6') + "***数据加载成功!***"+"\n"+"***请开始进行操作***"+"\n",autoscroll=False)
    pxu(e_list)
    pxu(z_list)
    while True:
        event, values = window.read()
        if event in (None,'关闭'):
            break
        if event in(None,'5'):
            try:
                t = "{0:<8}{1:<12}{2:<5}{3:<15}{4:<20}"
                window.Element('6').Update(t.format("国家","疫情总确诊","时间","人口","确诊占比"))
                s = values.get('0')
                data = guojiachaxun(wenjianhuoqu(), s)
                if(s=="中国"):
                    data=data[:1:-1]
                for l in range(1,len(data)):
                    ty=[]
                    event, values = window.read(timeout=100)
                    for i in range(len(data[l])):
                        ty.append(str(data[l][i]))
                    window.Element('6').Update(values.get('6') +t.format(ty[0],ty[1],ty[2],ty[3],ty[4]))
            except:
                window.Element('6').Update(values.get('6') + "***错误!***")
        if event in(None,'3'):
            '''try:'''
            t="{0:<4}{1:<8}{2:<20}{3:<20}{4:<6}"
            window.Element('6').Update(t.format("排名","国家","确诊占比","治愈占比","日期"))
            s = values.get('1')
            data = riqichaxun(e_list,z_list,s)
            for l in range(len(data)):
                ty=list()
                event, values = window.read(timeout=100)
                for i in range(len(data[l])):
                    ty.append(str(data[l][i]))
                window.Element('6').Update(values.get('6')+t.format(str(l),ty[0],ty[1],ty[2],s))
            '''except:
                window.Element('6').Update(values.get('6') + "***错误!***")'''
        if event in (None,'2'):
            t=1
            plante(e_list, t)
        if event in (None,'4'):
            t=2
            plante(z_list, t)
    window.close()

 

posted on 2020-12-17 23:13  pcontain  阅读(149)  评论(0)    收藏  举报