from bs4 import BeautifulSoup
import requests
from pyecharts import Bar
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
}
ALL_data=[]
def parse_page(url):
response=requests.get(url,headers=headers)
text=response.content.decode("utf-8")
soup=BeautifulSoup(text,'html5lib')
#soup = BeautifulSoup(text, 'lxml') #港澳台地区标签不规范。lxml解析器容错力小,改成html5lib解析器
conMidtab=soup.find('div',class_='conMidtab')#取出第一个conMidtab标签
tables=conMidtab.find_all('table')#注意find_all返回的是一个列表
for table in tables:
trs=table.find_all('tr')[2:]
for index,tr in enumerate(trs):
tds=tr.find_all('td')
city_td=tds[0]
if index == 0:
city_td=tds[1]
city=list(city_td.stripped_strings)[0]
temp_td=tds[-2]
min_temp=list(temp_td.stripped_strings)[0]
ALL_data.append({"city":city,"min_temp":int(min_temp)})
# print({"city":city,"min_temp":int(min_temp)})
def main():
urls={
'http://www.weather.com.cn/textFC/hb.shtml',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml',
'http://www.weather.com.cn/textFC/gat.shtml'
}
for url in urls:
parse_page(url)
#根据最低气温排序
ALL_data.sort(key=lambda data:data['min_temp'])
#print(ALL_data)
data=ALL_data[:]
# for city_temp in data:
# city=city_temp['city']
# cities.append(city)
cities = list(map(lambda x: x['city'], data))
temps = list(map(lambda x: x['min_temp'], data))
chart=Bar("中国天气温度排行榜")
chart.add("",cities,temps)
chart.render("temperature.html")
if __name__ == '__main__':
main()