1 # coding=utf-8
2 import json
3 import re
4 import requests
5
6 def get_data(url):
7 response = requests.get(url)
8 response.encoding = 'utf-8'
9 if response.status_code == 200:
10
11 return response.text
12 return None
13
14 def parse_data(html):
15 pattern = re.compile('<li.*?skyid.*?h1>(.*?)</h1>.*?wea">(.*?)</p>.*?i>(.*?)</i>.*?i>(.*?)</i>.*?</li>', re.S)
16 items = re.findall(pattern, html)
17 for item in items:
18 yield{
19 'data':item[0],
20 'weather':item[1],
21 'T':item[2]+'/'+item[3],
22 'wind':item[4]
23 }
24
25 def write_to_file(content):
26 with open('result.txt', 'a', encoding='utf-8') as f:
27 f.write(json.dumps(content, ensure_ascii=False) + '\n')
28 f.close()
29
30 def main():
31 url = 'http://www.weather.com.cn/weather/101280601.shtml'
32 html = get_data(url)
33 for item in parse_data(html):
34 print(item)
35 write_to_file(item)
36
37 if __name__ == '__main__':
38 main()