Python 基础实战 -- 爬虫(天气查询系统)

  1 #需求:查询地区天气
  2 #分析:第一步,抓取上面所有的天气信息
  3 
  4 from html.parser import HTMLParser
  5 from urllib import request
  6 import pickle
  7 import json
  8 
  9 #解析中国天气网HTML
 10 class WeatherHtmlParser(HTMLParser):
 11     def __init__(self):
 12         self.flag = False
 13         self.weather_data = None
 14         super(WeatherHtmlParser,self).__init__()
 15     
 16     def handle_starttag(self,tag,attr):
 17         if tag == "script":
 18             self.flag = True
 19 
 20     def handle_endtag(self,tag):
 21         if tag == "script":
 22             self.flag = False
 23 
 24     def handle_data(self,data):
 25         if self.flag:
 26             if "var hour3data=" in data:
 27                 data = data.strip("\n")
 28                 data = data.strip("var hour3data=")
 29                 self.weather_data = json.loads(data)
 30                 
 31 
 32 #全国城市天气预报代码
 33 class CityCodeHtmlParser(HTMLParser):
 34 
 35     def __init__(self):
 36         self.flag = False
 37         self.city_dict = {}
 38         super(CityCodeHtmlParser,self).__init__()
 39 
 40     def handle_starttag(self,tag,attr):
 41         if tag == "p" or tag == "br":
 42             self.flag = True
 43 
 44     def handle_endtag(self,tag):
 45         if tag == "p" or tag == "br":
 46             self.flag = False
 47 
 48     def handle_data(self,data):
 49         if self.flag:
 50             if "=" in data:
 51                 data = data.split("=")
 52                 self.city_dict[data[1]] = data[0]
 53             
 54                 
 55 def printWeatherInfo(func):
 56     def call():
 57         info = func()
 58         if info == None:
 59             return None
 60 
 61         #一天之内的天气
 62         one_day = info["1d"]
 63         for item in one_day:
 64             item = item.split(",")
 65             print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5]))
 66 
 67         #未来7天内的天气
 68         flag = input("是否打印未来7天内的天气:")
 69         if flag == "":
 70             seven_day = info["7d"]
 71             for i in range(7):
 72                 if i >= 1:
 73                     for item in seven_day[i]:
 74                         item = item.split(",")
 75                         print("%s::天气:%s; 温度:%s; 风向:%s; 风力:%s" % (item[0],item[2],item[3],item[4],item[5]))
 76         else:
 77             return None
 78 
 79     return call
 80 
 81 
 82 
 83 #抓取天气信息
 84 @printWeatherInfo
 85 def getAllWeather():
 86     city = input("请输入你要查询的城市:")
 87     city = queryCityCode(city)
 88     if city == None:
 89         return None
 90     url_address = "http://www.weather.com.cn/weather1d/%s.shtml" % city
 91     req = request.Request(url_address)
 92     req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
 93     with request.urlopen(req) as html:
 94         data = html.read().decode("utf-8")
 95         html_parser = WeatherHtmlParser()
 96         html_parser.feed(data)
 97         html_parser.close()
 98         return html_parser.weather_data
 99         
100 
101         
102 #查询城市的编码
103 def queryCityCode(city_name):
104 
105     #从网上抓取信息,本来我是想放在文件里的,博客园传不了,我稍微改造了下
106     #目前代码很丑,先做个记录吧,现在毕竟没工作,先把总体的知识过一遍再说
107     def getAllCityInfo():
108         url_address = "http://doc.orz520.com/a/doc/2014/0322/2100581.html"
109         req = request.Request(url_address)
110         req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")
111         with request.urlopen(req) as html:
112             data = html.read().decode("utf-8")
113             html_parser = CityCodeHtmlParser()
114             html_parser.feed(data)
115             html_parser.close()
116             return html_parser.city_dict
117 
118     city_dict = getAllCityInfo()
119     if city_name not in city_dict:
120         return None
121     return city_dict[city_name]
122         
123         
124 getAllWeather()
125 aa = input()

 

posted @ 2017-12-21 21:32  欧晨曦  阅读(5471)  评论(1编辑  收藏  举报