2016/5/9 14:17:40 python bs4 BeautifulSoup 抓取糗事百科文字信息
2016/5/9 14:17:40
# -*- coding: utf-8 -*-import urllibimport urllib.requestfrom bs4 import BeautifulSoupclass QiuShi():def __init__(self):user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'self.headers = {'User-Agent':user_agent}def query(self,page=1):self.url = 'http://www.qiushibaike.com/text/page/' + str(page)print(self.url)res = urllib.request.Request(self.url,headers=self.headers)html = urllib.request.urlopen(res)bsoup = BeautifulSoup(html,'html.parser')for content in bsoup.find_all('div',{'class':'content'}):print(content.get_text())if __name__ =='__main__':qiushi = QiuShi()for i in range(35):qiushi.query(i)

浙公网安备 33010602011771号