[python]爬虫学习（三）糗事百科

import requests
import os
from bs4 import BeautifulSoup
import time


page=2
url='http://www.qiushibaike.com/hot/page/'+str(page)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'}
html=requests.get(url,headers=headers)
soup=BeautifulSoup(html.text,'lxml')

names=soup.select('div.author h2')
years=soup.select('div.articleGender')
contents=soup.select('div.content span')
ups=soup.select('div.stats span.stats-vote i.number')
comments=soup.select('div.stats span.stats-comments  a i.number')

for name,year,content,up,comment in zip(names,years,contents,ups,comments):
    data={
        'name':name.get('title'),
        'year':year.text,
        'content':content.text,
        'up':up.text,
        'comment':comment.text
    }
    print(data)

posted @ 2017-01-03 21:39 高中国流阅读(221) 评论(0) 收藏举报

刷新页面返回顶部

高中国流

afacode.top

[python]爬虫学习（三）糗事百科

公告