代码改变世界

爬取校园新闻

2018-04-03 16:17  Molemole  阅读(244)  评论(0编辑  收藏  举报

str = requests.get('http://news.gzcc.cn/html/xiaoyuanxinwen/')

str.encoding = 'utf-8'
 
from bs4 import BeautifulSoup
soup=BeautifulSoup(str.text,'html.parser')
#print(soup)
 
d=soup.select('li')
for news in d:
    if len(news.select('.news-list-title')) > 0:
        t=print(news.select('.news-list-title')[0].text)
        a=news.select('a')[0].attrs
        print(a['href'])
 
        strd = requests.get(a['href'])
        strd.encoding = 'utf-8'
        soupd = BeautifulSoup(strd.text, 'html.parser')
        cont=soupd.select('#content')
        timet=soupd.select('.show-info')
        print(timet[0].text[0:25])
        print(timet[0].text[30:38])
        print(timet[0].text[38:45])
        print(timet[0].text[46:56])
        print(timet[0].text[62:])