爬取校园新闻
2018-04-03 16:17 Molemole 阅读(253) 评论(0) 收藏 举报str = requests.get('http://news.gzcc.cn/html/xiaoyuanxinwen/')
str.encoding = 'utf-8'from bs4 import BeautifulSoupsoup=BeautifulSoup(str.text,'html.parser')#print(soup)d=soup.select('li')for news in d: if len(news.select('.news-list-title')) > 0: t=print(news.select('.news-list-title')[0].text) a=news.select('a')[0].attrs print(a['href']) strd = requests.get(a['href']) strd.encoding = 'utf-8' soupd = BeautifulSoup(strd.text, 'html.parser') cont=soupd.select('#content') timet=soupd.select('.show-info') print(timet[0].text[0:25]) print(timet[0].text[30:38]) print(timet[0].text[38:45]) print(timet[0].text[46:56]) print(timet[0].text[62:])
浙公网安备 33010602011771号