import requests
from bs4 import BeautifulSoup
import string
import time
import datetime
head = {}
head['user-agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
r=requests.get("http://news.gzcc.cn/html/xiaoyuanxinwen/",headers=head)
r.encoding='utf-8'
soup=BeautifulSoup(r.text,'html.parser')
for i in soup.select('li'):
if len(i.select(".news-list-title"))>0:
a=i.select(".news-list-title")[0].text
b=i.select(".news-list-info")[0].contents[0].text
c=i.select(".news-list-info")[0].contents[1].text
d=i.select("a")[0].attrs['href']
print("标题:"+a+'\n'+"时间:"+b+'\n'+"来源:"+c+'\n'+"链接:"+d+'\n\n')
print()
print()
print()
r1=requests.get(d,headers=head)
r1.encoding='utf-8'
soup=BeautifulSoup(r1.text,'html.parser')
e=soup.select(".show-info")[0].text
f=e.split()
for i in range(len(f)-1):
print(f[i],end=' ')
print()
print()
#时间类型转换
dt=e.lstrip('发布时间:')[:19]
dt = datetime.datetime.strptime(dt,'%Y-%m-%d %H:%M:%S')
print("datetime类型时间:",end=' ')
print(dt)
print()
#作者
i=e.find('作者:')
if i>0:
s=e[e.find('作者:'):].split()[0].lstrip('作者:')
print("作者:",end=' ')
print(s)
print()
#审核
i=e.find('审核:')
if i>0:
s=e[e.find('审核:'):].split()[0].lstrip('审核:')
print("审核:",end=' ')
print(s)
print()
#来源
i=e.find('来源:')
if i>0:
s=e[e.find('来源:'):].split()[0].lstrip('来源:')
print("来源:",end=' ')
print(s)
print()
#摄影
i=e.find('摄影:')
if i>0:
s=e[e.find('摄影:'):].split()[0].lstrip('摄影:')
print("摄影:",end=' ')
print(s)
print()
#打印文章主体
print(soup.select("#content")[0].text)
print()
print()
print()
![]()
![]()
![]()