网络爬虫基础练习

import requests
import re
from bs4 import BeautifulSoup


head = {}
head['user-agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'

r=requests.get("http://news.gzcc.cn/html/xiaoyuanxinwen/",headers=head)
r.encoding='utf-8'
soup=BeautifulSoup(r.text,'html.parser')

print("a标签的链接:")
print(soup.h1)
print()
print("a标签的链接:")
print(soup.a.attrs['href'])
print()
print('所有li标签的所有内容:')
#print(soup.find_all('li'))
for each in soup.find_all('li'):
    print(each)

print()
print()
a1=soup.select_one(".news-list").select_one("li")
print(a1.select_one(".news-list-title").text)
print(a1.a.attrs.get('href'))
print(a1.select_one(".news-list-info").contents[0].text)
print(a1.select_one(".news-list-info").contents[1].text)

 

 

posted @ 2018-03-29 15:33  087林金龙  阅读(84)  评论(0编辑  收藏  举报