20171123初学demo爬去网页资料
一、工具vs2015 +python3.5
import urllib.request
import urllib.error
import re
def getcontent(url,page):
head=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko")
opener=urllib.request.build_opener()
opener.addheaders=[head]
urllib.request.install_opener(opener)
try:
data=urllib.request.urlopen(url).read().decode("utf-8")
contentpat='<div class="content">(.*?)</div>'
contentlist=re.compile(contentpat,re.S).findall(data)
for cont in contentlist:
print(cont)
except urllib.error.URLError as e:
print(e.reason)
for i in range(1,29):
url="https://www.qiushibaike.com/8hr/page/"+str(i)
getcontent(url,i)
模仿浏览访问,正则表达式匹配内容,打印结果
也许并不是你需要的内容,这只是我人生的一些痕迹.
-- soar.pang

浙公网安备 33010602011771号