今日学习总计

爬虫学习笔记

import requests
from lxml import etree
import os
url="http://www.win4000.com/zt/mao.html"
header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response=requests.get(url=url,headers=header).text
tree = etree.HTML(response)
leaf=tree.xpath('//div[@class="tab_tj"]//ul[@class="clearfix"]/li/a/@href')
if not os.path.exists('./maomao'):
os.mkdir('./maomao')
for a in leaf:
try:
b=1
while b<11:
c = a.split('.html')[0]
d = c+'_'+str(b)+'.html'
b += 1
e = requests.get(url=d,headers=header).text
f = etree.HTML(e)
g = f.xpath('//div[@class="main"]//div[@class="pic-meinv"]/a/img/@src')[0]
h = requests.get(url=g,headers=header).content
i = 'maomao/'+g.split('/')[-1]
with open(i,'wb') as fp:
fp.write(h)
except :
print("出错了")

posted @ 2021-03-31 20:02  禁小呆  阅读(19)  评论(0)    收藏  举报