Python学习笔记第22天

谏言:穷则独善其身,达则兼济天下

图片爬取

# pip install 库  安装第三方库
# import 库 导包

import requests
# 爬虫请求第三方库

from lxml import etree
# 数据提取第三方库

# 请求地址
url='http://pic.netbian.com/'
count=1
# 请求头
headers={
# Cookie已过期,不能使用了
'Cookie': '__cfduid=d752853af80c2e799aa8b8c814bb33d8e1588249955; zkhanecookieclassrecord=%2C53%2C; PHPSESSID=0tdkvgklarhle4hkqm8hfia011; ' 

'Hm_lvt_526caf4e20c21f06a4e9209712d6a20e=1588252204,1588255851,1588255859,1588257133; Hm_lpvt_526caf4e20c21f06a4e9209712d6a20e=1588257147',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}

# 发送请求
response = requests.get(url,headers=headers).content.decode('gbk')

# 数据提取 -> url 后缀
html = etree.HTML(response)

# xpath # //div 全选页面中所有div标签
#
//ul[@class="clearfix"]/li/a/@href
# 获取页面所以图片切片地址
clearfix =html.xpath('//ul[@class="clearfix"]/li/a/@href')
print(clearfix)
for url_i in clearfix: ID=url_i[8:-5]
  urls
='http://pic/netbian.com/downpic.php?id=' + ID + '&classid=66'
# 下载图片接口
#
http://pic/netbian.com/downpic.php?id=25761&classid=66
#
http://pic/netbian.com/downpic.php?id=25790&classid=60
img_response = requests.get(urls,headers=headers)
f
= open('./img/{}.jpg'.format(count),'ab')
f.write(img_response.content)
f.close()
count
+=1

 

posted @ 2020-04-30 22:57  过气诗人  阅读(172)  评论(0)    收藏  举报