复习ajax异步加载-爬取图片

import requests
from lxml import etree
import re
import os

#原网址：https://www.pexels.com/zh-tw/
#F12之后选择XHR，然后不断往下滑，可以看到出现了更多的URL，如下
# url='https://www.pexels.com/zh-tw/?format=js&seed=2019-04-02%2B11%3A58%3A30%2B%2B0000&dark=true&page=3&type='
#可以进行简化，提取有用的部分，结果为

url=['https://www.pexels.com/zh-tw/?page={}'.format(i) for i in range(3,6)]

for i in range(len(url)):
    response=requests.get(url[i])
    html=etree.HTML(response.text)
    url_image=html.xpath('//div[@class="photos__column"]/div/article/a[1]/img/@data-pin-media')
    url_name=html.xpath('//div[@class="photos__column"]/div/article/a[1]/@href')  #获得特有的id值
    url_num=" ".join(url_name)  #列表转为字符串
    url_num=(re.compile('\d+').findall(url_num))  #使用正则表达式后返回列表
    #建立文件夹
    dir_path='D:/picture/ajax/{}page'.format(i)
    if os.path.exists(dir_path)==0:
        os.mkdir(dir_path)

    for j,k in zip(url_image,url_num):
        # 获得图片内容
        res = requests.get(j)
        with open(dir_path+'/{}.jpg'.format(k),'wb')as f:
            f.write(res.content)
    print(i)

posted on 2019-04-02 21:51 佛大老妖阅读(339) 评论(0) 收藏举报

刷新页面返回顶部

佛大老妖

复习ajax异步加载-爬取图片

导航

公告