爬虫自动爬取图片
import time import requests from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from lxml import etree #作者:小辉 # 时间:2022-10-22 10:24 class tupian(): def __init__(self): self.sertd=Service("\自动\chromedriver.exe") #放你自己的自动的 chromedriver.exe self.asd=webdriver.Chrome(service=self.sertd) self.p=0 self.hert={ 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.42' } def mian1(self): name_space=input('输入要下载图片的关键字:') self.asd.get('https://www.vcg.com/creative-image/'+name_space+'/') js = "var q=document.documentElement.scrollTop=9000" self.asd.execute_script(js) #将页面的 到达底部 time.sleep(2) page_address=self.asd.current_url #获取当搜索图片地址的的url FORMAT_HTML=self.asd.page_source #获取当前的源码 bmkp_jump_gnus='?page=' #控制页数 m_xpth=etree.HTML(FORMAT_HTML) list_xpath=m_xpth.xpath('//*[@class="paginationTotal"]/text()')[1] #页数的 print('一共'+list_xpath+'页图片') img_form=int(input('获取几页的图片:')) for i in range(1,img_form+1): img_html=str(page_address)+bmkp_jump_gnus+str(i) self.asd.get(img_html) js = "var q=document.documentElement.scrollTop=100000" self.asd.execute_script(js) time.sleep(1) self.asd.refresh() #刷新 time.sleep(2) imgsh_html=self.asd.page_source mnji_xpath1=etree.HTML(imgsh_html) list_xpath11 = mnji_xpath1.xpath('//*[@class="imgWaper"]/img/@data-min') for i in list_xpath11: n='https:'+i requests_conct=requests.get(n).content with open('图片'+str(self.p)+'.jpg','wb')as mko: mko.write(requests_conct) print('下载完成') self.p+=1 self.asd.quit() while True: tupian().mian1()