import urllib.request
from lxml import etree
# https://sc.chinaz.com/tupian/siwameinvtupian.html
url = 'https://sc.chinaz.com/tupian/siwameinvtupian_2.html'
def getTenGirlPhote(page):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}
url = ''
if(page == 1):
url = 'https://sc.chinaz.com/tupian/siwameinvtupian.html'
else:
url = 'https://sc.chinaz.com/tupian/siwameinvtupian_' + str(page) + '.html'
request = urllib.request.Request(url=url, headers=headers)
res = urllib.request.urlopen(request)
content = res.read().decode('utf-8')
#print(content)
tree = etree.HTML(content)
src = tree.xpath('//div[@class="tupian-list com-img-txt-list"]/div/img//@data-original')
name = tree.xpath('//div[@class="tupian-list com-img-txt-list"]/div/img//@alt')
# print(name)
print(len(src))
for i in range(len(src)):
# 0-39
#print(i)
imgUrl = 'https:' + src[i]
print(imgUrl)
urllib.request.urlretrieve(imgUrl,'./imgs/' + name[i] + '.jpg')
for i in range(1,11):
getTenGirlPhote(i)