新手python爬虫第1爬

点击查看代码
# https://sc.chinaz.com/tupian/meinvtupian.html
# https://sc.chinaz.com/tupian/meinvtupian_2.html
import requests
from lxml import etree

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
#
page = int(input('请输入页码'))
if page == 1:
    url = "https://sc.chinaz.com/tupian/meinvtupian.html"
else:
    url = f"https://sc.chinaz.com/tupian/meinvtupian_{page}.html"
print(url)

response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
# print(response.text)
etree = etree.HTML(response.text)
divs = etree.xpath('//div[@class="item"]')

for div in range(len(divs)):
    # 得到图片地址
    img = divs[div].xpath('./img/@data-original')[0]
    img = 'https:' + img.replace('_s', '')
    print(f'正在下载第{div + 1}张图片')
    # 得到图片标题
    title = divs[div].xpath('./div/a/text()')[0]
    res = requests.get(url=img, headers=headers)
    with open(f'./美女/{title}.png', 'wb') as f:
        f.write(res.content)

posted @ 2024-09-23 22:43  神仙不在  阅读(55)  评论(0)    收藏  举报