今日学习总计

爬虫学习笔记

import requests
url="https://www.baidu.com/s?wd=爬虫"
idcard={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response=requests.get(url=url,headers=idcard) # 给他headers属性
file=response.text
with open ("wd.html","w",encoding='utf-8') as wd:
wd.write(file)

import requests
from lxml import etree
url="https://www.qqtn.com/article/article_292075_1.html"
header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}
response=requests.get(url=url,headers=header).text
tree=etree.HTML(response)
leaf=tree.xpath('//div[@id="zoom"]/p/img/@src')
for i in leaf:
a=requests.get(url=i,headers=header).content
name=i.split("/")[-1]
with open(name,"wb") as f:
f.write(a)

posted @ 2021-03-24 17:18  禁小呆  阅读(26)  评论(0)    收藏  举报