爬虫爬取网页基本的代码

import urllib.request,urllib.parse

strs = \
'''blackside_state=0; buvid4=93AB1303-E725-8C6C-9986-BD42F6123C8F98873-022012417-OiqJJgeV6VQWPbVT2qNDNg==; 
b_nut=1650026964; buvid3=91665260-5783-B121-3A1A-493CE29373B664163infoc; 
rpdid=|(k|YRJuu|lk0J'uYl||kYmYl; DedeUserID=515163213; DedeUserID__ckMd5=3dcc69f5ca845f22; 
b_ut=5; _uuid=5410279A2-CF13-7686-10958-C148E9210D8DF21391infoc; 
buvid_fp=8cc8e7ab9a2e8223fb6f7cf571569c3a; i-wanna-go-back=2; 
hit-dyn-v2=1; CURRENT_BLACKGAP=0; LIVE_BUVID=AUTO7816510559632719; 
fingerprint3=97b9714c6c2575eaac50b9664241605f; go_old_video=1; nostalgia_conf=2; 
CURRENT_FNVAL=4048; CURRENT_QUALITY=80; fingerprint=6669eec625188139cf2cd625cf310b4b; 
SESSDATA=8ef1b2e2,1678238412,9f657*91; bili_jct=5624e0df1821829446d0958af5e27508; 
sid=8kbfybjf; bp_video_offset_515163213=703911862525231153; innersign=0; b_lsid=61E498EE_18324DFE75D; PVID=1'''

# 将参数封装到Request对象中
url = "https://pdai.tech/md/spring/spring-x-framework-helloworld.html"
headers = {
    "user-agent": '''Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.27'''}
# 这里需要将数据转换为二进制
data = bytes(urllib.parse.urlencode({"cookie":strs}),encoding="utf-8")
request = urllib.request.Request(url=url, data=data, headers=headers, method="GET")
response = urllib.request.urlopen(request, timeout=5)
print(response.read().decode())

posted @ 2022-09-10 09:12  yfs1024  阅读(1028)  评论(0)    收藏  举报