爬虫学习(九)——登录获取cookie爬取

import urllib.request
import urllib.parse
import http.cookiejar

# http.cookiejar 该包是专门对网页的cookie只进行获取的
# cookiejar是专门让代码保存cookie值
# 创建一个cookiejar对象
cookiejar = http.cookiejar.CookieJar()
# 根据cookiejar创建一个管理器对象
handler = urllib.request.HTTPCookieProcessor(cookiejar)
# 在使用handler创建一个opener对象对服务器发送请求
opener = urllib.request.build_opener(handler)

url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019131959912"
headers = {
"User-Agent":" Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}
data = {
"email":"15904923018",
"icode":"",
"origURL":"http://www.renren.com/home",
"domain":"renren.com",
"key_id":"1",
"captcha_type":"web_login",
"password":"3912395285b1bc3a702d4b786c6ca78f31e3eb02581632ff953f3cde5d4a0e4c",
"rkey":"72b87c9a06689d433fac19cc32918a24",
"f":"https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DG3uWgGKuXm54K9E_sD5z1fU3YkI4h1UVckfBqdYMWk_%26wd%3D%26eqid%3D979d3c7c0004e681000000025c63fb79",
}
data = urllib.parse.urlencode(data).encode("utf8")
request = urllib.request.Request(url,headers=headers)
# response = urllib.request.urlopen(request,data=data)

response = opener.open(request,data=data)
#显示的结果是一个含登录信息的网页地址
print(response.read().decode("utf8"))

# 显示结果:
# {"code":true,"homeUrl":"http://www.renren.com/home"}

# 获取登录时的cookie值
# 登录后访问其他含登录信息的网页时,必须带着cookie

get_url = "http://www.renren.com/969727800/profile"
request = urllib.request.Request(get_url,headers=headers)

# opener对象保存了登录时的cookie值
# 再次更换请求对象使用opener进行请求时,cookie还是存在的,能够保存登录信息
response1 = opener.open(request)
with open("renren1.html","wb")as tf:
tf.write(response1.read())
posted @ 2019-02-13 20:04  石桥浪子  阅读(3484)  评论(0编辑  收藏  举报