requests模拟登录知乎
1、首先分析登录页面,经分析得知,知乎登录的POST数据:

2、XSRF为跨站请求伪造(Cross-site request forgery),通过搜集资料,在大神的博客里找到有相应资料,http://cuiqingcai.com/2076.html ,说的也很清楚,有兴趣可以查看。这个参数目的就是为了防范XSRF攻击而设置的一个hash值,每次访问主页都会生成这样一个唯一的字符串。这里我们只关注如何去取这个xsrf值。右键分析网页源码发现:

这样一来,我们只需要requests请求到页面的响应response之后,用正则匹配得到这个xsrf就行了。解决了这个问题我们就可以去模拟登录了。
3、直接贴上源码
# -*- coding: utf-8 -*- import requests try: import cookielib except: import http.cookiejar as cookielib import re session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies.txt") try: session.cookies.load(ignore_discard=True) except: print ("cookie未能加载") agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" header = { "HOST":"www.zhihu.com", "Referer": "https://www.zhizhu.com", 'User-Agent': agent } def is_login(): #通过个人中心页面返回状态码来判断是否为登录状态 inbox_url = "https://www.zhihu.com/question/56250357/answer/148534773" response = session.get(inbox_url, headers=header, allow_redirects=False) if response.status_code != 200: return False else: return True def get_xsrf(): #获取xsrf code response = session.get("https://www.zhihu.com", headers=header) match_obj = re.match('.*name="_xsrf" value="(.*?)"', response.text) if match_obj: return (match_obj.group(1)) else: return "" def get_index(): response = session.get("https://www.zhihu.com", headers=header) with open("index_page.html", "wb") as f: f.write(response.text.encode("utf-8")) print ("ok") def zhihu_login(account, password): #知乎登录 if re.match("^1\d{10}",account): print ("手机号码登录") post_url = "https://www.zhihu.com/login/phone_num" post_data = { "_xsrf": get_xsrf(), "phone_num": account, "password": password } else: if "@" in account: #判断用户名是否为邮箱 print("邮箱方式登录") post_url = "https://www.zhihu.com/login/email" post_data = { "_xsrf": get_xsrf(), "email": account, "password": password } response_text = session.post(post_url, data=post_data, headers=header) session.cookies.save() zhihu_login("18782902568", "admin123") # get_index() is_login()

浙公网安备 33010602011771号