爬取微信web
1 from flask import Flask, session, jsonify, request, render_template 2 import time, requests, re 3 from bs4 import BeautifulSoup 4 5 app = Flask(__name__) 6 app.debug = True 7 app.secret_key = 'yuio' 8 9 10 def parser_zz(arg): 11 dic = {} 12 sop = BeautifulSoup(arg, 'html.parser') 13 ret = sop.find(name='error') 14 for i in ret.find_all(recursive=False): # recursive是我们的find_all里面的参数,是否递归着一层一层去寻找指定元素的意思 15 dic[i.name] = i.text 16 # print(333, dic) 17 return dic 18 19 20 @app.route('/login', methods=['GET', 'POST']) 21 def login(): 22 if request.method == 'GET': 23 ctime = str(int(time.time() * 1000)) 24 code_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={}'.format( 25 ctime) 26 obj = requests.get(code_url) 27 # print(100000,obj.text) # window.QRLogin.code = 200; window.QRLogin.uuid = "QdsVPRcIdg=="; 28 # src="https://login.weixin.qq.com/qrcode/QcfrYvucSQ==" 29 code_str = re.findall('uuid = "(.*)";', obj.text)[0] 30 # print(code_str) # oaDRFTvNuA== 31 session['code_str'] = code_str # 把随机字符串存入到session里面 32 return render_template('login.html', code=code_str) 33 else: 34 pass 35 36 37 @app.route('/checklogin', methods=['GET', 'POST']) 38 def checklogin(): 39 """ 40 发送get请求检测是否已经扫码,登录 41 这里的地址是我们的浏览器network里面未登录的请求地址 42 https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=QbeUOBatKw==&tip=0&r=-1036255891&_=1525749595604 43 :return: 44 """ 45 # time.sleep(3) 46 response = {'code': 408} 47 code = session.get('code_str') 48 ctime = str(int(time.time() * 1000)) 49 url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-1051533832&_={1}'.format( 50 51 code, ctime) # 这里是我们的浏览器里面network里面未登录时的请求地址,需要注意的点是我们的url里面的UUID是带着==号的,而所以占位符需要把这个也占据上 52 test_url = requests.get(url) 53 # print(000,test_url.text) # window.code=408;这里得到的就是我们的浏览器里面preview里面的数据结果 54 if 'code=201' in test_url.text: 55 # 扫码成功 56 img_src = re.findall("userAvatar = '(.*)';", test_url.text)[0] 57 response['code'] = 201 58 response['img_src'] = img_src 59 elif 'code=200' in test_url.text: 60 redirect_url = re.findall('redirect_uri="(.*)";', test_url.text)[0] 61 # print(redirect_url) 62 """ 63 https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=A_6I8qWm-a-6scDK6p_3TNed@qrticket_0&uuid=QaxaBtzBUw==&lang=zh_CN&scan=1525790469 64 """ 65 # 向redirect_url地址发送请求,获取凭证相关信息 66 redirect_url = redirect_url + '&fun=new&version=v2' 67 # print("world",redirect_url) 68 """ 69 world 这里的url点进去就能得到我们的用户凭证 70 https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=A_Bu3RPAC_fMwHRJ1ntI2Wns@qrticket_0&uuid=4ZcidwQIVg==&lang=zh_CN&scan=1525790291&fun=new&version=v2 71 """ 72 ticket_obj = requests.get(redirect_url) 73 # print(111, ticket_obj.text) 74 """ 75 111 <error> 76 <ret>0</ret> 77 <message></message> 78 <skey>@crypt_2a177a7d_e69e10e9caff9e5ddc25becf89e0753c</skey> 79 <wxsid>kTujiJBABS1fgtZk</wxsid> 80 <wxuin>888506015</wxuin> 81 <pass_ticket>qziKwlI2LsjonL0XfHkEZF5Wr7T7ge8TKEiIhjoHjlzg2uzRtxCOhOoTcnz6zl9I</pass_ticket> 82 <isgrayscale>1</isgrayscale> 83 </error> 84 """ 85 ticket_res = parser_zz(ticket_obj.text) 86 # print(222, ticket_res) 87 """ 88 {'ret': '0', 89 'message': '', 90 'skey': '@crypt_2a177a7d_5e04449d5ffe74559df7fe9294656590', 91 'wxsid': 'sGpw8F+3fTP0OU75', 92 'wxuin': '888506015', 93 'pass_ticket': 'DBLpe%2BR71czcBxpH4r7%2BSuqC7lr1W9IEZW9MV7FHQm8xOV5e465PneNiJmBjPCOK', 94 'isgrayscale': '1'} 95 """ 96 session['ticket'] = ticket_res 97 response['code'] = 200 98 return jsonify(response) 99 100 101 @app.route('/index', methods=['GET', 'POST']) 102 def index(): 103 """ 104 用户数据的初始化 105 url就是我们的浏览器里面的二维码一扫完,然后手机确认登录之后, 106 浏览器网页版上面的network里面name那一栏,第一个请求信息里面的headers里有这个url, 107 必须是从二维码扫完之后跳转到个人信息页面才可以看到这个url,否则是看不到这个url的 108 https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=Ay5saROunk_i4ZM223nH-HUI@qrticket_0&uuid=werydGkFgA==&lang=zh_CN&scan=1525778805 109 :return: 110 """ 111 ticket_dic = session.get('ticket') 112 init_url = 'https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-1079034806&pass_ticket={}'.format( 113 ticket_dic.get('pass_ticket')) 114 115 data_obj = { 116 'BaseRequest': { 117 'DeviceID': "e748673222538702", 118 'Sid': ticket_dic.get('wxsid'), 119 'Skey': ticket_dic.get('skey'), 120 'Uin': ticket_dic.get('wxuin'), 121 } 122 } 123 init_obj = requests.post( 124 url=init_url, 125 json=data_obj 126 ) 127 print(9000,init_obj.text) 128 init_obj.encoding = 'utf8' 129 user_obj = init_obj.json() 130 # print(user_obj) 131 # return jsonify('hello world') 132 return render_template('index.html',user_obj=user_obj) 133 134 135 if __name__ == '__main__': 136 app.run()
1 <!DOCTYPE html> 2 <html lang="en"> 3 <head> 4 <meta charset="UTF-8"> 5 <meta http-equiv="X-UA-Compatible" content="IE=edge"> 6 <meta name="viewport" content="width=device-width, initial-scale=1"> 7 <title>Title</title> 8 </head> 9 <body> 10 <h1>helo</h1> 11 <p style="text-align:center;width:200px;margin:0 auto;"> 12 <img id="img" style="height:200px;width:200px;" src="https://login.weixin.qq.com/qrcode/{{code}}" alt=""> 13 </p> 14 15 <script src="/static/jquery-1.12.4.js"></script> 16 <script> 17 $(function () { 18 check_sign_in(); 19 }) 20 21 function check_sign_in() { 22 $.ajax({ 23 url: '/checklogin', 24 type: 'GET', 25 dataType: 'JSON', 26 success: function (arg) { 27 //扫码 28 if(arg.code===201){ 29 $("#img").attr('src',arg.img_src); 30 check_sign_in(); 31 }else if (arg.code===200){ 32 //重定向到用户信息页 33 location.href='/index' 34 }else{ 35 check_sign_in(); 36 } 37 } 38 }) 39 } 40 </script> 41 </body> 42 </html>
1 <!DOCTYPE html> 2 <html lang="en"> 3 <head> 4 <meta charset="UTF-8"> 5 <meta http-equiv="X-UA-Compatible" content="IE=edge"> 6 <meta name="viewport" content="width=device-width, initial-scale=1"> 7 <title>Title</title> 8 </head> 9 <body> 10 <h1>welcome to check in {{user_obj.NickName}}</h1> 11 <h3>content people</h3> 12 13 14 <ul> 15 {%for i in user_obj.ContactList%} 16 <li>{{i.NickName}}</li> <span>{{i.UserName}}</span> 17 {% endfor %} 18 </ul> 19 <hr> 20 21 </body> 22 </html>
此功能还有待完善,期待后续......

浙公网安备 33010602011771号