爬取微信web

 

  1 from flask import Flask, session, jsonify, request, render_template
  2 import time, requests, re
  3 from bs4 import BeautifulSoup
  4 
  5 app = Flask(__name__)
  6 app.debug = True
  7 app.secret_key = 'yuio'
  8 
  9 
 10 def parser_zz(arg):
 11     dic = {}
 12     sop = BeautifulSoup(arg, 'html.parser')
 13     ret = sop.find(name='error')
 14     for i in ret.find_all(recursive=False):  # recursive是我们的find_all里面的参数,是否递归着一层一层去寻找指定元素的意思
 15         dic[i.name] = i.text
 16     # print(333, dic)
 17     return dic
 18 
 19 
 20 @app.route('/login', methods=['GET', 'POST'])
 21 def login():
 22     if request.method == 'GET':
 23         ctime = str(int(time.time() * 1000))
 24         code_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={}'.format(
 25             ctime)
 26         obj = requests.get(code_url)
 27         # print(100000,obj.text)  # window.QRLogin.code = 200; window.QRLogin.uuid = "QdsVPRcIdg==";
 28         # src="https://login.weixin.qq.com/qrcode/QcfrYvucSQ=="
 29         code_str = re.findall('uuid = "(.*)";', obj.text)[0]
 30         # print(code_str)  # oaDRFTvNuA==
 31         session['code_str'] = code_str  # 把随机字符串存入到session里面
 32         return render_template('login.html', code=code_str)
 33     else:
 34         pass
 35 
 36 
 37 @app.route('/checklogin', methods=['GET', 'POST'])
 38 def checklogin():
 39     """
 40     发送get请求检测是否已经扫码,登录
 41     这里的地址是我们的浏览器network里面未登录的请求地址
 42     https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=QbeUOBatKw==&tip=0&r=-1036255891&_=1525749595604
 43     :return:
 44     """
 45     # time.sleep(3)
 46     response = {'code': 408}
 47     code = session.get('code_str')
 48     ctime = str(int(time.time() * 1000))
 49     url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-1051533832&_={1}'.format(
 50 
 51         code, ctime)  # 这里是我们的浏览器里面network里面未登录时的请求地址,需要注意的点是我们的url里面的UUID是带着==号的,而所以占位符需要把这个也占据上
 52     test_url = requests.get(url)
 53     # print(000,test_url.text)  # window.code=408;这里得到的就是我们的浏览器里面preview里面的数据结果
 54     if 'code=201' in test_url.text:
 55         # 扫码成功
 56         img_src = re.findall("userAvatar = '(.*)';", test_url.text)[0]
 57         response['code'] = 201
 58         response['img_src'] = img_src
 59     elif 'code=200' in test_url.text:
 60         redirect_url = re.findall('redirect_uri="(.*)";', test_url.text)[0]
 61         # print(redirect_url)
 62         """
 63         https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=A_6I8qWm-a-6scDK6p_3TNed@qrticket_0&uuid=QaxaBtzBUw==&lang=zh_CN&scan=1525790469
 64         """
 65         # 向redirect_url地址发送请求,获取凭证相关信息
 66         redirect_url = redirect_url + '&fun=new&version=v2'
 67         # print("world",redirect_url)
 68         """
 69         world 这里的url点进去就能得到我们的用户凭证
 70         https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=A_Bu3RPAC_fMwHRJ1ntI2Wns@qrticket_0&uuid=4ZcidwQIVg==&lang=zh_CN&scan=1525790291&fun=new&version=v2
 71         """
 72         ticket_obj = requests.get(redirect_url)
 73         # print(111, ticket_obj.text)
 74         """
 75         111 <error>
 76         <ret>0</ret>
 77         <message></message>
 78         <skey>@crypt_2a177a7d_e69e10e9caff9e5ddc25becf89e0753c</skey>
 79         <wxsid>kTujiJBABS1fgtZk</wxsid>
 80         <wxuin>888506015</wxuin>
 81         <pass_ticket>qziKwlI2LsjonL0XfHkEZF5Wr7T7ge8TKEiIhjoHjlzg2uzRtxCOhOoTcnz6zl9I</pass_ticket>
 82         <isgrayscale>1</isgrayscale>
 83         </error>
 84         """
 85         ticket_res = parser_zz(ticket_obj.text)
 86         # print(222, ticket_res)
 87         """
 88         {'ret': '0', 
 89         'message': '', 
 90         'skey': '@crypt_2a177a7d_5e04449d5ffe74559df7fe9294656590', 
 91         'wxsid': 'sGpw8F+3fTP0OU75', 
 92         'wxuin': '888506015', 
 93         'pass_ticket': 'DBLpe%2BR71czcBxpH4r7%2BSuqC7lr1W9IEZW9MV7FHQm8xOV5e465PneNiJmBjPCOK', 
 94         'isgrayscale': '1'}
 95         """
 96         session['ticket'] = ticket_res
 97         response['code'] = 200
 98     return jsonify(response)
 99 
100 
101 @app.route('/index', methods=['GET', 'POST'])
102 def index():
103     """
104     用户数据的初始化
105     url就是我们的浏览器里面的二维码一扫完,然后手机确认登录之后,
106     浏览器网页版上面的network里面name那一栏,第一个请求信息里面的headers里有这个url,
107     必须是从二维码扫完之后跳转到个人信息页面才可以看到这个url,否则是看不到这个url的
108     https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=Ay5saROunk_i4ZM223nH-HUI@qrticket_0&uuid=werydGkFgA==&lang=zh_CN&scan=1525778805
109     :return:
110     """
111     ticket_dic = session.get('ticket')
112     init_url = 'https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-1079034806&pass_ticket={}'.format(
113         ticket_dic.get('pass_ticket'))
114 
115     data_obj = {
116         'BaseRequest': {
117             'DeviceID': "e748673222538702",
118             'Sid': ticket_dic.get('wxsid'),
119             'Skey': ticket_dic.get('skey'),
120             'Uin': ticket_dic.get('wxuin'),
121         }
122     }
123     init_obj = requests.post(
124         url=init_url,
125         json=data_obj
126     )
127     print(9000,init_obj.text)
128     init_obj.encoding = 'utf8'
129     user_obj = init_obj.json()
130     # print(user_obj)
131     # return jsonify('hello world')
132     return render_template('index.html',user_obj=user_obj)
133 
134 
135 if __name__ == '__main__':
136     app.run()
manage.py

 

 1 <!DOCTYPE html>
 2 <html lang="en">
 3 <head>
 4     <meta charset="UTF-8">
 5     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6     <meta name="viewport" content="width=device-width, initial-scale=1">
 7     <title>Title</title>
 8 </head>
 9 <body>
10 <h1>helo</h1>
11 <p style="text-align:center;width:200px;margin:0 auto;">
12     <img id="img" style="height:200px;width:200px;" src="https://login.weixin.qq.com/qrcode/{{code}}" alt="">
13 </p>
14 
15 <script src="/static/jquery-1.12.4.js"></script>
16 <script>
17     $(function () {
18         check_sign_in();
19     })
20 
21     function check_sign_in() {
22         $.ajax({
23             url: '/checklogin',
24             type: 'GET',
25             dataType: 'JSON',
26             success: function (arg) {
27                 //扫码
28                if(arg.code===201){
29                    $("#img").attr('src',arg.img_src);
30                    check_sign_in();
31                }else if (arg.code===200){
32                    //重定向到用户信息页
33                    location.href='/index'
34                }else{
35                    check_sign_in();
36                }
37             }
38         })
39     }
40 </script>
41 </body>
42 </html>
login

 

 1 <!DOCTYPE html>
 2 <html lang="en">
 3 <head>
 4     <meta charset="UTF-8">
 5     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6     <meta name="viewport" content="width=device-width, initial-scale=1">
 7     <title>Title</title>
 8 </head>
 9 <body>
10 <h1>welcome to check in {{user_obj.NickName}}</h1>
11 <h3>content people</h3>
12 
13 
14 <ul>
15     {%for i in user_obj.ContactList%}
16     <li>{{i.NickName}}</li>&nbsp;&nbsp;&nbsp;<span>{{i.UserName}}</span>
17     {% endfor %}
18 </ul>
19 <hr>
20 
21 </body>
22 </html>
index

 

此功能还有待完善,期待后续......

posted @ 2018-05-09 12:14  dream-子皿  阅读(255)  评论(0)    收藏  举报