爬虫基础——示例:微信登陆收发消息

   原理

1. URL  https://wx.qq.com/

          1.1  获取uuid:https://login.wx.qq.com/jslogin?<(时间戳)>

      response 返回  ==> window.QRLogin.code = 200; window.QRLogin.uuid = "QaL1LOI9WQ==";

    1.2 使用uuid生成二维码 <img src="https://login.weixin.qq.com/qrcode/QaL1LOI9WQ==">  

2. 长轮询,等待用户扫码。https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<(uuid    tip=1   时间戳)>

          2.1 如果没有人扫码,response 返回 window.code=408;  继续轮询

     2.2 有人扫码,response 返回 window.code=201;window.userAvatar = <头像>,等待用户确认  https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<(uuid    tip=0   时间戳)>

          2.3 确认登陆,https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?<uuid    tip=0   时间戳>的response 返回   window.code=200;  window.redirect_uri="<地址>";   获取登陆cookie   c1

 

          2.4  获取凭证。  window.redirect_uri + &fun=new&version=v2   返回凭证  ,再次获取cookie c2

3. 获取用户信息。https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?<凭证>  response 返回用户信息 User  和 SyncKey

4. 获取所有联系人。https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?<凭证>   ,该url带上 cookie(c1.update(c2))    response 返回所有联系人

5. 发送消息。post 发送 https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?<凭证>

                     由于请求体是一个字典嵌套字典类型,

                     1. 如果字典中没有中文,发送的时候,直接使用 json 参数;

                     2. 如果有中文,要把它变成二进制类型 json.dumps(send_data, ensure_ascii=False).encode(encoding="utf-8"),使用 data参数,请求头加 headers={"Content-Type": "application/json"},

6. 接受消息。也是长轮询。https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?<凭证+SyncKey> ,携带 cookie(c1.update(c2)),监听消息

                 6.1 如果返回  window.synccheck={retcode:"0",selector:"0"},则没有消息

                 6.2 如果返回  window.synccheck={retcode:"0",selector:"0"} 则有消息。 再发送 post https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?<凭证+synckey>    response 返回消息  和新的 SyncKey

 

   代码实现

urlpatterns = [
    path('admin/', admin.site.urls),
    re_path(r'^login/$', views.login, name="login"),
    re_path(r'^index/$', views.index, name="index"),
    re_path(r'^check_login/$', views.check_login, name="check_login"),
    re_path(r'contact_all/$', views.contact_all, name="contact_all"),
    re_path(r'^send_msg/$', views.send_msg, name="send_msg"),
    re_path(r'^check_msg/$', views.check_msg, name="check_msg"),
]
urls.py
from django.shortcuts import render, HttpResponse
import requests, time, re, json
from bs4 import BeautifulSoup
# Create your views here.


def login(req):
    '''
    生成有二维码的网页
    :param req:
    :return:
    '''

    # 获取生产二维码的uuid
    '''
    https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_=1532602804064
    '''
    ctime = int(time.time()*1000)
    base_uri = "https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}"
    url = base_uri.format(ctime)
    res1 = requests.get(url=url)
    qcode = re.findall('window.QRLogin.uuid = "(.*)";', res1.text)[0]
    req.session['qcode'] = qcode

    return render(req, "login.html", {"qcode": qcode})


def check_login(req):
    '''
    检测用户扫码
    :param req:
    :return:
    '''
    # 长轮询
    res_data = {"code":408, "data": None}

    ctime = int(time.time() * 1000)
    TIP = req.GET.get('TIP')
    base_uri = "https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip={1}&r=694021981&_={2}"
    url = base_uri.format(req.session['qcode'], TIP, ctime)

    res1 = requests.get(url=url)
    print(res1.text)
    if "window.code=201" in res1.text:
        # 有人扫码
        img_data = re.findall("window.code=201;window.userAvatar = '(.*)';", res1.text)[0]
        res_data["code"] = 201
        res_data['data'] = img_data
    elif "window.code=200" in res1.text:
        # 用户确认登陆
        redirect_uri = re.findall('window.redirect_uri="(.*)";', res1.text)[0]

        # 登陆成功后,可以获取cookies
        req.session['login_cookie'] = res1.cookies.get_dict()

        # 访问 redirect_uri, 获取凭证
        redirect_url = "{0}&fun=new&version=v2&lang=zh_CN".format(redirect_uri)
        res2 = requests.get(url=redirect_url)

        # 获取cookies
        req.session['ticket_cookie'] = res2.cookies.get_dict()

        soup = BeautifulSoup(res2.text, "html.parser")
        ticket_dict = {}
        for item in soup.find(name="error").children:
            ticket_dict[item.name] = item.text

        req.session['ticket_dict'] = ticket_dict  # 获取凭证
        res_data["code"] = 200

        req.session["is_login"] = True

    return HttpResponse(json.dumps(res_data))


def index(req):
    '''
    跳到聊天页面
    :param req:
    :return:
    '''

    # 获取用户信息
    # https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=617941360&pass_ticket=VSSitrEOjrKhkJwzrepBNJZI7gz98fJcU3zLaKoRnYaaBMQF1XPJ76v%252FXUXXm5f4
    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=617941360&pass_ticket={0}"
    url = base_uri.format(req.session['ticket_dict']['pass_ticket'])
    req_data = {
        "BaseRequest": {
            "DeviceID": "e641097429558556",
            "Sid": req.session['ticket_dict']['wxsid'],
            "Skey": req.session['ticket_dict']['skey'],
            "Uin": req.session['ticket_dict']['wxuin'],
        }
    }
    res = requests.post(
        url=url,
        json=req_data
    )
    res.encoding = "utf-8"

    user_data = json.loads(res.text)  # 获取的微信信息
    # for k,v in user_data.items():
    #     print(k, "-->", v)

    req.session['current_user_info'] = user_data['User']

    req.session['init_sync_key'] = user_data['SyncKey']  # 监听消息凭证


    return render(req, "index.html", {"user_data": user_data})



def contact_all(req):
    '''
    列出所有联系人
    :param req:
    :return:
    '''
    ctime = int(time.time() * 1000)

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}"

    url = base_uri.format(req.session['ticket_dict']['pass_ticket'], ctime, req.session['ticket_dict']['skey'])

    res1 = requests.get(
        url=url,
        cookies=all_cookie
    )
    res1.encoding = "utf-8"

    user_dict = json.loads(res1.text)

    # for item in user_dict['MemberList']:
    #     print(item)

    return render(req, "contact_all.html", {"user_dict": user_dict})

def send_msg(req):
    ctime = str(int(time.time() * 1000))

    recv = req.GET.get("recv")
    content = req.GET.get("content")

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    send_data = {
        "BaseRequest": {
            "DeviceID": "e024995249607937",
            "Sid": req.session['ticket_dict']['wxsid'],
            "Skey": req.session['ticket_dict']['skey'],
            "Uin": req.session['ticket_dict']['wxuin'],
        },
        "Msg": {
            'ClientMsgId': ctime,
            'Content': content,
            'FromUserName': req.session["current_user_info"]['UserName'],
            'LocalID': ctime,
            'ToUserName': recv,
            'Type': 1,
        },
        "Scene": 0
    }

    byte_send_data = json.dumps(send_data, ensure_ascii=False).encode(encoding="utf-8")  # 为了转译中文

    base_uri = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?lang=zh_CN&pass_ticket={0}"
    url = base_uri.format(req.session['ticket_dict']['pass_ticket'])
    res1 = requests.post(
        url=url,
        data=byte_send_data,   # 如果没有中文,直接用json:<字典>
        headers={"Content-Type": "application/json"},  # data 是字符串的话,必须要申明请求头
        cookies=all_cookie
    )

    print(res1.text)

    return HttpResponse("....")

def check_msg(req):
    res_data = {"code": 201, "data": None}

    ctime = str(int(time.time() * 1000))

    all_cookie = {}
    all_cookie.update(req.session['login_cookie'])
    all_cookie.update(req.session['ticket_cookie'])

    if req.GET.get("PENDING") == "1":
        base_uri = "https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck"

        '''
        https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?r={0}&skey={1}&sid={2}&uin={3}&deviceid=e971783524980667&synckey=1_684933101%7C2_684933158%7C3_684933113%7C11_684932930%7C201_1532754877%7C1000_1532754362%7C1001_1532733674&_={5}
        '''
        sync_key_list = []
        for item in req.session['init_sync_key']['List']:
            tmp = "%s_%s"%(item['Key'], item['Val'])
            sync_key_list.append(tmp)
        sync_key = "|".join(sync_key_list)



        param_data = {
            "r": ctime,
            "skey": req.session['ticket_dict']['skey'],
            "sid": req.session['ticket_dict']['wxsid'],
            "uin": req.session['ticket_dict']['wxuin'],
            "deviceid": "e446581143835818",
            "synckey": sync_key,
            "_": ctime
        }

        res1 = requests.get(
            url=base_uri,
            params=param_data,
            cookies=all_cookie
        )

        print(res1.text)

        selector = re.findall('window.synccheck={retcode:"0",selector:"(.*)"}', res1.text)[0]
        if selector == "2":
            res_data["code"] = 200
            res_data["data"] = "get msg"

    elif req.GET.get("PENDING") == "0":
        print(type(req.GET.get("PENDING")), "--->", req.GET.get("PENDING"))
        base_get_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}"
        get_msg_url = base_get_msg_url.format(req.session['ticket_dict']['wxsid'], req.session['ticket_dict']['skey'], req.session['ticket_dict']['pass_ticket'])
        msg_data = {
            "BaseRequest": {
                "DeviceID": "e994644199968030",
                "Sid": req.session['ticket_dict']['wxsid'],
                "Skey": req.session['ticket_dict']['skey'],
                "Uin": req.session['ticket_dict']['wxuin'],
            },
            "SyncKey": req.session['init_sync_key'],
            "rr": 545089680
        }


        res1 = requests.post(
            url=get_msg_url,
            json=msg_data
        )

        res1.encoding = "utf-8"

        # print(res1.text)
        friend_data = json.loads(res1.text)
        # print(type(friend_data['AddMsgCount']))

        req.session['init_sync_key'] = friend_data['SyncKey']

        message_list = []

        for msg in friend_data['AddMsgList']:

            message = msg["FromUserName"]+"--->"+ msg['ToUserName']+ ":"+msg['Content']
            message_list.append(message)
            print(msg["FromUserName"], "--->", msg['ToUserName'], ":",msg['Content'])




        res_data["code"] = 202
        res_data["data"] = "||".join(message_list)


    return HttpResponse(json.dumps(res_data))
views.py
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>login</title>
</head>
<body>

<img src="https://login.weixin.qq.com/qrcode/{{ qcode }}" alt="">

<script type="text/javascript" src="/static/jquery-1.12.4.js"></script>
<script>
    TIP = 1;
    function checkLogin(){
        $.ajax({
            url: "/check_login",
            type: "get",
            data: {"TIP": TIP},
            dataType: "JSON",
            success: function (args) {
                console.log(args.code);
                if(args.code == 408){
                    // 没有人扫码,继续发请求
                    checkLogin();
                }else if (args.code == 201) {
                    // 有人扫码,等待确认
                    $("img").attr("src", args.data);
                    TIP = 0;
                    checkLogin();
                }else if(args.code == 200){
                    // 已经确认
                    window.location.href = "/index/"
                }

            }
        })
    }

    checkLogin();

</script>

</body>
</html>
login.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>

<div>
    <h3>个人信息,{{ user_data.User.NickName }}</h3>
    <ol>
        {% for info in user_data.ContactList %}
            <li>{{ info.NickName }}</li>
        {% endfor %}
    </ol>
    <a href="/contact_all/">更多联系人</a>

</div>
    <h3>公众号信息</h3>
    {% for msg in user_data.MPSubscribeMsgList %}
    <h4>{{ msg.NickName }}</h4>
        <ol>
            {% for item in msg.MPArticleList %}
                <li><a href="{{ item.Url }}">{{ item.Title }}</a></li>
            {% endfor %}
        </ol>
    {% endfor %}
</body>
</html>
index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>

<div><h3>发消息</h3>
<p>
    收件人:<input id="recv" type="text">
</p>
    <p>
        内容:<input id="content" type="text">
    </p>
    <p>
        <button id="btn">发送</button>
    </p>
</div>
<div><h3>收消息</h3>
<ol id="msg">

</ol>
</div>

<div>
<h3>联系人</h3>
<ol>
    {% for user in user_dict.MemberList %}
    <li>{{ user.NickName }}-------------->{{ user.UserName }}</li>
    {% endfor %}
</ol>
</div>
<script type="application/javascript" src="/static/jquery-1.12.4.js"></script>
<script>
    $(function () {
        $("#btn").click(function () {
            console.log("test");
            var recv = $("#recv").val();
            var content = $("#content").val();
            $.ajax({
                url: "/send_msg/",
                type: "get",
                data: {"recv": recv, "content": content},
                dataType: "JSON",
                success: function (args) {

                }
            })
        });

        PENDING = 1;
        function checkMsg() {
            $.ajax({
                url: "/check_msg/",
                type: "get",
                data: {"PENDING": PENDING},
                dataType: "JSON",
                success: function (args) {
                    if(args.code == 200){
                        PENDING = 0;

                        checkMsg();
                    }else if (args.code == 201) {
                        PENDING = 1;

                        console.log("pending...");
                        checkMsg();
                    }else if (args.code == 202){
                        $("#msg").append("<li>"+args.data+"</li>");
                        PENDING = 1;

                        console.log("pending...");
                        checkMsg();
                    }

                }
            })

        }
        checkMsg();
    })
</script>

</body>
</html>
contact_all.html

 

posted @ 2018-07-30 15:14  北风之神Sam  阅读(521)  评论(0)    收藏  举报