用lxml、etree、xpath 来解析html,获取拉钩网的动态token

# conding:utf-8
import requests
from lxml import etree
import re
import urllib3
urllib3.disable_warnings()


s = requests.session()

def get_it_execution():
    loginurl = "https://passport.lagou.com/login/login.html"
    h1 = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
    }
    r = s.get(loginurl, headers=h1,verify=False)
    dom = etree.HTML(r.content)
    tokencode = {}
    try:
        t = dom.xpath('//script[2]/text()')
        tt = ''.join(t)
        tokencode['X_Anti_Forge_Token'] = re.findall("Token = '(.+?)'", tt)[0]
        tokencode['X_Anti_Forge_Code'] = re.findall("Code = '(.+?)'", tt)[0]

    except:
        print("lt、execution参数获取失败!")

    return tokencode


if __name__ == "__main__":
    print(get_it_execution())

 

posted @ 2020-11-12 17:25  天天眠眠  阅读(189)  评论(0编辑  收藏  举报