__VIEWSTATE和__EVENTTARGET验证获取,爬取网站通讯录

from pyquery import PyQuery as pq
import requests
import csv

#首页链接单独获取,
def get_env(url="http://localhost:8080/index.htm"):
    res = requests.get (url).content
    opq = pq (res)
    return opq

#次页网站,返回验证字符串,用于下一页的获取验证
def get_content(url="http://localhost:8080/index.htm", viewstate="viewstate", enventvalidation="enventvalidation",
                tarpgnumber="2", input="1", opq=""):
    if opq = "":
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
        }
        parses = {
            "__VIEWSTATE": viewstate,
            "__EVENTTARGET": "aspNetPager1",
            "__EVENTARGUMENT": tarpgnumber,
            "__EVENTVALIDATION": enventvalidation,
            "txtPhone": "",
            "AspNetPager1_input":input,
            "hidestrWhere": ""
        }
        res = requests.post (url, data=parses, headers=header).content
        opq = pq (res)

    else:
        pass
    viewstate = opq ('input[id="__VIEWSTATE"]').attr ("value")
    enventvalidation = opq ('input[id="__EVENTVALIDATION"]').attr ("value")
#分组,9个为一列表,方便成一行写入 listconters = [] conters = opq ("tbody").eq (1).find ("tr").children () for td in conters: w = td.text listconters.append (w) step = 9 listconter = [listconters[i:i + step] for i in range (0, len (listconters), step)] return viewstate , enventvalidation, listconter # writerow写一行,writerows写列表每一项为一行,newline属性可以避免多一行空白行 opq=get_env() viewstate, enventvalidation, listconter = get_content (opq=opq) for i in range (1, 3): url = "http://localhost:8080/tx.aspx" tarpgnumber =i input = i-1 with open ("./通讯录.csv", "a", newline="") as f: writer = csv.writer (f) writer.writerows (listconter) viewstate, enventvalidation, listconter = get_content (url=url, viewstate=viewstate, enventvalidation=enventvalidation, tarpgnumber=tarpgnumber, input=input, opq="")

  

posted @ 2020-10-23 13:34  遥月  阅读(376)  评论(0)    收藏  举报