from pyquery import PyQuery as pq
import requests
import csv
#首页链接单独获取,
def get_env(url="http://localhost:8080/index.htm"):
res = requests.get (url).content
opq = pq (res)
return opq
#次页网站,返回验证字符串,用于下一页的获取验证
def get_content(url="http://localhost:8080/index.htm", viewstate="viewstate", enventvalidation="enventvalidation",
tarpgnumber="2", input="1", opq=""):
if opq = "":
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
}
parses = {
"__VIEWSTATE": viewstate,
"__EVENTTARGET": "aspNetPager1",
"__EVENTARGUMENT": tarpgnumber,
"__EVENTVALIDATION": enventvalidation,
"txtPhone": "",
"AspNetPager1_input":input,
"hidestrWhere": ""
}
res = requests.post (url, data=parses, headers=header).content
opq = pq (res)
else:
pass
viewstate = opq ('input[id="__VIEWSTATE"]').attr ("value")
enventvalidation = opq ('input[id="__EVENTVALIDATION"]').attr ("value")
#分组,9个为一列表,方便成一行写入
listconters = []
conters = opq ("tbody").eq (1).find ("tr").children ()
for td in conters:
w = td.text
listconters.append (w)
step = 9
listconter = [listconters[i:i + step] for i in range (0, len (listconters), step)]
return viewstate , enventvalidation, listconter
# writerow写一行,writerows写列表每一项为一行,newline属性可以避免多一行空白行
opq=get_env()
viewstate, enventvalidation, listconter = get_content (opq=opq)
for i in range (1, 3):
url = "http://localhost:8080/tx.aspx"
tarpgnumber =i
input = i-1
with open ("./通讯录.csv", "a", newline="") as f:
writer = csv.writer (f)
writer.writerows (listconter)
viewstate, enventvalidation, listconter = get_content (url=url, viewstate=viewstate, enventvalidation=enventvalidation,
tarpgnumber=tarpgnumber, input=input, opq="")