import sys
import requests
from requestium import Session, Keys
import json
import pandas as pd
import time
import pickle
import os
import datetime
import traceback
def getdata(selected, startday):
runtime = datetime.datetime.now().strftime('%Y-%m-%d_%H.%M.%S')
basefolder = os.path.dirname(__file__)
session = Session(webdriver_path=r'C:\py\tools\chromedriver.exe',
browser='chrome',
default_timeout=15,
# webdriver_options={'arguments': ['headless']}
)
session.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
session.driver.get('https://1')
session.driver.find_element_by_id('zjhm').send_keys('123')
session.driver.find_element_by_id('sjhm').send_keys('123')
input()
session.transfer_driver_cookies_to_session()
header = {
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Host': 'zn',
'Origin': 'hn',
'Referer': 'hte=2',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
}
session.headers = header
requesturl = 'htx'
type = {'中介': 1, '业主': 2}
type_d = type[selected]
formdata_tmp = 'g05003'
result = pd.DataFrame()
i = 1
key = True
while key:
formdata = formdata_tmp % (type_d, startday, i)
a = session.post(url=requesturl, data=formdata)
try:
b = json.loads(a.content.decode('utf8'))['list']
except BaseException:
print(traceback.format_exc(), a.content.decode('utf8'))
b = []
if len(b) > 0:
for row in b:
tmp_dict = {
'cqmc': row['cqmc'],
'fczsh': row['fczsh'],
'cjsj': row['cjsj'],
'fwtybh': row['fwtybh'],
'gpfyid': row['gpfyid'],
'gpid': row['gpid'],
'gplxrxm': row['gplxrxm'],
'jzmj': row['jzmj'],
'mdmc': row['mdmc'],
'scgpshsj': row['scgpshsj'],
'tygpbh': row['tygpbh'],
'wtcsjg': row['wtcsjg'],
'xqmc': row['xqmc'],
'xzqhname': row['xzqhname'],
'xzqh': row['xzqh'],
}
tmp_df = pd.DataFrame(tmp_dict, index=[0])
result = pd.concat([result, tmp_df], axis=0)
print('%s done' % i, ', 记录: %s' % len(b))
time.sleep(1)
else:
key = False
i = i + 1
result.to_excel(os.path.join(basefolder, 'info_%s_from%s_runAt%s.xlsx' % (selected, startday, runtime)), index=False)
if __name__ == '__main__':
selected = '中介'
startday = '2022-06-01'
getdata(selected=selected, startday=startday)