爬虫
获取ck
借助selenium自动登录获取ck
import datetime,openpyxl,pandas as pd,time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
'''
# 将本地浏览器接口开启监听
cmd :
start chrome --flag-switches-begin --flag-switches-end --remote-debugging-port=9887
netstat -ano |findstr 9887 #检查端口是否开启
tasklist |findstr 98616
cmd = 'start chrome --flag-switches-begin --flag-switches-end --remote-debugging-port=9887'
subprocess.Popen(cmd, shell=True)
'''
time.sleep(1) # 防呆
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9887")
# driver就是当前浏览器窗口
driver = webdriver.Chrome(options=chrome_options)
time.sleep(2)
driver.get('链接') # 将网站URL替换为你要登录的网站
time.sleep(5)
# 检查是否需要重新登录
if "登录会话超时,请重新登录" in driver.page_source:
# 执行重新登录操作
driver.get('链接') # 将网站URL替换为你要登录的网站
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.ID, "formsubmitButton")))
# 如果需要手动登录,则可以在这里添加等待用户完成登录的提示,例如:
# input('请完成登录,并按 Enter 键继续...')
# 找到用户名和密码的输入框,并输入相应的值
# username_input = driver.find_element(By.XPATH, '//*[@id="username"]')
password_input = driver.find_element(By.XPATH, '//*[@id="password"]')
# username_input.send_keys('')
password_input.send_keys('')
# input('请完成登录,并按 Enter 键继续...')
# 睡眠五秒
time.sleep(2)
# 找到登录按钮并点击
login_button = driver.find_element(By.XPATH, '//*[@id="formsubmitButton"]')
login_button.click()
# 睡眠两秒,登录操作结束
time.sleep(2)
driver.get('链接') # 将网站URL替换为你要登录的网站
time.sleep(5)
# 等待页面刷新
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.ID, "aside-wrap-menu-top")))
time.sleep(5)
# 重刷新防呆
driver.get('链接') # 将网站URL替换为你要登录的网站
time.sleep(5)
cookies = driver.get_cookies()
# 我习惯将ck保存在本地使用,下面的各位酌情
loginCookies = {}
for cookie in cookies:
name = cookie['name']
value = cookie['value']
if value != None:
loginCookies[name] = value
# print(loginCookies)
# 将cookie转换成字符串
loginCookiesStr = ""
for key, value in loginCookies.items():
loginCookiesStr += f"'{key}':'{value}',"
loginCookiesStr = loginCookiesStr[:-2]
# 将cookie打印到表格内
workbook = openpyxl.load_workbook('ck.xlsx')
sheet = workbook['dmsc']
last_row = sheet.max_row + 1
sheet.cell(last_row, 1).value = pd.Timestamp.now()
sheet.cell(last_row, 2).value = loginCookiesStr
workbook.save('ck.xlsx')
被禁止机器人登录
需要从cmd里开启本地监听
# 将本地浏览器接口开启监听
cmd :
start chrome --flag-switches-begin --flag-switches-end --remote-debugging-port=9887
netstat -ano |findstr 9887 #检查端口是否开启
tasklist |findstr 98616
python代码,早期常驻应用,后期自动化再做封装隐藏处理
cmd = 'start chrome --flag-switches-begin --flag-switches-end --remote-debugging-port=9887'
subprocess.Popen(cmd, shell=True)
chrome_options = Options()
chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9887")
# driver就是当前浏览器窗口
driver = webdriver.Chrome(options=chrome_options)
time.sleep(2)
文件请求下载
Doc下载文件
import requests
cookie
url=""
header={
}
form_data={
}
response=request.post(url,headers=header,cookies=cookie,data=form_data)
with open(filePath, "wb") as file:
file.write(response.content)
print("文件下载完成")
下载2
import requests,pandas as pd
cookie
url=""
header={
}
form_data={
}
response=request.post(url,headers=header,cookies=cookie,data=form_data)
response.encoding='GBK'
if response.status_code == 200:
# 解析CSV数据
data = pd.read_csv(io.StringIO(response.text))
# 保存数据
data.to_csv(filePath, index=False, encoding='GBK')
else:
print(f'Request failed with status code {response.status_code}')

浙公网安备 33010602011771号