selenium模拟点击爬数据

from time import sleep
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
import clipboard
import pandas as pd

# 忽略ssl 公司网络问题
options = webdriver.ChromeOptions()
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
# 设置蓝牙
options.add_experimental_option('excludeSwitches', ['enable-logging'])

path = Service(executable_path=r'E:\\chromedriver.exe')
driver = webdriver.Chrome(service=path,options=options)

# 打开页面
driver.get('http://doc.weixin.qq.com/sheet/')

# 窗口大小
driver.set_window_size(930,1000)

# 鼠标左键点击
sleep(5)
ActionChains(driver).move_by_offset(460, 568).click().perform()

# 鼠标重置
# ActionChains(driver).move_by_offset(-460, -568)

# 点击选中全部表格
ActionChains(driver).move_by_offset(-460, -568).click().perform()
sleep(10)

# 鼠标右键点击
# ActionChains(driver).move_by_offset(200, 100).context_click().perform()

# ctrl + C
ActionChains(driver).move_by_offset(20, 154).click().perform()
ActionChains(driver).move_by_offset(-20, -154)
ActionChains(driver).move_by_offset(20, 154).key_down(Keys.CONTROL).send_keys('c').perform()

# 获取剪切板数据集
# data = clipboard.paste()

df = pd.read_clipboard()
# 去除一部分行列都是NaN
data = df.dropna(axis=0,how='all').dropna(axis=1,how='all')

# print(data)

data.to_excel(r'E:\\邮箱.xlsx','sheet')

 

posted @ 2022-06-24 15:14  缘故为何  阅读(118)  评论(0编辑  收藏  举报