selenium指定谷歌浏览器地址和驱动地址
在scrapy中引入selenium
utils.py
from selenium import webdriver
# 创建谷歌浏览器对象,用selenium控制浏览器访问url
def create_chrome_driver(*, headless=False):
options = webdriver.ChromeOptions()
if headless: # 如果为True,则爬取时不显示浏览器窗口
options.add_argument('--headless')
# 谷歌浏览器位置
chrome_location = r'D:\Code\jobSpider\Chrome-bin\chrome.exe'
# 谷歌浏览器驱动地址
chrome_path = r'D:\Code\jobSpider\chromedriver.exe'
# 做一些控制上的优化
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
# 指定chrome的路径
options.binary_location = chrome_location
# 创建浏览器对象
browser = webdriver.Chrome(chrome_path, chrome_options=options)
# 破解反爬措施
browser.execute_cdp_cmd(
'Page.addScriptToEvaluateOnNewDocument',
{'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'}
)
return browser
浙公网安备 33010602011771号