一,代码:
import requests
from requests.exceptions import HTTPError
from pyppeteer.launcher import connect
def get_debugger_url():
url = "http://localhost:9222/json/version"
try:
resp = requests.get(url).json()
print(resp)
if resp['webSocketDebuggerUrl'] is None:
raise Exception(resp['msg'])
webSocketDebuggerUrl = resp['webSocketDebuggerUrl']
return webSocketDebuggerUrl
except HTTPError:
raise Exception(HTTPError.response)
async def get_browser():
debugUrl = get_debugger_url()
print(debugUrl)
browser = await connect(
browserWSEndpoint=debugUrl,
defaultViewport=None,
ignoreHTTPSErrors=True,
ignoreDefaultArgs=['--enable-automation'],
logLevel=3
)
return browser
二,调用:
import debugbrowser
import asyncio
URL = 'https://movie.douban.com/explore?support_type=movie&is_all=false&category=%E7%83%AD%E9%97%A8&type=%E5%85%A8%E9%83%A8'
async def main():
browser = await debugbrowser.get_browser()
print(browser)
page = await browser.newPage()
await page.goto(URL, options={'timeout': 30000})
await page.evaluate('''()=>{Object.defineProperties(navigator,{webdriver:{get:()=>false}})}''')
await asyncio.sleep(7)
doc1 = await page.content()
print(doc1)
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())