python: Capture Full Webpage Screenshots with Selenium
browser = webdriver.Firefox()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
edge=webdriver.Edge()
safari=webdriver.Safari()
chrome=webdriver.Chrome()
https://pypi.org/project/selenium/
案例1:
# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:
# 描述:堆区Heap Area 栈区 Stack Area
# Author : geovindu,Geovin Du 涂聚文.
# IDE : PyCharm 2023.1 python 311
# Datetime : 2023/10/26 8:54
# User : geovindu
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# Product : PyCharm
# Project : EssentialAlgorithms
# File : LongScreenShot.py
# explain : 学习
import time
from PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import os
from datetime import datetime
class LongScreenShot(object):
"""
根据网址长截图 "C:\Program Files\Google\Chrome\Application\chrome.exe"
"""
def __init__(self):
self.url=""
self.fielname=""
def screenShot(self,weburl:str,filename:str):
"""
网页全页面截图 有效
:param weburl:网址
:param filename:生成图片的文件名 这里生成时间产生的
:return:
"""
try:
today = datetime.now()
image_name = today.strftime("%Y%m%d%H%M%S")
print(image_name)
# Set the path where the screenshot will be saved
print(os.path.abspath(__file__))
path = os.path.dirname(os.path.abspath(__file__))
# Configure Chrome WebDriver options
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--headless") # Use headless mode for running in the background
options.add_argument("--disable-gpu")
# Initialize the Chrome WebDriver
driver = webdriver.Chrome(options=options)
driver.maximize_window()
# Navigate to the URL you want to capture
driver.get(weburl)
# Wait for the page to load (you can adjust the sleep time as needed)
time.sleep(1)
# Use JavaScript to get the full width and height of the webpage
width = driver.execute_script(
"return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );")
height = driver.execute_script(
"return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );")
# Set the window size to match the entire webpage
driver.set_window_size(width, height)
# Find the full page element (usually 'body') and capture the screenshot
full_page = driver.find_element(By.TAG_NAME, "body")
# image_name="geovidu"
full_page.screenshot(f"{image_name}.png")
# Close the browser window
driver.quit()
except Exception as e:
print(e)
调用:
lscreen=BLL.LongScreenShot.LongScreenShot()
lscreen.screenShot("https://www.csdn.net/",r"result.png") #ok
案列2:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import sys
import random
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm,geovindu')
#takeScreenshot("https://www.csdn.net/")
options = webdriver.ChromeOptions()
#options.headless = True
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--headless") # Use headless mode for running in the background
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)
URL = 'https://www.csdn.net/'
driver.get(URL)
time.sleep(1)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
driver.set_window_size(S('Width'),
S('Height')) # May need manual adjustment
driver.maximize_window()
#driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
fullpage=driver.find_element(By.TAG_NAME,'body') #生成了第一页
fullpage.screenshot('geovindu.png')
driver.quit()
封装类:
# encoding: utf-8
# 版权所有 2023 ©涂聚文有限公司
# 许可信息查看:网页截图
# 描述:
# Author : geovindu,Geovin Du 涂聚文.
# IDE : PyCharm 2023.1 python 311
# Datetime : 2023/10/26 14:36
# User : geovindu
# Product : PyCharm
# Project : pythonWebScreenShot
# File : CaptureWeb.py
# explain : 学习
# python.exe -m pip install --upgrade pip
# pip install Pillow
# pip install selenium
# pip install pywin32
# pip install PyPDF2
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
from datetime import datetime
import sys
import random
class CaptureWebFull(object):
"""
网页截图
"""
def __init__(self):
self.webulr="";
self.filename="";
def CaptureGetImage(self,weburl:str,filename:str):
"""
网页截图 geovindu
:param weburl: 网址
:param filename: 生成的图片文件名
:return: None
"""
try:
options = webdriver.ChromeOptions()
# options.headless = True
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--headless") # Use headless mode for running in the background
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)
URL = weburl #'https://news.163.com/'
today = datetime.now()
imagename = today.strftime("%Y%m%d%H%M%S")
driver.get(URL)
time.sleep(2)
S = lambda X: driver.execute_script('return document.body.parentNode.scroll' + X)
driver.set_window_size(S('Width'),
S('Height')) # May need manual adjustment
driver.maximize_window()
# driver.find_element_by_tag_name('body').screenshot('web_screenshot.png')
fullpage = driver.find_element(By.TAG_NAME, 'body') # 生成了第一页
if(filename==''):
filename = imagename
fullpage.screenshot(f'{filename}.png')
driver.quit()
except Exception as e:
print(e)
print("ok")
调用:
bl=BLL.CaptureWeb.CaptureWebFull()
bl.CaptureGetImage("http://www.dusystem.com/","")
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
浙公网安备 33010602011771号