Linux下使用selenium进行网页截图

 1 import time
 2 import os
 3 from pyvirtualdisplay import Display
 4 from selenium import webdriver
 5 
 6 DOMAIN = 'http://127.0.0.1:8000'  # Use flask as the local server
 7 SCREEN_SHOT_IMAGES_DIR = 'html2png/images'
 8 SCREEN_SHOT_FILES_DIR = 'html2png/files'
 9 EXECUTABLE_PATH = './chromedriver'  # 下载chromedriver驱动并放入当前py文件路径
10 # CentOS系统安装以下包
11 # sudo yum install chromedriver chromium xorg-x11-server-Xvfb
12 
13 for _path in [SCREEN_SHOT_IMAGES_DIR, SCREEN_SHOT_FILES_DIR]:
14     os.makedirs(_path, exist_ok=True)
15 
16 
17 def url2png(url: str, display_size: tuple = (), window_size: tuple = ()):
18     result = {}
19     try:
20         img_path = os.path.join(SCREEN_SHOT_IMAGES_DIR, f'{int(time.time())}.html')
21         while os.path.exists(img_path):
22             img_path = os.path.join(SCREEN_SHOT_IMAGES_DIR, f'{int(time.time())}.png')
23 
24         display = Display(size=display_size) if display_size else Display()  # size=(1280, 1024)
25         display.start()
26         browser = webdriver.Chrome(executable_path=EXECUTABLE_PATH)
27         try:
28             result['default_window_size'] = browser.get_window_size()
29             if window_size:
30                 browser.set_window_size(window_size[0], window_size[1])
31             result['screenshots_window_size'] = browser.get_window_size()
32             browser.get(url)
33 
34             browser.save_screenshot(img_path)
35             result['display_size'] = display._size
36             result['success'] = True
37             result['img_path'] = img_path
38         except Exception as e:
39             raise Exception(e)
40         finally:
41             browser.close()
42             display.stop()
43     except Exception as e:
44         result['error'] = str(e)
45         result['success'] = False
46     return result
47 
48 
49 def text2png(html_text: str, display_size: tuple = (), window_size: tuple = ()):
50     result = {}
51     try:
52         relative_html_path = os.path.join(SCREEN_SHOT_FILES_DIR, f'{int(time.time())}.html')
53         while os.path.exists(relative_html_path):
54             relative_html_path = os.path.join(SCREEN_SHOT_FILES_DIR, f'{int(time.time())}.html')
55         with open(relative_html_path, 'w') as f:
56             f.write(html_text)
57 
58         url = os.path.join(
59             DOMAIN, 'static', relative_html_path.split('/static/')[-1]
60         ).replace('\\', '/')
61         result = url2png(url, display_size, window_size)
62         # if os.path.exists(filename):
63         #     os.remove(filename)
64     except Exception as e:
65         result['error'] = str(e)
66         result['success'] = False
67     return result

 

posted @ 2021-01-07 11:35  士为知己  阅读(627)  评论(0)    收藏  举报