python 通过js控制滚动条拉取全文 通过psutil获取pid窗口句柄,通过win32gui使程序窗口前置 通过autopy实现右键菜单和另存为操作

1.参考

利用 Python + Selenium 自动化快速截图

利用 Python + Selenium 实现对页面的指定元素截图(可截长图元素)

使用python获取系统所有进程PID以及进程名称

 python锁定焦点到指定进程窗口的参考方法

 

2.改进js代码,下拉和上拉,精确判断加载是否结束

#!/usr/bin/env python
# -*- coding: UTF-8 -*
import time

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.action_chains import ActionChains

def
scroll_page(url, browser='chrome'): if browser=='chrome': driver = webdriver.Chrome() else: driver = webdriver.Firefox() #右键异常 ActionChains(driver).context_click(e).perform() # driver = webdriver.PhantomJS() #可截长图!可是加载长网页时间超长? driver.set_window_size(1200, 900) driver.get(url) # Load page start = time.time() print driver.title driver.execute_script(""" (function () { var y = 0; var step = 1000; //100 window.scroll(0, 0); function f() { if (y < document.body.scrollHeight) { y += step; window.scroll(0, y); setTimeout(f, 100); //100 递归循环调用 } else { if(document.title.indexOf("scroll-done") < 0){ //-1 找不到,还没执行下文的driver.execute_script y -= step; window.scroll(0, y); setTimeout(f, 100); //100 //window.scroll(0, 0); //document.title += "scroll-done"; } //else{ //window.scroll(0, 0); //} } } setTimeout(f, 1000); //1000 })(); """) #拉到最后 出现元素“没有更多了” WebDriverWait(driver, 500).until(lambda x: x.find_element_by_xpath('//div[@style="text-align:center"]/em')) #或者引发 TimeoutException #停止上面的js到终点再上拉 循环 driver.execute_script(""" (function () { function f() { document.title += " scroll-done"; } setTimeout(f, 1000); })(); """) # <div class="js-infinite-layout"> # <div class="js-infinite-item"> # Jetbrains公司正式发布Pycharm 5 rst = driver.find_element_by_xpath('//div[@class="js-infinite-item"][last()]//div[@class="header"]').text #不能在path内部写入 text() print rst if rst != u'Jetbrains公司正式发布Pycharm 5': raise RuntimeError('wrong!!!') print time.time()-start print driver.title # time.sleep(3) #执行js也需要时间 WebDriverWait(driver, 10).until(lambda x: x.title == u'编程派 | Coding Python scroll-done') #或者引发 TimeoutException print driver.title return driver

 

3.通过pid进程号和窗口标题定位窗口并前置

import win32con
import win32gui
import win32process
import psutil

def
get_hwnds_for_pid (pid): def callback (hwnd, hwnds): if win32gui.IsWindowVisible(hwnd) and win32gui.IsWindowEnabled(hwnd): _, found_pid = win32process.GetWindowThreadProcessId(hwnd) if found_pid == pid: hwnds.append(hwnd) return True hwnds = [] win32gui.EnumWindows(callback, hwnds) return hwnds def set_process_foreground(pid_part_name, pid_window_text): pids_target = [] for pid in psutil.pids(): p = psutil.Process(pid) if pid_part_name in p.name().lower(): #'chrome.exe' 'EXCEL.EXE' pids_target.append(pid) for pid in pids_target: for hwnd in get_hwnds_for_pid(pid): # 92292 chrome.exe 137328 编程派 | Coding Python - Google Chrome # EXCEL.EXE 857830 Microsoft Excel - Book1.xlsx # 90644 firefox.exe 595556 编程派 | Coding Python scroll-done - Mozilla Firefox if pid_window_text.encode('gbk') in win32gui.GetWindowText(hwnd): print pid, psutil.Process(pid).name(), hwnd, win32gui.GetWindowText(hwnd) win32gui.SetForegroundWindow(hwnd) return raise RuntimeError('process not found')

 

4.通过autopy实现右键操作,以及网页另存为

from autopy import key, mouse
def save_result(driver): 
    time_for_filename = time.strftime('%H%M%S')
    with open('%s.html'%time_for_filename,'wb') as f:
        f.write(driver.page_source.encode('utf-8'))
        
    # e =  driver.find_element_by_xpath('//img[@src="/static/images/logo.png"]')
    # ActionChains(driver).context_click(e).perform()  
    # time.sleep(1.5)
    # esc 退出右键菜单
    # key.tap(key.K_ESCAPE)
    # driver.set_window_position(0,0)
    # window_position = driver.get_window_position() #  {u'x': 10, u'y': 10}
    # mouse.move(int(window_position['x']+150), int(window_position['y']+150)) 
    #右键再左键 避免错误点开链接
    # mouse.click(mouse.RIGHT_BUTTON)     
    # mouse.click(mouse.LEFT_BUTTON)
    
    # 关键是浏览器位于foreground
    set_process_foreground(driver.name,driver.title)
    key.tap('s', key.MOD_CONTROL)
    time.sleep(1.5)    
    key.type_string(time_for_filename)
    time.sleep(0.5)      
    key.tap(key.K_RETURN)   
    
    driver.save_screenshot('%s.png'%time_for_filename)
    # driver.close()

if __name__ == "__main__":
    browser = 'chrome'
    # browser = 'firefox'
    url = "http://codingpy.com"
    driver = scroll_page(url,browser)
    save_result(driver)
    print 'All DONE'

 

posted @ 2017-07-25 12:40  my8100  阅读(1115)  评论(0编辑  收藏  举报