Selenium Java 谷歌浏览器之保存网页为图片

前言

  谷歌浏览器自动化--安装地址:https://www.cnblogs.com/pangqiuchou/articles/10965856.html

  我上次的需求是做一个爬虫,爬取一些网站的敏感信息,然后要把这个网页敏感信息的证据保存下来,我们这里会保存两种,第一种就是网页内容(HTML),第二种就是我们现在说的截图,把这个网页保存为一张图片。

  这篇文章的方式是通过selenium操作谷歌浏览器进行截图,当然也可以操作火狐等其它浏览器截图(个人建议使用火狐浏览器,最下面案例五是通过火狐浏览器截图)。

  除了通过selenium操作浏览器外,我这里还有一种方式,是通过PHANTOMJS对网页截屏,效果不错,请看下面链接:

  使用PHANTOMJS对网页截屏地址:https://www.cnblogs.com/pangqiuchou/articles/10965906.html

 

案例一:保存网页可见区域为图片

import org.apache.commons.io.FileUtils;
import org.openqa.selenium.By;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;

import java.io.File;

public class Test1 {

    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver",
                "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");

        WebDriver driver = new ChromeDriver();

        driver.manage().window().maximize();

        driver.get("http://www.baidu.com/");

        //找到百度上面的输入框、放入输入内容‘鹿晗人妖’
        driver.findElement(By.id("kw")).sendKeys("鹿晗人妖");
        //点击百度旁边的搜索按钮
        driver.findElement(By.id("su")).click();
        //暂停两秒,让他加载搜索出来的数据
        Thread.sleep(2000);

        //对整个网页截图
        File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);

        //把截图保存到桌面
        FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png"));
        driver.quit();
    }

}

案例二:保存网页可见区域中的某一块为图片

import org.apache.commons.io.FileUtils;
import org.openqa.selenium.*;
import org.openqa.selenium.chrome.ChromeDriver;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;

public class Test2 {

    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver",
                "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");

        WebDriver driver = new ChromeDriver();

        driver.manage().window().maximize();

        driver.get("http://tool.oschina.net/highlight");
        Thread.sleep(2000);

        //找到class为wrapper的节点
        WebElement webElement = driver.findElement(By.className("wrapper"));
        Point point = webElement.getLocation();
        int eleWidth = webElement.getSize().getWidth();
        int eleHeight = webElement.getSize().getHeight();

        //对整个网页截图
        File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);

        //在上面的网页截图中,把根据class找到的节点截取出来、并覆盖上面的网页截图
        BufferedImage fullImg = ImageIO.read(srcFile);
        BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(),
                eleWidth, eleHeight);
        ImageIO.write(eleScreenshot, "png", srcFile);

        //把根据class找到的节点截图保存到桌面
        FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png"));
        driver.quit();
    }

}

案例三:保存网页可见区域为图片、并且标记网页中的关键字

 

import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;

import java.io.File;

public class Test3 {

    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver",
                "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");

        WebDriver driver = new ChromeDriver();

        driver.manage().window().maximize();

        driver.get("http://news.baidu.com");

        //获取百度新闻中html
        String htmlContent = driver.getPageSource();

        //解析html字符串(引入了jsoup-1.8.1.jar)
        Document document = Jsoup.parse(htmlContent);

        //删除html下面标签中的onclick属性、href属性(我这里只是截图、点击事件对我没用)
        for (Element element : document.getAllElements()) {
            element.removeAttr("onclick").removeAttr("href");
        }

        //删除html下面所有的script标签(我这里只是截图、不需要动态页面)
        for (Element element : document.getElementsByTag("script")) {
            element.remove();
        }

        //替换html中的双引号为单引号、删除换行
        String reHtmlContent = document.body().html().replace("\"", "'").replaceAll("\r|\n", "");;

        //标记'网页'为敏感字、用红色框给他框住
        reHtmlContent = reHtmlContent.replace("网页", "<span style='border:2px solid red;'>网页</span>");

        reHtmlContent = "\"" + reHtmlContent + "\"";

        //通过js把转换完的html替换到页面的body上面
        JavascriptExecutor js = (JavascriptExecutor) driver;
        js.executeScript("document.body.innerHTML=" + reHtmlContent);

        //对整个网页截图
        File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);

        //把截图保存到桌面
        FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png"));
        driver.quit();
    }

}

案例四:保存网页为图片(上面的案例只会保存可见区域)

import java.awt.image.BufferedImage;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import javax.imageio.ImageIO;

public class Test4 {

    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver",
                "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");

        WebDriver driver = new ChromeDriver();

        driver.manage().window().maximize();
        driver.get("https://www.csdn.net/");

        /* 通过js获取浏览器的各种高度 */
        JavascriptExecutor js = (JavascriptExecutor) driver;
        String heightStrs = (String) js.executeScript("return document.body.scrollHeight.toString()+','+document.body.scrollTop.toString() + ',' + window.screen.height.toString()");
        String[] heights = heightStrs.split(",");
        int htmlHeight = Integer.parseInt(heights[0]);//整个页面的高度
        int scrollTop = Integer.parseInt(heights[1]);//滚动条现在所处的高度
        int screenHeight = Integer.parseInt(heights[2]);//电脑屏幕的高度
        screenHeight = screenHeight - 140;

        //开始滚动截图
        int count = 0;
        while(scrollTop < htmlHeight){
            scrollTop += screenHeight;
            ((JavascriptExecutor) driver).executeScript("window.scrollTo(0, "+ (screenHeight * count) +")");

            //对整个网页截图
            File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);

            //把截图保存到桌面
            File saveFile = new File("C:\\Users\\admin\\Desktop\\allImg\\"+ (++count) +".png");
            FileUtils.copyFile(srcFile, saveFile);

            /* 最后一张图片的前半截有可能和倒数第二张的前半截图片重叠,这里需要把最后一张图片处理一下 */
            if (scrollTop >= htmlHeight) {
                BufferedImage fullImg = ImageIO.read(saveFile);
                // 最后一张图片真实高度
                int realHeight = htmlHeight % screenHeight;
                BufferedImage eleScreenshot = fullImg.getSubimage(0, fullImg.getHeight() - realHeight, fullImg.getWidth(), realHeight);
                // 覆盖最后一张整屏图片为刚刚截取的图片
                ImageIO.write(eleScreenshot, "png", saveFile);
            }
        }

        //拼接图片
        File imgsFile = new File("C:\\Users\\admin\\Desktop\\allImg");
        if(!imgsFile.isDirectory()){
            throw new RuntimeException("地址不是一个正确的目录...");
        }
        File[] imgsFiles = imgsFile.listFiles();
        ImageUtils.mergeImg(imgsFiles, ImageUtils.MERGE_IMG_TYPE_Y, "C:\\Users\\admin\\Desktop\\1233.png");

        driver.quit();
    }

}
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;

public class ImageUtils {

    public final static int MERGE_IMG_TYPE_X = 1;
    public final static int MERGE_IMG_TYPE_Y = 2;

    public static boolean mergeImg(File[] files, int type , String targetFile){
        int len = files.length;
        if(len<1){
            throw new RuntimeException("图片数量为0,不可以执行拼接");
        }
        BufferedImage[] images = new BufferedImage[len];
        int[][] ImageArrays = new int[len][];

        for (int i = 0 ; i < len ; i++){
            try {
                images[i] = ImageIO.read(files[i]);
            }catch (Exception e){
                e.printStackTrace();
            }

            int width = images[i].getWidth();
            int height = images[i].getHeight();
            ImageArrays[i] = new int[width * height];
            ImageArrays[i] = images[i].getRGB(0,0,width,height,ImageArrays[i],0,width);
        }

        int newHeight = 0;
        int newWidth = 0;
        for(int i = 0; i < images.length;i++){
            if(type == 1){
                newHeight = newHeight > images[i].getHeight() ? newWidth : images[i].getHeight();
                newWidth += images[i].getWidth();
            }else if (type == 2){
                newWidth = newHeight > images[i].getWidth() ? newWidth : images[i].getWidth();
                newHeight += images[i].getHeight();
            }
        }
        if(type == 1 && newWidth < 1){
            return false ;
        }
        if(type == 2 && newHeight < 1){
            return false;
        }
        try {
            BufferedImage ImageNew = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
            int height_i = 0;
            int width_i = 0;
            for (int i = 0; i < images.length; i++){
                if(type == 1){
                    ImageNew.setRGB(width_i, 0, images[i].getWidth(), newHeight, ImageArrays[i], 0, images[i].getWidth());
                    width_i += images[i].getWidth();
                }else if (type == 2){
                    ImageNew.setRGB(0, height_i, newWidth, images[i].getHeight(), ImageArrays[i], 0, newWidth);
                    height_i += images[i].getHeight();
                }
            }
            ImageIO.write(ImageNew,targetFile.split("\\.")[1], new File(targetFile));
            return true;
        }catch (Exception e){
            e.printStackTrace();
            return false;
        }
    }
}

案例五:保存网页为图片(通过火狐浏览器)

import java.io.File;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.openqa.selenium.*;
import org.openqa.selenium.firefox.FirefoxDriver;

public class Test5 {

    public static void main(String[] args) throws IOException {
        // 配置driver
        System.setProperty("webdriver.gecko.driver", "C:\\Program Files (x86)\\Mozilla Firefox\\geckodriver.exe");
        WebDriver driver = new FirefoxDriver();
        driver.manage().window().maximize();
        driver.get("https://www.csdn.net/");

        /* 通过js获取浏览器的各种高度 */
        JavascriptExecutor js = (JavascriptExecutor) driver;
        String scrollHeight = (String) js.executeScript("return document.body.scrollHeight.toString()");
        int htmlHeight = Integer.parseInt(scrollHeight);//整个页面的高度

        driver.manage().window().setSize(new Dimension(1440, htmlHeight + 200));

        // 对整个网页截图
        File srcFile = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        // 把截图保存到G盘
        FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png"));

        driver.quit();
    }
    
}
posted @ 2019-06-03 09:43  不出特产不出矿  阅读(3432)  评论(7)    收藏  举报