Selenium Java 谷歌浏览器之保存网页为图片
前言
谷歌浏览器自动化--安装地址:https://www.cnblogs.com/pangqiuchou/articles/10965856.html
我上次的需求是做一个爬虫,爬取一些网站的敏感信息,然后要把这个网页敏感信息的证据保存下来,我们这里会保存两种,第一种就是网页内容(HTML),第二种就是我们现在说的截图,把这个网页保存为一张图片。
这篇文章的方式是通过selenium操作谷歌浏览器进行截图,当然也可以操作火狐等其它浏览器截图(个人建议使用火狐浏览器,最下面案例五是通过火狐浏览器截图)。
除了通过selenium操作浏览器外,我这里还有一种方式,是通过PHANTOMJS对网页截屏,效果不错,请看下面链接:
使用PHANTOMJS对网页截屏地址:https://www.cnblogs.com/pangqiuchou/articles/10965906.html
案例一:保存网页可见区域为图片
import org.apache.commons.io.FileUtils; import org.openqa.selenium.By; import org.openqa.selenium.OutputType; import org.openqa.selenium.TakesScreenshot; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import java.io.File; public class Test1 { public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://www.baidu.com/"); //找到百度上面的输入框、放入输入内容‘鹿晗人妖’ driver.findElement(By.id("kw")).sendKeys("鹿晗人妖"); //点击百度旁边的搜索按钮 driver.findElement(By.id("su")).click(); //暂停两秒,让他加载搜索出来的数据 Thread.sleep(2000); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png")); driver.quit(); } }
案例二:保存网页可见区域中的某一块为图片![]()
import org.apache.commons.io.FileUtils; import org.openqa.selenium.*; import org.openqa.selenium.chrome.ChromeDriver; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; public class Test2 { public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://tool.oschina.net/highlight"); Thread.sleep(2000); //找到class为wrapper的节点 WebElement webElement = driver.findElement(By.className("wrapper")); Point point = webElement.getLocation(); int eleWidth = webElement.getSize().getWidth(); int eleHeight = webElement.getSize().getHeight(); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //在上面的网页截图中,把根据class找到的节点截取出来、并覆盖上面的网页截图 BufferedImage fullImg = ImageIO.read(srcFile); BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(), eleWidth, eleHeight); ImageIO.write(eleScreenshot, "png", srcFile); //把根据class找到的节点截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png")); driver.quit(); } }
案例三:保存网页可见区域为图片、并且标记网页中的关键字
import org.apache.commons.io.FileUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.OutputType; import org.openqa.selenium.TakesScreenshot; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import java.io.File; public class Test3 { public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://news.baidu.com"); //获取百度新闻中html String htmlContent = driver.getPageSource(); //解析html字符串(引入了jsoup-1.8.1.jar) Document document = Jsoup.parse(htmlContent); //删除html下面标签中的onclick属性、href属性(我这里只是截图、点击事件对我没用) for (Element element : document.getAllElements()) { element.removeAttr("onclick").removeAttr("href"); } //删除html下面所有的script标签(我这里只是截图、不需要动态页面) for (Element element : document.getElementsByTag("script")) { element.remove(); } //替换html中的双引号为单引号、删除换行 String reHtmlContent = document.body().html().replace("\"", "'").replaceAll("\r|\n", "");; //标记'网页'为敏感字、用红色框给他框住 reHtmlContent = reHtmlContent.replace("网页", "<span style='border:2px solid red;'>网页</span>"); reHtmlContent = "\"" + reHtmlContent + "\""; //通过js把转换完的html替换到页面的body上面 JavascriptExecutor js = (JavascriptExecutor) driver; js.executeScript("document.body.innerHTML=" + reHtmlContent); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png")); driver.quit(); } }
案例四:保存网页为图片(上面的案例只会保存可见区域)
import java.awt.image.BufferedImage; import java.io.File; import org.apache.commons.io.FileUtils; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.OutputType; import org.openqa.selenium.TakesScreenshot; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import javax.imageio.ImageIO; public class Test4 { public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("https://www.csdn.net/"); /* 通过js获取浏览器的各种高度 */ JavascriptExecutor js = (JavascriptExecutor) driver; String heightStrs = (String) js.executeScript("return document.body.scrollHeight.toString()+','+document.body.scrollTop.toString() + ',' + window.screen.height.toString()"); String[] heights = heightStrs.split(","); int htmlHeight = Integer.parseInt(heights[0]);//整个页面的高度 int scrollTop = Integer.parseInt(heights[1]);//滚动条现在所处的高度 int screenHeight = Integer.parseInt(heights[2]);//电脑屏幕的高度 screenHeight = screenHeight - 140; //开始滚动截图 int count = 0; while(scrollTop < htmlHeight){ scrollTop += screenHeight; ((JavascriptExecutor) driver).executeScript("window.scrollTo(0, "+ (screenHeight * count) +")"); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 File saveFile = new File("C:\\Users\\admin\\Desktop\\allImg\\"+ (++count) +".png"); FileUtils.copyFile(srcFile, saveFile); /* 最后一张图片的前半截有可能和倒数第二张的前半截图片重叠,这里需要把最后一张图片处理一下 */ if (scrollTop >= htmlHeight) { BufferedImage fullImg = ImageIO.read(saveFile); // 最后一张图片真实高度 int realHeight = htmlHeight % screenHeight; BufferedImage eleScreenshot = fullImg.getSubimage(0, fullImg.getHeight() - realHeight, fullImg.getWidth(), realHeight); // 覆盖最后一张整屏图片为刚刚截取的图片 ImageIO.write(eleScreenshot, "png", saveFile); } } //拼接图片 File imgsFile = new File("C:\\Users\\admin\\Desktop\\allImg"); if(!imgsFile.isDirectory()){ throw new RuntimeException("地址不是一个正确的目录..."); } File[] imgsFiles = imgsFile.listFiles(); ImageUtils.mergeImg(imgsFiles, ImageUtils.MERGE_IMG_TYPE_Y, "C:\\Users\\admin\\Desktop\\1233.png"); driver.quit(); } }
import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; public class ImageUtils { public final static int MERGE_IMG_TYPE_X = 1; public final static int MERGE_IMG_TYPE_Y = 2; public static boolean mergeImg(File[] files, int type , String targetFile){ int len = files.length; if(len<1){ throw new RuntimeException("图片数量为0,不可以执行拼接"); } BufferedImage[] images = new BufferedImage[len]; int[][] ImageArrays = new int[len][]; for (int i = 0 ; i < len ; i++){ try { images[i] = ImageIO.read(files[i]); }catch (Exception e){ e.printStackTrace(); } int width = images[i].getWidth(); int height = images[i].getHeight(); ImageArrays[i] = new int[width * height]; ImageArrays[i] = images[i].getRGB(0,0,width,height,ImageArrays[i],0,width); } int newHeight = 0; int newWidth = 0; for(int i = 0; i < images.length;i++){ if(type == 1){ newHeight = newHeight > images[i].getHeight() ? newWidth : images[i].getHeight(); newWidth += images[i].getWidth(); }else if (type == 2){ newWidth = newHeight > images[i].getWidth() ? newWidth : images[i].getWidth(); newHeight += images[i].getHeight(); } } if(type == 1 && newWidth < 1){ return false ; } if(type == 2 && newHeight < 1){ return false; } try { BufferedImage ImageNew = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); int height_i = 0; int width_i = 0; for (int i = 0; i < images.length; i++){ if(type == 1){ ImageNew.setRGB(width_i, 0, images[i].getWidth(), newHeight, ImageArrays[i], 0, images[i].getWidth()); width_i += images[i].getWidth(); }else if (type == 2){ ImageNew.setRGB(0, height_i, newWidth, images[i].getHeight(), ImageArrays[i], 0, newWidth); height_i += images[i].getHeight(); } } ImageIO.write(ImageNew,targetFile.split("\\.")[1], new File(targetFile)); return true; }catch (Exception e){ e.printStackTrace(); return false; } } }
案例五:保存网页为图片(通过火狐浏览器)
import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.openqa.selenium.*; import org.openqa.selenium.firefox.FirefoxDriver; public class Test5 { public static void main(String[] args) throws IOException { // 配置driver System.setProperty("webdriver.gecko.driver", "C:\\Program Files (x86)\\Mozilla Firefox\\geckodriver.exe"); WebDriver driver = new FirefoxDriver(); driver.manage().window().maximize(); driver.get("https://www.csdn.net/"); /* 通过js获取浏览器的各种高度 */ JavascriptExecutor js = (JavascriptExecutor) driver; String scrollHeight = (String) js.executeScript("return document.body.scrollHeight.toString()"); int htmlHeight = Integer.parseInt(scrollHeight);//整个页面的高度 driver.manage().window().setSize(new Dimension(1440, htmlHeight + 200)); // 对整个网页截图 File srcFile = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE); // 把截图保存到G盘 FileUtils.copyFile(srcFile, new File("C:\\Users\\admin\\Desktop\\1233.png")); driver.quit(); } }
仅供自己学习,记录问题和参考,若有带来误解和不便请见谅,共勉!

浙公网安备 33010602011771号