爬取动态canvas

之前做爬取动态canvas 是用selenuim写的

效果很不了理想

由于时间关系,让前端渲染截图转为base64发送到后端

 

最近比较有空  在网上找到了 无头浏览器  phantomjs

 

能做  比selenuim强  没打开页面渲染   但是也没强多少   内存和cpu占用还是比较高,速度也慢

 

下载phantomjs  配置环境变量   要在path路径下      不能  配用户变量  不然没效果

 

贴代码

package com.example.testmap;

import org.junit.jupiter.api.Test;

import java.io.*;
import java.util.Base64;
import java.util.UUID;


public class phantomJSTest {


    @Test
    void contextLoads() throws IOException {

        String url = "https://www.fs121.com/emergencyWarning/FoShan";

        long startTime = System.currentTimeMillis();
        String base = getAjaxContent(url);
        long endTime = System.currentTimeMillis();

        System.out.println("截图时间:"+(endTime-startTime)+"ms");

        long startBase = System.currentTimeMillis();
        this.generateImage(base,"");
        long endBase = System.currentTimeMillis();
        System.out.println("处理base64并写入磁盘时间:"+(endBase-startBase)+"ms");


        System.out.println("成功了,总耗时:"+(endBase-startTime)+"ms");
    }



    public static String getAjaxContent(String url) throws IOException {


        Process p = Runtime.getRuntime().exec("phantomjs D:\\workSoft\\phantomjs\\test\\code.js "+url);

        InputStream is = p.getInputStream();

        BufferedReader br = new BufferedReader(new InputStreamReader(is));

        StringBuffer sbf = new StringBuffer();

        String tmp = br.readLine();

        while(tmp != null){
            sbf.append(tmp);
            tmp = br.readLine();
        }

//        System.out.println(sbf.toString());
//        System.out.println(sbf.length());
        return sbf.toString();
    }


    /**
     * inputStream转byte数组
     *
     * @param inputStream 输入流对象
     * @return byte数组
     */
    public static byte[] inputStreamToByteArray(InputStream inputStream) {
        try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
            byte[] buffer = new byte[1024];
            int num;
            while ((num = inputStream.read(buffer)) != -1) {
                byteArrayOutputStream.write(buffer, 0, num);
            }
            byteArrayOutputStream.flush();
            return byteArrayOutputStream.toByteArray();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new byte[]{};
    }





    public static String generateImage(String base64, String path) {
        // 解密
        try {
            String savePath = "/**/imgtest/";
            // 图片分类路径+图片名+图片后缀
            String imgClassPath = path.concat(UUID.randomUUID().toString()).concat(".jpg");

            imgClassPath = "C:\\Users\\HTHT\\Desktop\\testimg.png";

            // 去掉base64前缀 data:image/jpeg;base64,
//            base64 = base64.substring(base64.indexOf(",", 1) + 1);
            // 解密,解密的结果是一个byte数组
            Base64.Decoder decoder = Base64.getDecoder();
            byte[] imgbytes = decoder.decode(base64);
            for (int i = 0; i < imgbytes.length; ++i) {
                if (imgbytes[i] < 0) {
                    imgbytes[i] += 256;
                }
            }

            // 保存图片
            OutputStream out = new FileOutputStream(imgClassPath);
            out.write(imgbytes);
            out.flush();
            out.close();
            // 返回图片的相对路径 = 图片分类路径+图片名+图片后缀
            return imgClassPath;
        } catch (IOException e) {
            return null;
        }
    }

}

 

 

然后就是js代码  

 

狗日的前端  叫他帮我写这点代码写了一下午

phantom.outputEncoding = "gbk";

system = require('system')

address = system.args[1];

var page = require("webpage").create();



page.viewportSize = { width: 1024, height: 768 };
//the clipRect is the portion of the page you are taking a screenshot of
page.clipRect = { top: 200, left: 210, width: 1024, height: 600};


// page.zoomFactor = 0.1;
// page.viewportSize = {
//   width: 1920,
//   height: 1080,
// };
//var url = "https://www.fs121.com/emergencyWarning/FoShan";

var url = address;

page.open(url, function(status) {

  if(status === "success") {
    window.setTimeout(function () {
        console.log(page.renderBase64('PNG'));
        // page.render('test.png');
        phantom.exit();
    }, 1000);
  }else{
    console.log('失败了....');
  }
})

 

运行截图

 

posted @ 2023-02-22 10:38  霸王龙168  阅读(331)  评论(0)    收藏  举报