【工作日常】Google 以图搜图 代码

public static List<String> getExactMatchesByGoogleCrawler(String imageUrl) {  
    try {  
        ObjectMapper objectMapper = new ObjectMapper();  
  
        // 发送请求并获取响应 (模拟点击 search)        Document doc = sendRequestByImageUrl(imageUrl);  
  
        // 判断是否有 exact matches        if (!doc.toString().contains("See exact matches")) {  
            LOGGER.info("没有exact matches");  
            return Collections.emptyList();  
        }  
  
        // 解析响应并获取 图片唯一标识  
        String imageKey = parsingImageKey(doc);  
  
        // 获取图片核心token  
        String imageToken = getImageToken(objectMapper, doc);  
  
        // 发送请求并获取响应 (模拟点击 see exact matches)        String htmlResponseStr = getExactMatchesImageByHttpRequest(imageKey, imageToken);  
  
        // 获取精确匹配图片的文本  
        return getExactMatchesImageText(objectMapper, htmlResponseStr);  
  
    } catch (IOException e) {  
        ExceptionCollActionEvent event = new ExceptionCollActionEvent(GoogleLensCrawler.class, e);  
        SpringContextHolder.publishEvent(event);  
        LOGGER.error(e.getMessage(), e);  
        return Collections.emptyList();  
    }  
}  
  
private static List<String> getExactMatchesImageText(ObjectMapper objectMapper, String htmlResponseStr) throws JsonProcessingException {  
    // 截取无用字符  
    JsonNode jsonNode = objectMapper.readTree(  
            objectMapper.readTree(htmlResponseStr.substring(7, htmlResponseStr.length() - 1))  
                    .get(2).asText()).get(1).get(0).get(1).get(8).get(20).get(0).get(0);  
  
    List<String> findImageList = new ArrayList<>();  
    for (JsonNode node : jsonNode) {  
        findImageList.add(node.get(4).asText());  
    }  
  
    return findImageList;  
}  
  
private static String getExactMatchesImageByHttpRequest(String imageKey, String imageToken) throws IOException {  
    String params = "[[[\"B7fdke\",\"[[\\\"18446744072281467874\\\",1,1],[null,null,null,null,null,null,[\\\" " + imageKey + "\\\"],[\\\"/lens-web-standalone-prod/" + imageKey + "\\\",[null,null,1000,1000]]],[null,null,null,null,3,[\\\"en\\\",null,\\\"US\\\",\\\"Asia/Hong_Kong\\\"],null,null,[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,1,null,null,null,1,null,null,null,null,null,null,1,1,null,null,null,null,1,null,1,null,null,1],[[null,1,1,1,1,1,1,null,null,null,1,1,1,1,null,null,null,1,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,1,null,null,null,1,null,null,null,null,null,null,1,1,1,null,1,1,null,null,null,null,null,0,null,null,null,[5,6,2],null,null,null,1,null,1,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,1,null,null,null,null,null,null,1,1,null,null,1,1,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,0,null,1]],[[[7]]],null,null,null,26,null,null,null,[797,929],[null,6],[null,14],null,[14],[null,\\\"\\\"]],null,null,null,null,null,null,null,null,null,5,null,null,null,[[\\\"region_search\\\",null,[[0.5,0.5,1,1,null,1],null,null,1]]],\\\"" + imageToken + "\\\",null,null,null,null,null,[[null,[]]],null,\\\"RkE1MTRERjktODBCOC00RDAzLUIzNzgtRTYwMTA3QzFBRjJF\\\"]\",null,\"generic\"]]]";  
    // 发送第二个请求  
    String url2 = "https://lens.google.com/_/LensWebStandaloneUi/data/batchexecute";  
    Map<String, String> headers1 = new HashMap<>();  
    headers1.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");  
    headers1.put("Accept-Language", "en-US,en;q=0.5");  
    Map<String, String> formData = new HashMap<>();  
    formData.put("f.req", params);  
    Connection.Response response = Jsoup.connect(url2)  
            .headers(headers1)  
            .data(formData)  
            .ignoreContentType(true)  
            .method(Connection.Method.POST)  
            .execute();  
    return response.body();  
}  
  
private static String getImageToken(ObjectMapper objectMapper, Document doc) throws JsonProcessingException {  
    Elements select = doc.select("script:containsData(AF_initDataCallback)");  
    Element scriptElement = null;  
  
    for (Element script : select) {  
        List<Node> childNodes = script.childNodes();  
        for (Node node : childNodes) {  
            if (node.toString().contains("key: 'ds:1'") && node.toString().contains("hash: '1'")) {  
                scriptElement = script;  
                break;  
            }  
            if (node.toString().contains("key: 'ds:1'") && node.toString().contains("hash: '3'")) {  
                scriptElement = script;  
                break;  
            }  
        }  
    }  
    if (scriptElement == null) {  
        LOGGER.debug(select.toString());  
    }  
  
    String findUnKnownStr1 = scriptElement.childNodes().get(0).toString();  
    String findUnKnownStr2 = findUnKnownStr1.substring(20, findUnKnownStr1.length() - 2);  
    String findUnKnownStr3 = findUnKnownStr2.replace("key: 'ds:1'", "\"key\": \"ds:1\"")  
            .replace("hash: '1'", "\"hash\": \"1\"")  
            .replace("hash: '3'", "\"hash\": \"3\"")  
            .replace("data:", "\"data\":")  
            .replace("sideChannel:", "\"sideChannel\":");  
    JsonNode rootNode = objectMapper.readTree(findUnKnownStr3);  
    return ((ArrayNode) ((ObjectNode) rootNode).get("data")).get(19).toString().replace("\"", "");  
}  
  
private static String parsingImageKey(Document doc) {  
    Element element = doc.select("c-wiz").first();  
    Pattern pattern = Pattern.compile("lens-web-standalone-prod/([a-f0-9-]+)");  
    // 获取元素的所有文本内容  
    String text = element.toString();  
    // 使用正则表达式匹配并提取值  
    Matcher matcher = pattern.matcher(text);  
    String extractedValue = null;  
    if (matcher.find()) {  
        extractedValue = matcher.group(1);  
    }  
    return extractedValue;  
}  
  
private static Document sendRequestByImageUrl(String imageUrl) throws IOException {  
    String url = "https://lens.google.com/uploadbyurl?url=" + URLEncoder.encode(imageUrl, "UTF-8") + "&hl=en&re=df&st=1716295332969&vpw=797&vph=929&ep=gsbubu";  
    // Custom request headers  
    Map<String, String> headers = new HashMap<>();  
    headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");  
    headers.put("Accept-Language", "en-US,en;q=0.5");  
    return Jsoup.connect(url)  
            .headers(headers)  
            .get();  
}
posted @ 2024-12-02 19:55  rongbu2  阅读(53)  评论(0)    收藏  举报