public static List<String> getExactMatchesByGoogleCrawler(String imageUrl) {
try {
ObjectMapper objectMapper = new ObjectMapper();
// 发送请求并获取响应 (模拟点击 search) Document doc = sendRequestByImageUrl(imageUrl);
// 判断是否有 exact matches if (!doc.toString().contains("See exact matches")) {
LOGGER.info("没有exact matches");
return Collections.emptyList();
}
// 解析响应并获取 图片唯一标识
String imageKey = parsingImageKey(doc);
// 获取图片核心token
String imageToken = getImageToken(objectMapper, doc);
// 发送请求并获取响应 (模拟点击 see exact matches) String htmlResponseStr = getExactMatchesImageByHttpRequest(imageKey, imageToken);
// 获取精确匹配图片的文本
return getExactMatchesImageText(objectMapper, htmlResponseStr);
} catch (IOException e) {
ExceptionCollActionEvent event = new ExceptionCollActionEvent(GoogleLensCrawler.class, e);
SpringContextHolder.publishEvent(event);
LOGGER.error(e.getMessage(), e);
return Collections.emptyList();
}
}
private static List<String> getExactMatchesImageText(ObjectMapper objectMapper, String htmlResponseStr) throws JsonProcessingException {
// 截取无用字符
JsonNode jsonNode = objectMapper.readTree(
objectMapper.readTree(htmlResponseStr.substring(7, htmlResponseStr.length() - 1))
.get(2).asText()).get(1).get(0).get(1).get(8).get(20).get(0).get(0);
List<String> findImageList = new ArrayList<>();
for (JsonNode node : jsonNode) {
findImageList.add(node.get(4).asText());
}
return findImageList;
}
private static String getExactMatchesImageByHttpRequest(String imageKey, String imageToken) throws IOException {
String params = "[[[\"B7fdke\",\"[[\\\"18446744072281467874\\\",1,1],[null,null,null,null,null,null,[\\\" " + imageKey + "\\\"],[\\\"/lens-web-standalone-prod/" + imageKey + "\\\",[null,null,1000,1000]]],[null,null,null,null,3,[\\\"en\\\",null,\\\"US\\\",\\\"Asia/Hong_Kong\\\"],null,null,[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,1,null,null,null,1,null,null,null,null,null,null,1,1,null,null,null,null,1,null,1,null,null,1],[[null,1,1,1,1,1,1,null,null,null,1,1,1,1,null,null,null,1,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,1,null,null,null,1,null,null,null,null,null,null,1,1,1,null,1,1,null,null,null,null,null,0,null,null,null,[5,6,2],null,null,null,1,null,1,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,1,null,null,null,null,null,null,1,1,null,null,1,1,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,0,null,1]],[[[7]]],null,null,null,26,null,null,null,[797,929],[null,6],[null,14],null,[14],[null,\\\"\\\"]],null,null,null,null,null,null,null,null,null,5,null,null,null,[[\\\"region_search\\\",null,[[0.5,0.5,1,1,null,1],null,null,1]]],\\\"" + imageToken + "\\\",null,null,null,null,null,[[null,[]]],null,\\\"RkE1MTRERjktODBCOC00RDAzLUIzNzgtRTYwMTA3QzFBRjJF\\\"]\",null,\"generic\"]]]";
// 发送第二个请求
String url2 = "https://lens.google.com/_/LensWebStandaloneUi/data/batchexecute";
Map<String, String> headers1 = new HashMap<>();
headers1.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");
headers1.put("Accept-Language", "en-US,en;q=0.5");
Map<String, String> formData = new HashMap<>();
formData.put("f.req", params);
Connection.Response response = Jsoup.connect(url2)
.headers(headers1)
.data(formData)
.ignoreContentType(true)
.method(Connection.Method.POST)
.execute();
return response.body();
}
private static String getImageToken(ObjectMapper objectMapper, Document doc) throws JsonProcessingException {
Elements select = doc.select("script:containsData(AF_initDataCallback)");
Element scriptElement = null;
for (Element script : select) {
List<Node> childNodes = script.childNodes();
for (Node node : childNodes) {
if (node.toString().contains("key: 'ds:1'") && node.toString().contains("hash: '1'")) {
scriptElement = script;
break;
}
if (node.toString().contains("key: 'ds:1'") && node.toString().contains("hash: '3'")) {
scriptElement = script;
break;
}
}
}
if (scriptElement == null) {
LOGGER.debug(select.toString());
}
String findUnKnownStr1 = scriptElement.childNodes().get(0).toString();
String findUnKnownStr2 = findUnKnownStr1.substring(20, findUnKnownStr1.length() - 2);
String findUnKnownStr3 = findUnKnownStr2.replace("key: 'ds:1'", "\"key\": \"ds:1\"")
.replace("hash: '1'", "\"hash\": \"1\"")
.replace("hash: '3'", "\"hash\": \"3\"")
.replace("data:", "\"data\":")
.replace("sideChannel:", "\"sideChannel\":");
JsonNode rootNode = objectMapper.readTree(findUnKnownStr3);
return ((ArrayNode) ((ObjectNode) rootNode).get("data")).get(19).toString().replace("\"", "");
}
private static String parsingImageKey(Document doc) {
Element element = doc.select("c-wiz").first();
Pattern pattern = Pattern.compile("lens-web-standalone-prod/([a-f0-9-]+)");
// 获取元素的所有文本内容
String text = element.toString();
// 使用正则表达式匹配并提取值
Matcher matcher = pattern.matcher(text);
String extractedValue = null;
if (matcher.find()) {
extractedValue = matcher.group(1);
}
return extractedValue;
}
private static Document sendRequestByImageUrl(String imageUrl) throws IOException {
String url = "https://lens.google.com/uploadbyurl?url=" + URLEncoder.encode(imageUrl, "UTF-8") + "&hl=en&re=df&st=1716295332969&vpw=797&vph=929&ep=gsbubu";
// Custom request headers
Map<String, String> headers = new HashMap<>();
headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");
headers.put("Accept-Language", "en-US,en;q=0.5");
return Jsoup.connect(url)
.headers(headers)
.get();
}