抓取https网页报错sun.security.validator.ValidatorException: PKIX path building failed解决
原文链接: https://www.cnblogs.com/zdz8207/p/java-https-ssl-jsoup.html
抓取https网页时,报错sun.security.validator.ValidatorException: PKIX path building failed 解决办法
原因是https证书问题,java抓取时忽略掉证书才能访问。jsoup在调用前先执行下以下忽略证书请求就可以了。
try {
//先调用下忽略https证书的再请求才可以
HttpsUrlValidator.retrieveResponseFromServer(url);
doc = Jsoup
.connect(url)
.header("User-Agent",rand_agents)
.timeout(10000).get();
body = doc.getElementsByTag("body").html();
} catch (Exception e) {
log.info(e.getMessage());
}
StringUtils.substringBetween("hello world", "l", "r");
//结果是: lo wo 第一个字符“l”与第一个字符“r”之间的字符串
datas.retainAll(arrs);
HttpsUrlValidator 类:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;
public class HttpsUrlValidator {
static HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
System.out.println("Warning: URL Host: " + urlHostName + " vs. "
+ session.getPeerHost());
return true;
}
};
public final static String retrieveResponseFromServer(final String url) {
HttpURLConnection connection = null;
try {
URL validationUrl = new URL(url);
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
connection = (HttpURLConnection) validationUrl.openConnection();
final BufferedReader in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line;
final StringBuffer stringBuffer = new StringBuffer(255);
synchronized (stringBuffer) {
while ((line = in.readLine()) != null) {
stringBuffer.append(line);
stringBuffer.append("\n");
}
return stringBuffer.toString();
}
} catch (final IOException e) {
System.out.println(e.getMessage());
return null;
} catch (final Exception e1){
System.out.println(e1.getMessage());
return null;
}finally {
if (connection != null) {
connection.disconnect();
}
}
}
public static void trustAllHttpsCertificates() throws Exception {
javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
javax.net.ssl.TrustManager tm = new miTM();
trustAllCerts[0] = tm;
javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext
.getInstance("SSL");
sc.init(null, trustAllCerts, null);
javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc
.getSocketFactory());
}
static class miTM implements javax.net.ssl.TrustManager,
javax.net.ssl.X509TrustManager {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public boolean isServerTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public boolean isClientTrusted(
java.security.cert.X509Certificate[] certs) {
return true;
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
}
}


浙公网安备 33010602011771号