Open hj287678654 opened 5 years ago
替换掉 requester 可实现忽略SSL错误。
创建一个类用来处理SSL错误(代码来自网络)
public class SSLSocketClient {
//获取这个SSLSocketFactory
public static SSLSocketFactory getSSLSocketFactory() {
try {
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, getTrustManager(), new SecureRandom());
return sslContext.getSocketFactory();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
//获取TrustManager
private static TrustManager[] getTrustManager() {
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) {
}
@Override
public void checkServerTrusted(X509Certificate[] chain, String authType) {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[]{};
}
}
};
return trustAllCerts;
}
public static HostnameVerifier getHostnameVerifier() {
HostnameVerifier hostnameVerifier = new HostnameVerifier() {
@Override
public boolean verify(String s, SSLSession sslSession) {
return true;
}
};
return hostnameVerifier;
}
public static X509TrustManager getX509TrustManager() {
X509TrustManager trustManager = null;
try {
TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm());
trustManagerFactory.init((KeyStore) null);
TrustManager[] trustManagers = trustManagerFactory.getTrustManagers();
if (trustManagers.length != 1 || !(trustManagers[0] instanceof X509TrustManager)) {
throw new IllegalStateException("Unexpected default trust managers:" + Arrays.toString(trustManagers));
}
trustManager = (X509TrustManager) trustManagers[0];
} catch (Exception e) {
e.printStackTrace();
}
return trustManager;
}
创建一个类,名为 MyOkHttpRequester,继承原本的类,重写方法,忽略SSL证书错误
public class MyOkHttpRequester extends OkHttpRequester {
@Override
public OkHttpClient.Builder createOkHttpClientBuilder() {
OkHttpClient.Builder builder = new OkHttpClient.Builder()
.followRedirects(false)
.followSslRedirects(false)
.connectTimeout(getConf().getConnectTimeout(), TimeUnit.MILLISECONDS)
.readTimeout(getConf().getReadTimeout(), TimeUnit.MILLISECONDS)
.sslSocketFactory(SSLSocketClient.getSSLSocketFactory(), SSLSocketClient.getX509TrustManager())
.hostnameVerifier(SSLSocketClient.getHostnameVerifier())
;
return builder;
}
}
在入口处替换,参考官方例子
public class DemoAutoNewsCrawler extends BreadthCrawler {
/**
* @param crawlPath crawlPath is the path of the directory which maintains
* information of this crawler
* @param autoParse if autoParse is true,BreadthCrawler will auto extract
* links which match regex rules from pag
*/
public DemoAutoNewsCrawler(String crawlPath, boolean autoParse) {
super(crawlPath, autoParse);
// 替换请求类,处理SSL错误
requester = new MyOkHttpRequester(); // ← 添加这行,偷梁换柱
/*start pages*/
String url = "http://github.blog/";
我使用的版本
<dependency>
<groupId>cn.edu.hfut.dmic.webcollector</groupId>
<artifactId>WebCollector</artifactId>
<version>2.74-alpha</version>
</dependency>
您好! 您的邮件已送到,我会及时答复,请耐心等待!
之前用2.52版本可以爬取,改成2.73-alpha版后,报错提示sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target。(是改用okhttps后导致的??)