0.10.3版本情况下,如果对方是 shtml页面,报错 process request Request{url='https://www.prlife.com.cn/page/message/base/product/list/product_help_list.shtml', method='null', extras=null, priority=0, headers={}, cookies={}} error
java.lang.NullPointerException: null
at java.util.regex.Matcher.getTextLength(Matcher.java:1283)
at java.util.regex.Matcher.reset(Matcher.java:309)
at java.util.regex.Matcher.(Matcher.java:229)
at java.util.regex.Pattern.matcher(Pattern.java:1093)
at us.codecraft.webmagic.utils.UrlUtils.getCharset(UrlUtils.java:119)
at us.codecraft.webmagic.utils.CharsetUtils.detectCharset(CharsetUtils.java:28)
at us.codecraft.webmagic.downloader.HttpClientDownloader.getHtmlCharset(HttpClientDownloader.java:128)
at us.codecraft.webmagic.downloader.HttpClientDownloader.handleResponse(HttpClientDownloader.java:112)
at us.codecraft.webmagic.downloader.HttpClientDownloader.download(HttpClientDownloader.java:82)
at us.codecraft.webmagic.Spider.processRequest(Spider.java:445)
at us.codecraft.webmagic.Spider.access$000(Spider.java:65)
at us.codecraft.webmagic.Spider$1.run(Spider.java:349)
at us.codecraft.webmagic.thread.CountableThreadPool$1.run(CountableThreadPool.java:74)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
0.10.3版本情况下,如果对方是 shtml页面,报错 process request Request{url='https://www.prlife.com.cn/page/message/base/product/list/product_help_list.shtml', method='null', extras=null, priority=0, headers={}, cookies={}} error java.lang.NullPointerException: null at java.util.regex.Matcher.getTextLength(Matcher.java:1283) at java.util.regex.Matcher.reset(Matcher.java:309) at java.util.regex.Matcher.(Matcher.java:229)
at java.util.regex.Pattern.matcher(Pattern.java:1093)
at us.codecraft.webmagic.utils.UrlUtils.getCharset(UrlUtils.java:119)
at us.codecraft.webmagic.utils.CharsetUtils.detectCharset(CharsetUtils.java:28)
at us.codecraft.webmagic.downloader.HttpClientDownloader.getHtmlCharset(HttpClientDownloader.java:128)
at us.codecraft.webmagic.downloader.HttpClientDownloader.handleResponse(HttpClientDownloader.java:112)
at us.codecraft.webmagic.downloader.HttpClientDownloader.download(HttpClientDownloader.java:82)
at us.codecraft.webmagic.Spider.processRequest(Spider.java:445)
at us.codecraft.webmagic.Spider.access$000(Spider.java:65)
at us.codecraft.webmagic.Spider$1.run(Spider.java:349)
at us.codecraft.webmagic.thread.CountableThreadPool$1.run(CountableThreadPool.java:74)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
切换回 0.10.2版本下,没问题