hankcs / hanlp-lucene-plugin

HanLP中文分词Lucene插件,支持包括Solr在内的基于Lucene的系统
http://www.hankcs.com/nlp/segment/full-text-retrieval-solr-integrated-hanlp-chinese-word-segmentation.html
Apache License 2.0
296 stars 99 forks source link

fix NoSuchElementException in Scanner.next() #8

Closed leeyc0 closed 8 years ago

leeyc0 commented 8 years ago

我用以下指令上傳一個 txt 檔

(solr-6.1.0)/bin/post -c test PA2016.txt

出現以下 exception:

2016-07-29 08:24:39.613 ERROR (qtp110456297-18) [   x:test] o.a.s.h.RequestHandlerBase org.apache.solr.common.SolrException: Exception writing document id /home/yclee/Desktop/aaa.txt to the index; possible analysis error.
    at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:181)
    at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:68)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory$AddSchemaFieldsUpdateProcessor.processAdd(AddSchemaFieldsUpdateProcessorFactory.java:335)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldNameMutatingUpdateProcessorFactory$1.processAdd(FieldNameMutatingUpdateProcessorFactory.java:74)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:939)
    at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1094)
    at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:720)
    at org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
    at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:48)
    at org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:93)
    at org.apache.solr.handler.extraction.ExtractingDocumentLoader.doAdd(ExtractingDocumentLoader.java:126)
    at org.apache.solr.handler.extraction.ExtractingDocumentLoader.addDoc(ExtractingDocumentLoader.java:131)
    at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:237)
    at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:69)
    at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:156)
    at org.apache.solr.core.SolrCore.execute(SolrCore.java:2036)
    at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:657)
    at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:464)
    at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:257)
    at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:208)
    at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1668)
    at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:581)
    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
    at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
    at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
    at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1160)
    at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)
    at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
    at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1092)
    at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
    at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213)
    at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119)
    at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)
    at org.eclipse.jetty.server.Server.handle(Server.java:518)
    at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)
    at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:244)
    at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)
    at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)
    at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93)
    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:246)
    at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:156)
    at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)
    at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)
    at java.lang.Thread.run(Thread.java:745)
Caused by: java.util.NoSuchElementException
    at java.util.Scanner.throwFor(Scanner.java:862)
    at java.util.Scanner.next(Scanner.java:1371)
    at com.hankcs.lucene.SegmentWrapper.next(SegmentWrapper.java:69)
    at com.hankcs.lucene.HanLPTokenizer.incrementToken(HanLPTokenizer.java:67)
    at org.apache.lucene.analysis.util.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:51)
    at org.apache.lucene.analysis.core.LowerCaseFilter.incrementToken(LowerCaseFilter.java:45)
    at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:714)
    at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:417)
    at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:373)
    at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:232)
    at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:449)
    at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1492)
    at org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:282)
    at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:214)
    at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:169)
    ... 56 more

這個 patch 是修正這個錯誤

leeyc0 commented 8 years ago

PA2016.txt

附上有關 txt 檔

hankcs commented 8 years ago

太感谢了