duydo / elasticsearch-analysis-vietnamese

Vietnamese Analysis Plugin for Elasticsearch
Apache License 2.0
505 stars 211 forks source link

Error when bulk indexing database using vnTokenizer #51

Closed nguyentienlong closed 5 years ago

nguyentienlong commented 5 years ago

I found this error in log file when doing bulk index database using the plugin. Any suggestion to debug this ? ES 5.6.6, and plugin info

qvB_lVV analysis-icu                      5.6.5
qvB_lVV elasticsearch-analysis-vietnamese 5.6.5
[2018-11-15T21:31:37,514][DEBUG][o.e.a.b.TransportShardBulkAction] [qvB_lVV] [lalafood_2018_11_16][0] failed to execute bulk item (index) BulkShardRequest [[lalafood_2018_11_16][0]] containing [21] requests
java.lang.StringIndexOutOfBoundsException: String index out of range: 8
    at java.lang.String.substring(String.java:1963) ~[?:1.8.0_181]
    at vn.hus.nlp.tokenizer.Tokenizer.getNextToken(Tokenizer.java:469) ~[?:?]
    at vn.hus.nlp.tokenizer.Tokenizer.tokenize(Tokenizer.java:214) ~[?:?]
    at org.apache.lucene.analysis.vi.VietnameseTokenizer.tokenize(VietnameseTokenizer.java:94) ~[?:?]
    at org.apache.lucene.analysis.vi.VietnameseTokenizer.reset(VietnameseTokenizer.java:142) ~[?:?]
    at org.apache.lucene.analysis.TokenFilter.reset(TokenFilter.java:70) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:742) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:447) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:403) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:232) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:478) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1571) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1316) ~[lucene-core-6.6.1.jar:6.6.1 9aa465a89b64ff2dabe7b4d50c472de32c298683 - varunthacker - 2017-08-29 21:54:39]
    at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:662) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.engine.InternalEngine.indexIntoLucene(InternalEngine.java:606) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:504) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:557) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:546) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.bulk.TransportShardBulkAction.executeIndexRequestOnPrimary(TransportShardBulkAction.java:492) ~[elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.bulk.TransportShardBulkAction.executeBulkItemRequest(TransportShardBulkAction.java:146) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:115) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:70) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryShardReference.perform(TransportReplicationAction.java:975) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryShardReference.perform(TransportReplicationAction.java:944) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.ReplicationOperation.execute(ReplicationOperation.java:113) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.onResponse(TransportReplicationAction.java:345) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.onResponse(TransportReplicationAction.java:270) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$1.onResponse(TransportReplicationAction.java:924) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$1.onResponse(TransportReplicationAction.java:921) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.shard.IndexShardOperationsLock.acquire(IndexShardOperationsLock.java:151) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.index.shard.IndexShard.acquirePrimaryOperationLock(IndexShard.java:1659) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction.acquirePrimaryShardReference(TransportReplicationAction.java:933) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction.access$500(TransportReplicationAction.java:92) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.doRun(TransportReplicationAction.java:291) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:266) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:248) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:654) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:638) [elasticsearch-5.6.5.jar:5.6.5]
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-5.6.5.jar:5.6.5]
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]
    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]
duydo commented 5 years ago

Thanks for reporting the bug, this bug relates to https://github.com/duydo/elasticsearch-analysis-vietnamese/issues/52