Open wuzhenglin510 opened 4 years ago
报错提示: startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=1,endOffset=2,lastStartOffset=4 for field 'nickname.pinyin'
创建索引的命令:
put /crm_fans_v1 { "settings": { "number_of_shards": 1, "analysis": { "analyzer": { "phone_analyzer": { "tokenizer": "phone_tokenizer" }, "pinyin_analyzer": { "tokenizer": "pinyin_tokenizer" }, "pinyin_search_analyzer": { "tokenizer": "pinyin_search_tokenizer" } }, "tokenizer": { "phone_tokenizer": { "type": "ngram", "min_gram": 2, "max_gram": 3, "token_chars": [ "digit" ] }, "pinyin_tokenizer": { "type": "pinyin", "keep_first_letter": false, "keep_separate_first_letter": true, "keep_full_pinyin": false, "lowercase": false, "keep_joined_full_pinyin": false, "remove_duplicated_term": false, "keep_none_chinese": true, "keep_none_chinese_together": false, "keep_none_chinese_in_first_letter": false, "ignore_pinyin_offset": false, "trim_whitespace": true }, "pinyin_search_tokenizer": { "type": "standard", "max_token_length": 1 } } } }, "mappings": { "_doc": { "properties": { "id": { "type": "long" }, "busId": { "type": "long" }, "deleted": { "type": "boolean" }, "nickname": { "type": "text", "term_vector": "with_positions_offsets", "fields": { "keyword": { "type": "keyword" }, "pinyin": { "type": "text", "analyzer": "pinyin_analyzer", "search_analyzer": "pinyin_search_analyzer", "term_vector": "with_positions_offsets" } } }, "mobilePhone": { "type": "text", "analyzer": "phone_analyzer", "term_vector": "with_positions_offsets", "fields": { "keyword": { "type": "keyword" } } }, "gender": { "type": "keyword" }, "customerId": { "type": "long" }, "customerType": { "type": "keyword" }, "subscribeTime": { "type": "long" }, "profile": { "type": "keyword" }, "openId": { "type": "keyword" }, "totalSendCouponTimes": { "type": "integer" }, "currentMonthSendCouponTimes": { "type": "integer" } } } } }
进行分词 get /crm_fans_v1/_analyze { "text": ["Yang小波🔜🐑"], "analyzer": "pinyin_analyzer" }
分词得到的错误数据
{ "tokens" : [ { "token" : "Y", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "x", "start_offset" : 4, "end_offset" : 5, "type" : "word", "position" : 0 }, { "token" : "a", "start_offset" : 1, "end_offset" : 2, "type" : "word", "position" : 1 }, { "token" : "b", "start_offset" : 5, "end_offset" : 6, "type" : "word", "position" : 1 }, { "token" : "n", "start_offset" : 2, "end_offset" : 3, "type" : "word", "position" : 2 }, { "token" : "g", "start_offset" : 3, "end_offset" : 4, "type" : "word", "position" : 3 } ] }
多试几次就得到正确的分词数据:
{ "tokens" : [ { "token" : "Y", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "a", "start_offset" : 1, "end_offset" : 2, "type" : "word", "position" : 1 }, { "token" : "n", "start_offset" : 2, "end_offset" : 3, "type" : "word", "position" : 2 }, { "token" : "g", "start_offset" : 3, "end_offset" : 4, "type" : "word", "position" : 3 }, { "token" : "x", "start_offset" : 4, "end_offset" : 5, "type" : "word", "position" : 3 }, { "token" : "b", "start_offset" : 5, "end_offset" : 6, "type" : "word", "position" : 4 } ] }
在 master 分支的版本重新试一下看看。
报错提示: startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=1,endOffset=2,lastStartOffset=4 for field 'nickname.pinyin'
创建索引的命令:
进行分词 get /crm_fans_v1/_analyze { "text": ["Yang小波🔜🐑"], "analyzer": "pinyin_analyzer" }
分词得到的错误数据
多试几次就得到正确的分词数据: