Open zhengpq opened 2 years ago
在实际使用中发现两个有趣的现象:
具体见下方:
索引
{ "settings": { "analysis": { "analyzer": { "pinyin_ik": { "tokenizer": "ik_smart", "filter": [ "py" ] }, "pinyin_keyword": { "tokenizer": "keyword", "filter": [ "py" ] }, "pinyin_keyword_full": { "tokenizer": "keyword", "filter": [ "py_full" ] } }, "filter": { "py": { "type": "pinyin", "keep_original": true, "keep_first_letter": true, "keep_full_pinyin": true, "keep_joined_full_pinyin": true, "keep_none_chinese_in_joined_full_pinyin": true, "limit_first_letter_length": 16 }, "py_full": { "type": "pinyin", "keep_original": true, "keep_first_letter": false, "keep_full_pinyin": true, "keep_joined_full_pinyin": true, "keep_none_chinese_in_joined_full_pinyin": true, "limit_first_letter_length": 16 } } } }, "mappings": { "properties": { "page_id": { "type": "integer" }, "page_title": { "type": "text", "analyzer": "pinyin", "search_analyzer": "ik_smart", "fields": { "suggest": { "type": "completion", "analyzer": "pinyin_keyword", "search_analyzer": "pinyin_keyword_full" } } }, } } }
### 1、首字母没有办法匹配到
文章标题分析结果
POST wxad-page-new/_analyze { "analyzer": "pinyin_keyword", "text": "公众号流量主基础介绍" } // 结果,部分无关的 token 删除 { "tokens" : [ { "token" : "gzhllzjcjs", "start_offset" : 0, "end_offset" : 10, "type" : "word", "position" : 9 } ] }
搜索及结果
POST wxad-page-new/_search?pretty { "_source": { "includes": [ "page_title" ] }, "suggest": { "title-suggest": { "prefix": "gzhl", "completion": { "field": "page_title.suggest", "size": 100 } } } } // 结果 { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "title-suggest" : [ { "text" : "gzhl", "offset" : 0, "length" : 4, "options" : [ ] } ] } }
### 2、输入全部拼音的问题
POST wxad-page-new/_analyze { "analyzer": "pinyin_keyword", "text": "公众号流量主基础介绍" } // 结果,部分无关的 token 删除 { "tokens" : [ { "token" : "gongzhonghaoliuliangzhujichujieshao", "start_offset" : 0, "end_offset" : 10, "type" : "word", "position" : 0 } ] }
搜索及结果——能够匹配到的情况
POST wxad-page-new/_search?pretty { "_source": { "includes": [ "page_title" ] }, "suggest": { "title-suggest": { "prefix": "gongzhong", "completion": { "field": "page_title.suggest", "size": 100 } } } } // 结果 { "took" : 4, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "title-suggest" : [ { "text" : "gongzhong", "offset" : 0, "length" : 9, "options" : [ { "text" : "公众号流量主基础介绍", "_index" : "wxad-page-new", "_type" : "_doc", "_id" : "259", "_score" : 1.0, "_source" : { "page_title" : "公众号流量主基础介绍" } } ] } ] } }
搜索及结果——匹配不到的情况
POST wxad-page-new/_search?pretty { "_source": { "includes": [ "page_title" ] }, "suggest": { "title-suggest": { "prefix": "gongzhon", "completion": { "field": "page_title.suggest", "size": 100 } } } } // 结果 { "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "title-suggest" : [ { "text" : "gongzhon", "offset" : 0, "length" : 8, "options" : [ ] } ] } }
差了一个 g 就匹配不到,让我跟感到很费解。
由于是新手,不知道这个是 pinyin 这个插件的特性还是 es 本身的一些特性,如果有哪位小伙伴知道这个问题的解决方案,麻烦告知一下,感谢!
在实际使用中发现两个有趣的现象:
具体见下方:
索引
### 1、首字母没有办法匹配到
文章标题分析结果
搜索及结果
### 2、输入全部拼音的问题
文章标题分析结果
搜索及结果——能够匹配到的情况
搜索及结果——匹配不到的情况
差了一个 g 就匹配不到,让我跟感到很费解。
由于是新手,不知道这个是 pinyin 这个插件的特性还是 es 本身的一些特性,如果有哪位小伙伴知道这个问题的解决方案,麻烦告知一下,感谢!