codelibs / elasticsearch-analysis-kuromoji-ipadic-neologd

Elasticsearch's Analyzer for Kuromoji with Neologd
Apache License 2.0
114 stars 26 forks source link

got illegal_argument_exception when use kuromoji_neologd_tokenizer with synonym_graph #10

Open PyYoshi opened 6 years ago

PyYoshi commented 6 years ago

Environment

What is the problem?

Expected behavior

return HTTP Status 200.

Actual behavior

return HTTP Status 400.

Steps to reproduce the behavior

Use kuromoji_neologd_tokenizer with synonym_graph

request:

$ curl -X PUT  "localhost:9210/es63-reproduce-index?pretty" -H 'Content-Type: application/json' -d@reproduce.json

response:

{
  "error" : {
    "root_cause" : [
      {
        "type" : "illegal_argument_exception",
        "reason" : "failed to build synonyms"
      }
    ],
    "type" : "illegal_argument_exception",
    "reason" : "failed to build synonyms",
    "caused_by" : {
      "type" : "parse_exception",
      "reason" : "Invalid synonym rule at line 1",
      "caused_by" : {
        "type" : "illegal_argument_exception",
        "reason" : "term: 高等学校 analyzed to a token (高等学校) with position increment != 1 (got: 0)"
      }
    }
  },
  "status" : 400
}

reproduce.json:

{
  "settings": {
    "index": {
      "analysis": {
        "filter": {
          "synonym": {
            "type": "synonym_graph",
            "synonyms": ["高校,高等学校"]
          }
        },
        "tokenizer": {
          "ja_text_tokenizer": {
            "type": "kuromoji_neologd_tokenizer",
            "mode": "search"
          }
        },
        "analyzer": {
          "ja_text_analyzer": {
            "tokenizer": "ja_text_tokenizer",
            "type": "custom",
            "filter": ["synonym"]
          }
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "dynamic": "strict",
      "properties": {
        "name": {
          "fielddata": true,
          "type": "text",
          "analyzer": "ja_text_analyzer"
        }
      }
    }
  }
}

Use kuromoji_tokenizer with synonym_graph

request:

$ curl -X PUT  "localhost:9210/es63-reproduce-index?pretty" -H 'Content-Type: application/json' -d@ok.json

response:

{
  "acknowledged" : true,
  "shards_acknowledged" : true,
  "index" : "es63-reproduce-index"
}

ok.json:

{
  "settings": {
    "index": {
      "analysis": {
        "filter": {
          "synonym": {
            "type": "synonym_graph",
            "synonyms": ["高校,高等学校"]
          }
        },
        "tokenizer": {
          "ja_text_tokenizer": {
            "type": "kuromoji_tokenizer",
            "mode": "search"
          }
        },
        "analyzer": {
          "ja_text_analyzer": {
            "tokenizer": "ja_text_tokenizer",
            "type": "custom",
            "filter": ["synonym"]
          }
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "dynamic": "strict",
      "properties": {
        "name": {
          "fielddata": true,
          "type": "text",
          "analyzer": "ja_text_analyzer"
        }
      }
    }
  }
}