johtani / elasticsearch-extended-analyze

Extend Analyze API Plugin for Elasticsearch
Apache License 2.0
43 stars 8 forks source link

Specify attributes parameter #6

Closed johtani closed 10 years ago

johtani commented 10 years ago

Add request parameter attributes. The response include only specified attributes.

johtani commented 10 years ago

I think that attributes parameter receive Atrtibute Class Name. And multiple attributes is specified comma separated.

Example.

curl -XPOST 'localhost:9200/_extended_analyze?tokenizer=kuromoji_tokenizer&filters=kuromoji_baseform&attributes=PartOfSpeechAttribute,ReadingAttribute&pretty' -d '寿司が美味しかった'
johtani commented 10 years ago

Like this. curl -XPOST 'localhost:9200/_extended_analyze?tokenizer=kuromoji_tokenizer&filters=kuromoji_baseform&attributes=ReadingAttribute,PartOfSpeechAttribute&pretty' -d '寿司が美味しかった' { "custom_analyzer" : true, "tokenizer" : { "kuromoji_tokenizer" : [ { "token" : "寿司", "start_offset" : 0, "end_offset" : 2, "type" : "word", "position" : 1, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "noun-common", "partOfSpeech" : "名詞-一般" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "sushi", "reading" : "スシ", "pronunciation (en)" : "sushi", "pronunciation" : "スシ" } } }, { "token" : "が", "start_offset" : 2, "end_offset" : 3, "type" : "word", "position" : 2, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "particle-case-misc", "partOfSpeech" : "助詞-格助詞-一般" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "ga", "reading" : "ガ", "pronunciation (en)" : "ga", "pronunciation" : "ガ" } } }, { "token" : "美味しかっ", "start_offset" : 3, "end_offset" : 8, "type" : "word", "position" : 3, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "adjective-main", "partOfSpeech" : "形容詞-自立" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "oishika", "reading" : "オイシカッ", "pronunciation (en)" : "oishika", "pronunciation" : "オイシカッ" } } }, { "token" : "た", "start_offset" : 8, "end_offset" : 9, "type" : "word", "position" : 4, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "auxiliary-verb", "partOfSpeech" : "助動詞" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "ta", "reading" : "タ", "pronunciation (en)" : "ta", "pronunciation" : "タ" } } } ] }, "tokenfilters" : [ { "kuromoji_baseform" : [ { "token" : "寿司", "start_offset" : 0, "end_offset" : 2, "type" : "word", "position" : 1, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "noun-common", "partOfSpeech" : "名詞-一般" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "sushi", "reading" : "スシ", "pronunciation (en)" : "sushi", "pronunciation" : "スシ" } } }, { "token" : "が", "start_offset" : 2, "end_offset" : 3, "type" : "word", "position" : 2, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "particle-case-misc", "partOfSpeech" : "助詞-格助詞-一般" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "ga", "reading" : "ガ", "pronunciation (en)" : "ga", "pronunciation" : "ガ" } } }, { "token" : "美味しい", "start_offset" : 3, "end_offset" : 8, "type" : "word", "position" : 3, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "adjective-main", "partOfSpeech" : "形容詞-自立" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "oishika", "reading" : "オイシカッ", "pronunciation (en)" : "oishika", "pronunciation" : "オイシカッ" } } }, { "token" : "た", "start_offset" : 8, "end_offset" : 9, "type" : "word", "position" : 4, "extended_attributes" : { "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute" : { "partOfSpeech (en)" : "auxiliary-verb", "partOfSpeech" : "助動詞" }, "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute" : { "reading (en)" : "ta", "reading" : "タ", "pronunciation (en)" : "ta", "pronunciation" : "タ" } } } ] } ] }