Open roytmana opened 4 months ago
8.14.3
No response
bundled
Windows 11
Highlighting of a field populated via copy_to produces garbled highlights
PUT test-copy_to { "mappings": { "properties": { "text1": { "type": "text", "index_options": "offsets", "copy_to": ["combined"] }, "text2": { "type": "text", "index_options": "offsets", "copy_to": ["combined"] }, "combined": { "type": "text", "index_options": "offsets" } } } }
PUT test-copy_to/_doc/1 { "text1": ["quick brown fox", "jumped over high fence"], "text2": ["flying over", "boat trip"] }
GET test-copy_to/_search { "query": { "match": { "combined": "fox" } }, "highlight": { "fields": { "combined": {} } } }
{ "took": 1, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 1, "relation": "eq" }, "max_score": 0.2876821, "hits": [ { "_index": "test-copy_to", "_id": "1", "_score": 0.2876821, "_source": { "text1": [ "quick brown fox", "jumped over high fence" ], "text2": [ "flying over", "boat trip" ] }, "highlight": { "combined": [ "<em>boa</em>t trip" ] } } ] } }
"subjectTerms.combinedPreferred.name": [ "c<em>ounterna</em>rcotics", "can<em>nabis\u0000co</em>" ]
Result
{ "fields": { "subjectTerms.term.name": [ "drugs", "law enforcement agencies", "cannabis", "compliance oversight", "marijuana", "federal spending", "controlled substances", "state law", "drug enforcement" ], "subjectTerms.combinedPreferred.name": [ "marijuana", "cannabis", "state laws", "counternarcotics", "drugs", "law enforcement agencies", "cannabis", "compliance oversight", "marijuana", "federal spending", "controlled substances", "state law", "drug enforcement" ] }, "highlight": { "subjectTerms.term.name": [ "<em>cannabis</em>" ], "subjectTerms.combinedPreferred.name": [ "c<em>ounterna</em>rcotics", "can<em>nabis\u0000co</em>" ] } }
Query Snippet
{ "query": { "bool":{ "should" : [ { "multi_match": { "query": "marijuana", "type": "phrase", "fields":[ "subjectTerms.term.name^2", "subjectTerms.combinedPreferred.name", "subjectTerms.combinedPreferred.name.prefix" ] } } ] } }, "fields":[ "subjectTerms.combinedPreferred.name*", "subjectTerms.term.name*" ], "highlight": { "fragment_size": 200, "require_field_match":true, "fields": { "subjectTerms.term.name*": { "number_of_fragments":0 }, "subjectTerms.combinedPreferred.name": { "number_of_fragments":0 } } }, "size":500 }
Mapping Snippet
"subjectTerms": { "properties": { "frequency": { "type": "integer" }, "score": { "type": "integer" }, "term": { "properties": { "id": { "type": "long", "copy_to": [ "subjectTerms.combinedPreferred.id" ] }, "name": { "type": "text", "index_options": "offsets", "fields": { "keyword": { "type": "keyword" }, "lowercase": { "type": "keyword", "normalizer": "lowercase" }, "prefix": { "type": "text", "analyzer": "prefix", "search_analyzer": "prefix_search", "index_options": "offsets" } }, "copy_to": [ "subjectTerms.combinedPreferred.name" ] }, "preferredTerm": { "properties": { "id": { "type": "long", "copy_to": [ "subjectTerms.combinedPreferred.id" ] }, "name": { "type": "text", "index_options": "offsets", "fields": { "keyword": { "type": "keyword" }, "lowercase": { "type": "keyword", "normalizer": "lowercase" }, "prefix": { "type": "text", "analyzer": "prefix", "search_analyzer": "prefix_search", "index_options": "offsets" } }, "copy_to": [ "subjectTerms.combinedPreferred.name" ] } } }, "nonPreferredTerms": { "properties": { "id": { "type": "long", "copy_to": [ "subjectTerms.combinedPreferred.id" ] }, "name": { "type": "text", "index_options": "offsets", "fields": { "keyword": { "type": "keyword" }, "lowercase": { "type": "keyword", "normalizer": "lowercase" }, "prefix": { "type": "text", "analyzer": "prefix", "search_analyzer": "prefix_search", "index_options": "offsets" } }, "copy_to": [ "subjectTerms.combinedPreferred.name" ] } } } } }, "combinedPreferred": { "properties": { "id": { "type": "long" }, "name": { "type": "text", "index_options": "offsets", "fields": { "keyword": { "type": "keyword" }, "lowercase": { "type": "keyword", "normalizer": "lowercase" }, "prefix": { "type": "text", "analyzer": "prefix", "search_analyzer": "prefix_search", "index_options": "offsets" } } } } } } }
Pinging @elastic/es-search-relevance (Team:Search Relevance)
Elasticsearch Version
8.14.3
Installed Plugins
No response
Java Version
bundled
OS Version
Windows 11
Problem Description
Highlighting of a field populated via copy_to produces garbled highlights
Steps to Reproduce
Extract from my original code
Result
Query Snippet
Mapping Snippet
No response