Closed alexwlchan closed 1 year ago
Observe the following error: https://api.wellcomecollection.org/catalogue/v2/works?workType=a%3C&aggregations=workType
This is coming from an unhandled error in the Elasticsearch response:
{ "error": { "root_cause": [ { "type": "x_content_parse_exception", "reason": "[1:349] [terms] failed to parse field [include]" } ], "type": "x_content_parse_exception", "reason": "[1:349] [terms] failed to parse field [include]", "caused_by": { "type": "illegal_argument_exception", "reason": "expected '>' at position 14" } }, "status": 400 }
which comes from this query:
POST /works-indexed-2023-06-09/_search/template { "source": " { {{#query}} \"query\": { \"bool\": { \"should\": [ { \"span_first\": { \"match\": { \"span_term\": { \"query.title.shingles\": \"{{query}}\" } }, \"end\": 1, \"boost\": 1000.0, \"_name\": \"start of title\" } }, { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.id^1000.0\", \"query.identifiers.value^1000.0\", \"query.items.id^1000.0\", \"query.items.identifiers.value^1000.0\", \"query.images.id^1000.0\", \"query.images.identifiers.value^1000.0\", \"query.referenceNumber^1000.0\", \"query.allIdentifiers^1000.0\" ], \"type\": \"best_fields\", \"analyzer\": \"whitespace_analyzer\", \"operator\": \"Or\", \"_name\": \"identifiers\" } }, { \"dis_max\": { \"queries\": [ { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.titlesAndContributors^100.0\", \"query.titlesAndContributors.english^100.0\", \"query.titlesAndContributors.shingles^100.0\" ], \"type\": \"best_fields\", \"minimum_should_match\": \"-30%\", \"operator\": \"Or\", \"_name\": \"title and contributor exact spellings\" } }, { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.titlesAndContributors.arabic\", \"query.titlesAndContributors.bengali\", \"query.titlesAndContributors.french\", \"query.titlesAndContributors.german\", \"query.titlesAndContributors.hindi\", \"query.titlesAndContributors.italian\" ], \"type\": \"best_fields\", \"minimum_should_match\": \"-30%\", \"operator\": \"Or\", \"_name\": \"non-english titles and contributors\" } } ] } }, { \"bool\": { \"must\": [ { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.collectionPath.path.clean\", \"query.collectionPath.label.cleanPath\", \"query.collectionPath.label\", \"query.collectionPath.path.keyword\" ], \"operator\": \"Or\", \"_name\": \"relations paths\" } } ], \"should\": [ { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [\"query.title^100.0\", \"query.description^10.0\"], \"type\": \"cross_fields\", \"operator\": \"Or\", \"_name\": \"relations text\" } } ] } }, { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.contributors.agent.label^1000.0\", \"query.subjects.concepts.label^10.0\", \"query.genres.concepts.label^10.0\", \"query.production.label^10.0\", \"query.description\", \"query.physicalDescription\", \"query.languages.label\", \"query.edition\", \"query.notes.contents\", \"query.lettering\" ], \"type\": \"cross_fields\", \"minimum_should_match\": \"-30%\", \"operator\": \"Or\", \"_name\": \"data\" } }, { \"multi_match\": { \"query\": \"{{query}}\", \"fields\": [ \"query.title.shingles_cased^1000.0\", \"query.alternativeTitles.shingles_cased^100.0\", \"query.partOf.title.shingles_cased^10.0\" ], \"type\": \"most_fields\", \"minimum_should_match\": \"-30%\", \"operator\": \"Or\", \"_name\": \"shingles cased\" } } ], \"filter\": [ { \"term\": { \"type\": { \"value\": \"Visible\" } } } ], \"minimum_should_match\": \"1\" } } , {{/query}} \"from\": \"{{from}}\", \"size\": \"{{size}}\", \"_source\": { \"includes\": {{#toJson}}includes{{/toJson}} }, {{#aggs}} \"aggs\": {{#toJson}}aggs{{/toJson}}, {{/aggs}} {{#postFilter}} \"post_filter\": {{#toJson}}postFilter{{/toJson}}, {{/postFilter}} \"sort\": [ {{#sortByDate}} { \"query.production.dates.range.from\": { \"order\": \"{{sortByDate}}\" } }, {{/sortByDate}} {{#sortByScore}} { \"_score\": { \"order\": \"desc\" } }, {{/sortByScore}} { \"query.id\": { \"order\": \"asc\" } } ] } ", "params": { "query": null, "from": 0, "size": 10, "sortByDate": null, "sortByScore": false, "includes": [ "display", "type" ], "aggs": { "format": { "filter": { "bool": {} }, "aggs": { "format": { "terms": { "field": "aggregatableValues.workType", "size": 30, "order": [ { "_count": "desc" }, { "_key": "asc" } ] } }, "self": { "filter": { "terms": { "query.format.id": [ "a<" ] } }, "aggs": { "format": { "terms": { "field": "aggregatableValues.workType", "size": 30, "include": ".*(\\\"(a<)\\\").*", "min_doc_count": 0, "order": [ { "_count": "desc" }, { "_key": "asc" } ] } } } } } } }, "postFilter": { "bool": { "must": [ { "term": { "type": { "value": "Visible" } } }, { "terms": { "query.format.id": [ "a<" ] } } ] } } } }
I suspect this part of the query is the issue; angle brackets are a control character in regex and it's being misinterpreted as such:
"field": "aggregatableValues.workType", "size": 30, "include": ".*(\\\"(a<)\\\").*",
I originally omitted the optional extra regex characters (failing to spot that ALL is the default)
Observe the following error: https://api.wellcomecollection.org/catalogue/v2/works?workType=a%3C&aggregations=workType
This is coming from an unhandled error in the Elasticsearch response:
which comes from this query:
I suspect this part of the query is the issue; angle brackets are a control character in regex and it's being misinterpreted as such: