Closed jmrichardson closed 7 years ago
You can make sure fields are renamed properly with RenameTagger
or ReplaceTagger
, and you can use KeepOnlyTagger
to only keep what you want as well (all from Importer module).
Unfortunately, the Elasticsearch committer does not have a way to convert flat values into Elasticsearch nested objects. You can make a feature request for this here. If you do so, please suggest in that new ticket how this could be tackled if you have ideas (how to best convert flat values to nested objects).
I will test a few temporary solutions and let you know if they work until this feature potentially gets added.
Thanks
As a temporary fix (and potential idea to add to committer):
First delete the index if it already exists Create the index with just the nested field
PUT wmsearch
{
"mappings": {
"doc": {
"properties": {
"scope" : {
"type" : "nested",
"properties" : {
"level" : {
"type" : "integer"
},
"ancestors" : {
"type" : "keyword",
"index" : "true"
},
"value" : {
"type" : "keyword",
"index" : "true"
},
"order" : {
"type" : "integer"
}
}
}
}
}
}
}
Then start the collector. The end result is the fields created by the collector and the nested field (scope) persists:
{
"wmsearch": {
"mappings": {
"doc": {
"properties": {
"Application-Name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Application-Version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Character Count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Character-Count-With-Spaces": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Company": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Content-Encoding": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Content-Length": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Content-Type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Creation-Date": {
"type": "date"
},
"Edit-Time": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Last-Author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Last-Modified": {
"type": "date"
},
"Last-Printed": {
"type": "date"
},
"Last-Save-Date": {
"type": "date"
},
"Line-Count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Page-Count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Paragraph-Count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Revision-Number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Template": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Word-Count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"X-Parsed-By": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"collector": {
"properties": {
"content-type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"filesize": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"is-crawl-new": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"lastmodified": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"content": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"cp:revision": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"crawl_date": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"creator": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"custom:_AdHocReviewCycleID": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"custom:_AuthorEmail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"custom:_AuthorEmailDisplayName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"custom:_EmailSubject": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"date": {
"type": "date"
},
"dc:creator": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dc:title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dcterms:created": {
"type": "date"
},
"dcterms:modified": {
"type": "date"
},
"document": {
"properties": {
"contentFamily": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"contentType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"filename": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"generatedTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"reference": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"extended-properties:AppVersion": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"extended-properties:Application": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"extended-properties:Company": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"extended-properties:Template": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:character-count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:character-count-with-spaces": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:creation-date": {
"type": "date"
},
"meta:last-author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:line-count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:page-count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:paragraph-count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"meta:print-date": {
"type": "date"
},
"meta:save-date": {
"type": "date"
},
"meta:word-count": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"modified": {
"type": "date"
},
"scope": {
"type": "nested",
"properties": {
"ancestors": {
"type": "keyword"
},
"level": {
"type": "integer"
},
"order": {
"type": "integer"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"xmpTPg:NPages": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
I copied your last comment to https://github.com/Norconex/committer-elasticsearch/issues/16
Closing as this issue will be tracked there.
Hi, I am looking for a way to create a nested field with the following structure in my elastic search ingested documents:
Can I do this in the importer? I don't see a way to manipulate the field mappings.
Thanks