django-es / django-elasticsearch-dsl

This is a package that allows indexing of django models in elasticsearch with elasticsearch-dsl-py.
Other
1.03k stars 264 forks source link

NestedFiled not working for ManyToManyField with ES 6.5.4 #157

Open georgesbiaux opened 5 years ago

georgesbiaux commented 5 years ago

Hi,

I am trying to make a migration from ES 5.6.4 to 6.5.4.

I currently use

elasticsearch-dsl==6.1.0
django-elasticsearch-dsl==0.5.1

I have a model like this:

class RubricPage(models.Model):
    title = models.CharField(
        max_length=200,
        verbose_name='Rubric page name',
    )

    image = ContentTypeRestrictedFileField(
        content_types=['image/png', 'image/jpeg', 'image/gif'],
        max_upload_size=2097152,
        upload_to="%Y/%m/%d/%H/%M/%S",
        null=True,
        blank=True,
        verbose_name='Backgroung image',
        help_text='PNG, JPG, GIF (max: 2Mio)'
    )

    color = RGBColorField(
        verbose_name='Default color',
    )

    created_at = models.DateTimeField(
        auto_now_add=True,
        verbose_name='Rubric page creation date',
    )

    sections = SortedManyToManyField(
        'api.SectionRubricPage',
        verbose_name='Sections',
        blank=True
    )

    contacts = SortedManyToManyField(
        'api.EmailContact',
        verbose_name='Contacts',
        blank=True
    )

    branch = models.ForeignKey(
        Branch,
        on_delete=models.CASCADE,
        null=False,
        blank=False,
        default=1,
    )

    class Meta:
        verbose_name = 'Rubric page'
        verbose_name_plural = 'Rubric pages'

    def __str__(self):
        return '{title}'.format(title=self.title)

And a Document like this

@ppr_index.doc_type
class RubricPageDocument(DocType):
    sections = fields.NestedField(properties={
        'blocks': fields.NestedField(properties={
            'content': fields.StringField(),
            'file_url': fields.StringField(),
            'pdf_content': fields.StringField(),
            'pk': fields.IntegerField(),
            'title': fields.StringField(),
        }),
        'content': fields.StringField(),
        'pk': fields.IntegerField(),
        'title': fields.StringField(),
    })

    branch = fields.NestedField(properties={
        'pk': fields.IntegerField(),
        'name': fields.StringField(),
    })

    class Meta:
        model = RubricPage
        # The fields of the model to be indexed in Elasticsearch
        fields = [
            'title',
        ]

        # Ensure the rubric index is upated when a section or a branch is updated
        related_models = [BlockSectionRubricPage, SectionRubricPage]

    def get_instances_from_related(self, related_instance):
        """
        Define how to retrieve a RubricPage instance
        from a SectionRubricPage or a BlockSectionRubricPage
        """
        if isinstance(related_instance, SectionRubricPage):
            return related_instance.rubricpage_set.all()
        elif isinstance(related_instance, BlockSectionRubricPage):
            return RubricPage.objects.filter(sections__blocks__pk=related_instance.pk)
        elif isinstance(related_instance, Branch):
            return RubricPage.objects.filter(branch__pk=related_instance.pk)

The problem is when I try to search the index with this code:

def search_query_in_rubric_pages(query, branch):
    # query for title
    q_title = _build_match_query(query, 'title')

    # add query for sections
    section_fields = ['sections.title', 'sections.content']

    q_sections = Nested(
        path='sections',
        query=Bool(should=[_build_match_query(query, field)
                           for field in section_fields]),
        inner_hits=_build_inner_hits(section_fields)
    )

    # add query for blocks
    block_fields = ['sections.blocks.title',
                    'sections.blocks.content', 'sections.blocks.pdf_content']

    q_blocks = Nested(
        path='sections.blocks',
        query=Bool(should=[_build_match_query(query, field)
                           for field in block_fields]),
        inner_hits=_build_inner_hits(block_fields)
    )

    q = q_title | q_sections | q_blocks

    s = RubricPageDocument.search().query(q)

    s = s.filter("nested", path="branch", query=Match(
        **{'branch.name': branch.name}))

    # Add highlight
    s = s.highlight(
        'title',
        fragment_size=settings.SEARCH_FRAGMENT_SIZE,
        pre_tags=settings.SEARCH_PRE_TAGS,
        post_tags=settings.SEARCH_POST_TAGS,
    )

    response = s.execute()

    return [_format_hit(hit, 'rubric') for hit in response]

I get a 400 error from ElasticSearch.

The ES error:

{
  "error": {
    "root_cause": [
      {
        "type": "query_shard_exception",
        "reason": "failed to create query: {\n  \"bool\" : {\n    \"filter\" : [\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"match\" : {\n              \"branch.name\" : {\n                \"query\" : \"CNP_GROUP\",\n                \"operator\" : \"OR\",\n                \"prefix_length\" : 0,\n                \"max_expansions\" : 50,\n                \"fuzzy_transpositions\" : true,\n                \"lenient\" : false,\n                \"zero_terms_query\" : \"NONE\",\n                \"auto_generate_synonyms_phrase_query\" : true,\n                \"boost\" : 1.0\n              }\n            }\n          },\n          \"path\" : \"branch\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0\n        }\n      }\n    ],\n    \"should\" : [\n      {\n        \"match\" : {\n          \"title\" : {\n            \"query\" : \"test\",\n            \"operator\" : \"OR\",\n            \"prefix_length\" : 0,\n            \"max_expansions\" : 50,\n            \"fuzzy_transpositions\" : true,\n            \"lenient\" : false,\n            \"zero_terms_query\" : \"NONE\",\n            \"auto_generate_synonyms_phrase_query\" : true,\n            \"boost\" : 1.0\n          }\n        }\n      },\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"bool\" : {\n              \"should\" : [\n                {\n                  \"match\" : {\n                    \"sections.title\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                }\n              ],\n              \"adjust_pure_negative\" : true,\n              \"boost\" : 1.0\n            }\n          },\n          \"path\" : \"sections\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0,\n          \"inner_hits\" : {\n            \"ignore_unmapped\" : false,\n            \"from\" : 0,\n            \"size\" : 15,\n            \"version\" : false,\n            \"explain\" : false,\n            \"track_scores\" : false,\n            \"highlight\" : {\n              \"fields\" : {\n                \"sections.title\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                }\n              }\n            }\n          }\n        }\n      },\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"bool\" : {\n              \"should\" : [\n                {\n                  \"match\" : {\n                    \"sections.blocks.title\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.blocks.content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.blocks.pdf_content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                }\n              ],\n              \"adjust_pure_negative\" : true,\n              \"boost\" : 1.0\n            }\n          },\n          \"path\" : \"sections.blocks\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0,\n          \"inner_hits\" : {\n            \"ignore_unmapped\" : false,\n            \"from\" : 0,\n            \"size\" : 15,\n            \"version\" : false,\n            \"explain\" : false,\n            \"track_scores\" : false,\n            \"highlight\" : {\n              \"fields\" : {\n                \"sections.blocks.content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.blocks.title\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.blocks.pdf_content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                }\n              }\n            }\n          }\n        }\n      }\n    ],\n    \"adjust_pure_negative\" : true,\n    \"minimum_should_match\" : \"1\",\n    \"boost\" : 1.0\n  }\n}",
        "index_uuid": "0R8of9sGRYS0FbFAL5aNoQ",
        "index": "ppr_index"
      }
    ],
    "type": "search_phase_execution_exception",
    "reason": "all shards failed",
    "phase": "query",
    "grouped": true,
    "failed_shards": [
      {
        "shard": 0,
        "index": "ppr_index",
        "node": "vhS-2kS4RlypGfNN-IzNGQ",
        "reason": {
          "type": "query_shard_exception",
          "reason": "failed to create query: {\n  \"bool\" : {\n    \"filter\" : [\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"match\" : {\n              \"branch.name\" : {\n                \"query\" : \"CNP_GROUP\",\n                \"operator\" : \"OR\",\n                \"prefix_length\" : 0,\n                \"max_expansions\" : 50,\n                \"fuzzy_transpositions\" : true,\n                \"lenient\" : false,\n                \"zero_terms_query\" : \"NONE\",\n                \"auto_generate_synonyms_phrase_query\" : true,\n                \"boost\" : 1.0\n              }\n            }\n          },\n          \"path\" : \"branch\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0\n        }\n      }\n    ],\n    \"should\" : [\n      {\n        \"match\" : {\n          \"title\" : {\n            \"query\" : \"test\",\n            \"operator\" : \"OR\",\n            \"prefix_length\" : 0,\n            \"max_expansions\" : 50,\n            \"fuzzy_transpositions\" : true,\n            \"lenient\" : false,\n            \"zero_terms_query\" : \"NONE\",\n            \"auto_generate_synonyms_phrase_query\" : true,\n            \"boost\" : 1.0\n          }\n        }\n      },\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"bool\" : {\n              \"should\" : [\n                {\n                  \"match\" : {\n                    \"sections.title\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                }\n              ],\n              \"adjust_pure_negative\" : true,\n              \"boost\" : 1.0\n            }\n          },\n          \"path\" : \"sections\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0,\n          \"inner_hits\" : {\n            \"ignore_unmapped\" : false,\n            \"from\" : 0,\n            \"size\" : 15,\n            \"version\" : false,\n            \"explain\" : false,\n            \"track_scores\" : false,\n            \"highlight\" : {\n              \"fields\" : {\n                \"sections.title\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                }\n              }\n            }\n          }\n        }\n      },\n      {\n        \"nested\" : {\n          \"query\" : {\n            \"bool\" : {\n              \"should\" : [\n                {\n                  \"match\" : {\n                    \"sections.blocks.title\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.blocks.content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                },\n                {\n                  \"match\" : {\n                    \"sections.blocks.pdf_content\" : {\n                      \"query\" : \"test\",\n                      \"operator\" : \"OR\",\n                      \"prefix_length\" : 0,\n                      \"max_expansions\" : 50,\n                      \"fuzzy_transpositions\" : true,\n                      \"lenient\" : false,\n                      \"zero_terms_query\" : \"NONE\",\n                      \"auto_generate_synonyms_phrase_query\" : true,\n                      \"boost\" : 1.0\n                    }\n                  }\n                }\n              ],\n              \"adjust_pure_negative\" : true,\n              \"boost\" : 1.0\n            }\n          },\n          \"path\" : \"sections.blocks\",\n          \"ignore_unmapped\" : false,\n          \"score_mode\" : \"avg\",\n          \"boost\" : 1.0,\n          \"inner_hits\" : {\n            \"ignore_unmapped\" : false,\n            \"from\" : 0,\n            \"size\" : 15,\n            \"version\" : false,\n            \"explain\" : false,\n            \"track_scores\" : false,\n            \"highlight\" : {\n              \"fields\" : {\n                \"sections.blocks.content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.blocks.title\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                },\n                \"sections.blocks.pdf_content\" : {\n                  \"pre_tags\" : [\n                    \"<b>\"\n                  ],\n                  \"post_tags\" : [\n                    \"</b>\"\n                  ],\n                  \"fragment_size\" : 60\n                }\n              }\n            }\n          }\n        }\n      }\n    ],\n    \"adjust_pure_negative\" : true,\n    \"minimum_should_match\" : \"1\",\n    \"boost\" : 1.0\n  }\n}",
          "index_uuid": "0R8of9sGRYS0FbFAL5aNoQ",
          "index": "ppr_index",
          "caused_by": {
            "type": "illegal_state_exception",
            "reason": "[nested] nested object under path [sections] is not of nested type"
          }
        }
      }
    ]
  },
  "status": 400
}

Basicaly, my sections property seems not to be recognized a nested by ES and when I check the ES schema, I get this:

{
    "ppr_index": {
        "aliases": {},
        "mappings": {
            "doc": {
                "properties": {
                    "action": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        },
                        "analyzer": "default",
                        "search_analyzer": "default_search"
                    },
                    "branch": {
                        "type": "nested",
                        "properties": {
                            "name": {
                                "type": "text",
                                "analyzer": "default",
                                "search_analyzer": "default_search"
                            },
                            "pk": {
                                "type": "integer"
                            }
                        }
                    },
                    "content": {
                        "type": "text",
                        "analyzer": "default",
                        "search_analyzer": "default_search"
                    },
                    "groups": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        },
                        "analyzer": "default",
                        "search_analyzer": "default_search"
                    },
                    "public": {
                        "type": "boolean"
                    },
                    "sections": {
                        // Here, I think a "type": "nested" property is missing
                        "properties": {
                            "blocks": {
                                "properties": {
                                    "content": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        },
                                        "analyzer": "default",
                                        "search_analyzer": "default_search"
                                    },
                                    "file_url": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        },
                                        "analyzer": "default",
                                        "search_analyzer": "default_search"
                                    },
                                    "pdf_content": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        },
                                        "analyzer": "default",
                                        "search_analyzer": "default_search"
                                    },
                                    "pk": {
                                        "type": "long"
                                    },
                                    "title": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        },
                                        "analyzer": "default",
                                        "search_analyzer": "default_search"
                                    }
                                }
                            },
                            "content": {
                                "type": "text",
                                "fields": {
                                    "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256
                                    }
                                },
                                "analyzer": "default",
                                "search_analyzer": "default_search"
                            },
                            "pk": {
                                "type": "long"
                            },
                            "title": {
                                "type": "text",
                                "fields": {
                                    "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256
                                    }
                                },
                                "analyzer": "default",
                                "search_analyzer": "default_search"
                            }
                        }
                    },
                    "test_url": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        },
                        "analyzer": "default",
                        "search_analyzer": "default_search"
                    },
                    "title": {
                        "type": "text",
                        "analyzer": "default",
                        "search_analyzer": "default_search"
                    }
                }
            }
        },
        "settings": {
            "index": {
                "number_of_shards": "1",
                "provided_name": "ppr_index",
                "creation_date": "1549905186169",
                "analysis": {
                    "filter": {
                        "drg_stop": {
                            "type": "stop",
                            "stopwords": "_french_"
                        }
                    },
                    "analyzer": {
                        "default_search": {
                            "filter": [
                                "standard",
                                "lowercase",
                                "drg_stop"
                            ],
                            "tokenizer": "standard"
                        }
                    }
                },
                "number_of_replicas": "0",
                "uuid": "QCKoCew4T7mA0Dsn6BC7Pg",
                "version": {
                    "created": "6050499"
                }
            }
        }
    }
}

Where the "type": "nested" is indeed missing for the Many to Many fields.

Any clue what's going on ?

Thanks :)

georgesbiaux commented 5 years ago

FIY, this PR https://github.com/sabricot/django-elasticsearch-dsl/pull/136 seems to solve this issue