tortoise / tortoise-orm

Familiar asyncio ORM for python, built with relations in mind
https://tortoise.github.io
Apache License 2.0
4.66k stars 389 forks source link

[help needed] Filter for list of relations does not work #756

Open spacemanspiff2007 opened 3 years ago

spacemanspiff2007 commented 3 years ago

I am trying to find an model that has all relations from the filter query. It can have more relations, but it can't have less. Unfortunately it seems the Q relations do not work here. How would I express id in m2m_relation?

from tortoise import fields, run_async, Tortoise
from tortoise.fields import ManyToManyRelation, ManyToManyField
from tortoise.models import Model
from tortoise.query_utils import Q

class MyModel(Model):
    id = fields.IntField(pk=True)
    properties: ManyToManyRelation["Property"]

class Property(Model):
    id = fields.IntField(pk=True)
    my_models: ManyToManyRelation["MyModel"] = ManyToManyField("models.MyModel", related_name='properties')

async def run():

    # Generate the schema
    await Tortoise.init({"connections": {"default_connection": {"engine": "tortoise.backends.sqlite", "credentials": {"file_path": ':memory:'},},}, "apps": {"models": {"models": ['__main__'], "default_connection": "default_connection"},},})
    await Tortoise.generate_schemas()

    m1 = await MyModel.create(id=1)
    m2 = await MyModel.create(id=2)
    p1 = await Property.create(id=1)
    p2 = await Property.create(id=2)

    await m1.properties.add(p1)
    await m2.properties.add(p1, p2)

    found = await MyModel.filter(
        Q(
            Q(properties__id=p1.id),
            Q(properties__id=p2.id)
        )
    )

    # Looking for [<MyModel 2>] because it has both relation p1 and p2 but get []
    print(found)

run_async(run())
spacemanspiff2007 commented 3 years ago

@long2ice Would you be so kind and give me a hint how I can express obj in relations with Q or filter?

long2ice commented 3 years ago

That's not a simple query, you can query twice and merge in your code

spacemanspiff2007 commented 3 years ago

Thank you for your reply.

I've written the corresponding query in SQL:

SELECT
    m.id, m.name 
FROM
    mymodel m
WHERE
    m.id in (select mymodel_id from property_mymodel where property_id = 1)
AND
    m.id in (select mymodel_id from property_mymodel where property_id = 2)       

Imho it's not that complex since all logic is in the where clause and tortoise.orm should be able to build something like that programmatically. Do we need a new object that does relational filtering or a new function that does this e.g.

R('mymodel', [1, 2])

query.filter_relations(properties=[1,2])

What do you think?

long2ice commented 3 years ago

We need subquery

spacemanspiff2007 commented 3 years ago

I've created the where clause with pypika. Can you point me in the direction how this can be integrated?


from pypika import Query, Tables, Criterion

mymodel, property, prop2model = Tables('mymodel', 'property', 'property_mymodel')

main_query = Query.from_(mymodel).select(mymodel.id)

relation_queries = []
for i in (1, 2): # <-- IDs of the relation
    q = Query.from_(prop2model).select(prop2model.mymodel_id).where(prop2model.property_id == i)
    relation_queries.append(mymodel.id.isin(q))

final_query = main_query.where(Criterion.all(relation_queries))

print(final_query.get_sql())
spacemanspiff2007 commented 3 years ago

As a Workaround I've created a relationship filter base on @AlwxSin snippets! Thanks @AlwxSin for your help and contribution!

usage:

await CriterionQuerySet(MyModel).has_relations('properties', [1, 2])
from __future__ import annotations

from copy import copy
from typing import Iterable, Any, List, Type, Literal

from pypika import Criterion, Table, Query

from tortoise import Model
from tortoise.fields.relational import ManyToManyFieldInstance
from tortoise.queryset import QuerySet

class CriterionQuerySet(QuerySet):
    __slots__ = QuerySet.__slots__ + ("_additional_and_criterions",)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._additional_and_criterions: List[Criterion] = []

    @classmethod
    def from_model(cls, model: Type[Model]) -> "CriterionQuerySet":
        """"""
        return cls(model)

    def add_and_criterion(self, criterion: Criterion) -> None:
        self._additional_and_criterions.append(criterion)

    def _clone(self):
        """We need to copy all method because Tortoise use Specific class"""
        # here can be queryset = self.__class__.__new__(self.__class__)
        queryset = CriterionQuerySet.__new__(CriterionQuerySet)

        queryset.fields = self.fields
        queryset.model = self.model
        queryset.query = self.query
        queryset.capabilities = self.capabilities
        queryset._prefetch_map = copy(self._prefetch_map)
        queryset._prefetch_queries = copy(self._prefetch_queries)
        queryset._single = self._single
        queryset._raise_does_not_exist = self._raise_does_not_exist
        queryset._db = self._db
        queryset._limit = self._limit
        queryset._offset = self._offset
        queryset._fields_for_select = self._fields_for_select
        queryset._filter_kwargs = copy(self._filter_kwargs)
        queryset._orderings = copy(self._orderings)
        queryset._joined_tables = copy(self._joined_tables)
        queryset._q_objects = copy(self._q_objects)
        queryset._distinct = self._distinct
        queryset._annotations = copy(self._annotations)
        queryset._having = copy(self._having)
        queryset._custom_filters = copy(self._custom_filters)
        queryset._group_bys = copy(self._group_bys)
        queryset._select_for_update = self._select_for_update
        queryset._select_for_update_nowait = self._select_for_update_nowait
        queryset._select_for_update_skip_locked = self._select_for_update_skip_locked
        queryset._select_for_update_of = self._select_for_update_of
        queryset._select_related = self._select_related
        queryset._select_related_idx = self._select_related_idx
        queryset._force_indexes = self._force_indexes
        queryset._use_indexes = self._use_indexes

        queryset._additional_and_criterions = self._additional_and_criterions
        return queryset

    def _make_query(self) -> None:
        super()._make_query()
        for criterion in self._additional_and_criterions:
            self.query._wheres &= criterion

    def _check_relations(self, field_name: str, pks: Iterable[Any], relation: Literal['isin', 'notin']):
        meta = self.model._meta
        pk_name = meta.pk_attr
        base_table: Table = meta.basetable
        # E.g. : base_table[pk_name].notin(q)
        relation_fnc = getattr(base_table[pk_name], relation)

        try:
            rel_field: ManyToManyFieldInstance = meta.fields_map[field_name]
        except KeyError:
            raise KeyError(f'Field {field_name} does not exist')

        rel_table: Table = Table(rel_field.through)

        fwd_key = rel_field.forward_key
        model_key = rel_field.backward_key
        for pk in pks:
            if isinstance(pk, Model):
                pk = pk.pk
            q = Query.from_(rel_table).select(rel_table[model_key]).where(rel_table[fwd_key] == pk)

            self.add_and_criterion(relation_fnc(q))

    def has_no_relations(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'notin')
        return self

    def has_relations(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'isin')
        return self
spacemanspiff2007 commented 3 years ago

Currently I am half way through a migration of a rather large project. I've postponed the complex logic because I wanted to get a feel and some knowledge for tortoise-orm. However I can't finish the migration because the lack of relational querying but I'm hesitating to throw all my work away and use another python orm that supports it.

@long2ice Is there any way that I can support you so tortoise-orm will support subqueries? Do you think it's realistic that tortoise-orm will support it, soon (2-6 weeks)?

spacemanspiff2007 commented 3 years ago

Hey @long2ice, many thanks for you quick addition of Subquery! I tried playing around with the dev branch but there still seems to be something missing/buggy.

If I test the first subquery it shows only Model1 instead of Model1 and Model2. If I add the second subquery it will show nothing at all instead of only Model 2

from tortoise import fields, run_async, Tortoise
from tortoise.expressions import Subquery
from tortoise.fields import ManyToManyRelation, ManyToManyField
from tortoise.models import Model

class MyModel(Model):
    id = fields.IntField(pk=True)
    name = fields.TextField()
    properties: ManyToManyRelation["Property"]

class Property(Model):
    id = fields.IntField(pk=True)
    my_models: ManyToManyRelation["MyModel"] = ManyToManyField("models.MyModel", related_name='properties')

async def run():

    # Generate the schema
    await Tortoise.init({"connections": {"default_connection": {"engine": "tortoise.backends.sqlite", "credentials": {"file_path": ':memory:'},},}, "apps": {"models": {"models": ['__main__'], "default_connection": "default_connection"},},})
    await Tortoise.generate_schemas()

    m1 = await MyModel.create(id=1, name='Model1')
    m2 = await MyModel.create(id=2, name='Model2')
    p1 = await Property.create(id=1)
    p2 = await Property.create(id=2)

    await m1.properties.add(p1)
    await m2.properties.add(p1, p2)
    print('Model1:', await m1.properties.all())    # Ouput: Model1: [<Property: 1>]
    print('Model2:', await m2.properties.all())    # Ouput: Model2: [<Property: 1>, <Property: 2>]
    print()

    sub1 = Subquery(MyModel.filter(properties__id=1).values("id"))
    sub2 = Subquery(MyModel.filter(properties__id=2).values("id"))

    query = MyModel.filter(pk__in=sub1)
    query = query.filter(pk__in=sub2)   # comment this line to check single subquery

    print(query.sql())
    print(await query.count())  # Output: 0
    print(await query)          # Output: []

run_async(run())

Also the generated SQL looks strange:

SELECT 
    "name","id" 
FROM 
    "mymodel" 
WHERE 
    "id" IN (SELECT "mymodel"."id" "id" FROM "mymodel" LEFT OUTER JOIN "property_mymodel" ON "mymodel"."id"="property_mymodel"."mymodel_id" LEFT OUTER JOIN "property" ON "property_mymodel"."property_id"="property"."id" WHERE "property"."id"=1) 
AND 
    "id" IN (SELECT "mymodel"."id" "id" FROM "mymodel" LEFT OUTER JOIN "property_mymodel" ON "mymodel"."id"="property_mymodel"."mymodel_id" LEFT OUTER JOIN "property" ON "property_mymodel"."property_id"="property"."id" WHERE "property"."id"=2)

The logic I am passing to the subquery is working fine:

    print(await MyModel.filter(properties__id=1).values("id"))  # Output: [{'id': 1}, {'id': 2}]
    print(await MyModel.filter(properties__id=2).values("id"))  # Output: [{'id': 2}]

What do you think about creating special filter functions for filtering relations?

spacemanspiff2007 commented 3 years ago

Additionally to the need to filter for

I had to filter for Models that do have no relation at all. I've updated the implementation to the following:

from __future__ import annotations

from copy import copy
from typing import Any, Iterable, List, Literal, Type

from pypika import Criterion, Query, Table
from tortoise import Model
from tortoise.fields.relational import ManyToManyFieldInstance
from tortoise.queryset import QuerySet

class CriterionQuerySet(QuerySet):
    __slots__ = QuerySet.__slots__ + ("_additional_and_criterions",)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._additional_and_criterions: List[Criterion] = []

    @classmethod
    def from_model(cls, model: Type[Model]) -> "CriterionQuerySet":
        """"""
        return cls(model)

    def add_and_criterion(self, criterion: Criterion) -> None:
        self._additional_and_criterions.append(criterion)

    def _clone(self):
        """We need to copy all method because Tortoise use Specific class"""
        # here can be queryset = self.__class__.__new__(self.__class__)
        queryset = CriterionQuerySet.__new__(CriterionQuerySet)

        queryset.fields = self.fields
        queryset.model = self.model
        queryset.query = self.query
        queryset.capabilities = self.capabilities
        queryset._prefetch_map = copy(self._prefetch_map)
        queryset._prefetch_queries = copy(self._prefetch_queries)
        queryset._single = self._single
        queryset._raise_does_not_exist = self._raise_does_not_exist
        queryset._db = self._db
        queryset._limit = self._limit
        queryset._offset = self._offset
        queryset._fields_for_select = self._fields_for_select
        queryset._filter_kwargs = copy(self._filter_kwargs)
        queryset._orderings = copy(self._orderings)
        queryset._joined_tables = copy(self._joined_tables)
        queryset._q_objects = copy(self._q_objects)
        queryset._distinct = self._distinct
        queryset._annotations = copy(self._annotations)
        queryset._having = copy(self._having)
        queryset._custom_filters = copy(self._custom_filters)
        queryset._group_bys = copy(self._group_bys)
        queryset._select_for_update = self._select_for_update
        queryset._select_for_update_nowait = self._select_for_update_nowait
        queryset._select_for_update_skip_locked = self._select_for_update_skip_locked
        queryset._select_for_update_of = self._select_for_update_of
        queryset._select_related = self._select_related
        queryset._select_related_idx = self._select_related_idx
        queryset._force_indexes = self._force_indexes
        queryset._use_indexes = self._use_indexes

        queryset._additional_and_criterions = self._additional_and_criterions
        return queryset

    def _make_query(self) -> None:
        super()._make_query()
        for criterion in self._additional_and_criterions:
            self.query._wheres &= criterion

    def _check_relations(self, field_name: str, pks: Iterable[Any], relation: Literal['isin', 'notin']):
        meta = self.model._meta
        pk_name = meta.pk_attr
        base_table: Table = meta.basetable
        # Dies enthält dann: base_table[pk_name].notin(q)
        relation_fnc = getattr(base_table[pk_name], relation)

        try:
            rel_field: ManyToManyFieldInstance = meta.fields_map[field_name]
        except KeyError:
            raise KeyError(f'Field {field_name} does not exist')

        rel_table: Table = Table(rel_field.through)

        fwd_key = rel_field.forward_key
        model_key = rel_field.backward_key
        for pk in pks:
            if isinstance(pk, Model):
                pk = pk.pk
            q = Query.from_(rel_table).select(rel_table[model_key]).where(rel_table[fwd_key] == pk)

            self.add_and_criterion(relation_fnc(q))

    def has_no_relations_with(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'notin')
        return self

    def has_relations_with(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'isin')
        return self

    def has_no_relations(self, field_name: str):
        meta = self.model._meta
        pk_name = meta.pk_attr
        base_table: Table = meta.basetable

        try:
            rel_field: ManyToManyFieldInstance = meta.fields_map[field_name]
        except KeyError:
            raise KeyError(f'Field {field_name} does not exist')
        rel_table: Table = Table(rel_field.through)
        model_key = rel_field.backward_key

        q = base_table[pk_name].notin(Query.from_(rel_table).select(rel_table[model_key]).distinct())
        self.add_and_criterion(q)
        return self

@long2ice I think it would be really nice an intuitive if these options for filtering relations would be exposed through the ManyToManyField and through the ManyToManyRelation. What do you think?

spacemanspiff2007 commented 3 years ago

Current implementation

from __future__ import annotations

from copy import copy
from typing import Any, Iterable, List, Literal, Type

from pypika import Criterion, Query, Table
from tortoise import Model
from tortoise.fields.relational import ManyToManyFieldInstance
from tortoise.queryset import QuerySet

class CriterionQuerySet(QuerySet):
    __slots__ = QuerySet.__slots__ + ("_additional_and_criterions",)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._additional_and_criterions: List[Criterion] = []

    @classmethod
    def from_model(cls, model: Type[Model]) -> "CriterionQuerySet":
        """"""
        return cls(model)

    def add_and_criterion(self, criterion: Criterion) -> None:
        self._additional_and_criterions.append(criterion)

    def _clone(self):
        """We need to copy all method because Tortoise use Specific class"""
        # here can be queryset = self.__class__.__new__(self.__class__)
        queryset = CriterionQuerySet.__new__(CriterionQuerySet)

        queryset.fields = self.fields
        queryset.model = self.model
        queryset.query = self.query
        queryset.capabilities = self.capabilities
        queryset._prefetch_map = copy(self._prefetch_map)
        queryset._prefetch_queries = copy(self._prefetch_queries)
        queryset._single = self._single
        queryset._raise_does_not_exist = self._raise_does_not_exist
        queryset._db = self._db
        queryset._limit = self._limit
        queryset._offset = self._offset
        queryset._fields_for_select = self._fields_for_select
        queryset._filter_kwargs = copy(self._filter_kwargs)
        queryset._orderings = copy(self._orderings)
        queryset._joined_tables = copy(self._joined_tables)
        queryset._q_objects = copy(self._q_objects)
        queryset._distinct = self._distinct
        queryset._annotations = copy(self._annotations)
        queryset._having = copy(self._having)
        queryset._custom_filters = copy(self._custom_filters)
        queryset._group_bys = copy(self._group_bys)
        queryset._select_for_update = self._select_for_update
        queryset._select_for_update_nowait = self._select_for_update_nowait
        queryset._select_for_update_skip_locked = self._select_for_update_skip_locked
        queryset._select_for_update_of = self._select_for_update_of
        queryset._select_related = self._select_related
        queryset._select_related_idx = self._select_related_idx
        queryset._force_indexes = self._force_indexes
        queryset._use_indexes = self._use_indexes

        queryset._additional_and_criterions = self._additional_and_criterions
        return queryset

    def _make_query(self) -> None:
        super()._make_query()
        for criterion in self._additional_and_criterions:
            self.query._wheres &= criterion

    def _check_relations(self, field_name: str, pks: Iterable[Any],
                         relation: Literal['isin', 'notin'], match: Literal['all', 'any'] = 'all'):
        meta = self.model._meta
        pk_name = meta.pk_attr
        base_table: Table = meta.basetable
        # Dies enthält dann: base_table[pk_name].notin(q)
        relation_fnc = getattr(base_table[pk_name], relation)

        try:
            rel_field: ManyToManyFieldInstance = meta.fields_map[field_name]
        except KeyError:
            raise KeyError(f'Field {field_name} does not exist')

        rel_table: Table = Table(rel_field.through)

        fwd_key = rel_field.forward_key
        model_key = rel_field.backward_key

        if match == 'all':
            for pk in pks:
                if isinstance(pk, Model):
                    pk = pk.pk
                q = Query.from_(rel_table).select(rel_table[model_key]).where(rel_table[fwd_key] == pk)

                self.add_and_criterion(relation_fnc(q))
        else:
            # match any
            _pks = []
            for pk in pks:
                if isinstance(pk, Model):
                    pk = pk.pk
                _pks.append(pk)

            q = Query.from_(rel_table).select(rel_table[model_key]).where(rel_table[fwd_key].isin(_pks))
            self.add_and_criterion(relation_fnc(q))

    def has_no_relations_with(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'notin')
        return self

    def has_no_relations_with_any(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'notin', 'any')
        return self

    def has_relations_with(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'isin')
        return self

    def has_relations_with_any(self, field_name: str, pks: Iterable[Any]):
        if not pks:
            return self
        self._check_relations(field_name, pks, 'isin', 'any')
        return self

    def has_no_relations(self, field_name: str):
        meta = self.model._meta
        pk_name = meta.pk_attr
        base_table: Table = meta.basetable

        try:
            rel_field: ManyToManyFieldInstance = meta.fields_map[field_name]
        except KeyError:
            raise KeyError(f'Field {field_name} does not exist')
        rel_table: Table = Table(rel_field.through)
        model_key = rel_field.backward_key

        q = base_table[pk_name].notin(Query.from_(rel_table).select(rel_table[model_key]).distinct())
        self.add_and_criterion(q)
        return self

@long2ice The interface now is like this and I think it's quite nice.

query = CriterionQuerySet(MyModel)
query.has_no_relations_with('field_name', iterable[Model or pk])
query.has_no_relations_with_any('field_name', iterable[Model or pk])
query.has_relations_with('field_name', iterable[Model or pk])
query.has_relations_with_any('field_name', iterable[Model or pk])
query.has_no_relations('field_name')

Would you be interested in a pull request?