googleapis / python-spanner-sqlalchemy

Apache License 2.0
38 stars 28 forks source link

Migrations drop and identically recreate certain foreign keys #271

Closed louimet closed 1 year ago

louimet commented 1 year ago

Hello! I'm opening a new issue, but this looks quite possibly related to issue 231.

Following the release of version 1.2.2, we observe a new, strange behavior in migrations regarding a slightly tricky foreign key we use.

In short, we are linking two entities together in a table, and we want to enforce that a certain property of entities A and B match. This worked perfectly well in 1.2.1.

In 1.2.2, the keys' enforcement actually still works, but every new migration we create drops that linking table's foreign keys and recreates them identically. This does not happen only once, but in every subsequent migration.

Here are the relevant package versions we use:

alembic 1.8.1 google-cloud-spanner 3.22.2 SQLAlchemy 1.4.41 sqlalchemy-spanner 1.2.2

And here's a minimalist setup that recreates the problem.

from __future__ import annotations

from sqlalchemy import Column, ForeignKey, ForeignKeyConstraint, PrimaryKeyConstraint, String
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import relationship

from shared.database_engine import Base

### Base's definition from shared.database_engine (it's pretty standard):
# from sqlalchemy import MetaData, create_engine
# from sqlalchemy.orm import declarative_base
# engine = create_engine(shared_settings.SPANNER_URL, pool_recycle=shared_settings.SPANNER_SESSION_RECYCLE_AGE)
# metadata = MetaData(bind=engine)
# Base = declarative_base(bind=engine, metadata=metadata)

class EnvironmentMixin:
    @declared_attr
    def environment_id(self) -> Column[String]:
        return Column(String, ForeignKey('environment.id'), nullable=False)

class Environment(Base):
    __tablename__ = 'environment'

    id = Column(String, primary_key=True)

class A(Base, EnvironmentMixin):
    __tablename__ = 'a'

    id = Column(String, primary_key=True)

class B(Base, EnvironmentMixin):
    __tablename__ = 'b'

    id = Column(String, primary_key=True)

class Link(Base, EnvironmentMixin):
    __tablename__ = 'link'
    __table_args__ = (
        # Enforce at DB-level that associated A and B must share the same Environment
        PrimaryKeyConstraint('environment_id', 'a_id', 'b_id', name='pk_test'),
        ForeignKeyConstraint(
            ['environment_id', 'a_id'],
            ['a.environment_id', 'a.id'],
            name='fk_1',
        ),
        ForeignKeyConstraint(
            ['environment_id', 'b_id'],
            ['b.environment_id', 'b.id'],
            name='fk_2',
        ),
    )

    a_id = Column(String, nullable=False)
    b_id = Column(String, nullable=False)

    a: A = relationship('A', foreign_keys=[a_id])
    b: B = relationship('B', foreign_keys=[b_id])

Using this file, I ran alembic revision --autogenerate for the initial setup, then alembic upgrade head, then immediately another alembic revision --autogenerate.

Our migrations are enclosed in an autocommit block following this issue.

Here's the initial generation:

"""setup

Revision ID: 485161853e62
Revises: 
Create Date: 2022-10-14 14:46:50.269607

"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = '485161853e62'
down_revision = None
branch_labels = None
depends_on = None

def upgrade():
    with op.get_context().autocommit_block():
        # ### commands auto generated by Alembic - please adjust! ###
        op.create_table('environment',
        sa.Column('id', sa.String(), nullable=False),
        sa.PrimaryKeyConstraint('id')
        )
        op.create_table('a',
        sa.Column('id', sa.String(), nullable=False),
        sa.Column('environment_id', sa.String(), nullable=False),
        sa.ForeignKeyConstraint(['environment_id'], ['environment.id'], ),
        sa.PrimaryKeyConstraint('id')
        )
        op.create_table('b',
        sa.Column('id', sa.String(), nullable=False),
        sa.Column('environment_id', sa.String(), nullable=False),
        sa.ForeignKeyConstraint(['environment_id'], ['environment.id'], ),
        sa.PrimaryKeyConstraint('id')
        )
        op.create_table('link',
        sa.Column('a_id', sa.String(), nullable=False),
        sa.Column('b_id', sa.String(), nullable=False),
        sa.Column('environment_id', sa.String(), nullable=False),
        sa.ForeignKeyConstraint(['environment_id', 'a_id'], ['a.environment_id', 'a.id'], name='fk_1'),
        sa.ForeignKeyConstraint(['environment_id', 'b_id'], ['b.environment_id', 'b.id'], name='fk_2'),
        sa.ForeignKeyConstraint(['environment_id'], ['environment.id'], ),
        sa.PrimaryKeyConstraint('environment_id', 'a_id', 'b_id', name='pk_test')
        )
        # ### end Alembic commands ###

def downgrade():
    with op.get_context().autocommit_block():
        # ### commands auto generated by Alembic - please adjust! ###
        op.drop_table('link')
        op.drop_table('b')
        op.drop_table('a')
        op.drop_table('environment')
        # ### end Alembic commands ###

And here's every subsequent autogenerated migration:

"""no modification

Revision ID: 508dda784ffa
Revises: 485161853e62
Create Date: 2022-10-14 14:47:00.349500

"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = '508dda784ffa'
down_revision = '485161853e62'
branch_labels = None
depends_on = None

def upgrade():
    with op.get_context().autocommit_block():
        # ### commands auto generated by Alembic - please adjust! ###
        with op.batch_alter_table('link', schema=None) as batch_op:
            batch_op.drop_constraint('fk_2', type_='foreignkey')
            batch_op.drop_constraint('fk_1', type_='foreignkey')
            batch_op.create_foreign_key('fk_2', 'b', ['environment_id', 'b_id'], ['environment_id', 'id'])
            batch_op.create_foreign_key('fk_1', 'a', ['environment_id', 'a_id'], ['environment_id', 'id'])

        # ### end Alembic commands ###

def downgrade():
    with op.get_context().autocommit_block():
        # ### commands auto generated by Alembic - please adjust! ###
        with op.batch_alter_table('link', schema=None) as batch_op:
            batch_op.drop_constraint('fk_1', type_='foreignkey')
            batch_op.drop_constraint('fk_2', type_='foreignkey')
            batch_op.create_foreign_key('fk_1', 'a', ['environment_id', 'a_id'], ['id', 'environment_id'])
            batch_op.create_foreign_key('fk_2', 'b', ['environment_id', 'b_id'], ['id', 'environment_id'])

        # ### end Alembic commands ###
jstlaurent commented 1 year ago

I've created a repo with the issue reproduced. Hopefully that can be helpful in finding the cause.

IlyaFaer commented 1 year ago

@jstlaurent, @louimet, d'accord, I finally reproduced the error with this repo. Looks like a tiny thing somewhere didn't much between your and my configurations, so it was working on my end.

Now let's see what's really happening there...

IlyaFaer commented 1 year ago

@asthamohta, I think I see the problem. When SQLAlchemy introspects foreign keys, constrained columns are returned in another order:

Untitled

As Spanner doesn't support arrays ordering, it looks like we need to invent something here.