olirice / alembic_utils

An alembic/sqlalchemy extension for migrating sql views, functions, triggers, and policies
MIT License
193 stars 42 forks source link

Extremely slow #125

Open masylum opened 5 months ago

masylum commented 5 months ago

I have a bunch of functions, extensions and policies registered and it's impossible to run autogenerate anymore. It looks like there is something going exponential in the compare_registered_entities. the console is going crazy with hundreds of savepoints.

import asyncio
from logging.config import fileConfig
from typing import Any

from alembic import context
from alembic_utils.pg_grant_table import PGGrantTable
from alembic_utils.replaceable_entity import register_entities
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import create_async_engine

# load all the db models
import core.db.models  # noqa # pyright: ignore
from core.db.metadata import metadata
from core.db.url import db_url
from alembic_utils.pg_policy import PGPolicy
from alembic_utils.pg_function import PGFunction
from alembic_utils.pg_extension import PGExtension

extensions = [
    PGExtension(schema="public", signature="unaccent"),
    PGExtension(schema="public", signature="vector"),
]

functions = [
    PGFunction(
        schema="public",
        signature="i_unaccent(text)",
        definition="RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE AS $func$ SELECT public.unaccent('public.unaccent', $1) $func$",
    ),
    PGFunction(
        schema="public",
        signature="lang_to_regconfig(text)",
        definition="RETURNS regconfig LANGUAGE sql IMMUTABLE STRICT AS $$ SELECT $1::regconfig; $$",
    ),
    PGFunction(
        schema="public",
        signature="user_id()",
        definition="RETURNS text as $$ SELECT nullif(current_setting('request.jwt.claim.sub', true), ''); $$ language sql stable;",
    ),
]

policies = [
    PGPolicy(
        schema="public",
        signature="recent_publishers_private",
        on_entity="public.recent_publishers",
        definition="as PERMISSIVE for ALL to authenticated using (( SELECT (auth.uid())::text AS uid) = (user_id)::text) with check (( SELECT (auth.uid())::text AS uid) = (user_id)::text)",
    ),
    PGPolicy(
        schema="public",
        signature="publishers_authenticated",
        on_entity="public.publishers",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="replicache_view_records_authenticated",
        on_entity="public.replicache_view_records",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="bookmarks_authenticated",
        on_entity="public.bookmarks",
        definition="as PERMISSIVE for ALL to authenticated using ((( SELECT (auth.uid())::character varying AS uid))::text = (user_id)::text) with check ((( SELECT (auth.uid())::character varying AS uid))::text = (user_id)::text)",
    ),
    PGPolicy(
        schema="public",
        signature="stories_authenticated",
        on_entity="public.stories",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="subscriptions_private",
        on_entity="public.subscriptions",
        definition="as PERMISSIVE for ALL to authenticated using (( SELECT (auth.uid())::text AS uid) = (user_id)::text) with check (( SELECT (auth.uid())::text AS uid) = (user_id)::text)",
    ),
    PGPolicy(
        schema="public",
        signature="alembic_version_authenticated",
        on_entity="public.alembic_version",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="replicache_client_groups_private",
        on_entity="public.replicache_client_groups",
        definition="as PERMISSIVE for ALL to authenticated using (( SELECT (auth.uid())::text AS uid) = (user_id)::text) with check (( SELECT (auth.uid())::text AS uid) = (user_id)::text)",
    ),
    PGPolicy(
        schema="public",
        signature="replicache_clients_authenticated",
        on_entity="public.replicache_clients",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="replicache_views_authenticated",
        on_entity="public.replicache_views",
        definition="as PERMISSIVE for ALL to authenticated using (true)",
    ),
    PGPolicy(
        schema="public",
        signature="users_public",
        on_entity="public.users",
        definition="as PERMISSIVE for ALL to public using (true)",
    ),
]

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

register_entities(entities=functions + extensions + policies)

if config.config_file_name is not None:
    fileConfig(config.config_file_name)

config.set_main_option("sqlalchemy.url", db_url())

def include_object(
    object: Any, name: Any, type_: Any, reflected: Any, compare_to: Any
) -> bool:
    if isinstance(object, PGGrantTable):
        return False
    return True

def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
    Calls to context.execute() here emit the given string to the
    script output.
    """
    context.configure(
        url=db_url(),
        target_metadata=metadata,
        literal_binds=True,
        compare_type=True,
        dialect_opts={"paramstyle": "named"},
        include_object=include_object,
    )

    with context.begin_transaction():
        context.run_migrations()

def do_run_migrations(connection: Connection) -> None:
    context.configure(
        connection=connection,
        target_metadata=metadata,
        include_object=include_object,
    )

    with context.begin_transaction():
        context.run_migrations()

async def run_migrations_online():
    """Run migrations in 'online' mode.
    In this scenario we need to create an Engine
    and associate a connection with the context.
    """
    connectable = create_async_engine(db_url(), echo=True, future=True)

    async with connectable.connect() as connection:
        await connection.run_sync(do_run_migrations)

if context.is_offline_mode():
    run_migrations_offline()
else:
    asyncio.run(run_migrations_online())
olirice commented 5 months ago

The savepoints are produced during dependency resolution and simulating entities to "diff" them. It admittedly is a lot more than you'd expect but they're light-weight and do not grow exponentially. The benefit of that approach is that you don't have to explicitly declare dependencies among entities.

The rationale for this approach is that --autogenerate doesn't really need to be fast

For context, a 400-ish entity project with lots of dependences among entities completes in < 1 minute.

Are you seeing slow (> 1 minute) runtimes, or just concerned about the savepoint logs?

gregyannett commented 4 months ago

I have also noticed that using alembic_utils greatly slows down my autogenerate times. I use PGTriggers to create triggers that refresh updated_at columns in all of my tables (currently around 150 tables) and autogenerate takes over 15 minutes to run. Creating these triggers in a revision file is much faster

olirice commented 4 months ago

I'm getting 71 seconds for a 150 trigger project with postgres running locally in docker. If you're seeing 15 minutes please open an issue with a reproducible example and I'd be happy to help debug