chdb-io / chdb

chDB is an in-process OLAP SQL Engine 🚀 powered by ClickHouse
https://clickhouse.com/chdb
Apache License 2.0
2.15k stars 75 forks source link

Already existing database called "default" but virgin chdb #269

Open LPauzies opened 2 months ago

LPauzies commented 2 months ago

I'm using :

I'm trying to execute this python :


from pathlib import Path
import pytest
from typing import Generator, List, Optional

from chdb import dbapi
from chdb.dbapi.cursors import Cursor

def chdb_dbapi(dump_paths: List[Path], path: Optional[Path] = None) -> Generator[Cursor, None, None]:
    """
    Create a clickhouse dbapi cursor in memory using chdb.
    Note that this method can take multiple paths depending on what is planned to be tested.

    Args:
        dump_paths (List[Path]): The list of path for the dumps to load at each fixture

    Yields:
        Generator[Cursor, None, None]: The cursor
    """
    string_path = str(path) if path else None
    if path and path.exists():  # A path is given and a session exists
        with dbapi.connect(path=string_path) as cursor:
            yield cursor
    else:  # Fallback
        with dbapi.connect(path=string_path) as cursor:
            for dump_path in dump_paths:
                sql_content = dump_path.read_text()
                cursor.execute(sql_content)
            yield cursor

@pytest.fixture(scope="session")
def alembic_dump_path() -> Path:
    path = Path.cwd() / "../../dumps/clickhouse_alembic_dump.sql"
    return path.resolve()

@pytest.fixture(scope="session")
def clickhouse_chdb_dbapi(alembic_dump_path: Path) -> Generator[Cursor, None, None]:
    yield from chdb_dbapi([alembic_dump_path])

def test_clickhouse_session_dbapi(clickhouse_chdb_dbapi):
    clickhouse_chdb_dbapi.execute("SHOW tables IN default;")
    result = list(clickhouse_chdb_dbapi.fetchall()[0])
    # This should never change
    assert "alembic_version" in result

Here is the content of "../../dumps/clickhouse_alembic_dump.sql" :

CREATE DATABASE default;

CREATE TABLE default.alembic_version
(
    `version_num` String,
    `dt` DateTime DEFAULT now()
)
ENGINE = MergeTree()
ORDER BY tuple()
SETTINGS index_granularity = 8192;

When executing my test, it comes with this error :

ERROR tests/unittest/test_chdb_dbapi.py::test_clickhouse_session_dbapi - chdb.dbapi.err.InterfaceError: query err: Code: 82. DB::Exception: Database default already exists. (DATABASE_ALREADY_EXISTS)

When I try to check the structure of the "default" database before loading my .sql file :

def chdb_dbapi(dump_paths: List[Path], path: Optional[Path] = None) -> Generator[Cursor, None, None]:
    """
    Create a clickhouse dbapi cursor in memory using chdb.
    Note that this method can take multiple paths depending on what is planned to be tested.

    Args:
        dump_paths (List[Path]): The list of path for the dumps to load at each fixture

    Yields:
        Generator[Cursor, None, None]: The cursor
    """
    string_path = str(path) if path else None
    if path and path.exists():  # A path is given and a session exists
        with dbapi.connect(path=string_path) as cursor:
            yield cursor
    else:  # Fallback
        with dbapi.connect(path=string_path) as cursor:
            for dump_path in dump_paths:
                cursor.execute("select database, name from system.tables;")
                for database, name in cursor.fetchall():
                    print(f"{database}.{name}")
                sql_content = dump_path.read_text()
                cursor.execute(sql_content)
            yield cursor

It gives me this, without a single "default" database existing :

INFORMATION_SCHEMA.COLUMNS
INFORMATION_SCHEMA.KEY_COLUMN_USAGE
INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS
INFORMATION_SCHEMA.SCHEMATA
INFORMATION_SCHEMA.STATISTICS
INFORMATION_SCHEMA.TABLES
INFORMATION_SCHEMA.VIEWS
INFORMATION_SCHEMA.columns
INFORMATION_SCHEMA.key_column_usage
INFORMATION_SCHEMA.referential_constraints
INFORMATION_SCHEMA.schemata
INFORMATION_SCHEMA.statistics
INFORMATION_SCHEMA.tables
INFORMATION_SCHEMA.views
information_schema.COLUMNS
information_schema.KEY_COLUMN_USAGE
information_schema.REFERENTIAL_CONSTRAINTS
information_schema.SCHEMATA
information_schema.STATISTICS
information_schema.TABLES
information_schema.VIEWS
information_schema.columns
information_schema.key_column_usage
information_schema.referential_constraints
information_schema.schemata
information_schema.statistics
information_schema.tables
information_schema.views
system.aggregate_function_combinators
system.asynchronous_inserts
system.asynchronous_loader
system.backups
system.build_options
system.certificates
system.clusters
system.collations
system.columns
system.contributors
system.current_roles
system.dashboards
system.data_skipping_indices
system.data_type_families
system.database_engines
system.databases
system.detached_parts
system.dictionaries
system.disks
system.distributed_ddl_queue
system.distribution_queue
system.dns_cache
system.dropped_tables
system.dropped_tables_parts
system.enabled_roles
system.errors
system.events
system.filesystem_cache
system.filesystem_cache_settings
system.formats
system.functions
system.generateSeries
system.generate_series
system.grants
system.graphite_retentions
system.jemalloc_bins
system.kafka_consumers
system.keywords
system.licenses
system.macros
system.merge_tree_settings
system.merges
system.metrics
system.models
system.moves
system.mutations
system.mysql_binlogs
system.named_collections
system.numbers
system.numbers_mt
system.one
system.part_moves_between_shards
system.parts
system.parts_columns
system.privileges
system.processes
system.projection_parts
system.projection_parts_columns
system.query_cache
system.quota_limits
system.quota_usage
system.quotas
system.quotas_usage
system.remote_data_paths
system.replicas
system.replicated_fetches
system.replicated_merge_tree_settings
system.replication_queue
system.rocksdb
system.role_grants
system.roles
system.row_policies
system.s3queue
system.scheduler
system.schema_inference_cache
system.server_settings
system.settings
system.settings_changes
system.settings_profile_elements
system.settings_profiles
system.stack_trace
system.storage_policies
system.symbols
system.table_engines
system.table_functions
system.tables
system.time_zones
system.user_directories
system.user_processes
system.users
system.view_refreshes
system.warnings
system.zeros
system.zeros_mt

NB: This code worked in chdb 1.4.1, and when upgrading it, it explodes. Is it a regression ? Is it a bug ?

auxten commented 2 months ago

ClickHouse has changed the default database name from local to default. But still the default database is not persistent. So, the easiest way to fix is just use another db name for your database otherthan default