marcboeker / go-duckdb

go-duckdb provides a database/sql driver for the DuckDB database engine.
MIT License
623 stars 97 forks source link

Reopening DB fails if it contains tables created with columns having COLLATE. #159

Closed derkan closed 6 months ago

derkan commented 6 months ago

With this app:

package main

import (
    "context"
    "database/sql"
    "database/sql/driver"
    "fmt"
    "github.com/marcboeker/go-duckdb"
    "log"
)

type user struct {
    Name string
    Age  int
}

func main() {
    dsn := "./foo.db?access_mode=read_write&threads=4"
    connector, err := duckdb.NewConnector(dsn, func(execer driver.ExecerContext) error {
        bootQueries := []string{
            "INSTALL 'icu'",
            "LOAD 'icu'",
        }
        for _, qry := range bootQueries {
            _, err := execer.ExecContext(context.Background(), qry, nil)
            if err != nil {
                log.Println("boot query failed", qry, err)
                return err
            }
        }
        return nil
    })
    db := sql.OpenDB(connector)
    defer db.Close()

    if _, err = db.Exec("CREATE TABLE IF NOT EXISTS tbl(name VARCHAR COLLATE TR, age BIGINT);"); err != nil {
        log.Fatal(err)
    }
    db.Query("INSERT INTO tbl VALUES ('Erkan',40), ('İsmail', 10), ('Zeki', 3);")
    stmt, err := db.Prepare("SELECT * FROM tbl WHERE age > ?")
    if err != nil {
        log.Fatal(err)
    }

    rows, err := stmt.Query(1)
    if err != nil {
        log.Fatal(err)
    }
    defer rows.Close()
    for rows.Next() {
        u := new(user)
        if err = rows.Scan(&u.Name, &u.Age); err != nil {
            log.Fatal(err)
        }
        log.Printf("%s is %d years old\n", u.Name, u.Age)
    }
}

At first run no problems. But closing and running for second time fails with:

Exception in WAL playback: Catalog Error: Collation with name "tr" is not in the catalog, but it exists in the icu extension.
Please try installing and loading the icu extension:
INSTALL icu;
LOAD icu;
marcboeker commented 6 months ago

Thank you for the report. The error occurs because the database is already created and depends on the icu extension. When it's opened the second time, the bootQueries are executed after the database is opened. However, the open call already produces an exception at this point. We can't execute the plugin installation before the database is opened, which presents a chicken/egg problem.

You can reproduce this issue by opening foo.db using the DuckDB CLI tool, duckdb foo.db.

One possible hack is to open a database in memory, load the extensions, and attach foo.db from disk. For example:

package main

import (
    "context"
    "database/sql"
    "database/sql/driver"
    "fmt"
    "log"

    "github.com/marcboeker/go-duckdb"
)

type user struct {
    Name string
    Age  int
}

func main() {
    dsn := ""
    connector, err := duckdb.NewConnector(dsn, func(execer driver.ExecerContext) error {
        bootQueries := []string{
            "INSTALL 'icu'",
            "LOAD 'icu'",
            "ATTACH 'foo.db'",
            "USE 'foo'",
        }

        for _, qry := range bootQueries {
            _, err := execer.ExecContext(context.Background(), qry, nil)
            if err != nil {
                fmt.Println("hello")
                log.Println("boot query failed", qry, err)
                return err
            }
        }
        return nil
    })
    if err != nil {
        log.Fatal(err)
    }
    defer connector.Close()

    db := sql.OpenDB(connector)
    defer db.Close()

    conn, err := db.Conn(context.Background())
    if err != nil {
        log.Fatal(err)
    }
    defer conn.Close()

    if _, err := conn.ExecContext(context.Background(), "CREATE TABLE IF NOT EXISTS tbl(name VARCHAR COLLATE TR, age BIGINT);"); err != nil {
        log.Fatal(err)
    }

    _, err = conn.ExecContext(context.Background(), "INSERT INTO tbl VALUES ('Erkan',40), ('İsmail', 10), ('Zeki', 3);")
    if err != nil {
        log.Fatal(nil)
    }

    stmt, err := conn.PrepareContext(context.Background(), "SELECT * FROM tbl WHERE age > ?")
    if err != nil {
        log.Fatal(err)
    }
    defer stmt.Close()

    rows, err := stmt.QueryContext(context.Background(), 2)
    if err != nil {
        log.Fatal(err)
    }
    defer rows.Close()

    for rows.Next() {
        u := new(user)
        if err = rows.Scan(&u.Name, &u.Age); err != nil {
            log.Fatal(err)
        }
        log.Printf("%s is %d years old\n", u.Name, u.Age)
    }

    _, err = conn.ExecContext(context.Background(), "USE memory; DETACH foo")
    if err != nil {
        log.Fatal(err)
    }
}

Perhaps the DuckDB maintainers are aware of how to handle this issue. However, adding the autoload parameters to the DSN does not automatically load the icu extension.

@taniabogatsch, do you know a better way to circumvent this issue?

taniabogatsch commented 6 months ago

Hi @marcboeker @derkan, this is a known bug in DuckDB. We have an issue open here. We've reopened it and plan to fix it in our current post-release feature freeze.

marcboeker commented 6 months ago

Thanks @taniabogatsch, so I will close this issue.

derkan commented 6 months ago

Thank you @marcboeker & @taniabogatsch. For work around I'm defining as default collate in session level.