googleapis / go-sql-spanner

Google Cloud Spanner driver for Go's database/sql package.
Apache License 2.0
105 stars 25 forks source link

"invalid session pool" error when db.SetConnMaxLifetime is set #288

Closed elek closed 2 months ago

elek commented 3 months ago

Environment details

Steps to reproduce

Looks like, if we set db.SetConnMaxLifetime, the session is invalidated after a while, and couldn't be used any more...

Go supposed to close the connection, but remove it from the free pool and replace it with a fresh one.

The following code (which just stes the ConnMaxLifetime and running the hello world) will fail eventually.

But not with pgx (cockroach). It works well, even if the connection / session is invalidated.

// Copyright 2021 Google LLC All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
    "context"
    "database/sql"
    "fmt"
    _ "github.com/googleapis/go-sql-spanner"
    _ "github.com/jackc/pgx/v5/stdlib"
    "time"
)

// Simple sample application that shows how to use the Spanner Go sql driver.
// Execute the sample with the command `go run main.go` from this directory.
func helloWorld() error {
    ctx := context.Background()
    // same thing with cockroach
    //db, err := sql.Open("pgx", "postgres://root@localhost:26257/env1?sslmode=disable")
    db, err := sql.Open("spanner", "projects/test-project/instances/test-instance/databases/db")
satellite")
    if err != nil {
        return fmt.Errorf("failed to open database connection: %v\n", err)
    }
    defer db.Close()
    db.SetConnMaxLifetime(time.Second * 20)

    for i := 0; i < 10000; i++ {
        err := selectOnce(ctx, db)
        if err != nil {
            return err
        }
        time.Sleep(1 * time.Second)
    }
    return nil
}

func selectOnce(ctx context.Context, db *sql.DB) error {
    rows, err := db.QueryContext(ctx, "SELECT 'Hello World!'")
    if err != nil {
        return fmt.Errorf("failed to execute query: %v", err)
    }
    defer rows.Close()

    var msg string
    for rows.Next() {
        if err := rows.Scan(&msg); err != nil {
            return fmt.Errorf("failed to scan row values: %v", err)
        }
        fmt.Printf("%s\n", msg)
    }
    if err := rows.Err(); err != nil {
        return fmt.Errorf("failed to execute query: %v", err)
    }
    return nil
}

func main() {
    err := helloWorld()
    if err != nil {
        panic(err)
    }
}
egonelbre commented 2 months ago

It looks like the problem does not occur when there's some other connection open:

package main

import (
    "context"
    "database/sql"
    "fmt"
    "time"

    _ "github.com/googleapis/go-sql-spanner"
    "github.com/googleapis/go-sql-spanner/examples"
)

// Simple sample application that shows how to use the Spanner Go sql driver.
//
// Execute the sample with the command `go run main.go` from this directory.
func helloWorld(projectId, instanceId, databaseId string) error {
    ctx := context.Background()
    db, err := sql.Open("spanner", fmt.Sprintf("projects/%s/instances/%s/databases/%s", projectId, instanceId, databaseId))
    if err != nil {
        return fmt.Errorf("failed to open database connection: %v\n", err)
    }
    defer db.Close()

    db.SetConnMaxLifetime(time.Second * 10)

    tx, err := db.BeginTx(ctx, &sql.TxOptions{})
    if err != nil {
        return fmt.Errorf("failed to start database connection: %v\n", err)
    }
    defer tx.Rollback()

    for i := 0; i < 100000; i++ {
        rows, err := db.QueryContext(ctx, "SELECT 'Hello World!'")
        if err != nil {
            return fmt.Errorf("failed to execute query: %v", err)
        }
        defer rows.Close()

        var msg string
        for rows.Next() {
            if err := rows.Scan(&msg); err != nil {
                return fmt.Errorf("failed to scan row values: %v", err)
            }
            fmt.Printf("%s\n", msg)

            time.Sleep(time.Second)
        }
        if err := rows.Err(); err != nil {
            return fmt.Errorf("failed to execute query: %v", err)
        }
    }

    return nil
}

func main() {
    examples.RunSampleOnEmulator(helloWorld)
}

My guess is that the issue stems from this (https://github.com/googleapis/go-sql-spanner/blob/698932dea7d4d975021f02241fdaa51d206b733f/driver.go#L952):

func (c *conn) Close() error {
    // Check if this is the last open connection of the connector.
    if count := atomic.AddInt32(&c.connector.connCount, -1); count > 0 {
        return nil
    }

    // This was the last connection. Remove the connector and close the Spanner clients.
    c.connector.driver.mu.Lock()
    delete(c.connector.driver.connectors, c.connector.dsn) // <-- this ends up deleting the connector, even though it's still in use
    c.connector.driver.mu.Unlock()

    c.client.Close()
    return c.adminClient.Close()
}

Even though the last connection is closed the database can still have the connector open.

PS: I think I have a fix for it.