rs / zerolog

Zero Allocation JSON Logger
MIT License
10.7k stars 572 forks source link

please help to resolve race-condition in zerolog #555

Open Slach opened 1 year ago

Slach commented 1 year ago

@rs i have race-condition ;( https://github.com/Altinity/clickhouse-backup/actions/runs/5259410860/jobs/9504979925#step:7:2222

WARNING: DATA RACE
Write at 0x00c000109440 by goroutine 30:
  github.com/rs/zerolog.newEvent()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/event.go:60 +0xd9
  github.com/rs/zerolog.(*Logger).newEvent()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/log.go:453 +0xec
  github.com/rs/zerolog.(*Logger).Info()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/log.go:334 +0x59
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).LogQuery()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:917 +0x2e4
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).Query()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:887 +0x193
  github.com/Altinity/clickhouse-backup/pkg/partition.GetPartitionId()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/partition/partition.go:60 +0x1058
  github.com/Altinity/clickhouse-backup/pkg/filesystemhelper.CreatePartitionsToBackupMap()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/filesystemhelper/filesystemhelper.go:305 +0x14d9
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:386 +0x33f7
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:196 +0x8b2
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/sync@v0.2.0/errgroup/errgroup.go:75 +0x133

Previous read at 0x00c000109440 by goroutine 31:
  github.com/rs/zerolog.putEvent()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/event.go:40 +0x3c
  github.com/rs/zerolog.(*Event).write()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/event.go:81 +0x339
  github.com/rs/zerolog.(*Event).msg()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/event.go:149 +0x55d
  github.com/rs/zerolog.(*Event).Msg()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.29.1/event.go:108 +0x64
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).LogQuery()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:922 +0xc71
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).Query()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:887 +0x193
  github.com/Altinity/clickhouse-backup/pkg/partition.GetPartitionId()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/partition/partition.go:60 +0x1058
  github.com/Altinity/clickhouse-backup/pkg/filesystemhelper.CreatePartitionsToBackupMap()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/filesystemhelper/filesystemhelper.go:305 +0x14d9
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:386 +0x33f7
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:196 +0x8b2
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/sync@v0.2.0/errgroup/errgroup.go:75 +0x133      

when try to use zerolog with multiple go-routines + errorgroup

LogQuery() related code

import (
    "context"
    "fmt"
    "github.com/rs/zerolog"
    "github.com/rs/zerolog/log"
    "github.com/rs/zerolog/pkgerrors"
    "github.com/urfave/cli"
    stdlog "log"
    "os"
)
...
    zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs
    zerolog.ErrorStackMarshaler = pkgerrors.MarshalStack
    log.Logger = zerolog.New(zerolog.SyncWriter(os.Stdout)).With().Timestamp().Caller().Logger()
    stdlog.SetOutput(log.Logger)
...
type ClickHouse struct {
    Config               *config.ClickHouseConfig
    conn                 driver.Conn
    disks                []Disk
    Logger               zerolog.Logger
    version              int
    isPartsColumnPresent int8
    IsOpen               bool
}

...
    ch := &clickhouse.ClickHouse{
        Config: &api.config.ClickHouse,
        Logger: log.With().Str("logger", "clickhouse").Logger(),
    }
...
func (ch *ClickHouse) LogQuery(query string, args ...interface{}) string {
    var logF *zerolog.Event
    // zerolog is not thread-safe https://github.com/rs/zerolog/issues/242 ;(
    logger := ch.Logger.With().Logger()
    if !ch.Config.LogSQLQueries {
        logF = logger.Debug()
    } else {
        logF = logger.Info()
    }
    if len(args) > 0 {
        logF.Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(fmt.Sprintf("%s with args %v", query, args)))
    } else {
        logF.Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(query))
    }
    return query
}

How race condition possible here? i redefine log.Logger once in main function

and after it use copy of logger

Logger: log.With().Str("logger", "clickhouse").Logger(), and copy of copy logger := ch.Logger.With().Logger()

during execution of With() it shall create copy of Logger

what i'm doing wrong?

sruehl commented 1 year ago

@Slach could you try this?

func (ch *ClickHouse) LogQuery(query string, args ...interface{}) string {
    level := zerolog.InfoLevel
    if !ch.Config.LogSQLQueries {
                level = zerolog.DebugLevel
    }
    if len(args) > 0 {
        ch.Logger.WithLevel(level).Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(fmt.Sprintf("%s with args %v", query, args)))
    } else {
            ch.Logger.WithLevel(level).Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(query))
    }
    return query
}

Trying to confirm if that might help.

Slach commented 1 year ago

@sruehl I can't thank you enough, it works, you save many debug hours

Slach commented 1 year ago

=( but found race condition in other place

WARNING: DATA RACE
Write at 0x00c000448655 by goroutine 34:
  github.com/rs/zerolog/internal/json.Encoder.AppendString()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/internal/json/string.go:63 +0x71d
  github.com/rs/zerolog/internal/json.Encoder.AppendKey()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/internal/json/base.go:18 +0x275
  github.com/rs/zerolog.(*Event).Str()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/event.go:256 +0xea
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:412 +0x3c24
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:197 +0x44e
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/sync@v0.3.0/errgroup/errgroup.go:75 +0x133

Previous write at 0x00c000448655 by goroutine 33:
  github.com/rs/zerolog/internal/json.Encoder.AppendString()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/internal/json/string.go:63 +0x71d
  github.com/rs/zerolog/internal/json.Encoder.AppendKey()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/internal/json/base.go:18 +0x275
  github.com/rs/zerolog.(*Event).Str()
      /home/runner/go/pkg/mod/github.com/rs/zerolog@v1.30.0/event.go:256 +0xea
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:412 +0x3c24
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:197 +0x44e
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/sync@v0.3.0/errgroup/errgroup.go:75 +0x133

backup/download.go:412

// simple import zerolog
import (
    "github.com/rs/zerolog/log"
)

func (b *Backuper) downloadTableMetadata(ctx context.Context, backupName string, disks []clickhouse.Disk, tableTitle metadata.TableTitle, schemaOnly bool, partitions []string) (*metadata.TableMetadata, uint64, error) {

...
    log.Info().
        Str("operation", "download_metadata").
        Str("backup", backupName).
        Str("duration", utils.HumanizeDuration(time.Since(start))).
        Str("size", utils.FormatBytes(size)).
        Msg("done")
    return &tableMetadata, size, nil
}

just one function without instances of zerolog.Logger, which called inside pool of go-routines

    downloadSemaphore := semaphore.NewWeighted(int64(b.cfg.General.DownloadConcurrency))
    metadataGroup, metadataCtx := errgroup.WithContext(ctx)
    for i, t := range tablesForDownload {
...
        metadataGroup.Go(func() error {
            defer downloadSemaphore.Release(1)
            if err != nil {
                return err
            }
        })
    }
    if err := metadataGroup.Wait(); err != nil {
        return fmt.Errorf("one of Download Metadata go-routine return error: %v", err)
    }

Any ideas why it happen? and how to resolve it?

main.go with zerolog initialization

import (
    stdlog "log"
    "github.com/rs/zerolog"
    "github.com/rs/zerolog/log"
    "github.com/rs/zerolog/pkgerrors"
)

func main() {
    zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs
    zerolog.ErrorStackMarshaler = pkgerrors.MarshalStack
    consoleWriter := zerolog.ConsoleWriter{Out: os.Stdout, NoColor: true, TimeFormat: "2006-01-02 15:04:05.000"}
    log.Logger = zerolog.New(zerolog.SyncWriter(consoleWriter)).With().Timestamp().Caller().Logger()
    stdlog.SetOutput(log.Logger)
...
}