segmentio / parquet-go

Go library to read/write Parquet files
https://pkg.go.dev/github.com/segmentio/parquet-go
Apache License 2.0
341 stars 102 forks source link

A struct where two tags specify the same column name causes records to silently be not written #486

Open andruwm opened 1 year ago

andruwm commented 1 year ago

I embarrassingly came across this with a careless copy + paste and more time debugging than I care to admit. The generic writer does not error, and the count returned from Write even equals the count of objects passed in. The result however is a parquet file with no objects.

A simple reporduction:

package main

import (
    "bytes"
    "io"
    "log"
    "strings"

    "github.com/segmentio/parquet-go"
)

type ExampleWorks struct {
    Foo string `parquet:"foo"`
    Bar string `parquet:"bar"`
}

type ExampleDoesntWork struct {
    Foo string `parquet:"foo"`
    Bar string `parquet:"foo"`
}

func main() {
    readWritePrint([]ExampleWorks{{Foo: "Hello", Bar: "World"}})
    readWritePrint([]ExampleDoesntWork{{Foo: "Hello", Bar: "World"}})
}

func readWritePrint[T any](input []T) {
    log.Printf("testing type %T", input)

    buf := bytes.NewBuffer([]byte{})
    w := parquet.NewGenericWriter[T](buf)

    n, err := w.Write(input)
    if err != nil {
        log.Fatal(err)
    }
    if n != len(input) {
        log.Fatal("expected write but got none")
    }

    if err = w.Close(); err != nil {
        log.Fatal(err)
    }

    sr := strings.NewReader(buf.String())
    r := parquet.NewGenericReader[T](sr)

    objs := make([]T, len(input))
    read, err := r.Read(objs)
    if err != nil && err != io.EOF {
        log.Fatal(err)
    }
    if read < len(input) {
        log.Println("expected read rows but got none")
    }

    log.Printf("%+v", objs)
}
$ go run ./main
# 2023/03/16 12:51:34 testing type []main.ExampleWorks
# 2023/03/16 12:51:34 [{Foo:Hello Bar:World}]
# 2023/03/16 12:51:34 testing type []main.ExampleDoesntWork
# 2023/03/16 12:51:34 expected read rows but got none
# 2023/03/16 12:51:34 [{Foo: Bar:}]