I embarrassingly came across this with a careless copy + paste and more time debugging than I care to admit. The generic writer does not error, and the count returned from Write even equals the count of objects passed in. The result however is a parquet file with no objects.
A simple reporduction:
package main
import (
"bytes"
"io"
"log"
"strings"
"github.com/segmentio/parquet-go"
)
type ExampleWorks struct {
Foo string `parquet:"foo"`
Bar string `parquet:"bar"`
}
type ExampleDoesntWork struct {
Foo string `parquet:"foo"`
Bar string `parquet:"foo"`
}
func main() {
readWritePrint([]ExampleWorks{{Foo: "Hello", Bar: "World"}})
readWritePrint([]ExampleDoesntWork{{Foo: "Hello", Bar: "World"}})
}
func readWritePrint[T any](input []T) {
log.Printf("testing type %T", input)
buf := bytes.NewBuffer([]byte{})
w := parquet.NewGenericWriter[T](buf)
n, err := w.Write(input)
if err != nil {
log.Fatal(err)
}
if n != len(input) {
log.Fatal("expected write but got none")
}
if err = w.Close(); err != nil {
log.Fatal(err)
}
sr := strings.NewReader(buf.String())
r := parquet.NewGenericReader[T](sr)
objs := make([]T, len(input))
read, err := r.Read(objs)
if err != nil && err != io.EOF {
log.Fatal(err)
}
if read < len(input) {
log.Println("expected read rows but got none")
}
log.Printf("%+v", objs)
}
$ go run ./main
# 2023/03/16 12:51:34 testing type []main.ExampleWorks
# 2023/03/16 12:51:34 [{Foo:Hello Bar:World}]
# 2023/03/16 12:51:34 testing type []main.ExampleDoesntWork
# 2023/03/16 12:51:34 expected read rows but got none
# 2023/03/16 12:51:34 [{Foo: Bar:}]
I embarrassingly came across this with a careless copy + paste and more time debugging than I care to admit. The generic writer does not error, and the count returned from
Write
even equals the count of objects passed in. The result however is a parquet file with no objects.A simple reporduction: