xitongsys / parquet-go

pure golang library for reading/writing parquet file
Apache License 2.0
1.27k stars 293 forks source link

arrow schema with timestamp_s is wrong #441

Open jaredzhou opened 2 years ago

jaredzhou commented 2 years ago
    schema := arrow.NewSchema([]arrow.Field{{
        Name:     "time",
        Type:     arrow.FixedWidthTypes.Timestamp_s,
        Nullable: true,
        Metadata: arrow.NewMetadata(nil, nil),
    }}, nil)
    fmt.Printf("schema: %v\n", schema)
    builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
    builder.Field(0).(*array.TimestampBuilder).Append(arrow.Timestamp(time.Now().Unix()))
    rec := builder.NewRecord()
    fw, err := local.NewLocalFileWriter("a.parquet")
    if err != nil {

        fmt.Printf("Can't create file;%s", err.Error())
        return
    }
    w, err := writer.NewArrowWriter(rec.Schema(), fw, 1)
    if err != nil {

        fmt.Printf("Can't create parquet writer;%s", err.Error())
        return
    }
    if err = w.WriteArrow(rec); err != nil {

        fmt.Printf("WriteArrow error;%s", err.Error())
        return
    }
    if err = w.WriteStop(); err != nil {
        fmt.Printf("WriteStop error;%s", err.Error())
        return
    }
    log.Println("Write Finished")
    fw.Close()

timestamp_s is not supported, should report error here, but it is now considered as timestamp_ms, so the data writted to parquet file is wrong