xitongsys / parquet-go

pure golang library for reading/writing parquet file
Apache License 2.0
1.25k stars 294 forks source link

in memory reader #481

Closed Arnold1 closed 2 years ago

Arnold1 commented 2 years ago

Hi,

I have []bytes which represent a parquet file - how can I read parquet using an in memory reader?

Thanks

hangxie commented 2 years ago

Have you tried https://github.com/xitongsys/parquet-go-source/blob/master/buffer/buffer.go?

Arnold1 commented 2 years ago

@hangxie how would you write the following with the BufferFile?

func LoadParquetData(content []byte) ([]Foo, error) {
    fr, err := goparquet.NewFileReader(bytes.NewReader(content))
    if err != nil {
        return nil, err
    }

    rows := fr.NumRows()
    f := make([]Foo, rows)

    for i := int64(0); i < rows; i++ {
        row, err := fr.NextRow()
        if err == io.EOF {
            break
        }
        if err != nil {
            return nil, errors.Wrap(err, "Reading record failed")
        }

        for k, v := range row {
            if k == "value" {
                if vv, ok := v.([]byte); ok {
                    f[i].Value = string(vv)
                }
            }
        }
    }

    return f, err
}