Closed martindurant closed 1 year ago
Original: https://github.com/dask/dask/issues/10601 cc @jrbourbeau
_______________________________ test_timestamp96 _______________________________ [gw1] linux -- Python 3.10.12 /usr/share/miniconda3/envs/test-environment/bin/python3.10 tmpdir = local('/tmp/pytest-of-runner/pytest-0/popen-gw1/test_timestamp960') @FASTPARQUET_MARK def test_timestamp96(tmpdir): fn = str(tmpdir) df = pd.DataFrame({"a": [pd.to_datetime("now", utc=True)]}) ddf = dd.from_pandas(df, 1) ddf.to_parquet(fn, engine="fastparquet", write_index=False, times="int96") pf = fastparquet.ParquetFile(fn) assert pf._schema[1].type == fastparquet.parquet_thrift.Type.INT96 > out = dd.read_parquet(fn, engine="fastparquet", index=False).compute() dask/dataframe/io/tests/test_parquet.py:1883: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ dask/base.py:342: in compute (result,) = compute(self, traverse=False, **kwargs) dask/base.py:628: in compute results = schedule(dsk, keys, **kwargs) dask/dataframe/io/parquet/core.py:96: in __call__ return read_parquet_part( dask/dataframe/io/parquet/core.py:654: in read_parquet_part dfs = [ dask/dataframe/io/parquet/core.py:655: in <listcomp> func( dask/dataframe/io/parquet/fastparquet.py:1075: in read_partition return cls.pf_to_pandas( dask/dataframe/io/parquet/fastparquet.py:1115: in pf_to_pandas df, views = pf.pre_allocate(size, columns, categories, index) /usr/share/miniconda3/envs/test-environment/lib/python3.10/site-packages/fastparquet/api.py:797: in pre_allocate df, arrs = _pre_allocate(size, columns, categories, index, cats, /usr/share/miniconda3/envs/test-environment/lib/python3.10/site-packages/fastparquet/api.py:1051: in _pre_allocate df, views = dataframe.empty(dtypes, size, cols=cols, index_names=index, /usr/share/miniconda3/envs/test-environment/lib/python3.10/site-packages/fastparquet/dataframe.py:202: in empty values = type(bvalues)._from_sequence(values, copy=False, dtype=bvalues.dtype) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E ValueError: Buffer has wrong number of dimensions (expected 1, got 2) pandas/_libs/tslibs/tzconversion.pyx:187: ValueError
Regression due to https://github.com/dask/fastparquet/pull/893 @jbrockmendel
(duplicated in #896) #
Original: https://github.com/dask/dask/issues/10601 cc @jrbourbeau
Regression due to https://github.com/dask/fastparquet/pull/893 @jbrockmendel