chdb-io / chdb

chDB is an in-process OLAP SQL Engine 🚀 powered by ClickHouse
https://clickhouse.com/docs/en/chdb
Apache License 2.0
2.03k stars 72 forks source link

Let pandas choose backend itself #39

Closed auxten closed 1 year ago

auxten commented 1 year ago

Fix error found here: https://colab.research.google.com/drive/1y68cJrS9qD3dTnPRLYFDvk7L136eIv1p?usp=sharing#scrollTo=hlfihpIB6_fI

TypeError                                 Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/IPython/core/formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

6 frames
/usr/local/lib/python3.10/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
    392                         if cls is not object \
    393                                 and callable(cls.__dict__.get('__repr__')):
--> 394                             return _repr_pprint(obj, self, cycle)
    395 
    396             return _default_pprint(obj, self, cycle)

/usr/local/lib/python3.10/dist-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
    698     """A pprint that just redirects to the normal repr function."""
    699     # Find newlines and replace them with p.break_()
--> 700     output = repr(obj)
    701     lines = output.splitlines()
    702     with p.group():

/usr/local/lib/python3.10/dist-packages/chdb/dataframe/query.py in __repr__(self)
     88 
     89     def __repr__(self):
---> 90         return repr(self.to_pandas())
     91 
     92     def __str__(self):

/usr/local/lib/python3.10/dist-packages/chdb/dataframe/query.py in to_pandas(self)
     47                 # wrap bytes to ReadBuffer
     48                 pq_reader = BytesIO(self._parquet_memoryview.tobytes())
---> 49                 return pandas_read_parquet(pq_reader)
     50             elif self._parquet_path is not None:
     51                 return pandas_read_parquet(self._parquet_path)

/usr/local/lib/python3.10/dist-packages/chdb/dataframe/query.py in pandas_read_parquet(path)
    188 def pandas_read_parquet(path) -> pd.DataFrame:
    189     if pd.__version__[0] >= '1':
--> 190         return pd.read_parquet(path, engine='pyarrow', dtype_backend='pyarrow')
    191     else:
    192         return pd.read_parquet(path)

/usr/local/lib/python3.10/dist-packages/pandas/io/parquet.py in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, **kwargs)
    501             msg += (
    502                 "Use dtype_backend='numpy_nullable' instead of use_nullable_dtype=True."
--> 503             )
    504         warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
    505     else:

/usr/local/lib/python3.10/dist-packages/pandas/io/parquet.py in read(self, path, columns, use_nullable_dtypes, storage_options, **kwargs)
    249     def write(
    250         self,
--> 251         df: DataFrame,
    252         path,
    253         compression: Literal["snappy", "gzip", "brotli"] | None = "snappy",

TypeError: read_table() got an unexpected keyword argument 'dtype_backend'