ploomber / jupysql

Better SQL in Jupyter. 📊
https://jupysql.ploomber.io
Apache License 2.0
717 stars 76 forks source link

Clickhouse boxplot error #86

Closed idomic closed 9 months ago

idomic commented 1 year ago

When connecting to clickhouse via the driver, and trying to get the box plot, there's an error due to SQL mismatch under the hood: Can follow this tutorial to reproduce.

Command

%sqlplot boxplot --table trips --column trip_distance

Stack:

---------------------------------------------------------------------------
DatabaseException                         Traceback (most recent call last)
Cell In[39], line 1
----> 1 get_ipython().run_line_magic('sqlplot', 'boxplot --table trips --column trip_distance')

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2369, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
   2367     kwargs['local_ns'] = self.get_local_scope(stack_depth)
   2368 with self.builtin_trap:
-> 2369     result = fn(*args, **kwargs)
   2370 return result

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sql/magic_plot.py:70, in SqlPlotMagic.execute(self, line, cell, local_ns)
     65     raise ValueError(
     66         "Missing the first argument, must be: 'histogram' or 'boxplot'"
     67     )
     69 if cmd.args.line[0] in {"box", "boxplot"}:
---> 70     return plot.boxplot(
     71         table=cmd.args.table,
     72         column=column,
     73         with_=cmd.args.with_,
     74         orient=cmd.args.orient,
     75         conn=None,
     76     )
     77 elif cmd.args.line[0] in {"hist", "histogram"}:
     78     return plot.histogram(
     79         table=cmd.args.table,
     80         column=column,
   (...)
     83         conn=None,
     84     )

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/ploomber_core/dependencies.py:50, in requires.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
     44     fn_name = name or f.__name__
     46     raise ImportError(
     47         _make_requires_error_message(missing_pkgs, fn_name, extra_msg)
     48     )
---> 50 return f(*args, **kwargs)

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sql/plot.py:250, in boxplot(table, column, orient, with_, conn)
    247 set_label = ax.set_ylabel if vert else ax.set_xlabel
    249 if isinstance(column, str):
--> 250     stats = [_boxplot_stats(conn, table, column, with_=with_)]
    251     ax.bxp(stats, vert=vert)
    252     ax.set_title(f"{column!r} from {table!r}")

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sql/plot.py:137, in _boxplot_stats(con, table, column, whis, autorange, with_)
    134 stats = dict()
    136 # arithmetic mean
--> 137 s_stats = _summary_stats(con, table, column, with_=with_)
    139 stats["mean"] = s_stats["mean"]
    140 q1, med, q3 = s_stats["q1"], s_stats["med"], s_stats["q3"]

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sql/plot.py:40, in _summary_stats(con, table, column, with_)
     37 if with_:
     38     query = str(store.render(query, with_=with_))
---> 40 values = con.execute(query).fetchone()
     41 keys = ["q1", "med", "q3", "mean", "N"]
     42 return {k: float(v) for k, v in zip(keys, values)}

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/engine/base.py:1365, in Connection.execute(self, statement, *multiparams, **params)
   1356 if isinstance(statement, util.string_types):
   1357     util.warn_deprecated_20(
   1358         "Passing a string to Connection.execute() is "
   1359         "deprecated and will be removed in version 2.0.  Use the "
   (...)
   1362         "driver-level SQL string."
   1363     )
-> 1365     return self._exec_driver_sql(
   1366         statement,
   1367         multiparams,
   1368         params,
   1369         _EMPTY_EXECUTION_OPTS,
   1370         future=False,
   1371     )
   1373 try:
   1374     meth = statement._execute_on_connection

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/engine/base.py:1669, in Connection._exec_driver_sql(self, statement, multiparams, params, execution_options, future)
   1659         (
   1660             statement,
   1661             distilled_params,
   (...)
   1665             statement, distilled_parameters, execution_options
   1666         )
   1668 dialect = self.dialect
-> 1669 ret = self._execute_context(
   1670     dialect,
   1671     dialect.execution_ctx_cls._init_statement,
   1672     statement,
   1673     distilled_parameters,
   1674     execution_options,
   1675     statement,
   1676     distilled_parameters,
   1677 )
   1679 if not future:
   1680     if self._has_events or self.engine._has_events:

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/engine/base.py:1943, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
   1940             branched.close()
   1942 except BaseException as e:
-> 1943     self._handle_dbapi_exception(
   1944         e, statement, parameters, cursor, context
   1945     )
   1947 return result

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/engine/base.py:2128, in Connection._handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   2124         util.raise_(
   2125             sqlalchemy_exception, with_traceback=exc_info[2], from_=e
   2126         )
   2127     else:
-> 2128         util.raise_(exc_info[1], with_traceback=exc_info[2])
   2130 finally:
   2131     del self._reentrant_error

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/util/compat.py:211, in raise_(***failed resolving arguments***)
    208     exception.__cause__ = replace_context
    210 try:
--> 211     raise exception
    212 finally:
    213     # credit to
    214     # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
    215     # as the __traceback__ object creates a cycle
    216     del exception, replace_context, from_, with_traceback

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/sqlalchemy/engine/base.py:1900, in Connection._execute_context(self, dialect, constructor, statement, parameters, execution_options, *args, **kw)
   1898                 break
   1899     if not evt_handled:
-> 1900         self.dialect.do_execute(
   1901             cursor, statement, parameters, context
   1902         )
   1904 if self._has_events or self.engine._has_events:
   1905     self.dispatch.after_cursor_execute(
   1906         self,
   1907         cursor,
   (...)
   1911         context.executemany,
   1912     )

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/clickhouse_sqlalchemy/drivers/base.py:416, in ClickHouseDialect.do_execute(self, cursor, statement, parameters, context)
    415 def do_execute(self, cursor, statement, parameters, context=None):
--> 416     cursor.execute(statement, parameters, context=context)

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/clickhouse_sqlalchemy/drivers/http/connector.py:117, in Cursor.execute(self, operation, parameters, context)
    114 params = {'query_id': self._query_id}
    115 response_gen = transport.execute(raw_sql, params=params)
--> 117 self._process_response(response_gen)
    118 self._end_query()

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/clickhouse_sqlalchemy/drivers/http/connector.py:216, in Cursor._process_response(self, response)
    213 def _process_response(self, response):
    214     response = iter(response)
--> 216     self._columns = next(response, None)
    217     self._types = next(response, None)
    218     self._response = response

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/clickhouse_sqlalchemy/drivers/http/transport.py:133, in RequestsTransport.execute(self, query, params)
    128 def execute(self, query, params=None):
    129     """
    130     Query is returning rows and these rows should be parsed or
    131     there is nothing to return.
    132     """
--> 133     r = self._send(query, params=params, stream=True)
    134     lines = r.iter_lines()
    135     try:

File ~/opt/miniconda3/envs/py310/lib/python3.10/site-packages/clickhouse_sqlalchemy/drivers/http/transport.py:179, in RequestsTransport._send(self, data, params, stream)
    177     orig = HTTPException(r.text)
    178     orig.code = r.status_code
--> 179     raise DatabaseException(orig)
    180 return r

DatabaseException: Orig exception: Code: 62. DB::Exception: Syntax error: failed at position 44 ('(') (line 3, col 36): (ORDER BY "trip_distance") AS q1,
percentile_disc(0.50) WITHIN GROUP (ORDER BY "trip_distance") AS med,
percentile_disc(0.75) WITHIN GROUP (ORDER BY "trip_dista. Expected one of: BY, end of query. (SYNTAX_ERROR) (version 22.13.1.2766 (official build))
edublancas commented 1 year ago

when working on the listing tables/columns (https://github.com/ploomber/jupysql-plugin/issues/22), I found that sqlalchemy has a Python API that allows to write a single version of a query (in Python), and export it to different SQL dialects; we might want to explore this and re-write our queries using SQLAlchemy so they are automatically converted and we don't face these issues

edublancas commented 1 year ago

we should add clickhouse to the integration tests: https://hub.docker.com/r/clickhouse/clickhouse-server/