ploomber / jupysql

Better SQL in Jupyter. 📊
https://jupysql.ploomber.io
Apache License 2.0
705 stars 75 forks source link

Add support for files such as `.csv` for `sqlcmd columns` #824

Closed mehtamohit013 closed 1 year ago

mehtamohit013 commented 1 year ago

Add support for files such as .csv for sqlcmd columns

Dataset: https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques File: train.csv

%load_ext sql
%sql duckdb://
%sqlcmd columns --table train.csv

Stack Trace:

---------------------------------------------------------------------------
NoSuchTableError                          Traceback (most recent call last)
Cell In[4], line 1
----> 1 get_ipython().run_line_magic('sqlcmd', 'columns --table train.csv')

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2417](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2417), in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
   2415     kwargs['local_ns'] = self.get_local_scope(stack_depth)
   2416 with self.builtin_trap:
-> 2417     result = fn(*args, **kwargs)
   2419 # The code below prevents the output from being displayed
   2420 # when using magics with decodator @output_can_be_silenced
   2421 # when the last Python token in the expression is a ';'.
   2422 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):

File [~/ploomber/jupysql/src/sql/magic_cmd.py:89](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/magic_cmd.py:89), in SqlCmdMagic._validate_execute_inputs(self, line)
     86     if command in COMMANDS_SQLALCHEMY_ONLY:
     87         util.support_only_sql_alchemy_connection(f"%sqlcmd {command}")
---> 89     return self.execute(command, others)
     90 else:
     91     raise exceptions.UsageError(
     92         f"%sqlcmd has no command: {command!r}. "
     93         "Valid commands are: {}".format(
     94             ", ".join(AVAILABLE_SQLCMD_COMMANDS)
     95         )
     96     )

File [~/ploomber/jupysql/src/sql/magic_cmd.py:116](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/magic_cmd.py:116), in SqlCmdMagic.execute(self, cmd_name, others, cell, local_ns)
    114 cmd = router.get(cmd_name)
    115 if cmd:
--> 116     return cmd(others)

File [~/ploomber/jupysql/src/sql/cmd/columns.py:29](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/cmd/columns.py:29), in columns(others)
     26 parser.add_argument("-s", "--schema", type=str, help="Schema name", required=False)
     28 args = parser.parse_args(others)
---> 29 return inspect.get_columns(name=sanitize_identifier(args.table), schema=args.schema)

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/telemetry/telemetry.py:754](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/telemetry/telemetry.py:754), in Telemetry.log_call.<locals>._log_call.<locals>.wrapper(*args, **kwargs)
    752             result = func(_payload, *args, **kwargs)
    753     else:
--> 754         result = func(*args, **kwargs)
    755 except Exception as e:
    756     metadata_error = {
    757         # can we log None to posthog?
    758         "type": getattr(e, "type_", None),
   (...)
    761         **_payload,
    762     }

File [~/ploomber/jupysql/src/sql/inspect.py:486](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/inspect.py:486), in get_columns(name, schema)
    483 @telemetry.log_call()
    484 def get_columns(name, schema=None):
    485     """Get column names for a given connection"""
--> 486     return Columns(name, schema)

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/exceptions.py:109](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/exceptions.py:109), in modify_exceptions.<locals>.wrapper(*args, **kwargs)
    106 @wraps(fn)
    107 def wrapper(*args, **kwargs):
    108     try:
--> 109         return fn(*args, **kwargs)
    110     except (ValueError, TypeError) as e:
    111         _add_community_link(e)

File [~/ploomber/jupysql/src/sql/inspect.py:182](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/inspect.py:182), in Columns.__init__(self, name, schema, conn)
    177 inspector = _get_inspector(conn)
    179 # this returns a list of dictionaries. e.g.,
    180 # [{"name": "column_a", "type": "INT"}
    181 #  {"name": "column_b", "type": "FLOAT"}]
--> 182 columns = inspector.get_columns(name, schema) or []
    184 self._table = PrettyTable()
    185 self._table.field_names = _get_row_with_most_keys(columns)

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:859](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:859), in Inspector.get_columns(self, table_name, schema, **kw)
    835 r"""Return information about columns in ``table_name``.
    836 
    837 Given a string ``table_name`` and an optional string ``schema``,
   (...)
    855 
    856 """
    858 with self._operation_context() as conn:
--> 859     col_defs = self.dialect.get_columns(
    860         conn, table_name, schema, info_cache=self.info_cache, **kw
    861     )
    862 if col_defs:
    863     self._instantiate_types([col_defs])

File <string>:2, in get_columns(self, connection, table_name, schema, **kw)

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:97](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:97), in cache(fn, self, con, *args, **kw)
     95 ret: _R = info_cache.get(key)
     96 if ret is None:
---> 97     ret = fn(self, con, *args, **kw)
     98     info_cache[key] = ret
     99 return ret

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3481](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3481), in PGDialect.get_columns(self, connection, table_name, schema, **kw)
   3471 @reflection.cache
   3472 def get_columns(self, connection, table_name, schema=None, **kw):
   3473     data = self.get_multi_columns(
   3474         connection,
   3475         schema=schema,
   (...)
   3479         **kw,
   3480     )
-> 3481     return self._value_or_raise(data, table_name, schema)

File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3449](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3449), in PGDialect._value_or_raise(self, data, table, schema)
   3447     return dict(data)[(schema, table)]
   3448 except KeyError:
-> 3449     raise exc.NoSuchTableError(
   3450         f"{schema}.{table}" if schema else table
   3451     ) from None

NoSuchTableError: train.csv
edublancas commented 1 year ago

this is expected. %sqlcmd columns uses sqlalchemy's API to retrieve the information. and csv files are not registered as tables. the best thing we can do here is to show a more meaningful error message