Closed mehtamohit013 closed 1 year ago
Add support for files such as .csv for sqlcmd columns
.csv
sqlcmd columns
Dataset: https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques File: train.csv
%load_ext sql %sql duckdb:// %sqlcmd columns --table train.csv
Stack Trace:
--------------------------------------------------------------------------- NoSuchTableError Traceback (most recent call last) Cell In[4], line 1 ----> 1 get_ipython().run_line_magic('sqlcmd', 'columns --table train.csv') File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2417](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2417), in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth) 2415 kwargs['local_ns'] = self.get_local_scope(stack_depth) 2416 with self.builtin_trap: -> 2417 result = fn(*args, **kwargs) 2419 # The code below prevents the output from being displayed 2420 # when using magics with decodator @output_can_be_silenced 2421 # when the last Python token in the expression is a ';'. 2422 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False): File [~/ploomber/jupysql/src/sql/magic_cmd.py:89](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/magic_cmd.py:89), in SqlCmdMagic._validate_execute_inputs(self, line) 86 if command in COMMANDS_SQLALCHEMY_ONLY: 87 util.support_only_sql_alchemy_connection(f"%sqlcmd {command}") ---> 89 return self.execute(command, others) 90 else: 91 raise exceptions.UsageError( 92 f"%sqlcmd has no command: {command!r}. " 93 "Valid commands are: {}".format( 94 ", ".join(AVAILABLE_SQLCMD_COMMANDS) 95 ) 96 ) File [~/ploomber/jupysql/src/sql/magic_cmd.py:116](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/magic_cmd.py:116), in SqlCmdMagic.execute(self, cmd_name, others, cell, local_ns) 114 cmd = router.get(cmd_name) 115 if cmd: --> 116 return cmd(others) File [~/ploomber/jupysql/src/sql/cmd/columns.py:29](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/cmd/columns.py:29), in columns(others) 26 parser.add_argument("-s", "--schema", type=str, help="Schema name", required=False) 28 args = parser.parse_args(others) ---> 29 return inspect.get_columns(name=sanitize_identifier(args.table), schema=args.schema) File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/telemetry/telemetry.py:754](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/telemetry/telemetry.py:754), in Telemetry.log_call.<locals>._log_call.<locals>.wrapper(*args, **kwargs) 752 result = func(_payload, *args, **kwargs) 753 else: --> 754 result = func(*args, **kwargs) 755 except Exception as e: 756 metadata_error = { 757 # can we log None to posthog? 758 "type": getattr(e, "type_", None), (...) 761 **_payload, 762 } File [~/ploomber/jupysql/src/sql/inspect.py:486](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/inspect.py:486), in get_columns(name, schema) 483 @telemetry.log_call() 484 def get_columns(name, schema=None): 485 """Get column names for a given connection""" --> 486 return Columns(name, schema) File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/exceptions.py:109](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/ploomber_core/exceptions.py:109), in modify_exceptions.<locals>.wrapper(*args, **kwargs) 106 @wraps(fn) 107 def wrapper(*args, **kwargs): 108 try: --> 109 return fn(*args, **kwargs) 110 except (ValueError, TypeError) as e: 111 _add_community_link(e) File [~/ploomber/jupysql/src/sql/inspect.py:182](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/ploomber/jupysql/src/sql/inspect.py:182), in Columns.__init__(self, name, schema, conn) 177 inspector = _get_inspector(conn) 179 # this returns a list of dictionaries. e.g., 180 # [{"name": "column_a", "type": "INT"} 181 # {"name": "column_b", "type": "FLOAT"}] --> 182 columns = inspector.get_columns(name, schema) or [] 184 self._table = PrettyTable() 185 self._table.field_names = _get_row_with_most_keys(columns) File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:859](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:859), in Inspector.get_columns(self, table_name, schema, **kw) 835 r"""Return information about columns in ``table_name``. 836 837 Given a string ``table_name`` and an optional string ``schema``, (...) 855 856 """ 858 with self._operation_context() as conn: --> 859 col_defs = self.dialect.get_columns( 860 conn, table_name, schema, info_cache=self.info_cache, **kw 861 ) 862 if col_defs: 863 self._instantiate_types([col_defs]) File <string>:2, in get_columns(self, connection, table_name, schema, **kw) File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:97](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/engine/reflection.py:97), in cache(fn, self, con, *args, **kw) 95 ret: _R = info_cache.get(key) 96 if ret is None: ---> 97 ret = fn(self, con, *args, **kw) 98 info_cache[key] = ret 99 return ret File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3481](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3481), in PGDialect.get_columns(self, connection, table_name, schema, **kw) 3471 @reflection.cache 3472 def get_columns(self, connection, table_name, schema=None, **kw): 3473 data = self.get_multi_columns( 3474 connection, 3475 schema=schema, (...) 3479 **kw, 3480 ) -> 3481 return self._value_or_raise(data, table_name, schema) File [~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3449](https://file+.vscode-resource.vscode-cdn.net/home/mohit/ploomber/jupysql/~/mambaforge/envs/jupysql/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/base.py:3449), in PGDialect._value_or_raise(self, data, table, schema) 3447 return dict(data)[(schema, table)] 3448 except KeyError: -> 3449 raise exc.NoSuchTableError( 3450 f"{schema}.{table}" if schema else table 3451 ) from None NoSuchTableError: train.csv
this is expected. %sqlcmd columns uses sqlalchemy's API to retrieve the information. and csv files are not registered as tables. the best thing we can do here is to show a more meaningful error message
%sqlcmd columns
Add support for files such as
.csv
forsqlcmd columns
Dataset: https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques File: train.csv
Stack Trace: