Open dfolch opened 6 years ago
I am getting this same error when queries a dataframe with around 72,000 rows.
yes performance on even modest data size is bad
@dfolch Part of the performance issue with this package is that it uses SqLite as a backend, which means that you're essentially not using one of the main benefits of pandas, which is in memory computation. As a solution to this problem, a few months back I created a package called dataframe_sql, which solves this problem by parsing the sql and translating it to native pandas operations. Hope this helps!
I'm getting an
OperationalError
when selecting from a large dataframe (full error message at bottom). The error seems to be related to the size of the dataframe not the size of the selection.Error message
```python --------------------------------------------------------------------------- OperationalError Traceback (most recent call last) ~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1192 parameters, -> 1193 context) 1194 except BaseException as e: ~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context) 506 def do_execute(self, cursor, statement, parameters, context=None): --> 507 cursor.execute(statement, parameters) 508 OperationalError: too many SQL variables The above exception was the direct cause of the following exception: OperationalError Traceback (most recent call last) in ()
1 df = pd.DataFrame(np.random.randint(0,10,size=(60000,5)))
----> 2 select = sqldf('SELECT "1", "2" FROM df')
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandasql/sqldf.py in sqldf(query, env, db_uri)
154 >>> sqldf("select avg(x) from df;", locals())
155 """
--> 156 return PandaSQL(db_uri)(query, env)
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandasql/sqldf.py in __call__(self, query, env)
56 continue
57 self.loaded_tables.add(table_name)
---> 58 write_table(env[table_name], table_name, conn)
59
60 try:
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandasql/sqldf.py in write_table(df, tablename, conn)
119 message='The provided table name \'%s\' is not found exactly as such in the database' % tablename)
120 to_sql(df, name=tablename, con=conn,
--> 121 index=not any(name is None for name in df.index.names)) # load index into db if all levels are named
122
123
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
448 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
449 index_label=index_label, schema=schema,
--> 450 chunksize=chunksize, dtype=dtype)
451
452
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1147 schema=schema, dtype=dtype)
1148 table.create()
-> 1149 table.insert(chunksize)
1150 if (not name.isdigit() and not name.islower()):
1151 # check for potentially case sensitivity issues (GH7815)
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandas/io/sql.py in insert(self, chunksize)
661
662 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 663 self._execute_insert(conn, keys, chunk_iter)
664
665 def _query_iterator(self, result, chunksize, columns, coerce_float=True,
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/pandas/io/sql.py in _execute_insert(self, conn, keys, data_iter)
636 """Insert data into this table with database connection"""
637 data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
--> 638 conn.execute(*self.insert_statement(data, conn))
639
640 def insert(self, chunksize=None):
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
1058 compiled_sql,
1059 distilled_params,
-> 1060 compiled_sql, distilled_params
1061 )
1062 if self._has_events or self.engine._has_events:
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1198 parameters,
1199 cursor,
-> 1200 context)
1201
1202 if self._has_events or self.engine._has_events:
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1411 util.raise_from_cause(
1412 sqlalchemy_exception,
-> 1413 exc_info
1414 )
1415 else:
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
201 exc_type, exc_value, exc_tb = exc_info
202 cause = exc_value if exc_value is not exception else None
--> 203 reraise(type(exception), exception, tb=exc_tb, cause=cause)
204
205 if py3k:
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
184 value.__cause__ = cause
185 if value.__traceback__ is not tb:
--> 186 raise value.with_traceback(tb)
187 raise value
188
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1191 statement,
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
1195 self._handle_dbapi_exception(
~/anaconda2/envs/urban_prac4/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
505
506 def do_execute(self, cursor, statement, parameters, context=None):
--> 507 cursor.execute(statement, parameters)
508
509 def do_execute_no_params(self, cursor, statement, context=None):
OperationalError: (sqlite3.OperationalError) too many SQL variables [SQL: 'INSERT INTO df ("0", "1", "2", "3", "4") VALUES (?, ?, ?, ?, ?
...
...
(Background on this error at: http://sqlalche.me/e/e3q8)
```