dfp = dd.read_csv(s3fp,
compression='gzip',
header=None,
sep=',',
quotechar='"',
blocksize=None,
dtype=object,
names=['A' + str(i) for i in range(60)]
)
ps.sqldf('select * from dfp limit 10;',globals())
AttributeError Traceback (most recent call last)
in ()
----> 1 ps.sqldf('select * from dfp limit 10;',globals())
C:\Miniconda2\envs\py27\lib\site-packages\pandasql\sqldf.pyc in sqldf(query, env, db_uri)
154 >>> sqldf("select avg(x) from df;", locals())
155 """
--> 156 return PandaSQL(db_uri)(query, env)
C:\Miniconda2\envs\py27\lib\site-packages\pandasql\sqldf.pyc in __call__(self, query, env)
56 continue
57 self.loaded_tables.add(table_name)
---> 58 write_table(env[table_name], table_name, conn)
59
60 try:
C:\Miniconda2\envs\py27\lib\site-packages\pandasql\sqldf.pyc in write_table(df, tablename, conn)
119 message='The provided table name \'%s\' is not found exactly as such in the database' % tablename)
120 to_sql(df, name=tablename, con=conn,
--> 121 index=not any(name is None for name in df.index.names)) # load index into db if all levels are named
122
123
C:\Miniconda2\envs\py27\lib\site-packages\dask\dataframe\core.pyc in __getattr__(self, key)
2246 elif key in self._dt_attributes:
2247 return getattr(self.dt, key)
-> 2248 raise AttributeError("'Index' object has no attribute %r" % key)
2249
2250 def __dir__(self):
AttributeError: 'Index' object has no attribute 'names'
Hi,
When I try to use pandasql with dask.dataframe, I got below error. Do you have any idea for using pandasql with dask?
s3fp="s3://bucket/.../xxx00103_1prt/xxx00103_1prt.csv.gz"
dfp = dd.read_csv(s3fp, compression='gzip', header=None, sep=',', quotechar='"', blocksize=None, dtype=object, names=['A' + str(i) for i in range(60)] ) ps.sqldf('select * from dfp limit 10;',globals())
AttributeError Traceback (most recent call last)