sfu-db / dataprep

Open-source low code data preparation library in python. Collect, clean and visualization your data in python with a few lines of code.
http://dataprep.ai
MIT License
2.01k stars 204 forks source link

AttributeError: 'Series' object has no attribute 'len' #775

Closed naveen-marthala closed 2 years ago

naveen-marthala commented 2 years ago

Describe the bug When I tried to create eda report, I got the error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
C:\Users\NAVEEN~1.MAR\AppData\Local\Temp/ipykernel_14588/1372974063.py in <module>
----> 1 report = dp_eda.create_report(df=raw_data)
      2 # report.show_browser()

~\Miniconda3\envs\my_venv\lib\site-packages\dataprep\eda\create_report\__init__.py in create_report(df, config, display, title, mode, progress)
     66         "resources": INLINE.render(),
     67         "title": title,
---> 68         "components": format_report(df, cfg, mode, progress),
     69     }
     70     template_base = ENV_LOADER.get_template("base.html")

~\Miniconda3\envs\my_venv\lib\site-packages\dataprep\eda\create_report\formatter.py in format_report(df, cfg, mode, progress)
     74         if mode == "basic":
     75             edaframe = EDAFrame(df)
---> 76             comps = format_basic(edaframe, cfg)
     77         # elif mode == "full":
     78         #     comps = format_full(df)

~\Miniconda3\envs\my_venv\lib\site-packages\dataprep\eda\create_report\formatter.py in format_basic(df, cfg)
    284             category=RuntimeWarning,
    285         )
--> 286         (data,) = dask.compute(data)
    287 
    288     res_overview = _format_overview(data, cfg)

~\Miniconda3\envs\my_venv\lib\site-packages\dask\base.py in compute(*args, **kwargs)
    450         postcomputes.append(x.__dask_postcompute__())
    451 
--> 452     results = schedule(dsk, keys, **kwargs)
    453     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    454 

~\Miniconda3\envs\my_venv\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     74                 pools[thread][num_workers] = pool
     75 
---> 76     results = get_async(
     77         pool.apply_async,
     78         len(pool._pool),

~\Miniconda3\envs\my_venv\lib\site-packages\dask\local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

~\Miniconda3\envs\my_venv\lib\site-packages\dask\local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317 
    318 

~\Miniconda3\envs\my_venv\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

~\Miniconda3\envs\my_venv\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\Miniconda3\envs\my_venv\lib\site-packages\dask\optimization.py in __call__(self, *args)
    959         if not len(args) == len(self.inkeys):
    960             raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 961         return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
    962 
    963     def __reduce__(self):

~\Miniconda3\envs\my_venv\lib\site-packages\dask\core.py in get(dsk, out, cache)
    149     for key in toposort(dsk):
    150         task = dsk[key]
--> 151         result = _execute_task(task, cache)
    152         cache[key] = result
    153     result = _execute_task(out, cache)

~\Miniconda3\envs\my_venv\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

~\Miniconda3\envs\my_venv\lib\site-packages\dask\utils.py in apply(func, args, kwargs)
     27 def apply(func, args, kwargs=None):
     28     if kwargs:
---> 29         return func(*args, **kwargs)
     30     else:
     31         return func(*args)

~\Miniconda3\envs\my_venv\lib\site-packages\dask\dataframe\core.py in apply_and_enforce(*args, **kwargs)
   5296     func = kwargs.pop("_func")
   5297     meta = kwargs.pop("_meta")
-> 5298     df = func(*args, **kwargs)
   5299     if is_dataframe_like(df) or is_series_like(df) or is_index_like(df):
   5300         if not len(df):

~\Miniconda3\envs\my_venv\lib\site-packages\dask\dataframe\accessor.py in _delegate_method(obj, accessor, attr, args, kwargs)
     46     @staticmethod
     47     def _delegate_method(obj, accessor, attr, args, kwargs):
---> 48         out = getattr(getattr(obj, accessor, obj), attr)(*args, **kwargs)
     49         return maybe_wrap_pandas(obj, out)
     50 

~\Miniconda3\envs\my_venv\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
   5485         ):
   5486             return self[name]
-> 5487         return object.__getattribute__(self, name)
   5488 
   5489     def __setattr__(self, name: str, value) -> None:

AttributeError: 'Series' object has no attribute 'len'

To Reproduce

from dataprep import eda as dp_eda
report = dp_eda.create_report(df=raw_data)

(raw_data is a pandas dataframe with varying data types with 350+ columns and 60K+ rows)

Expected behavior I was expecting the report to built and displayed.

Screenshots If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

Additional context Add any other context about the problem here. versions of some packages in my venv:

pip list

Package                           Version
--------------------------------- ----------
...
dask                              2.30.0
dataprep                          0.4.1
...
ipykernel                         6.6.0
ipython                           7.30.1
ipython-genutils                  0.2.0
ipywidgets                        7.6.5
itsdangerous                      1.1.0
...
jupyter-client                    7.1.0
jupyter-contrib-core              0.3.3
jupyter-contrib-nbextensions      0.5.1
jupyter-core                      4.9.1
...
matplotlib                        3.4.0
matplotlib-inline                 0.1.3
...
nltk                              3.6.3
...
numpy                             1.21.5
openpyxl                          3.0.9
...
pandas                            1.3.5
pandocfilters                     1.5.0
...
pip                               21.2.2
plotly                            5.5.0
ply                               3.11
probableparsing                   0.0.1
...
regex                             2020.11.13
scikit-learn                      1.0.1
scipy                             1.7.3
seaborn                           0.11.2
...
setuptools                        58.0.4
...
naveen-marthala commented 2 years ago

I went through the ci file in .github folder and learned that python-3.7 is what dataprep is being tested on. so, i created a new venv in miniconda with python-3.7.11 and the error persisted.

and here are the versions of all packages in venv:

Package                           Version
--------------------------------- ----------
aiohttp                           3.8.1
aiosignal                         1.2.0
argcomplete                       2.0.0
argon2-cffi                       21.3.0
argon2-cffi-bindings              21.2.0
asttokens                         2.0.5
async-generator                   1.10
async-timeout                     4.0.2
asynctest                         0.13.0
attrs                             21.4.0
backcall                          0.2.0
backports.functools-lru-cache     1.6.4
bleach                            4.1.0
bokeh                             2.4.2
Bottleneck                        1.3.2
certifi                           2021.10.8
cffi                              1.15.0
charset-normalizer                2.0.10
click                             7.1.2
cloudpickle                       2.0.0
colorama                          0.4.4
cycler                            0.11.0
dask                              2.30.0
dataprep                          0.4.1
debugpy                           1.5.1
decorator                         5.1.0
defusedxml                        0.7.1
entrypoints                       0.3
executing                         0.8.2
Flask                             1.1.4
Flask-Cors                        3.0.10
flit_core                         3.6.0
fonttools                         4.28.5
frozenlist                        1.2.0
fsspec                            2021.11.1
future                            0.18.2
idna                              3.3
importlib-metadata                4.10.0
importlib-resources               5.4.0
ipykernel                         6.6.1
ipython                           7.30.1
ipython-genutils                  0.2.0
ipywidgets                        7.6.5
itsdangerous                      1.1.0
jedi                              0.18.1
Jinja2                            3.0.3
joblib                            1.1.0
jsonpath-ng                       1.5.3
jsonschema                        4.3.3
jupyter                           1.0.0
jupyter-client                    7.1.0
jupyter-console                   6.4.0
jupyter-contrib-core              0.3.3
jupyter-contrib-nbextensions      0.5.1
jupyter-core                      4.9.1
jupyter-highlight-selected-word   0.2.0
jupyter-http-over-ws              0.0.8
jupyter-latex-envs                1.4.6
jupyter-nbextensions-configurator 0.4.1
jupyterlab-pygments               0.1.2
jupyterlab-widgets                1.0.2
kiwisolver                        1.3.2
levenshtein                       0.12.0
locket                            0.2.1
lxml                              4.7.1
MarkupSafe                        2.0.1
matplotlib                        3.5.1
matplotlib-inline                 0.1.3
Metaphone                         0.6
mistune                           0.8.4
multidict                         5.2.0
nbclient                          0.5.9
nbconvert                         6.4.0
nbformat                          5.1.3
nest-asyncio                      1.5.4
nltk                              3.6.3
notebook                          6.4.6
numpy                             1.21.5
packaging                         21.3
pandas                            1.3.5
pandocfilters                     1.5.0
parso                             0.8.3
partd                             1.2.0
pickleshare                       0.7.5
Pillow                            9.0.0
pip                               21.2.4
ply                               3.11
probableparsing                   0.0.1
prometheus-client                 0.12.0
prompt-toolkit                    3.0.24
pure-eval                         0.2.1
pycparser                         2.21
pydantic                          1.9.0
Pygments                          2.11.1
pyparsing                         3.0.6
pyrsistent                        0.18.0
python-crfsuite                   0.9.7
python-dateutil                   2.8.2
python-stdnum                     1.17
pytz                              2021.3
pywin32                           303
pywinpty                          1.1.6
PyYAML                            6.0
pyzmq                             22.3.0
qtconsole                         5.2.2
QtPy                              2.0.0
regex                             2020.11.13
scipy                             1.7.3
Send2Trash                        1.8.0
setuptools                        58.0.4
six                               1.16.0
terminado                         0.12.1
testpath                          0.5.0
toolz                             0.11.2
tornado                           6.1
tqdm                              4.62.3
traitlets                         5.1.1
typing_extensions                 4.0.1
usaddress                         0.5.10
varname                           0.8.1
wcwidth                           0.2.5
webencodings                      0.5.1
Werkzeug                          1.0.1
wheel                             0.37.0
widgetsnbextension                3.5.2
wincertstore                      0.2
wordcloud                         1.8.1
yarl                              1.7.2
zipp                              3.7.0
jinglinpeng commented 2 years ago

Hi @naveen-marthala , thanks a lot for the bug report! Is it possible to provide the dataset so we can reproduce the error?

naveen-marthala commented 2 years ago

@jinglinpeng, sorry, it is not possible for me to provide the dataset, since it is on NDA. thanks though.

naveen-marthala commented 2 years ago

i am closing the issue since i won't be abe to help solve it.