ydataai / ydata-profiling

1 Line of code data quality profiling & exploratory data analysis for Pandas and Spark DataFrames.
https://docs.profiling.ydata.ai
MIT License
12.47k stars 1.68k forks source link

index -9223372036854775808 is out of bounds for axis 0 with size 2 #1313

Open zhoujianch opened 1 year ago

zhoujianch commented 1 year ago

Current Behaviour

IndexError Traceback (most recent call last)

in ----> 1 profile_report.get_description() ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs) 1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs) 1032 check_argument_types(memo) -> 1033 retval = func(*args, **kwargs) 1034 try: 1035 check_return_type(retval, memo) ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in get_description(self) 315 Dict containing a description for each variable in the DataFrame. 316 """ --> 317 return self.description_set 318 319 def get_rejected_variables(self) -> set: ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/__init__.py in wrapper(*args, **kwargs) 1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs) 1032 check_argument_types(memo) -> 1033 retval = func(*args, **kwargs) 1034 try: 1035 check_return_type(retval, memo) ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in description_set(self) 251 self.summarizer, 252 self.typeset, --> 253 self._sample, 254 ) 255 return self._description_set ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample) 70 pbar.total += len(df.columns) 71 series_description = get_series_descriptions( ---> 72 config, df, summarizer, typeset, pbar 73 ) 74 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs) 313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))] 314 try: --> 315 return func(*args, **kwargs) 316 except TypeError as ex: 317 raise DispatchError(f"Function {func.__code__}") from ex ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_get_series_descriptions(config, df, summarizer, typeset, pbar) 98 with multiprocessing.pool.ThreadPool(pool_size) as executor: 99 for i, (column, description) in enumerate( --> 100 executor.imap_unordered(multiprocess_1d, args) 101 ): 102 pbar.set_postfix_str(f"Describe variable:{column}") ~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in next(self, timeout) 746 if success: 747 return value --> 748 raise value 749 750 __next__ = next # XXX ~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 119 job, i, func, args, kwds = task 120 try: --> 121 result = (True, func(*args, **kwds)) 122 except Exception as e: 123 if wrap_exception and func is not _helper_reraises_exception: ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in multiprocess_1d(args) 77 """ 78 column, series = args ---> 79 return column, describe_1d(config, series, summarizer, typeset) 80 81 pool_size = config.pool_size ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs) 313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))] 314 try: --> 315 return func(*args, **kwargs) 316 except TypeError as ex: 317 raise DispatchError(f"Function {func.__code__}") from ex ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_describe_1d(config, series, summarizer, typeset) 55 56 typeset.type_schema[series.name] = vtype ---> 57 return summarizer.summarize(config, series, dtype=vtype) 58 59 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype) 37 object: 38 """ ---> 39 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)}) 40 return summary 41 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs) 60 funcs = self.mapping.get(dtype, []) 61 op = compose(funcs) ---> 62 return op(*args) 63 64 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x) 19 return f(*x) 20 else: ---> 21 return f(*res) 22 23 return func2 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x) 19 return f(*x) 20 else: ---> 21 return f(*res) 22 23 return func2 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x) 19 return f(*x) 20 else: ---> 21 return f(*res) 22 23 return func2 ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x) 15 def func(f: Callable, g: Callable) -> Callable: 16 def func2(*x) -> Any: ---> 17 res = g(*x) 18 if type(res) == bool: 19 return f(*x) ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/__init__.py in __call__(self, *args, **kwargs) 313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))] 314 try: --> 315 return func(*args, **kwargs) 316 except TypeError as ex: 317 raise DispatchError(f"Function {func.__code__}") from ex ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary) 63 if not summary["hashable"]: 64 return config, series, summary ---> 65 return fn(config, series, summary) 66 67 return inner ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary) 80 series = series.dropna() 81 ---> 82 return fn(config, series, summary) 83 84 return inner ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/describe_numeric_pandas.py in pandas_describe_numeric_1d(config, series, summary) 118 119 if chi_squared_threshold > 0.0: --> 120 stats["chi_squared"] = chi_square(finite_values) 121 122 stats["range"] = stats["max"] - stats["min"] ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in chi_square(values, histogram) 50 ) -> dict: 51 if histogram is None: ---> 52 histogram, _ = np.histogram(values, bins="auto") 53 return dict(chisquare(histogram)._asdict()) 54 <__array_function__ internals> in histogram(*args, **kwargs) ~/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py in histogram(a, bins, range, normed, weights, density) 854 # The index computation is not guaranteed to give exactly 855 # consistent results within ~1 ULP of the bin edges. --> 856 decrement = tmp_a < bin_edges[indices] 857 indices[decrement] -= 1 858 # The last bin includes the right edge. The other bins do not. IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2 ### Expected Behaviour return data profiling for this table ### Data Description SUM_TIMER_READ_WRITE 0 10950043000000000 ### Code that reproduces the bug ```Python import pandas as pd from ydata_profiling import ProfileReport b = {'SUM_TIMER_READ_WRITE': [10950043000000000]} table = pd.DataFrame.from_dict(b) profile_report = ProfileReport( table, progress_bar=False, infer_dtypes=False, missing_diagrams=None, correlations=None, interactions=None, # duplicates=None, samples=None) description = profile_report.get_description() ``` ### pandas-profiling version v4.1.1 ### Dependencies ```Text pandas==1.3.5 ydata-profiling==4.1.1 ``` ### OS Linux dsp-X299-WU8 5.15.0-69-generic #76~20.04.1-Ubuntu SMP Mon Mar 20 15:54:19 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux ### Checklist - [X] There is not yet another bug report for this issue in the [issue tracker](https://github.com/ydataai/pandas-profiling/issues) - [X] The problem is reproducible from this bug report. [This guide](http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) can help to craft a minimal bug report. - [X] The issue has not been resolved by the entries listed under [Common Issues](https://pandas-profiling.ydata.ai/docs/master/pages/support_contrib/common_issues.html).
fabclmnt commented 1 year ago

Hi @zhoujianch,

thank you for opening this issue. Just wanted to double check with you whether the table you are expecting to generate the report for only includes one value as per the example you've provided here.