tompollard / tableone

Create "Table 1" for research papers in Python
https://pypi.python.org/pypi/tableone/
MIT License
161 stars 38 forks source link

not specifying data as categorical gives obscure error #36

Closed alistairewj closed 6 years ago

alistairewj commented 6 years ago
url="https://raw.githubusercontent.com/tompollard/data/master/primary-biliary-cirrhosis/pbc.csv"
data_pbc=pd.read_csv(url)

# 6 categories of age based on decade
data_pbc['age_group'] = data_pbc['age'].map(lambda x: int(x/10))
columns = ['age_group', 'age', 'sex', 'albumin', 'ast']
categorical = ['age_group']
table = TableOne(data_pbc, columns=columns, categorical=categorical)

traceback:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-1311713e7539> in <module>()
      1 columns = ['age_group', 'age', 'sex', 'albumin', 'ast']
      2 categorical = ['age_group']
----> 3 table = TableOne(data_pbc, columns=columns, categorical=categorical, limit=3)

/home/alistairewj/git/tableone/tableone.py in __init__(self, data, columns, categorical, groupby, nonnormal, pval, pval_adjust, isnull, ddof, labels, sort, limit)
    136         # create tables of continuous and categorical variables
    137         if self._continuous:
--> 138             self.cont_describe = self._create_cont_describe(data)
    139             self.cont_table = self._create_cont_table(data)
    140 

/home/alistairewj/git/tableone/tableone.py in _create_cont_describe(self, data)
    244             # if no groupby, just add single group column
    245             df_cont = data[self._continuous].apply(pd.to_numeric,
--> 246                 errors='ignore').apply(aggfuncs).T
    247             df_cont.columns.name = 'overall'
    248             df_cont.columns = pd.MultiIndex.from_product([df_cont.columns,

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4816         # dispatch to agg
   4817         if axis == 0 and isinstance(func, (list, dict)):
-> 4818             return self.aggregate(func, axis=axis, *args, **kwds)
   4819 
   4820         if len(self.columns) == 0 and len(self.index) == 0:

/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in aggregate(self, func, axis, *args, **kwargs)
   4740         if axis == 0:
   4741             try:
-> 4742                 result, how = self._aggregate(func, axis=0, *args, **kwargs)
   4743             except TypeError:
   4744                 pass

/usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate(self, arg, *args, **kwargs)
    537             return self._aggregate_multiple_funcs(arg,
    538                                                   _level=_level,
--> 539                                                   _axis=_axis), None
    540         else:
    541             result = None

/usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate_multiple_funcs(self, arg, _level, _axis)
    582                 try:
    583                     colg = self._gotitem(col, ndim=1, subset=obj[col])
--> 584                     results.append(colg.aggregate(arg))
    585                     keys.append(col)
    586                 except (TypeError, DataError):

/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in aggregate(self, func, axis, *args, **kwargs)
   2358     def aggregate(self, func, axis=0, *args, **kwargs):
   2359         axis = self._get_axis_number(axis)
-> 2360         result, how = self._aggregate(func, *args, **kwargs)
   2361         if result is None:
   2362 

/usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate(self, arg, *args, **kwargs)
    537             return self._aggregate_multiple_funcs(arg,
    538                                                   _level=_level,
--> 539                                                   _axis=_axis), None
    540         else:
    541             result = None

/usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate_multiple_funcs(self, arg, _level, _axis)
    567                 try:
    568                     colg = self._gotitem(obj.name, ndim=1, subset=obj)
--> 569                     results.append(colg.aggregate(a))
    570 
    571                     # make sure we find a good name

/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in aggregate(self, func, axis, *args, **kwargs)
   2377                 result = self.apply(func, *args, **kwargs)
   2378             except (ValueError, AttributeError, TypeError):
-> 2379                 result = func(self, *args, **kwargs)
   2380 
   2381         return result

/home/alistairewj/git/tableone/tableone.py in _t1_summary(self, x)
    215                 np.nanpercentile(x.values,25), np.nanpercentile(x.values,75))
    216         else:
--> 217             return '{:.2f} ({:.2f})'.format(np.nanmean(x.values),
    218                 np.nanstd(x.values,ddof=self._ddof))
    219 

/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.pyc in nanmean(a, axis, dtype, out, keepdims)
    822     cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims)
    823     tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
--> 824     avg = _divide_by_count(tot, cnt, out=out)
    825 
    826     isbad = (cnt == 0)

/usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.pyc in _divide_by_count(a, b, out)
    141         else:
    142             if out is None:
--> 143                 return a.dtype.type(a / b)
    144             else:
    145                 # This is questionable, but currently a numpy scalar can

AttributeError: 'str' object has no attribute 'dtype'
tompollard commented 6 years ago

Fixed in https://github.com/tompollard/tableone/pull/48