Closed alistairewj closed 6 years ago
url="https://raw.githubusercontent.com/tompollard/data/master/primary-biliary-cirrhosis/pbc.csv" data_pbc=pd.read_csv(url) # 6 categories of age based on decade data_pbc['age_group'] = data_pbc['age'].map(lambda x: int(x/10)) columns = ['age_group', 'age', 'sex', 'albumin', 'ast'] categorical = ['age_group'] table = TableOne(data_pbc, columns=columns, categorical=categorical)
traceback:
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-10-1311713e7539> in <module>() 1 columns = ['age_group', 'age', 'sex', 'albumin', 'ast'] 2 categorical = ['age_group'] ----> 3 table = TableOne(data_pbc, columns=columns, categorical=categorical, limit=3) /home/alistairewj/git/tableone/tableone.py in __init__(self, data, columns, categorical, groupby, nonnormal, pval, pval_adjust, isnull, ddof, labels, sort, limit) 136 # create tables of continuous and categorical variables 137 if self._continuous: --> 138 self.cont_describe = self._create_cont_describe(data) 139 self.cont_table = self._create_cont_table(data) 140 /home/alistairewj/git/tableone/tableone.py in _create_cont_describe(self, data) 244 # if no groupby, just add single group column 245 df_cont = data[self._continuous].apply(pd.to_numeric, --> 246 errors='ignore').apply(aggfuncs).T 247 df_cont.columns.name = 'overall' 248 df_cont.columns = pd.MultiIndex.from_product([df_cont.columns, /usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in apply(self, func, axis, broadcast, raw, reduce, args, **kwds) 4816 # dispatch to agg 4817 if axis == 0 and isinstance(func, (list, dict)): -> 4818 return self.aggregate(func, axis=axis, *args, **kwds) 4819 4820 if len(self.columns) == 0 and len(self.index) == 0: /usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in aggregate(self, func, axis, *args, **kwargs) 4740 if axis == 0: 4741 try: -> 4742 result, how = self._aggregate(func, axis=0, *args, **kwargs) 4743 except TypeError: 4744 pass /usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate(self, arg, *args, **kwargs) 537 return self._aggregate_multiple_funcs(arg, 538 _level=_level, --> 539 _axis=_axis), None 540 else: 541 result = None /usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate_multiple_funcs(self, arg, _level, _axis) 582 try: 583 colg = self._gotitem(col, ndim=1, subset=obj[col]) --> 584 results.append(colg.aggregate(arg)) 585 keys.append(col) 586 except (TypeError, DataError): /usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in aggregate(self, func, axis, *args, **kwargs) 2358 def aggregate(self, func, axis=0, *args, **kwargs): 2359 axis = self._get_axis_number(axis) -> 2360 result, how = self._aggregate(func, *args, **kwargs) 2361 if result is None: 2362 /usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate(self, arg, *args, **kwargs) 537 return self._aggregate_multiple_funcs(arg, 538 _level=_level, --> 539 _axis=_axis), None 540 else: 541 result = None /usr/local/lib/python2.7/dist-packages/pandas/core/base.pyc in _aggregate_multiple_funcs(self, arg, _level, _axis) 567 try: 568 colg = self._gotitem(obj.name, ndim=1, subset=obj) --> 569 results.append(colg.aggregate(a)) 570 571 # make sure we find a good name /usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in aggregate(self, func, axis, *args, **kwargs) 2377 result = self.apply(func, *args, **kwargs) 2378 except (ValueError, AttributeError, TypeError): -> 2379 result = func(self, *args, **kwargs) 2380 2381 return result /home/alistairewj/git/tableone/tableone.py in _t1_summary(self, x) 215 np.nanpercentile(x.values,25), np.nanpercentile(x.values,75)) 216 else: --> 217 return '{:.2f} ({:.2f})'.format(np.nanmean(x.values), 218 np.nanstd(x.values,ddof=self._ddof)) 219 /usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.pyc in nanmean(a, axis, dtype, out, keepdims) 822 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) 823 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) --> 824 avg = _divide_by_count(tot, cnt, out=out) 825 826 isbad = (cnt == 0) /usr/local/lib/python2.7/dist-packages/numpy/lib/nanfunctions.pyc in _divide_by_count(a, b, out) 141 else: 142 if out is None: --> 143 return a.dtype.type(a / b) 144 else: 145 # This is questionable, but currently a numpy scalar can AttributeError: 'str' object has no attribute 'dtype'
Fixed in https://github.com/tompollard/tableone/pull/48
traceback: