ddbourgin / numpy-ml

Machine learning, in numpy
https://numpy-ml.readthedocs.io/
GNU General Public License v3.0
15.35k stars 3.72k forks source link

error in DecisionTree #58

Open Gewissta opened 4 years ago

Gewissta commented 4 years ago

data = pd.read_csv('Data/Bankloan.csv', sep=';') for i in ['debtinc', 'creddebt', 'othdebt']: data[i] = data[i].str.replace(',', '.').astype('float') train, test, y_train, y_test = train_test_split(data.drop('default', axis=1), data['default'], test_size=0.3, stratify=data['default'], random_state=42) X_train = pd.get_dummies(train) X_test = pd.get_dummies(test) tree = DecisionTree(seed=42, max_depth=4, n_feats=2) tree.fit(X_train.values, y_train.values)


ValueError Traceback (most recent call last)

in 1 tree = DecisionTree(seed=42, max_depth=4, n_feats=2) ----> 2 tree.fit(X_train.values, y_train.values) in fit(self, X, Y) 78 self.n_classes = max(Y) + 1 if self.classifier else None 79 self.n_feats = X.shape[1] if not self.n_feats else min(self.n_feats, X.shape[1]) ---> 80 self.root = self._grow(X, Y) 81 82 def predict(self, X): in _grow(self, X, Y, cur_depth) 138 139 # grow the children that result from the split --> 140 left = self._grow(X[l, :], Y[l], cur_depth) 141 right = self._grow(X[r, :], Y[r], cur_depth) 142 return Node(left, right, (feat, thresh)) in _grow(self, X, Y, cur_depth) 139 # grow the children that result from the split 140 left = self._grow(X[l, :], Y[l], cur_depth) --> 141 right = self._grow(X[r, :], Y[r], cur_depth) 142 return Node(left, right, (feat, thresh)) 143 in _grow(self, X, Y, cur_depth) 139 # grow the children that result from the split 140 left = self._grow(X[l, :], Y[l], cur_depth) --> 141 right = self._grow(X[r, :], Y[r], cur_depth) 142 return Node(left, right, (feat, thresh)) 143 in _grow(self, X, Y, cur_depth) 133 134 # greedily select the best split according to `criterion` --> 135 feat, thresh = self._segment(X, Y, feat_idxs) 136 l = np.argwhere(X[:, feat] <= thresh).flatten() 137 r = np.argwhere(X[:, feat] > thresh).flatten() in _segment(self, X, Y, feat_idxs) 155 gains = np.array([self._impurity_gain(Y, t, vals) for t in thresholds]) 156 --> 157 if gains.max() > best_gain: 158 split_idx = i 159 best_gain = gains.max() /anaconda3/lib/python3.7/site-packages/numpy/core/_methods.py in _amax(a, axis, out, keepdims, initial, where) 28 def _amax(a, axis=None, out=None, keepdims=False, 29 initial=_NoValue, where=True): ---> 30 return umr_maximum(a, axis, None, out, keepdims, initial, where) 31 32 def _amin(a, axis=None, out=None, keepdims=False, ValueError: zero-size array to reduction operation maximum which has no identity Link to dataset https://drive.google.com/file/d/1lj7qUyG7BOV6cAGm8-tDNUqS62IEgk5p/view?usp=sharing