Closed datNurd closed 6 years ago
imblearn SMOTE throws error with n_jobs > 1
sm = SMOTE(random_state=12,kind="svm",svm_estimator=svm.SVC(C=0.1,kernel="linear"),n_jobs = 6) X_res, y_res = sm.fit_sample(X, y)
If the code is too long, feel free to put it in a public gist and link it in the issue: https://gist.github.com -->
Error: multiprocessing.pool.RemoteTraceback: """ Traceback (most recent call last): File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 350, in __call__ return self.func(*args, **kwargs) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__ return [func(*args, **kwargs) for func, args, kwargs in self.items] File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp> return [func(*args, **kwargs) for func, args, kwargs in self.items] File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py", line 223, in euclidean_distances X, Y = check_pairwise_arrays(X, Y) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py", line 110, in check_pairwise_arrays warn_on_dtype=warn_on_dtype, estimator=estimator) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py", line 431, in check_array force_all_finite) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py", line 296, in _ensure_sparse_format spmatrix = spmatrix.astype(dtype) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py", line 71, in astype self._deduped_data().astype(dtype, casting=casting, copy=copy), File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py", line 34, in _deduped_data self.sum_duplicates() File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py", line 1009, in sum_duplicates self.sort_indices() File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py", line 1055, in sort_indices self.indices, self.data) ValueError: WRITEBACKIFCOPY base is read-only During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker result = (True, func(*args, **kwds)) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 359, in __call__ raise TransportableException(text, e_type) sklearn.externals.joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ ValueError Wed Apr 4 09:09:04 2018 PID: 20131 Python 3.5.2: /home/ubuntu/ML/venv/bin/python ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function euclidean_distances>, (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>), {'squared': True})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function euclidean_distances> args = (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>) kwargs = {'squared': True} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in euclidean_distances(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, Y_norm_squared=None, squared=True, X_norm_squared=None) 218 219 See also 220 -------- 221 paired_distances : distances betweens pairs of elements of X and Y. 222 """ --> 223 X, Y = check_pairwise_arrays(X, Y) X = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> Y = <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format> 224 225 if X_norm_squared is not None: 226 XX = check_array(X_norm_squared) 227 if XX.shape == (1, X.shape[0]): ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in check_pairwise_arrays(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, precomputed=False, dtype=<class 'float'>) 105 if Y is X or Y is None: 106 X = Y = check_array(X, accept_sparse='csr', dtype=dtype, 107 warn_on_dtype=warn_on_dtype, estimator=estimator) 108 else: 109 X = check_array(X, accept_sparse='csr', dtype=dtype, --> 110 warn_on_dtype=warn_on_dtype, estimator=estimator) warn_on_dtype = False estimator = 'check_pairwise_arrays' 111 Y = check_array(Y, accept_sparse='csr', dtype=dtype, 112 warn_on_dtype=warn_on_dtype, estimator=estimator) 113 114 if precomputed: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse='csr', dtype=<class 'float'>, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator='check_pairwise_arrays') 426 estimator_name = "Estimator" 427 context = " by %s" % estimator_name if estimator is not None else "" 428 429 if sp.issparse(array): 430 array = _ensure_sparse_format(array, accept_sparse, dtype, copy, --> 431 force_all_finite) force_all_finite = True 432 else: 433 array = np.array(array, dtype=dtype, order=order, copy=copy) 434 435 if ensure_2d: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in _ensure_sparse_format(spmatrix=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse=['csr'], dtype=<class 'float'>, copy=False, force_all_finite=True) 291 "boolean or list of strings. You provided " 292 "'accept_sparse={}'.".format(accept_sparse)) 293 294 if dtype != spmatrix.dtype: 295 # convert dtype --> 296 spmatrix = spmatrix.astype(dtype) spmatrix = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> spmatrix.astype = <bound method _data_matrix.astype of <5409x11723...stored elements in Compressed Sparse Row format>> dtype = <class 'float'> 297 elif copy and not changed_format: 298 # force copy 299 spmatrix = spmatrix.copy() 300 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in astype(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, dtype=dtype('float64'), casting='unsafe', copy=True) 66 67 def astype(self, dtype, casting='unsafe', copy=True): 68 dtype = np.dtype(dtype) 69 if self.dtype != dtype: 70 return self._with_data( ---> 71 self._deduped_data().astype(dtype, casting=casting, copy=copy), self._deduped_data.astype = undefined dtype = dtype('float64') casting = 'unsafe' copy = True 72 copy=copy) 73 elif copy: 74 return self.copy() 75 else: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in _deduped_data(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 29 self.data.dtype = newtype 30 dtype = property(fget=_get_dtype, fset=_set_dtype) 31 32 def _deduped_data(self): 33 if hasattr(self, 'sum_duplicates'): ---> 34 self.sum_duplicates() self.sum_duplicates = <bound method _cs_matrix.sum_duplicates of <5409...stored elements in Compressed Sparse Row format>> 35 return self.data 36 37 def __abs__(self): 38 return self._with_data(abs(self._deduped_data())) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sum_duplicates(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1004 1005 The is an *in place* operation 1006 """ 1007 if self.has_canonical_format: 1008 return -> 1009 self.sort_indices() self.sort_indices = <bound method _cs_matrix.sort_indices of <5409x1...stored elements in Compressed Sparse Row format>> 1010 1011 M, N = self._swap(self.shape) 1012 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices, 1013 self.data) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sort_indices(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1050 """Sort the indices of this matrix *in place* 1051 """ 1052 1053 if not self.has_sorted_indices: 1054 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr, -> 1055 self.indices, self.data) self.indices = array([110400, 110390, 110345, ..., 18292, 13241, 13236], dtype=int32) self.data = memmap([1, 1, 2, ..., 1, 1, 1]) 1056 self.has_sorted_indices = True 1057 1058 def prune(self): 1059 """Remove empty space after all non-zero elements. ValueError: WRITEBACKIFCOPY base is read-only ___________________________________________________________________________ """ The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 699, in retrieve self._output.extend(job.get(timeout=self.timeout)) File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get raise self._value sklearn.externals.joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ ValueError Wed Apr 4 09:09:04 2018 PID: 20131 Python 3.5.2: /home/ubuntu/ML/venv/bin/python ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function euclidean_distances>, (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>), {'squared': True})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function euclidean_distances> args = (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>) kwargs = {'squared': True} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in euclidean_distances(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, Y_norm_squared=None, squared=True, X_norm_squared=None) 218 219 See also 220 -------- 221 paired_distances : distances betweens pairs of elements of X and Y. 222 """ --> 223 X, Y = check_pairwise_arrays(X, Y) X = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> Y = <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format> 224 225 if X_norm_squared is not None: 226 XX = check_array(X_norm_squared) 227 if XX.shape == (1, X.shape[0]): ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in check_pairwise_arrays(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, precomputed=False, dtype=<class 'float'>) 105 if Y is X or Y is None: 106 X = Y = check_array(X, accept_sparse='csr', dtype=dtype, 107 warn_on_dtype=warn_on_dtype, estimator=estimator) 108 else: 109 X = check_array(X, accept_sparse='csr', dtype=dtype, --> 110 warn_on_dtype=warn_on_dtype, estimator=estimator) warn_on_dtype = False estimator = 'check_pairwise_arrays' 111 Y = check_array(Y, accept_sparse='csr', dtype=dtype, 112 warn_on_dtype=warn_on_dtype, estimator=estimator) 113 114 if precomputed: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse='csr', dtype=<class 'float'>, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator='check_pairwise_arrays') 426 estimator_name = "Estimator" 427 context = " by %s" % estimator_name if estimator is not None else "" 428 429 if sp.issparse(array): 430 array = _ensure_sparse_format(array, accept_sparse, dtype, copy, --> 431 force_all_finite) force_all_finite = True 432 else: 433 array = np.array(array, dtype=dtype, order=order, copy=copy) 434 435 if ensure_2d: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in _ensure_sparse_format(spmatrix=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse=['csr'], dtype=<class 'float'>, copy=False, force_all_finite=True) 291 "boolean or list of strings. You provided " 292 "'accept_sparse={}'.".format(accept_sparse)) 293 294 if dtype != spmatrix.dtype: 295 # convert dtype --> 296 spmatrix = spmatrix.astype(dtype) spmatrix = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> spmatrix.astype = <bound method _data_matrix.astype of <5409x11723...stored elements in Compressed Sparse Row format>> dtype = <class 'float'> 297 elif copy and not changed_format: 298 # force copy 299 spmatrix = spmatrix.copy() 300 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in astype(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, dtype=dtype('float64'), casting='unsafe', copy=True) 66 67 def astype(self, dtype, casting='unsafe', copy=True): 68 dtype = np.dtype(dtype) 69 if self.dtype != dtype: 70 return self._with_data( ---> 71 self._deduped_data().astype(dtype, casting=casting, copy=copy), self._deduped_data.astype = undefined dtype = dtype('float64') casting = 'unsafe' copy = True 72 copy=copy) 73 elif copy: 74 return self.copy() 75 else: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in _deduped_data(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 29 self.data.dtype = newtype 30 dtype = property(fget=_get_dtype, fset=_set_dtype) 31 32 def _deduped_data(self): 33 if hasattr(self, 'sum_duplicates'): ---> 34 self.sum_duplicates() self.sum_duplicates = <bound method _cs_matrix.sum_duplicates of <5409...stored elements in Compressed Sparse Row format>> 35 return self.data 36 37 def __abs__(self): 38 return self._with_data(abs(self._deduped_data())) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sum_duplicates(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1004 1005 The is an *in place* operation 1006 """ 1007 if self.has_canonical_format: 1008 return -> 1009 self.sort_indices() self.sort_indices = <bound method _cs_matrix.sort_indices of <5409x1...stored elements in Compressed Sparse Row format>> 1010 1011 M, N = self._swap(self.shape) 1012 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices, 1013 self.data) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sort_indices(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1050 """Sort the indices of this matrix *in place* 1051 """ 1052 1053 if not self.has_sorted_indices: 1054 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr, -> 1055 self.indices, self.data) self.indices = array([110400, 110390, 110345, ..., 18292, 13241, 13236], dtype=int32) self.data = memmap([1, 1, 2, ..., 1, 1, 1]) 1056 self.has_sorted_indices = True 1057 1058 def prune(self): 1059 """Remove empty space after all non-zero elements. ValueError: WRITEBACKIFCOPY base is read-only ___________________________________________________________________________ During handling of the above exception, another exception occurred: Traceback (most recent call last): File "sampling.py", line 42, in <module> sample_smote(X,y,class_names) File "sampling.py", line 16, in sample_smote X_res, y_res = sm.fit_sample(X, y) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/base.py", line 88, in fit_sample return self.fit(X, y).sample(X, y) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/base.py", line 64, in sample return self._sample(X, y) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py", line 598, in _sample return self._sample_svm(X, y) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py", line 513, in _sample_svm kind='noise') File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py", line 202, in _in_danger_noise x = self.nn_m_.kneighbors(samples, return_distance=False)[:, 1:] File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/neighbors/base.py", line 357, in kneighbors n_jobs=n_jobs, squared=True) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py", line 1247, in pairwise_distances return _parallel_pairwise(X, Y, func, n_jobs, **kwds) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py", line 1096, in _parallel_pairwise for s in gen_even_slices(Y.shape[0], n_jobs)) File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 789, in __call__ self.retrieve() File "/home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 740, in retrieve raise exception sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError ___________________________________________________________________________ Multiprocessing exception: ........................................................................... /home/ubuntu/ML/sampling.py in <module>() 37 X = vectorizer.fit_transform(msgs) 38 y = train.messagetype_num.values 39 class_names = train.messagetype.values 40 print(datetime.datetime.now().time()) 41 sys.stdout.flush() ---> 42 sample_smote(X,y,class_names) ........................................................................... /home/ubuntu/ML/sampling.py in sample_smote(X=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, y=array([8, 7, 5, ..., 8, 1, 1]), class_names=array(['Debit', 'Credit', 'Warning', ..., 'Debit...ayment_due', 'Payment_due'], dtype=object)) 11 12 def sample_smote(X,y,class_names): 13 print('Original dataset shape {}'.format(Counter(class_names))) 14 sys.stdout.flush() 15 sm = SMOTE(random_state=12,kind="svm",svm_estimator=svm.SVC(C=0.1,kernel="linear"),n_jobs=6) ---> 16 X_res, y_res = sm.fit_sample(X, y) X_res = undefined y_res = undefined sm.fit_sample = <bound method SamplerMixin.fit_sample of SMOTE(k...ne, shrinking=True, tol=0.001, verbose=False))> X = <236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format> y = array([8, 7, 5, ..., 8, 1, 1]) 17 print(datetime.datetime.now().time()) 18 sys.stdout.flush() 19 print('Resampled dataset shape {}'.format(Counter(y_res))) 20 save_classifier = open("messagetype_X_res.pickle","wb") ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/base.py in fit_sample(self=SMOTE(k=None, k_neighbors=5, kind='svm', m=None,...one, shrinking=True, tol=0.001, verbose=False)), X=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, y=array([8, 7, 5, ..., 8, 1, 1])) 83 y_resampled : array-like, shape (n_samples_new,) 84 The corresponding label of `X_resampled` 85 86 """ 87 ---> 88 return self.fit(X, y).sample(X, y) self.fit = <bound method BaseSampler.fit of SMOTE(k=None, k...ne, shrinking=True, tol=0.001, verbose=False))> X = <236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format> y.sample = undefined y = array([8, 7, 5, ..., 8, 1, 1]) 89 90 @abstractmethod 91 def _sample(self, X, y): 92 """Resample the dataset. ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/base.py in sample(self=SMOTE(k=None, k_neighbors=5, kind='svm', m=None,...one, shrinking=True, tol=0.001, verbose=False)), X=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, y=array([8, 7, 5, ..., 8, 1, 1])) 59 X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) 60 61 check_is_fitted(self, 'ratio_') 62 self._check_X_y(X, y) 63 ---> 64 return self._sample(X, y) self._sample = <bound method SMOTE._sample of SMOTE(k=None, k_n...ne, shrinking=True, tol=0.001, verbose=False))> X = <236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format> y = array([8, 7, 5, ..., 8, 1, 1]) 65 66 def fit_sample(self, X, y): 67 """Fit the statistics and resample the data directly. 68 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py in _sample(self=SMOTE(k=None, k_neighbors=5, kind='svm', m=None,...one, shrinking=True, tol=0.001, verbose=False)), X=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, y=array([8, 7, 5, ..., 8, 1, 1])) 593 if self.kind == 'regular': 594 return self._sample_regular(X, y) 595 elif self.kind == 'borderline1' or self.kind == 'borderline2': 596 return self._sample_borderline(X, y) 597 elif self.kind == 'svm': --> 598 return self._sample_svm(X, y) self._sample_svm = <bound method SMOTE._sample_svm of SMOTE(k=None,...ne, shrinking=True, tol=0.001, verbose=False))> X = <236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format> y = array([8, 7, 5, ..., 8, 1, 1]) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py in _sample_svm(self=SMOTE(k=None, k_neighbors=5, kind='svm', m=None,...one, shrinking=True, tol=0.001, verbose=False)), X=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, y=array([8, 7, 5, ..., 8, 1, 1])) 508 y[self.svm_estimator_.support_] == class_sample] 509 support_vector = safe_indexing(X, support_index) 510 511 self.nn_m_.fit(X) 512 noise_bool = self._in_danger_noise(support_vector, class_sample, y, --> 513 kind='noise') 514 support_vector = safe_indexing( 515 support_vector, 516 np.flatnonzero(np.logical_not(noise_bool))) 517 danger_bool = self._in_danger_noise(support_vector, class_sample, ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/imblearn/over_sampling/smote.py in _in_danger_noise(self=SMOTE(k=None, k_neighbors=5, kind='svm', m=None,...one, shrinking=True, tol=0.001, verbose=False)), samples=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, target_class=0, y=array([8, 7, 5, ..., 8, 1, 1]), kind='noise') 197 ------- 198 output : ndarray, shape (n_samples,) 199 A boolean array where True refer to samples in danger or noise. 200 201 """ --> 202 x = self.nn_m_.kneighbors(samples, return_distance=False)[:, 1:] x = undefined self.nn_m_.kneighbors = <bound method KNeighborsMixin.kneighbors of Near...None, n_jobs=6, n_neighbors=11, p=2, radius=1.0)> samples = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> 203 nn_label = (y[x] != target_class).astype(int) 204 n_maj = np.sum(nn_label, axis=1) 205 206 if kind == 'danger': ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/neighbors/base.py in kneighbors(self=NearestNeighbors(algorithm='auto', leaf_size=30,...=None, n_jobs=6, n_neighbors=11, p=2, radius=1.0), X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, n_neighbors=11, return_distance=False) 352 n_jobs = _get_n_jobs(self.n_jobs) 353 if self._fit_method == 'brute': 354 # for efficiency, use squared euclidean distances 355 if self.effective_metric_ == 'euclidean': 356 dist = pairwise_distances(X, self._fit_X, 'euclidean', --> 357 n_jobs=n_jobs, squared=True) n_jobs = 6 358 else: 359 dist = pairwise_distances( 360 X, self._fit_X, self.effective_metric_, n_jobs=n_jobs, 361 **self.effective_metric_params_) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in pairwise_distances(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, metric='euclidean', n_jobs=6, **kwds={'squared': True}) 1242 if n_jobs == 1 and X is Y: 1243 return distance.squareform(distance.pdist(X, metric=metric, 1244 **kwds)) 1245 func = partial(distance.cdist, metric=metric, **kwds) 1246 -> 1247 return _parallel_pairwise(X, Y, func, n_jobs, **kwds) X = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> Y = <236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format> func = <function euclidean_distances> n_jobs = 6 kwds = {'squared': True} 1248 1249 1250 # These distances recquire boolean arrays, when using scipy.spatial.distance 1251 PAIRWISE_BOOLEAN_FUNCTIONS = [ ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in _parallel_pairwise(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<236345x117239 sparse matrix of type '<class 'nu... stored elements in Compressed Sparse Row format>, func=<function euclidean_distances>, n_jobs=6, **kwds={'squared': True}) 1091 1092 # TODO: in some cases, backend='threading' may be appropriate 1093 fd = delayed(func) 1094 ret = Parallel(n_jobs=n_jobs, verbose=0)( 1095 fd(X, Y[s], **kwds) -> 1096 for s in gen_even_slices(Y.shape[0], n_jobs)) Y.shape = (236345, 117239) n_jobs = 6 1097 1098 return np.hstack(ret) 1099 1100 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=6), iterable=<generator object _parallel_pairwise.<locals>.<genexpr>>) 784 if pre_dispatch == "all" or n_jobs == 1: 785 # The iterable was consumed all at once by the above for loop. 786 # No need to wait for async callbacks to trigger to 787 # consumption. 788 self._iterating = False --> 789 self.retrieve() self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=6)> 790 # Make sure that we get a last message telling us we are done 791 elapsed_time = time.time() - self._start_time 792 self._print('Done %3i out of %3i | elapsed: %s finished', 793 (len(self._output), len(self._output), --------------------------------------------------------------------------- Sub-process traceback: --------------------------------------------------------------------------- ValueError Wed Apr 4 09:09:04 2018 PID: 20131 Python 3.5.2: /home/ubuntu/ML/venv/bin/python ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function euclidean_distances>, (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>), {'squared': True})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function euclidean_distances> args = (<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>) kwargs = {'squared': True} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in euclidean_distances(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, Y_norm_squared=None, squared=True, X_norm_squared=None) 218 219 See also 220 -------- 221 paired_distances : distances betweens pairs of elements of X and Y. 222 """ --> 223 X, Y = check_pairwise_arrays(X, Y) X = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> Y = <39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format> 224 225 if X_norm_squared is not None: 226 XX = check_array(X_norm_squared) 227 if XX.shape == (1, X.shape[0]): ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/metrics/pairwise.py in check_pairwise_arrays(X=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, Y=<39391x117239 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, precomputed=False, dtype=<class 'float'>) 105 if Y is X or Y is None: 106 X = Y = check_array(X, accept_sparse='csr', dtype=dtype, 107 warn_on_dtype=warn_on_dtype, estimator=estimator) 108 else: 109 X = check_array(X, accept_sparse='csr', dtype=dtype, --> 110 warn_on_dtype=warn_on_dtype, estimator=estimator) warn_on_dtype = False estimator = 'check_pairwise_arrays' 111 Y = check_array(Y, accept_sparse='csr', dtype=dtype, 112 warn_on_dtype=warn_on_dtype, estimator=estimator) 113 114 if precomputed: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in check_array(array=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse='csr', dtype=<class 'float'>, order=None, copy=False, force_all_finite=True, ensure_2d=True, allow_nd=False, ensure_min_samples=1, ensure_min_features=1, warn_on_dtype=False, estimator='check_pairwise_arrays') 426 estimator_name = "Estimator" 427 context = " by %s" % estimator_name if estimator is not None else "" 428 429 if sp.issparse(array): 430 array = _ensure_sparse_format(array, accept_sparse, dtype, copy, --> 431 force_all_finite) force_all_finite = True 432 else: 433 array = np.array(array, dtype=dtype, order=order, copy=copy) 434 435 if ensure_2d: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/sklearn/utils/validation.py in _ensure_sparse_format(spmatrix=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, accept_sparse=['csr'], dtype=<class 'float'>, copy=False, force_all_finite=True) 291 "boolean or list of strings. You provided " 292 "'accept_sparse={}'.".format(accept_sparse)) 293 294 if dtype != spmatrix.dtype: 295 # convert dtype --> 296 spmatrix = spmatrix.astype(dtype) spmatrix = <5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format> spmatrix.astype = <bound method _data_matrix.astype of <5409x11723...stored elements in Compressed Sparse Row format>> dtype = <class 'float'> 297 elif copy and not changed_format: 298 # force copy 299 spmatrix = spmatrix.copy() 300 ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in astype(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>, dtype=dtype('float64'), casting='unsafe', copy=True) 66 67 def astype(self, dtype, casting='unsafe', copy=True): 68 dtype = np.dtype(dtype) 69 if self.dtype != dtype: 70 return self._with_data( ---> 71 self._deduped_data().astype(dtype, casting=casting, copy=copy), self._deduped_data.astype = undefined dtype = dtype('float64') casting = 'unsafe' copy = True 72 copy=copy) 73 elif copy: 74 return self.copy() 75 else: ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/data.py in _deduped_data(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 29 self.data.dtype = newtype 30 dtype = property(fget=_get_dtype, fset=_set_dtype) 31 32 def _deduped_data(self): 33 if hasattr(self, 'sum_duplicates'): ---> 34 self.sum_duplicates() self.sum_duplicates = <bound method _cs_matrix.sum_duplicates of <5409...stored elements in Compressed Sparse Row format>> 35 return self.data 36 37 def __abs__(self): 38 return self._with_data(abs(self._deduped_data())) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sum_duplicates(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1004 1005 The is an *in place* operation 1006 """ 1007 if self.has_canonical_format: 1008 return -> 1009 self.sort_indices() self.sort_indices = <bound method _cs_matrix.sort_indices of <5409x1...stored elements in Compressed Sparse Row format>> 1010 1011 M, N = self._swap(self.shape) 1012 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices, 1013 self.data) ........................................................................... /home/ubuntu/ML/venv/lib/python3.5/site-packages/scipy/sparse/compressed.py in sort_indices(self=<5409x117239 sparse matrix of type '<class 'nump... stored elements in Compressed Sparse Row format>) 1050 """Sort the indices of this matrix *in place* 1051 """ 1052 1053 if not self.has_sorted_indices: 1054 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr, -> 1055 self.indices, self.data) self.indices = array([110400, 110390, 110345, ..., 18292, 13241, 13236], dtype=int32) self.data = memmap([1, 1, 2, ..., 1, 1, 1]) 1056 self.has_sorted_indices = True 1057 1058 def prune(self): 1059 """Remove empty space after all non-zero elements. ValueError: WRITEBACKIFCOPY base is read-only ___________________________________________________________________________
<----- Version----->
Linux-4.4.0-1052-aws-x86_64-with-Ubuntu-16.04-xenial Python 3.5.2 (default, Nov 23 2017, 16:37:01) [GCC 5.4.0 20160609] NumPy 1.14.2 SciPy 1.0.1 Scikit-Learn 0.19.1
We need to set n_jobs=1. This is a joblib issue.
n_jobs=1
joblib
Description
imblearn SMOTE throws error with n_jobs > 1
If the code is too long, feel free to put it in a public gist and link it in the issue: https://gist.github.com -->
Expected Results
Actual Results
Steps/Code to Reproduce
<----- Version----->
Versions