peter-WeiZhang commented 4 years ago

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) automl = AutoML() automl.fit(X_train, y_train)

after running the code, it raises error like this

AutoML directory: AutoML_9 The task is regression with evaluation metric rmse AutoML will use algorithms: ['Baseline', 'Linear', 'Decision Tree', 'Random Forest', 'Xgboost', 'Neural Network'] AutoML will ensemble availabe models 2020-09-10 18:59:15,591 supervised.preprocessing.eda ERROR There was an issue when running EDA. [Errno 22] Invalid argument: 'AutoML_9\EDA\Spd*LSBW.png' AutoML steps: ['simple_algorithms', 'default_algorithms', 'ensemble']

Step simple_algorithms will try to check up to 3 models 1_Baseline rmse 2.013368 trained in 0.11 seconds 2_DecisionTree rmse 0.686167 trained in 9.43 seconds

_RemoteTraceback Traceback (most recent call last) _RemoteTraceback: ''' Traceback (most recent call last): File "D:\Anaconda3\envs\mljar\lib\site-packages\joblib\externals\loky\process_executor.py", line 391, in _process_worker call_item = call_queue.get(block=True, timeout=timeout) File "D:\Anaconda3\envs\mljar\lib\multiprocessing\queues.py", line 113, in get return _ForkingPickler.loads(res) File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised__init__.py", line 3, in from supervised.automl import AutoML File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\automl.py", line 3, in from supervised.base_automl import BaseAutoML File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\base_automl.py", line 17, in from supervised.algorithms.registry import AlgorithmsRegistry File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\registry.py", line 63, in import supervised.algorithms.random_forest File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\random_forest.py", line 8, in from supervised.algorithms.algorithm import BaseAlgorithm File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\algorithm.py", line 3, in from supervised.utils.importance import PermutationImportance File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\utils\importance.py", line 7, in import matplotlib.pyplot as plt File "D:\Anaconda3\envs\mljar\lib\site-packages\matplotlib\pyplot.py", line 43, in from matplotlib.figure import Figure, figaspect File "", line 971, in _find_and_load File "", line 955, in _find_and_load_unlocked File "", line 665, in _load_unlocked File "", line 674, in exec_module File "", line 764, in get_code File "", line 833, in get_data MemoryError '''

The above exception was the direct cause of the following exception:

BrokenProcessPool Traceback (most recent call last) in 5 # explain_level=0 6 ) ----> 7 automl.fit(X_train, y_train)

~\AppData\Roaming\Python\Python36\site-packages\supervised\automl.py in fit(self, X, y) 276 self : AutoML object 277 """ --> 278 return self._fit(X, y) 279 280 def predict(self, X):

~\AppData\Roaming\Python\Python36\site-packages\supervised\base_automl.py in _fit(self, X, y) 668 669 except Exception as e: --> 670 raise e 671 finally: 672 if self._X_path is not None:

~\AppData\Roaming\Python\Python36\site-packages\supervised\base_automl.py in _fit(self, X, y) 655 trained = self.ensemble_step(is_stacked=params["is_stacked"]) 656 else: --> 657 trained = self.train_model(params) 658 659 params["status"] = "trained" if trained else "skipped"

~\AppData\Roaming\Python\Python36\site-packages\supervised\base_automl.py in train_model(self, params) 227 f"Train model #{len(self._models)+1} / Model name: {params['name']}" 228 ) --> 229 mf.train(model_path) 230 231 # save the model

~\AppData\Roaming\Python\Python36\site-packages\supervised\model_framework.py in train(self, model_path) 176 metric_name=self.get_metric_name(), 177 ml_task=self._ml_task, --> 178 explain_level=self._explain_level, 179 ) 180

~\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\linear.py in interpret(self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name, class_names, metric_name, ml_task, explain_level) 137 metric_name, 138 ml_task, --> 139 explain_level, 140 ) 141 if explain_level == 0:

~\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\algorithm.py in interpret(self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name, class_names, metric_name, ml_task, explain_level) 77 learner_name, 78 metric_name, ---> 79 ml_task, 80 ) 81 if explain_level > 1:

~\AppData\Roaming\Python\Python36\site-packages\supervised\utils\importance.py in compute_and_plot(model, X_validation, y_validation, model_file_path, learner_name, metric_name, ml_task) 58 n_jobs=-1, # all cores 59 random_state=12, ---> 60 n_repeats=5, # default 61 ) 62

D:\Anaconda3\envs\mljar\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, kwargs) 70 FutureWarning) 71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) ---> 72 return f(kwargs) 73 return inner_f 74

D:\Anaconda3\envs\mljar\lib\site-packages\sklearn\inspection_permutation_importance.py in permutation_importance(estimator, X, y, scoring, n_repeats, n_jobs, random_state) 135 scores = Parallel(n_jobs=n_jobs)(delayed(_calculate_permutation_scores)( 136 estimator, X, y, col_idx, random_seed, n_repeats, scorer --> 137 ) for col_idx in range(X.shape[1])) 138 139 importances = baseline_score - np.array(scores)

D:\Anaconda3\envs\mljar\lib\site-packages\joblib\parallel.py in call(self, iterable) 1015 1016 with self._backend.retrieval_context(): -> 1017 self.retrieve() 1018 # Make sure that we get a last message telling us we are done 1019 elapsed_time = time.time() - self._start_time

D:\Anaconda3\envs\mljar\lib\site-packages\joblib\parallel.py in retrieve(self) 907 try: 908 if getattr(self._backend, 'supports_timeout', False): --> 909 self._output.extend(job.get(timeout=self.timeout)) 910 else: 911 self._output.extend(job.get())

D:\Anaconda3\envs\mljar\lib\site-packages\joblib_parallel_backends.py in wrap_future_result(future, timeout) 560 AsyncResults.get from multiprocessing.""" 561 try: --> 562 return future.result(timeout=timeout) 563 except LokyTimeoutError: 564 raise TimeoutError()

D:\Anaconda3\envs\mljar\lib\concurrent\futures_base.py in result(self, timeout) 430 raise CancelledError() 431 elif self._state == FINISHED: --> 432 return self.__get_result() 433 else: 434 raise TimeoutError()

D:\Anaconda3\envs\mljar\lib\concurrent\futures_base.py in __get_result(self) 382 def __get_result(self): 383 if self._exception: --> 384 raise self._exception 385 else: 386 return self._result

BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.

pplonski commented 4 years ago

Hey @peter-WeiZhang! Thank you for reporting.

It looks like a bug. The Automated EDA is trying to create a plot for your feature Spd*LSBW. There is a * character in the file name which is forbidden. We will fix it in the next (0.7.2) release.

For now, please change the name of your feature to not have * in them, maybe replace it with _?

Please let me know if it works after the column name change.

peter-WeiZhang commented 4 years ago

Hey, I changed the column to SpdXLSBW, it still raises the error.

The task is regression with evaluation metric rmse AutoML will use algorithms: ['Baseline', 'Linear', 'Decision Tree', 'Random Forest', 'Xgboost', 'Neural Network'] AutoML will ensemble availabe models AutoML steps: ['simple_algorithms', 'default_algorithms', 'ensemble']

Step simple_algorithms will try to check up to 3 models 1_Baseline rmse 2.013368 trained in 0.1 seconds 2_DecisionTree rmse 0.686167 trained in 10.19 seconds 3_Linear rmse 1.260673 trained in 5.03 seconds
Step default_algorithms will try to check up to 3 models

_RemoteTraceback Traceback (most recent call last) _RemoteTraceback: ''' Traceback (most recent call last): File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 58, in from tensorflow.python.pywrap_tensorflow_internal import * File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 28, in _pywrap_tensorflow_internal = swig_import_helper() File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 24, in swig_import_helper _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description) File "D:\Anaconda3\envs\mljar\lib\imp.py", line 243, in load_module return load_dynamic(name, filename, file) File "D:\Anaconda3\envs\mljar\lib\imp.py", line 343, in load_dynamic return _load(spec) ImportError: DLL load failed: 页面文件太小，无法完成操作。

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "D:\Anaconda3\envs\mljar\lib\site-packages\joblib\externals\loky\process_executor.py", line 391, in _process_worker call_item = call_queue.get(block=True, timeout=timeout) File "D:\Anaconda3\envs\mljar\lib\multiprocessing\queues.py", line 113, in get return _ForkingPickler.loads(res) File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised__init.py", line 3, in from supervised.automl import AutoML File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\automl.py", line 3, in from supervised.base_automl import BaseAutoML File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\base_automl.py", line 17, in from supervised.algorithms.registry import AlgorithmsRegistry File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\registry.py", line 71, in import supervised.algorithms.nn File "C:\Users\ZW\AppData\Roaming\Python\Python36\site-packages\supervised\algorithms\nn.py", line 11, in import keras File "D:\Anaconda3\envs\mljar\lib\site-packages\keras__init.py", line 3, in from . import utils File "D:\Anaconda3\envs\mljar\lib\site-packages\keras\utils\init.py", line 6, in from . import conv_utils File "D:\Anaconda3\envs\mljar\lib\site-packages\keras\utils\conv_utils.py", line 9, in from .. import backend as K File "D:\Anaconda3\envs\mljar\lib\site-packages\keras\backend\init.py", line 1, in from .load_backend import epsilon File "D:\Anaconda3\envs\mljar\lib\site-packages\keras\backend\load_backend.py", line 90, in from .tensorflow_backend import * File "D:\Anaconda3\envs\mljar\lib\site-packages\keras\backend\tensorflow_backend.py", line 5, in import tensorflow as tf File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\init__.py", line 41, in from tensorflow.python.tools import module_util as _module_util File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\init__.py", line 50, in from tensorflow.python import pywrap_tensorflow File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 69, in raise ImportError(msg) ImportError: Traceback (most recent call last): File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 58, in from tensorflow.python.pywrap_tensorflow_internal import * File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 28, in _pywrap_tensorflow_internal = swig_import_helper() File "D:\Anaconda3\envs\mljar\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 24, in swig_import_helper _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description) File "D:\Anaconda3\envs\mljar\lib\imp.py", line 243, in load_module return load_dynamic(name, filename, file) File "D:\Anaconda3\envs\mljar\lib\imp.py", line 343, in load_dynamic return _load(spec) ImportError: DLL load failed: 页面文件太小，无法完成操作。

Failed to load the native TensorFlow runtime.

See https://www.tensorflow.org/install/errors

for some common reasons and solutions. Include the entire stack trace above this error message when asking for help. '''