ETA444 / datasafari

DataSafari simplifies complex data science tasks into straightforward, powerful one-liners.
https://datasafari.dev
GNU General Public License v3.0
2 stars 0 forks source link

Implement error handling for model_recommendation_core() #109

Closed ETA444 closed 4 months ago

ETA444 commented 4 months ago

Error Handling in model_recommendation_core()

Type Validations

if not isinstance(x_train, (pd.DataFrame, np.ndarray)):
    raise TypeError("model_recommendation_core(): 'x_train' must be a pandas DataFrame or NumPy ndarray.")
if not isinstance(y_train, (pd.Series, np.ndarray)):
    raise TypeError("model_recommendation_core(): 'y_train' must be a pandas Series or NumPy ndarray.")
if not isinstance(task_type, str) or task_type not in ['classification', 'regression']:
    raise ValueError("model_recommendation_core(): 'task_type' must be either 'classification' or 'regression'.")
if not isinstance(priority_metrics, list):
    raise TypeError("model_recommendation_core(): 'priority_metrics' must be a list of scoring metric names.")
if not isinstance(cv, int):
    raise TypeError("model_recommendation_core(): 'cv' must be an integer.")
if not isinstance(n_top_models, int) or n_top_models <= 0:
    raise ValueError("model_recommendation_core(): 'n_top_models' must be an integer greater than 0.")
if not isinstance(verbose, int):
    raise TypeError("model_recommendation_core(): 'verbose' must be an integer value.")

Value Validations

if x_train.size == 0:
    raise ValueError("model_recommendation_core(): 'x_train' cannot be empty.")
if y_train.size == 0:
    raise ValueError("model_recommendation_core(): 'y_train' cannot be empty.")
if x_train.shape[0] != y_train.shape[0]:
    raise ValueError("model_recommendation_core(): 'x_train' and 'y_train' must have the same number of rows.")
if len(priority_metrics) != len(set(priority_metrics)):
    raise ValueError("model_recommendation_core(): 'priority_metrics' should not contain duplicate values.")
if not all(isinstance(metric, str) for metric in priority_metrics):
    raise ValueError("model_recommendation_core(): All items in 'priority_metrics' must be strings representing metric names.")
# Assuming the definition of valid metrics for classification and regression
valid_metrics = set(scoring_classification.values()) | set(scoring_regression.values())
invalid_metrics = [metric for metric in priority_metrics if metric not in valid_metrics]
if invalid_metrics:
    valid_metric_list = ", ".join(sorted(valid_metrics))
    raise ValueError(f"model_recommendation_core(): Invalid metric(s) in 'priority_metrics': {', '.join(invalid_metrics)}.\n\nValid metrics are: {valid_metric_list}.")
if task_type == 'classification' and n_top_models > len(models_classification):
    raise ValueError(f"model_recommendation_core(): 'n_top_models' cannot exceed the number of available classification models ({len(models_classification)}).")
if task_type == 'regression' and n_top_models > len(models_regression):
    raise ValueError(f"model_recommendation_core(): 'n_top_models' cannot exceed the number of available regression models ({len(models_regression)}).")