ETA444 / datasafari

DataSafari simplifies complex data science tasks into straightforward, powerful one-liners.
https://datasafari.dev
GNU General Public License v3.0
2 stars 0 forks source link

Implement error handling for data_preprocessing_core() #99

Closed ETA444 closed 4 months ago

ETA444 commented 4 months ago

Error Handling in data_preprocessing_core()

Type Validations

if not isinstance(df, pd.DataFrame):
    raise TypeError("data_preprocessing_core(): The 'df' parameter must be a pandas DataFrame.")
if not isinstance(x_cols, list) or not all(isinstance(col, str) for col in x_cols):
    raise TypeError("data_preprocessing_core(): The 'x_cols' parameter must be a list of strings representing column names.")
if not isinstance(y_col, str):
    raise TypeError("data_preprocessing_core(): The 'y_col' parameter must be a string representing the target column name.")
if not isinstance(data_state, str):
    raise TypeError("data_preprocessing_core(): The 'data_state' parameter must be a string.")
if not isinstance(test_size, float):
    raise TypeError("data_preprocessing_core(): The 'test_size' parameter must be a float.")
if not isinstance(random_state, int):
    raise TypeError("data_preprocessing_core(): The 'random_state' parameter must be an integer.")
if not isinstance(verbose, int):
    raise TypeError("data_preprocessing_core(): 'verbose' must be an integer value.")
if not (hasattr(numeric_imputer, 'fit_transform') or (hasattr(numeric_imputer, 'fit') and hasattr(numeric_imputer, 'transform'))):
    raise TypeError("data_preprocessing_core(): The 'numeric_imputer' must support 'fit_transform' or both 'fit' and 'transform' methods.")
# Similar checks for other transformers and vectorizers...

Value Validations

if df.empty:
    raise ValueError("data_preprocessing_core(): The input DataFrame is empty.")
if data_state.lower() not in ['unprocessed', 'preprocessed']:
    raise ValueError("data_preprocessing_core(): The 'data_state' parameter must be either 'unprocessed' or 'preprocessed'.")
if not 0 < test_size < 1:
    raise ValueError("data_preprocessing_core(): The 'test_size' parameter must be a float between 0 and 1.")
if y_col not in df.columns:
    raise ValueError(f"data_preprocessing_core(): The specified target column '{y_col}' is not present in the DataFrame.")
missing_cols = set(x_cols) - set(df.columns)
if missing_cols:
    raise ValueError(f"data_preprocessing_core(): The following feature columns are not present in the DataFrame: {', '.join(missing_cols)}")