print(f"R2 score on Train : {train_r2_score}")
print(f"R2 score on Test : {test_r2_score}")
Error
SnowparkSQLException: (1304): 01b83a41-0004-21f5-0001-b2ff000694a6: 100357 (P0000): Python Interpreter Error: Traceback (most recent call last): File "/home/udf/7298028005/udf_py_1901916001.zip/udf_py_1901916001.py", line 429, in compute return func(session,arg1,arg2,arg3,arg4) File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 801, in _distributed_search from sklearn.utils.validation import _check_fit_params, indexable ImportError: cannot import name '_check_fit_params' from 'sklearn.utils.validation' (/usr/lib/python_udf/40aaaca3c394c0b2751774fb0239ba44f43487a3f73ef6f34480e87d2cf97f94/lib/python3.9/site-packages/sklearn/utils/validation.py) in function SNOWPARK_TEMP_PROCEDURE_12AC5SCX2R with handler udf_py_1901916001.compute
Traceback:
File "Cell [cell3]", line 33, in
model.fit(train_df)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 527, in wrap
return ctx.run(execute_func_with_statement_params)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 503, in execute_func_with_statement_params
result = func(*args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/framework/base.py", line 440, in fit
return self._fit(dataset)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/model_selection/grid_search_cv.py", line 328, in _fit
self._sklearn_object = model_trainer.train()
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 1149, in train
return self.fit_search_snowpark_enable_efficient_memory_usage(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 1099, in fit_search_snowpark_enable_efficient_memory_usage
sproc_export_file_name = _distributed_search(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/stored_procedure.py", line 131, in call
return df._internal_collect_with_tag(statement_params=statement_params)[0][
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/telemetry.py", line 167, in wrap
result = func(*args, *kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/dataframe.py", line 651, in _internal_collect_with_tag_no_telemetry
return self._session._conn.execute(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 555, in execute
result_set, result_meta = self.get_result_set(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py", line 208, in wrap
raise ne.with_traceback(tb) from None
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py", line 139, in wrap
return func(args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 662, in get_result_set
result = self.run_query(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 126, in wrap
raise ex
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 120, in wrap
return func(*args, *kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 462, in run_query
raise ex
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 447, in run_query
results_cursor = self.execute_and_notify_query_listener(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 183, in wrapper
return func(args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 398, in execute_and_notify_query_listener
results_cursor = self._cursor.execute(query, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/cursor.py", line 1018, in execute
Error.errorhandler_wrapper(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 232, in errorhandler_wrapper
handed_over = Error.hand_to_other_handler(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 287, in hand_to_other_handler
cursor.errorhandler(connection, cursor, error_class, error_value)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 165, in default_errorhandler
raise error_class(
When executing it, I encountered the following error :
Can someone help me pls?
**Code :** CROSS_VALIDATION_FOLDS = 10 POLYNOMIAL_FEATURES_DEGREE = 2
Create train and test Snowpark DataDrames
train_df, test_df = session.table("MARKETING_BUDGETS_FEATURES").random_split(weights=[0.8, 0.2], seed=0)
Preprocess the Numeric columns
We apply PolynomialFeatures and StandardScaler preprocessing steps to the numeric columns
NOTE: High degrees can cause overfitting.
numeric_features = ['SEARCH_ENGINE','SOCIAL_MEDIA','VIDEO','EMAIL'] numeric_transformer = Pipeline(steps=[('poly',PolynomialFeatures(degree = POLYNOMIAL_FEATURES_DEGREE)),('scaler', StandardScaler())])
Combine the preprocessed step together using the Column Transformer module
preprocessor = ColumnTransformer( transformers=[ ('num', numeric_transformer, numeric_features)])
The next step is the integrate the features we just preprocessed with our Machine Learning algorithm to enable us to build a model
pipeline = Pipeline(steps=[('preprocessor', preprocessor),('classifier', LinearRegression())]) parameteres = {}
Use GridSearch to find the best fitting model based on number_of_folds folds
model = GridSearchCV( estimator=pipeline, param_grid=parameteres, cv=CROSS_VALIDATION_FOLDS, label_cols=["REVENUE"], output_cols=["PREDICTED_REVENUE"], verbose=2 )
Fit and Score
model.fit(train_df) train_r2_score = model.score(train_df) test_r2_score = model.score(test_df)
R2 score on train and test datasets
print(f"R2 score on Train : {train_r2_score}") print(f"R2 score on Test : {test_r2_score}")
Error
SnowparkSQLException: (1304): 01b83a41-0004-21f5-0001-b2ff000694a6: 100357 (P0000): Python Interpreter Error: Traceback (most recent call last): File "/home/udf/7298028005/udf_py_1901916001.zip/udf_py_1901916001.py", line 429, in compute return func(session,arg1,arg2,arg3,arg4) File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 801, in _distributed_search from sklearn.utils.validation import _check_fit_params, indexable ImportError: cannot import name '_check_fit_params' from 'sklearn.utils.validation' (/usr/lib/python_udf/40aaaca3c394c0b2751774fb0239ba44f43487a3f73ef6f34480e87d2cf97f94/lib/python3.9/site-packages/sklearn/utils/validation.py) in function SNOWPARK_TEMP_PROCEDURE_12AC5SCX2R with handler udf_py_1901916001.compute Traceback: File "Cell [cell3]", line 33, in
model.fit(train_df)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 527, in wrap
return ctx.run(execute_func_with_statement_params)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 503, in execute_func_with_statement_params
result = func(*args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/framework/base.py", line 440, in fit
return self._fit(dataset)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/model_selection/grid_search_cv.py", line 328, in _fit
self._sklearn_object = model_trainer.train()
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 1149, in train
return self.fit_search_snowpark_enable_efficient_memory_usage(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py", line 1099, in fit_search_snowpark_enable_efficient_memory_usage
sproc_export_file_name = _distributed_search(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/stored_procedure.py", line 131, in call
return df._internal_collect_with_tag(statement_params=statement_params)[0][
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/telemetry.py", line 167, in wrap
result = func(*args, *kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/dataframe.py", line 651, in _internal_collect_with_tag_no_telemetry
return self._session._conn.execute(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 555, in execute
result_set, result_meta = self.get_result_set(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py", line 208, in wrap
raise ne.with_traceback(tb) from None
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py", line 139, in wrap
return func(args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 662, in get_result_set
result = self.run_query(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 126, in wrap
raise ex
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 120, in wrap
return func(*args, *kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 462, in run_query
raise ex
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 447, in run_query
results_cursor = self.execute_and_notify_query_listener(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/ml/_internal/telemetry.py", line 183, in wrapper
return func(args, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py", line 398, in execute_and_notify_query_listener
results_cursor = self._cursor.execute(query, kwargs)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/cursor.py", line 1018, in execute
Error.errorhandler_wrapper(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 232, in errorhandler_wrapper
handed_over = Error.hand_to_other_handler(
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 287, in hand_to_other_handler
cursor.errorhandler(connection, cursor, error_class, error_value)
File "/usr/lib/python_udf/15cd5df74ab7fd683d821c71e148956b28181c9e87413b0b6bffb8d2cd402e55/lib/python3.9/site-packages/snowflake/connector/errors.py", line 165, in default_errorhandler
raise error_class(