Closed sfc-gh-klim closed 1 month ago
When attempting to run the notebook locally/externally, I ran into this error where it failed when attempting to create a temporary stored procedure.
Notebook in question: 3_snowflake_ml_model_training_inference.ipynb Cell in question:
grid_search = GridSearchCV( estimator=XGBRegressor(), param_grid={ "n_estimators":[100, 200, 300, 400, 500], "learning_rate":[0.1, 0.2, 0.3, 0.4, 0.5], }, n_jobs = -1, scoring="neg_mean_absolute_percentage_error", input_cols=CATEGORICAL_COLUMNS_OE+NUMERICAL_COLUMNS, label_cols=LABEL_COLUMNS, output_cols=OUTPUT_COLUMNS ) # Train grid_search.fit(train_df)
Full stacktrace:
SnowparkSQLException Traceback (most recent call last) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/_internal/telemetry.py:394, in send_api_usage_telemetry.<locals>.decorator.<locals>.wrap(*args, **kwargs) 393 try: --> 394 res = func(*args, **kwargs) 395 except Exception as e: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/framework/base.py:431, in BaseEstimator.fit(self, dataset) 430 lineage_utils.set_data_sources(self, data_sources) --> 431 return self._fit(dataset) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/model_selection/grid_search_cv.py:328, in GridSearchCV._fit(self, dataset) 319 model_trainer = ModelTrainerBuilder.build( 320 estimator=self._sklearn_object, 321 dataset=dataset, (...) 326 subproject=_SUBPROJECT, 327 ) --> 328 self._sklearn_object = model_trainer.train() 329 self._is_fitted = True File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py:1163, in DistributedHPOTrainer.train(self) 1151 return self.fit_search_snowpark_enable_efficient_memory_usage( 1152 param_grid=param_grid, 1153 dataset=self.dataset, (...) 1160 sample_weight_col=self.sample_weight_col, 1161 ) -> 1163 return self.fit_search_snowpark( 1164 param_grid=param_grid, 1165 dataset=self.dataset, 1166 session=self.session, 1167 estimator=self.estimator, 1168 dependencies=relaxed_dependencies, 1169 udf_imports=["sklearn"], 1170 input_cols=self.input_cols, 1171 label_cols=self.label_cols, 1172 sample_weight_col=self.sample_weight_col, 1173 ) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py:670, in DistributedHPOTrainer.fit_search_snowpark(self, param_grid, dataset, session, estimator, dependencies, udf_imports, input_cols, label_cols, sample_weight_col) 668 return str(os.path.basename(local_result_file_name)) --> 670 sproc_export_file_name = _distributed_search( 671 session, 672 imports, 673 stage_estimator_file_name, 674 input_cols, 675 label_cols, 676 ) 678 local_estimator_path = temp_file_utils.get_temp_file_path() File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/stored_procedure.py:127, in StoredProcedure.__call__(self, session, statement_params, *args) 126 df = session.sql(query) --> 127 return df._internal_collect_with_tag(statement_params=statement_params)[0][ 128 0 129 ] 130 else: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/telemetry.py:145, in df_collect_api_telemetry.<locals>.wrap(*args, **kwargs) 144 with args[0]._session.query_history() as query_history: --> 145 result = func(*args, **kwargs) 146 plan = args[0]._select_statement or args[0]._plan File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/dataframe.py:645, in DataFrame._internal_collect_with_tag_no_telemetry(self, statement_params, block, data_type, log_on_exception, case_sensitive) 633 def _internal_collect_with_tag_no_telemetry( 634 self, 635 *, (...) 643 # we should always call this method instead of collect(), to make sure the 644 # query tag is set properly. --> 645 return self._session._conn.execute( 646 self._plan, 647 block=block, 648 data_type=data_type, 649 _statement_params=create_or_update_statement_params_with_query_tag( 650 statement_params or self._statement_params, 651 self._session.query_tag, 652 SKIP_LEVELS_THREE, 653 ), 654 log_on_exception=log_on_exception, 655 case_sensitive=case_sensitive, 656 ) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:510, in ServerConnection.execute(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs) 507 raise NotImplementedError( 508 "Async query is not supported in stored procedure yet" 509 ) --> 510 result_set, result_meta = self.get_result_set( 511 plan, 512 to_pandas, 513 to_iter, 514 **kwargs, 515 block=block, 516 data_type=data_type, 517 log_on_exception=log_on_exception, 518 case_sensitive=case_sensitive, 519 ) 520 if not block: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:191, in SnowflakePlan.Decorator.wrap_exception.<locals>.wrap(*args, **kwargs) 188 ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR( 189 e 190 ) --> 191 raise ne.with_traceback(tb) from None File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:122, in SnowflakePlan.Decorator.wrap_exception.<locals>.wrap(*args, **kwargs) 121 try: --> 122 return func(*args, **kwargs) 123 except snowflake.connector.errors.ProgrammingError as e: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:612, in ServerConnection.get_result_set(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs) 611 final_query = final_query.replace(holder, id_) --> 612 result = self.run_query( 613 final_query, 614 to_pandas, 615 to_iter and (i == len(plan.queries) - 1), 616 is_ddl_on_temp_object=query.is_ddl_on_temp_object, 617 block=not is_last, 618 data_type=data_type, 619 async_job_plan=plan, 620 log_on_exception=log_on_exception, 621 case_sensitive=case_sensitive, 622 params=query.params, 623 **kwargs, 624 ) 625 placeholders[query.query_id_place_holder] = ( 626 result["sfqid"] if not is_last else result.query_id 627 ) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:123, in ServerConnection._Decorator.wrap_exception.<locals>.wrap(*args, **kwargs) 122 except Exception as ex: --> 123 raise ex File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:117, in ServerConnection._Decorator.wrap_exception.<locals>.wrap(*args, **kwargs) 116 try: --> 117 return func(*args, **kwargs) 118 except ReauthenticationRequest as ex: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:417, in ServerConnection.run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, **kwargs) 416 logger.error(f"Failed to execute query{query_id_log} {query}\n{ex}") --> 417 raise ex 419 # fetch_pandas_all/batches() only works for SELECT statements 420 # We call fetchall() if fetch_pandas_all/batches() fails, 421 # because when the query plan has multiple queries, it will 422 # have non-select statements, and it shouldn't fail if the user 423 # calls to_pandas() to execute the query. File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:402, in ServerConnection.run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, **kwargs) 401 if block: --> 402 results_cursor = self.execute_and_notify_query_listener( 403 query, params=params, **kwargs 404 ) 405 logger.debug(f"Execute query [queryID: {results_cursor.sfqid}] {query}") File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:354, in ServerConnection.execute_and_notify_query_listener(self, query, **kwargs) 351 def execute_and_notify_query_listener( 352 self, query: str, **kwargs: Any 353 ) -> SnowflakeCursor: --> 354 results_cursor = self._cursor.execute(query, **kwargs) 355 self.notify_query_listeners( 356 QueryRecord(results_cursor.sfqid, results_cursor.query) 357 ) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/cursor.py:1080, in SnowflakeCursor.execute(self, command, params, _bind_stage, timeout, _exec_async, _no_retry, _do_reset, _put_callback, _put_azure_callback, _put_callback_output_stream, _get_callback, _get_azure_callback, _get_callback_output_stream, _show_progress_bar, _statement_params, _is_internal, _describe_only, _no_results, _is_put_get, _raise_put_get_error, _force_put_overwrite, _skip_upload_on_content_match, file_stream, num_statements) 1079 error_class = IntegrityError if is_integrity_error else ProgrammingError -> 1080 Error.errorhandler_wrapper(self.connection, self, error_class, errvalue) 1081 return self File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:290, in Error.errorhandler_wrapper(connection, cursor, error_class, error_value) 274 """Error handler wrapper that calls the errorhandler method. 275 276 Args: (...) 287 exception to the first handler in that order. 288 """ --> 290 handed_over = Error.hand_to_other_handler( 291 connection, 292 cursor, 293 error_class, 294 error_value, 295 ) 296 if not handed_over: File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:345, in Error.hand_to_other_handler(connection, cursor, error_class, error_value) 344 cursor.messages.append((error_class, error_value)) --> 345 cursor.errorhandler(connection, cursor, error_class, error_value) 346 return True File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:221, in Error.default_errorhandler(connection, cursor, error_class, error_value) 220 done_format_msg = error_value.get("done_format_msg") --> 221 raise error_class( 222 msg=error_value.get("msg"), 223 errno=None if errno is None else int(errno), 224 sqlstate=error_value.get("sqlstate"), 225 sfqid=error_value.get("sfqid"), 226 query=error_value.get("query"), 227 done_format_msg=( 228 None if done_format_msg is None else bool(done_format_msg) 229 ), 230 connection=connection, 231 cursor=cursor, 232 ) SnowparkSQLException: (1304): 01b631ff-0905-2416-003f-5b8300c97c0a: 100357 (P0000): Cannot create a Python function with the specified packages. Please check your packages specification and try again. The above exception was the direct cause of the following exception: SnowparkSQLException Traceback (most recent call last) Cell In[13], line 15 1 grid_search = GridSearchCV( 2 estimator=XGBRegressor(), 3 param_grid={ (...) 11 output_cols=OUTPUT_COLUMNS 12 ) 14 # Train ---> 15 grid_search.fit(train_df) File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/_internal/telemetry.py:416, in send_api_usage_telemetry.<locals>.decorator.<locals>.wrap(*args, **kwargs) 414 raise me.original_exception from None 415 else: --> 416 raise me.original_exception from e 417 else: 418 return update_stmt_params_if_snowpark_df(res, statement_params) SnowparkSQLException: (1300) (1304): 01b631ff-0905-2416-003f-5b8300c97c0a: 100357 (P0000): Cannot create a Python function with the specified packages. Please check your packages specification and try again.``` Packages in the pyenv (Python 3.11.9),`pip list`:
Package Version
absl-py 1.4.0 aiobotocore 2.13.1 aiohappyeyeballs 2.3.5 aiohttp 3.10.1 aioitertools 0.11.0 aiosignal 1.3.1 anyio 3.7.1 appnope 0.1.4 argon2-cffi 23.1.0 argon2-cffi-bindings 21.2.0 arrow 1.3.0 asn1crypto 1.5.1 asttokens 2.4.1 async-lru 2.0.4 attrs 24.2.0 Babel 2.15.0 beautifulsoup4 4.12.3 bleach 6.1.0 botocore 1.34.131 cachetools 4.2.2 certifi 2024.7.4 cffi 1.17.0 charset-normalizer 3.3.2 cloudpickle 2.2.1 comm 0.2.2 contourpy 1.2.1 cramjam 2.8.3 cryptography 42.0.8 cycler 0.12.1 debugpy 1.8.5 decorator 5.1.1 defusedxml 0.7.1 executing 2.0.1 fastjsonschema 2.20.0 fastparquet 2023.10.0 filelock 3.15.4 fonttools 4.53.1 fqdn 1.5.1 frozenlist 1.4.1 fsspec 2023.12.2 h11 0.14.0 httpcore 1.0.5 httpx 0.27.0 idna 3.7 importlib_metadata 8.2.0 importlib_resources 6.4.0 ipykernel 6.29.5 ipython 8.26.0 ipywidgets 8.1.3 isoduration 20.11.0 jaraco.classes 3.4.0 jedi 0.19.1 Jinja2 3.1.4 jmespath 1.0.1 joblib 1.4.2 json5 0.9.25 jsonpointer 3.0.0 jsonschema 4.23.0 jsonschema-specifications 2023.12.1 jupyter 1.0.0 jupyter_client 8.6.2 jupyter-console 6.6.3 jupyter_core 5.7.2 jupyter-events 0.10.0 jupyter-lsp 2.2.5 jupyter_server 2.14.2 jupyter_server_terminals 0.5.3 jupyterlab 4.2.4 jupyterlab_pygments 0.3.0 jupyterlab_server 2.27.3 jupyterlab_widgets 3.0.11 keyring 24.3.1 kiwisolver 1.4.5 MarkupSafe 2.1.5 matplotlib 3.8.4 matplotlib-inline 0.1.7 mistune 3.0.2 more-itertools 10.4.0 multidict 6.0.5 nbclient 0.10.0 nbconvert 7.16.4 nbformat 5.10.4 nest-asyncio 1.6.0 notebook 7.2.1 notebook_shim 0.2.4 numpy 1.26.4 overrides 7.7.0 packaging 23.2 pandas 2.2.2 pandocfilters 1.5.1 parso 0.8.4 pexpect 4.9.0 pillow 10.4.0 pip 24.0 platformdirs 4.2.2 prometheus_client 0.20.0 prompt_toolkit 3.0.47 psutil 6.0.0 ptyprocess 0.7.0 pure_eval 0.2.3 pyarrow 10.0.1 pycparser 2.22 Pygments 2.18.0 PyJWT 2.9.0 pyOpenSSL 24.2.1 pyparsing 3.1.2 python-dateutil 2.9.0.post0 python-json-logger 2.0.7 pytimeparse 1.1.8 pytz 2024.1 PyYAML 6.0.2 pyzmq 26.1.0 qtconsole 5.5.2 QtPy 2.4.1 referencing 0.35.1 requests 2.32.3 retrying 1.3.4 rfc3339-validator 0.1.4 rfc3986-validator 0.1.1 rpds-py 0.20.0 s3fs 2023.12.2 scikit-learn 1.3.0 scipy 1.14.0 seaborn 0.13.2 Send2Trash 1.8.3 setuptools 65.5.0 six 1.16.0 sniffio 1.3.1 snowflake-connector-python 3.10.0 snowflake-ml-python 1.5.4 snowflake-snowpark-python 1.17.0 sortedcontainers 2.4.0 soupsieve 2.5 sqlparse 0.5.1 stack-data 0.6.3 terminado 0.18.1 threadpoolctl 3.5.0 tinycss2 1.3.0 tomlkit 0.13.0 tornado 6.4.1 traitlets 5.14.3 types-python-dateutil 2.9.0.20240316 typing_extensions 4.12.2 tzdata 2024.1 uri-template 1.3.0 urllib3 2.2.2 wcwidth 0.2.13 webcolors 24.6.0 webencodings 0.5.1 websocket-client 1.8.0 wheel 0.44.0 widgetsnbextension 4.0.11 wrapt 1.16.0 xgboost 1.7.6 yarl 1.9.4 zipp 3.19.2
resovled after pinning numpy to 1.23.5
When attempting to run the notebook locally/externally, I ran into this error where it failed when attempting to create a temporary stored procedure.
Notebook in question: 3_snowflake_ml_model_training_inference.ipynb Cell in question:
Full stacktrace:
Package Version
absl-py 1.4.0 aiobotocore 2.13.1 aiohappyeyeballs 2.3.5 aiohttp 3.10.1 aioitertools 0.11.0 aiosignal 1.3.1 anyio 3.7.1 appnope 0.1.4 argon2-cffi 23.1.0 argon2-cffi-bindings 21.2.0 arrow 1.3.0 asn1crypto 1.5.1 asttokens 2.4.1 async-lru 2.0.4 attrs 24.2.0 Babel 2.15.0 beautifulsoup4 4.12.3 bleach 6.1.0 botocore 1.34.131 cachetools 4.2.2 certifi 2024.7.4 cffi 1.17.0 charset-normalizer 3.3.2 cloudpickle 2.2.1 comm 0.2.2 contourpy 1.2.1 cramjam 2.8.3 cryptography 42.0.8 cycler 0.12.1 debugpy 1.8.5 decorator 5.1.1 defusedxml 0.7.1 executing 2.0.1 fastjsonschema 2.20.0 fastparquet 2023.10.0 filelock 3.15.4 fonttools 4.53.1 fqdn 1.5.1 frozenlist 1.4.1 fsspec 2023.12.2 h11 0.14.0 httpcore 1.0.5 httpx 0.27.0 idna 3.7 importlib_metadata 8.2.0 importlib_resources 6.4.0 ipykernel 6.29.5 ipython 8.26.0 ipywidgets 8.1.3 isoduration 20.11.0 jaraco.classes 3.4.0 jedi 0.19.1 Jinja2 3.1.4 jmespath 1.0.1 joblib 1.4.2 json5 0.9.25 jsonpointer 3.0.0 jsonschema 4.23.0 jsonschema-specifications 2023.12.1 jupyter 1.0.0 jupyter_client 8.6.2 jupyter-console 6.6.3 jupyter_core 5.7.2 jupyter-events 0.10.0 jupyter-lsp 2.2.5 jupyter_server 2.14.2 jupyter_server_terminals 0.5.3 jupyterlab 4.2.4 jupyterlab_pygments 0.3.0 jupyterlab_server 2.27.3 jupyterlab_widgets 3.0.11 keyring 24.3.1 kiwisolver 1.4.5 MarkupSafe 2.1.5 matplotlib 3.8.4 matplotlib-inline 0.1.7 mistune 3.0.2 more-itertools 10.4.0 multidict 6.0.5 nbclient 0.10.0 nbconvert 7.16.4 nbformat 5.10.4 nest-asyncio 1.6.0 notebook 7.2.1 notebook_shim 0.2.4 numpy 1.26.4 overrides 7.7.0 packaging 23.2 pandas 2.2.2 pandocfilters 1.5.1 parso 0.8.4 pexpect 4.9.0 pillow 10.4.0 pip 24.0 platformdirs 4.2.2 prometheus_client 0.20.0 prompt_toolkit 3.0.47 psutil 6.0.0 ptyprocess 0.7.0 pure_eval 0.2.3 pyarrow 10.0.1 pycparser 2.22 Pygments 2.18.0 PyJWT 2.9.0 pyOpenSSL 24.2.1 pyparsing 3.1.2 python-dateutil 2.9.0.post0 python-json-logger 2.0.7 pytimeparse 1.1.8 pytz 2024.1 PyYAML 6.0.2 pyzmq 26.1.0 qtconsole 5.5.2 QtPy 2.4.1 referencing 0.35.1 requests 2.32.3 retrying 1.3.4 rfc3339-validator 0.1.4 rfc3986-validator 0.1.1 rpds-py 0.20.0 s3fs 2023.12.2 scikit-learn 1.3.0 scipy 1.14.0 seaborn 0.13.2 Send2Trash 1.8.3 setuptools 65.5.0 six 1.16.0 sniffio 1.3.1 snowflake-connector-python 3.10.0 snowflake-ml-python 1.5.4 snowflake-snowpark-python 1.17.0 sortedcontainers 2.4.0 soupsieve 2.5 sqlparse 0.5.1 stack-data 0.6.3 terminado 0.18.1 threadpoolctl 3.5.0 tinycss2 1.3.0 tomlkit 0.13.0 tornado 6.4.1 traitlets 5.14.3 types-python-dateutil 2.9.0.20240316 typing_extensions 4.12.2 tzdata 2024.1 uri-template 1.3.0 urllib3 2.2.2 wcwidth 0.2.13 webcolors 24.6.0 webencodings 0.5.1 websocket-client 1.8.0 wheel 0.44.0 widgetsnbextension 4.0.11 wrapt 1.16.0 xgboost 1.7.6 yarl 1.9.4 zipp 3.19.2