Snowflake-Labs / sfguide-intro-to-machine-learning-with-snowflake-ml-for-python

Apache License 2.0
54 stars 111 forks source link

Exception: Cannot create a Python function with the specified packages. Please check your packages specification and try again. #24

Closed sfc-gh-klim closed 1 month ago

sfc-gh-klim commented 1 month ago

When attempting to run the notebook locally/externally, I ran into this error where it failed when attempting to create a temporary stored procedure.

Notebook in question: 3_snowflake_ml_model_training_inference.ipynb Cell in question:

grid_search = GridSearchCV(
    estimator=XGBRegressor(),
    param_grid={
        "n_estimators":[100, 200, 300, 400, 500],
        "learning_rate":[0.1, 0.2, 0.3, 0.4, 0.5],
    },
    n_jobs = -1,
    scoring="neg_mean_absolute_percentage_error",
    input_cols=CATEGORICAL_COLUMNS_OE+NUMERICAL_COLUMNS,
    label_cols=LABEL_COLUMNS,
    output_cols=OUTPUT_COLUMNS
)

# Train
grid_search.fit(train_df)

Full stacktrace:

SnowparkSQLException                      Traceback (most recent call last)
File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/_internal/telemetry.py:394, in send_api_usage_telemetry.<locals>.decorator.<locals>.wrap(*args, **kwargs)
    393 try:
--> 394     res = func(*args, **kwargs)
    395 except Exception as e:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/framework/base.py:431, in BaseEstimator.fit(self, dataset)
    430 lineage_utils.set_data_sources(self, data_sources)
--> 431 return self._fit(dataset)

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/model_selection/grid_search_cv.py:328, in GridSearchCV._fit(self, dataset)
    319 model_trainer = ModelTrainerBuilder.build(
    320     estimator=self._sklearn_object,
    321     dataset=dataset,
   (...)
    326     subproject=_SUBPROJECT,
    327 )
--> 328 self._sklearn_object = model_trainer.train()
    329 self._is_fitted = True

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py:1163, in DistributedHPOTrainer.train(self)
   1151     return self.fit_search_snowpark_enable_efficient_memory_usage(
   1152         param_grid=param_grid,
   1153         dataset=self.dataset,
   (...)
   1160         sample_weight_col=self.sample_weight_col,
   1161     )
-> 1163 return self.fit_search_snowpark(
   1164     param_grid=param_grid,
   1165     dataset=self.dataset,
   1166     session=self.session,
   1167     estimator=self.estimator,
   1168     dependencies=relaxed_dependencies,
   1169     udf_imports=["sklearn"],
   1170     input_cols=self.input_cols,
   1171     label_cols=self.label_cols,
   1172     sample_weight_col=self.sample_weight_col,
   1173 )

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py:670, in DistributedHPOTrainer.fit_search_snowpark(self, param_grid, dataset, session, estimator, dependencies, udf_imports, input_cols, label_cols, sample_weight_col)
    668     return str(os.path.basename(local_result_file_name))
--> 670 sproc_export_file_name = _distributed_search(
    671     session,
    672     imports,
    673     stage_estimator_file_name,
    674     input_cols,
    675     label_cols,
    676 )
    678 local_estimator_path = temp_file_utils.get_temp_file_path()

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/stored_procedure.py:127, in StoredProcedure.__call__(self, session, statement_params, *args)
    126     df = session.sql(query)
--> 127     return df._internal_collect_with_tag(statement_params=statement_params)[0][
    128         0
    129     ]
    130 else:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/telemetry.py:145, in df_collect_api_telemetry.<locals>.wrap(*args, **kwargs)
    144 with args[0]._session.query_history() as query_history:
--> 145     result = func(*args, **kwargs)
    146 plan = args[0]._select_statement or args[0]._plan

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/dataframe.py:645, in DataFrame._internal_collect_with_tag_no_telemetry(self, statement_params, block, data_type, log_on_exception, case_sensitive)
    633 def _internal_collect_with_tag_no_telemetry(
    634     self,
    635     *,
   (...)
    643     # we should always call this method instead of collect(), to make sure the
    644     # query tag is set properly.
--> 645     return self._session._conn.execute(
    646         self._plan,
    647         block=block,
    648         data_type=data_type,
    649         _statement_params=create_or_update_statement_params_with_query_tag(
    650             statement_params or self._statement_params,
    651             self._session.query_tag,
    652             SKIP_LEVELS_THREE,
    653         ),
    654         log_on_exception=log_on_exception,
    655         case_sensitive=case_sensitive,
    656     )

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:510, in ServerConnection.execute(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)
    507     raise NotImplementedError(
    508         "Async query is not supported in stored procedure yet"
    509     )
--> 510 result_set, result_meta = self.get_result_set(
    511     plan,
    512     to_pandas,
    513     to_iter,
    514     **kwargs,
    515     block=block,
    516     data_type=data_type,
    517     log_on_exception=log_on_exception,
    518     case_sensitive=case_sensitive,
    519 )
    520 if not block:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:191, in SnowflakePlan.Decorator.wrap_exception.<locals>.wrap(*args, **kwargs)
    188 ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
    189     e
    190 )
--> 191 raise ne.with_traceback(tb) from None

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:122, in SnowflakePlan.Decorator.wrap_exception.<locals>.wrap(*args, **kwargs)
    121 try:
--> 122     return func(*args, **kwargs)
    123 except snowflake.connector.errors.ProgrammingError as e:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:612, in ServerConnection.get_result_set(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)
    611     final_query = final_query.replace(holder, id_)
--> 612 result = self.run_query(
    613     final_query,
    614     to_pandas,
    615     to_iter and (i == len(plan.queries) - 1),
    616     is_ddl_on_temp_object=query.is_ddl_on_temp_object,
    617     block=not is_last,
    618     data_type=data_type,
    619     async_job_plan=plan,
    620     log_on_exception=log_on_exception,
    621     case_sensitive=case_sensitive,
    622     params=query.params,
    623     **kwargs,
    624 )
    625 placeholders[query.query_id_place_holder] = (
    626     result["sfqid"] if not is_last else result.query_id
    627 )

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:123, in ServerConnection._Decorator.wrap_exception.<locals>.wrap(*args, **kwargs)
    122 except Exception as ex:
--> 123     raise ex

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:117, in ServerConnection._Decorator.wrap_exception.<locals>.wrap(*args, **kwargs)
    116 try:
--> 117     return func(*args, **kwargs)
    118 except ReauthenticationRequest as ex:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:417, in ServerConnection.run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, **kwargs)
    416         logger.error(f"Failed to execute query{query_id_log} {query}\n{ex}")
--> 417     raise ex
    419 # fetch_pandas_all/batches() only works for SELECT statements
    420 # We call fetchall() if fetch_pandas_all/batches() fails,
    421 # because when the query plan has multiple queries, it will
    422 # have non-select statements, and it shouldn't fail if the user
    423 # calls to_pandas() to execute the query.

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:402, in ServerConnection.run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, **kwargs)
    401 if block:
--> 402     results_cursor = self.execute_and_notify_query_listener(
    403         query, params=params, **kwargs
    404     )
    405     logger.debug(f"Execute query [queryID: {results_cursor.sfqid}] {query}")

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/snowpark/_internal/server_connection.py:354, in ServerConnection.execute_and_notify_query_listener(self, query, **kwargs)
    351 def execute_and_notify_query_listener(
    352     self, query: str, **kwargs: Any
    353 ) -> SnowflakeCursor:
--> 354     results_cursor = self._cursor.execute(query, **kwargs)
    355     self.notify_query_listeners(
    356         QueryRecord(results_cursor.sfqid, results_cursor.query)
    357     )

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/cursor.py:1080, in SnowflakeCursor.execute(self, command, params, _bind_stage, timeout, _exec_async, _no_retry, _do_reset, _put_callback, _put_azure_callback, _put_callback_output_stream, _get_callback, _get_azure_callback, _get_callback_output_stream, _show_progress_bar, _statement_params, _is_internal, _describe_only, _no_results, _is_put_get, _raise_put_get_error, _force_put_overwrite, _skip_upload_on_content_match, file_stream, num_statements)
   1079     error_class = IntegrityError if is_integrity_error else ProgrammingError
-> 1080     Error.errorhandler_wrapper(self.connection, self, error_class, errvalue)
   1081 return self

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:290, in Error.errorhandler_wrapper(connection, cursor, error_class, error_value)
    274 """Error handler wrapper that calls the errorhandler method.
    275 
    276 Args:
   (...)
    287     exception to the first handler in that order.
    288 """
--> 290 handed_over = Error.hand_to_other_handler(
    291     connection,
    292     cursor,
    293     error_class,
    294     error_value,
    295 )
    296 if not handed_over:

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:345, in Error.hand_to_other_handler(connection, cursor, error_class, error_value)
    344 cursor.messages.append((error_class, error_value))
--> 345 cursor.errorhandler(connection, cursor, error_class, error_value)
    346 return True

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/connector/errors.py:221, in Error.default_errorhandler(connection, cursor, error_class, error_value)
    220 done_format_msg = error_value.get("done_format_msg")
--> 221 raise error_class(
    222     msg=error_value.get("msg"),
    223     errno=None if errno is None else int(errno),
    224     sqlstate=error_value.get("sqlstate"),
    225     sfqid=error_value.get("sfqid"),
    226     query=error_value.get("query"),
    227     done_format_msg=(
    228         None if done_format_msg is None else bool(done_format_msg)
    229     ),
    230     connection=connection,
    231     cursor=cursor,
    232 )

SnowparkSQLException: (1304): 01b631ff-0905-2416-003f-5b8300c97c0a: 100357 (P0000): Cannot create a Python function with the specified packages. Please check your packages specification and try again.

The above exception was the direct cause of the following exception:

SnowparkSQLException                      Traceback (most recent call last)
Cell In[13], line 15
      1 grid_search = GridSearchCV(
      2     estimator=XGBRegressor(),
      3     param_grid={
   (...)
     11     output_cols=OUTPUT_COLUMNS
     12 )
     14 # Train
---> 15 grid_search.fit(train_df)

File ~/.pyenv/versions/3.11.9/envs/ml/lib/python3.11/site-packages/snowflake/ml/_internal/telemetry.py:416, in send_api_usage_telemetry.<locals>.decorator.<locals>.wrap(*args, **kwargs)
    414         raise me.original_exception from None
    415     else:
--> 416         raise me.original_exception from e
    417 else:
    418     return update_stmt_params_if_snowpark_df(res, statement_params)

SnowparkSQLException: (1300) (1304): 01b631ff-0905-2416-003f-5b8300c97c0a: 100357 (P0000): Cannot create a Python function with the specified packages. Please check your packages specification and try again.```

Packages in the pyenv (Python 3.11.9),`pip list`:

Package Version


absl-py 1.4.0 aiobotocore 2.13.1 aiohappyeyeballs 2.3.5 aiohttp 3.10.1 aioitertools 0.11.0 aiosignal 1.3.1 anyio 3.7.1 appnope 0.1.4 argon2-cffi 23.1.0 argon2-cffi-bindings 21.2.0 arrow 1.3.0 asn1crypto 1.5.1 asttokens 2.4.1 async-lru 2.0.4 attrs 24.2.0 Babel 2.15.0 beautifulsoup4 4.12.3 bleach 6.1.0 botocore 1.34.131 cachetools 4.2.2 certifi 2024.7.4 cffi 1.17.0 charset-normalizer 3.3.2 cloudpickle 2.2.1 comm 0.2.2 contourpy 1.2.1 cramjam 2.8.3 cryptography 42.0.8 cycler 0.12.1 debugpy 1.8.5 decorator 5.1.1 defusedxml 0.7.1 executing 2.0.1 fastjsonschema 2.20.0 fastparquet 2023.10.0 filelock 3.15.4 fonttools 4.53.1 fqdn 1.5.1 frozenlist 1.4.1 fsspec 2023.12.2 h11 0.14.0 httpcore 1.0.5 httpx 0.27.0 idna 3.7 importlib_metadata 8.2.0 importlib_resources 6.4.0 ipykernel 6.29.5 ipython 8.26.0 ipywidgets 8.1.3 isoduration 20.11.0 jaraco.classes 3.4.0 jedi 0.19.1 Jinja2 3.1.4 jmespath 1.0.1 joblib 1.4.2 json5 0.9.25 jsonpointer 3.0.0 jsonschema 4.23.0 jsonschema-specifications 2023.12.1 jupyter 1.0.0 jupyter_client 8.6.2 jupyter-console 6.6.3 jupyter_core 5.7.2 jupyter-events 0.10.0 jupyter-lsp 2.2.5 jupyter_server 2.14.2 jupyter_server_terminals 0.5.3 jupyterlab 4.2.4 jupyterlab_pygments 0.3.0 jupyterlab_server 2.27.3 jupyterlab_widgets 3.0.11 keyring 24.3.1 kiwisolver 1.4.5 MarkupSafe 2.1.5 matplotlib 3.8.4 matplotlib-inline 0.1.7 mistune 3.0.2 more-itertools 10.4.0 multidict 6.0.5 nbclient 0.10.0 nbconvert 7.16.4 nbformat 5.10.4 nest-asyncio 1.6.0 notebook 7.2.1 notebook_shim 0.2.4 numpy 1.26.4 overrides 7.7.0 packaging 23.2 pandas 2.2.2 pandocfilters 1.5.1 parso 0.8.4 pexpect 4.9.0 pillow 10.4.0 pip 24.0 platformdirs 4.2.2 prometheus_client 0.20.0 prompt_toolkit 3.0.47 psutil 6.0.0 ptyprocess 0.7.0 pure_eval 0.2.3 pyarrow 10.0.1 pycparser 2.22 Pygments 2.18.0 PyJWT 2.9.0 pyOpenSSL 24.2.1 pyparsing 3.1.2 python-dateutil 2.9.0.post0 python-json-logger 2.0.7 pytimeparse 1.1.8 pytz 2024.1 PyYAML 6.0.2 pyzmq 26.1.0 qtconsole 5.5.2 QtPy 2.4.1 referencing 0.35.1 requests 2.32.3 retrying 1.3.4 rfc3339-validator 0.1.4 rfc3986-validator 0.1.1 rpds-py 0.20.0 s3fs 2023.12.2 scikit-learn 1.3.0 scipy 1.14.0 seaborn 0.13.2 Send2Trash 1.8.3 setuptools 65.5.0 six 1.16.0 sniffio 1.3.1 snowflake-connector-python 3.10.0 snowflake-ml-python 1.5.4 snowflake-snowpark-python 1.17.0 sortedcontainers 2.4.0 soupsieve 2.5 sqlparse 0.5.1 stack-data 0.6.3 terminado 0.18.1 threadpoolctl 3.5.0 tinycss2 1.3.0 tomlkit 0.13.0 tornado 6.4.1 traitlets 5.14.3 types-python-dateutil 2.9.0.20240316 typing_extensions 4.12.2 tzdata 2024.1 uri-template 1.3.0 urllib3 2.2.2 wcwidth 0.2.13 webcolors 24.6.0 webencodings 0.5.1 websocket-client 1.8.0 wheel 0.44.0 widgetsnbextension 4.0.11 wrapt 1.16.0 xgboost 1.7.6 yarl 1.9.4 zipp 3.19.2

sfc-gh-klim commented 1 month ago

resovled after pinning numpy to 1.23.5