snowflakedb / snowflake-ml-python

Apache License 2.0
38 stars 8 forks source link

SnowparkSQLException with snowml transform jobs #32

Closed mbkupfer closed 1 year ago

mbkupfer commented 1 year ago

Just trying out snowparkml and I'm running into an issue where it is creating a temporary internal stage that is not authorized. I see the stage exists, and I'm able to access other stages in that same schema as well. The difference being that this in an internal temporary stage, while the others are just internal, not temporary.

Exception:

---------------------------------------------------------------------------
SnowparkSQLException                      Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_872976\3643504582.py in <cell line: 1>()
----> 1 ct.fit(churn_feats_smb).transform(churn_feats_smb).show()

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\ml\_internal\telemetry.py in wrap(*args, **kwargs)
    300             )
    301             try:
--> 302                 res = func(*args, **kwargs)
    303             except Exception as e:
    304                 error = repr(e)

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\ml\modeling\compose\column_transformer.py in fit(self, dataset)
    323             self._fit_pandas(dataset)
    324         elif isinstance(dataset, DataFrame):
--> 325             self._fit_snowpark(dataset)
    326         else:
    327             raise TypeError(

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\ml\modeling\compose\column_transformer.py in _fit_snowpark(self, dataset)
    386         )
    387         # Put locally serialized transform on stage.
--> 388         session.file.put(
    389             local_transform_file_name,
    390             stage_transform_file_name,

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\file_operation.py in put(self, local_file_name, stage_location, parallel, auto_compress, source_compression, overwrite, statement_params)
    135                 options,
    136             )
--> 137             put_result = snowflake.snowpark.dataframe.DataFrame(
    138                 self._session, plan
    139             )._internal_collect_with_tag(statement_params=statement_params)

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\telemetry.py in wrap(*args, **kwargs)
    137     def wrap(*args, **kwargs):
    138         with args[0]._session.query_history() as query_history:
--> 139             result = func(*args, **kwargs)
    140         plan = args[0]._select_statement or args[0]._plan
    141         api_calls = [

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\dataframe.py in _internal_collect_with_tag_no_telemetry(self, statement_params, block, data_type, log_on_exception, case_sensitive)
    632         # we should always call this method instead of collect(), to make sure the
    633         # query tag is set properly.
--> 634         return self._session._conn.execute(
    635             self._plan,
    636             block=block,

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in execute(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)
    440                 "Async query is not supported in stored procedure yet"
    441             )
--> 442         result_set, result_meta = self.get_result_set(
    443             plan,
    444             to_pandas,

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\analyzer\snowflake_plan.py in wrap(*args, **kwargs)
    178                             e
    179                         )
--> 180                         raise ne.with_traceback(tb) from None
    181 
    182             return wrap

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\analyzer\snowflake_plan.py in wrap(*args, **kwargs)
    108             def wrap(*args, **kwargs):
    109                 try:
--> 110                     return func(*args, **kwargs)
    111                 except snowflake.connector.errors.ProgrammingError as e:
    112                     query = None

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in get_result_set(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)
    548                         for holder, id_ in placeholders.items():
    549                             final_query = final_query.replace(holder, id_)
--> 550                         result = self.run_query(
    551                             final_query,
    552                             to_pandas,

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in wrap(*args, **kwargs)
    100                     )
    101                 except Exception as ex:
--> 102                     raise ex
    103 
    104             return wrap

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in wrap(*args, **kwargs)
     94                     raise SnowparkClientExceptionMessages.SERVER_SESSION_HAS_BEEN_CLOSED()
     95                 try:
---> 96                     return func(*args, **kwargs)
     97                 except ReauthenticationRequest as ex:
     98                     raise SnowparkClientExceptionMessages.SERVER_SESSION_EXPIRED(

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, **kwargs)
    363                 query_id_log = f" [queryID: {ex.sfqid}]" if hasattr(ex, "sfqid") else ""
    364                 logger.error(f"Failed to execute query{query_id_log} {query}\n{ex}")
--> 365             raise ex
    366 
    367         # fetch_pandas_all/batches() only works for SELECT statements

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\snowpark\_internal\server_connection.py in run_query(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, **kwargs)
    344                 kwargs["_statement_params"]["SNOWPARK_SKIP_TXN_COMMIT_IN_DDL"] = True
    345             if block:
--> 346                 results_cursor = self._cursor.execute(query, params=params, **kwargs)
    347                 self.notify_query_listeners(
    348                     QueryRecord(results_cursor.sfqid, results_cursor.query)

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\connector\cursor.py in execute(self, command, params, _bind_stage, timeout, _exec_async, _no_retry, _do_reset, _put_callback, _put_azure_callback, _put_callback_output_stream, _get_callback, _get_azure_callback, _get_callback_output_stream, _show_progress_bar, _statement_params, _is_internal, _describe_only, _no_results, _is_put_get, _raise_put_get_error, _force_put_overwrite, _skip_upload_on_content_match, file_stream, num_statements)
    908             )  # NULL result in a non-nullable column
    909             error_class = IntegrityError if is_integrity_error else ProgrammingError
--> 910             Error.errorhandler_wrapper(self.connection, self, error_class, errvalue)
    911         return self
    912 

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\connector\errors.py in errorhandler_wrapper(connection, cursor, error_class, error_value)
    288         """
    289 
--> 290         handed_over = Error.hand_to_other_handler(
    291             connection,
    292             cursor,

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\connector\errors.py in hand_to_other_handler(connection, cursor, error_class, error_value)
    343         if cursor is not None:
    344             cursor.messages.append((error_class, error_value))
--> 345             cursor.errorhandler(connection, cursor, error_class, error_value)
    346             return True
    347         elif connection is not None:

~\AppData\Local\miniconda3\envs\py38\lib\site-packages\snowflake\connector\errors.py in default_errorhandler(connection, cursor, error_class, error_value)
    219         errno = error_value.get("errno")
    220         done_format_msg = error_value.get("done_format_msg")
--> 221         raise error_class(
    222             msg=error_value.get("msg"),
    223             errno=None if errno is None else int(errno),

SnowparkSQLException: (1304): 01ae0538-0403-a5cd-0000-bc035b4e6bc6: 002003 (02000): SQL compilation error:
Stage 'DATABASE.SCHEMA."SNOWML_TRANSFORM_1096664C_4044_485E_9432_D635381B6A58   MPIY8AYSJQ"' does not exist or not authorized.
mbkupfer commented 1 year ago

And here is the failed query when pulled up in the query history log:

PUT 'file://C:/Users/MAXIM~1.KUP/AppData/Local/Temp/tmpexh0yyze' '@SNOWML_TRANSFORM_04C37AFE_AB72_4239_AEE7_7B6876D9DF46\tmpexh0yyze'  
parallel = 4 source_compression = 'AUTO_DETECT' auto_compress = False overwrite = True
SQL compilation error: Stage 'SANDBOX.BI."SNOWML_TRANSFORM_04C37AFE_AB72_4239_AEE7_7B6876D9DF46 MPEXH0YYZE"' does not exist or not authorized.

Not sure if it's a red herring, but the printed exception seems to be treating the \t of \tmpexh0yyze as a tab character.

sfc-gh-snandamuri commented 1 year ago

@mbkupfer May I know which version of snowflake-ml-python are you using? If not already using the latest version, could you please upgrade to snowflake-ml-python==1.0.4 (https://pypi.org/project/snowflake-ml-python/1.0.4/) and test again?

mbkupfer commented 1 year ago

Ah yes, I was using 1.0.2. Upgrading to 1.0.4 did the trick! Thanks for the reply @sfc-gh-snandamuri