logicalclocks / hopsworks-tutorials

Tutorials for the Hopsworks Platform
GNU Affero General Public License v3.0
251 stars 89 forks source link

Air quality Advanced Tutorial [feature view creation] #252

Open Solab5 opened 7 months ago

Solab5 commented 7 months ago

In the third notebook, For some reasons it prints some operational error after calling selected_features.show(5): it brings the same error even trying to create the training data.

this is the error

OperationalError Traceback (most recent call last) File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2266, in SQLiteDatabase.execute(self, sql, params) 2265 try: -> 2266 cur.execute(sql, *args) 2267 return cur

File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:408, in Cursor.execute(self, operation, parameters, **kwargs) 407 response = self._connection.client.ExecuteStatement(req) --> 408 _check_status(response) 409 self._operationHandle = response.operationHandle

File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:538, in _check_status(response) 537 if response.status.statusCode != ttypes.TStatusCode.SUCCESS_STATUS: --> 538 raise OperationalError(response)

OperationalError: TExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation.java:232', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:269', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor268:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750'], sqlState='08S01', errorCode=1, errorMessage='Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask'), operationHandle=None)

During handling of the above exception, another exception occurred:

NotSupportedError Traceback (most recent call last) File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2270, in SQLiteDatabase.execute(self, sql, params) 2269 try: -> 2270 self.con.rollback() 2271 except Exception as inner_exc: # pragma: no cover

File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:285, in Connection.rollback(self) 284 def rollback(self): --> 285 raise NotSupportedError("Hive does not have transactions")

NotSupportedError: Hive does not have transactions

The above exception was the direct cause of the following exception:

DatabaseError Traceback (most recent call last) Cell In[11], line 2 1 # # Uncomment this if you would like to view your selected features ----> 2 selected_features.show(5)

File ~/mambaforge/lib/python3.10/site-packages/hsfs/constructor/query.py:182, in Query.show(self, n, online) 179 read_options = {} 180 sql_query, online_conn = self._prep_read(online, read_options) --> 182 return engine.get_instance().show( 183 sql_query, self._feature_store_name, n, online_conn, read_options 184 )

File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:317, in Engine.show(self, sql_query, feature_store, n, online_conn, read_options) 316 def show(self, sql_query, feature_store, n, online_conn, read_options={}): --> 317 return self.sql( 318 sql_query, feature_store, online_conn, "default", read_options 319 ).head(n)

File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:106, in Engine.sql(self, sql_query, feature_store, online_conn, dataframe_type, read_options, schema) 96 def sql( 97 self, 98 sql_query, (...) 103 schema=None, 104 ): 105 if not online_conn: --> 106 return self._sql_offline( 107 sql_query, 108 feature_store, 109 dataframe_type, 110 schema, 111 hive_config=read_options.get("hive_config") if read_options else None, 112 ) 113 else: 114 return self._jdbc( 115 sql_query, online_conn, dataframe_type, read_options, schema 116 )

File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:144, in Engine._sql_offline(self, sql_query, feature_store, dataframe_type, schema, hive_config) 142 with warnings.catch_warnings(): 143 warnings.simplefilter("ignore", UserWarning) --> 144 result_df = util.run_with_loading_animation( 145 "Reading data from Hopsworks, using Hive", 146 pd.read_sql, 147 sql_query, 148 hive_conn, 149 ) 151 if schema: 152 result_df = Engine.cast_columns(result_df, schema)

File ~/mambaforge/lib/python3.10/site-packages/hsfs/util.py:345, in run_with_loading_animation(message, func, *args, *kwargs) 342 end = None 344 try: --> 345 result = func(args, **kwargs) 346 end = time.time() 347 return result

File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:654, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize, dtype_backend, dtype) 652 with pandasSQL_builder(con) as pandas_sql: 653 if isinstance(pandas_sql, SQLiteDatabase): --> 654 return pandas_sql.read_query( 655 sql, 656 index_col=index_col, 657 params=params, 658 coerce_float=coerce_float, 659 parse_dates=parse_dates, 660 chunksize=chunksize, 661 dtype_backend=dtype_backend, 662 dtype=dtype, 663 ) 665 try: 666 _is_table_name = pandas_sql.has_table(sql)

File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2330, in SQLiteDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend) 2319 def read_query( 2320 self, 2321 sql, (...) 2328 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", 2329 ) -> DataFrame | Iterator[DataFrame]: -> 2330 cursor = self.execute(sql, params) 2331 columns = [col_desc[0] for col_desc in cursor.description] 2333 if chunksize is not None:

File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2275, in SQLiteDatabase.execute(self, sql, params) 2271 except Exception as inner_exc: # pragma: no cover 2272 ex = DatabaseError( 2273 f"Execution failed on sql: {sql}\n{exc}\nunable to rollback" 2274 ) -> 2275 raise ex from inner_exc 2277 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}") 2278 raise ex from exc

DatabaseError: Execution failed on sql: WITH right_fg0 AS (SELECT FROM (SELECT fg1.city_name city_name, fg1.date date, fg1.pm2_5 pm2_5, fg1.pm_2_5_previous_1_day pm_2_5_previous_1_day, fg1.pm_2_5_previous_2_day pm_2_5_previous_2_day, fg1.pm_2_5_previous_3_day pm_2_5_previous_3_day, fg1.pm_2_5_previous_4_day pm_2_5_previous_4_day, fg1.pm_2_5_previous_5_day pm_2_5_previous_5_day, fg1.pm_2_5_previous_6_day pm_2_5_previous_6_day, fg1.pm_2_5_previous_7_day pm_2_5_previous_7_day, fg1.mean_7_days mean_7_days, fg1.mean_14_days mean_14_days, fg1.mean_28_days mean_28_days, fg1.std_7_days std_7_days, fg1.exp_mean_7_days exp_mean_7_days, fg1.exp_std_7_days exp_std_7_days, fg1.std_14_days std_14_days, fg1.exp_mean_14_days exp_mean_14_days, fg1.exp_std_14_days exp_std_14_days, fg1.std_28_days std_28_days, fg1.exp_mean_28_days exp_mean_28_days, fg1.exp_std_28_days exp_std_28_days, fg1.year year, fg1.day_of_month day_of_month, fg1.month month, fg1.day_of_week day_of_week, fg1.is_weekend is_weekend, fg1.sin_day_of_year sin_day_of_year, fg1.cos_day_of_year cos_day_of_year, fg1.sin_day_of_week sin_day_of_week, fg1.cos_day_of_week cos_day_of_week, fg1.unix_time unix_time, fg1.city_name join_pk_city_name, fg1.unix_time join_pk_unix_time, fg1.unix_time join_evt_unix_time, fg0.temperature_max temperature_max, fg0.temperature_min temperature_min, fg0.precipitation_sum precipitation_sum, fg0.rain_sum rain_sum, fg0.snowfall_sum snowfall_sum, fg0.precipitation_hours precipitation_hours, fg0.wind_speed_max wind_speed_max, fg0.wind_gusts_max wind_gusts_max, fg0.wind_direction_dominant wind_direction_dominant, RANK() OVER (PARTITION BY fg1.city_name, fg1.date, fg1.unix_time ORDER BY fg0.unix_time DESC) pit_rank_hopsworks FROM soll_featurestore.air_quality_1 fg1 INNER JOIN soll_featurestore.weather_1 fg0 ON fg1.city_name = fg0.city_name AND fg1.date = fg0.date AND fg1.unix_time >= fg0.unix_time) NA WHERE pit_rank_hopsworks = 1) (SELECT right_fg0.city_name city_name, right_fg0.date date, right_fg0.pm2_5 pm2_5, right_fg0.pm_2_5_previous_1_day pm_2_5_previous_1_day, right_fg0.pm_2_5_previous_2_day pm_2_5_previous_2_day, right_fg0.pm_2_5_previous_3_day pm_2_5_previous_3_day, right_fg0.pm_2_5_previous_4_day pm_2_5_previous_4_day, right_fg0.pm_2_5_previous_5_day pm_2_5_previous_5_day, right_fg0.pm_2_5_previous_6_day pm_2_5_previous_6_day, right_fg0.pm_2_5_previous_7_day pm_2_5_previous_7_day, right_fg0.mean_7_days mean_7_days, right_fg0.mean_14_days mean_14_days, right_fg0.mean_28_days mean_28_days, right_fg0.std_7_days std_7_days, right_fg0.exp_mean_7_days exp_mean_7_days, right_fg0.exp_std_7_days exp_std_7_days, right_fg0.std_14_days std_14_days, right_fg0.exp_mean_14_days exp_mean_14_days, right_fg0.exp_std_14_days exp_std_14_days, right_fg0.std_28_days std_28_days, right_fg0.exp_mean_28_days exp_mean_28_days, right_fg0.exp_std_28_days exp_std_28_days, right_fg0.year year, right_fg0.day_of_month day_of_month, right_fg0.month month, right_fg0.day_of_week day_of_week, right_fg0.is_weekend is_weekend, right_fg0.sin_day_of_year sin_day_of_year, right_fg0.cos_day_of_year cos_day_of_year, right_fg0.sin_day_of_week sin_day_of_week, right_fg0.cos_day_of_week cos_day_of_week, right_fg0.unix_time unix_time, right_fg0.temperature_max temperature_max, right_fg0.temperature_min temperature_min, right_fg0.precipitation_sum precipitation_sum, right_fg0.rain_sum rain_sum, right_fg0.snowfall_sum snowfall_sum, right_fg0.precipitation_hours precipitation_hours, right_fg0.wind_speed_max wind_speed_max, right_fg0.wind_gusts_max wind_gusts_max, right_fg0.wind_direction_dominant wind_direction_dominant FROM right_fg0) TExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['org.apache.hive.service.cli.HiveSQLException:Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation.java:232', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:269', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor268:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750'], sqlState='08S01', errorCode=1, errorMessage='Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask'), operationHandle=None) unable to rollback