In the third notebook, For some reasons it prints some operational error after calling selected_features.show(5)
:
it brings the same error even trying to create the training data.
this is the error
OperationalError Traceback (most recent call last)
File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2266, in SQLiteDatabase.execute(self, sql, params)
2265 try:
-> 2266 cur.execute(sql, *args)
2267 return cur
File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:408, in Cursor.execute(self, operation, parameters, **kwargs)
407 response = self._connection.client.ExecuteStatement(req)
--> 408 _check_status(response)
409 self._operationHandle = response.operationHandle
File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:538, in _check_status(response)
537 if response.status.statusCode != ttypes.TStatusCode.SUCCESS_STATUS:
--> 538 raise OperationalError(response)
OperationalError: TExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation.java:232', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:269', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor268:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750'], sqlState='08S01', errorCode=1, errorMessage='Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask'), operationHandle=None)
During handling of the above exception, another exception occurred:
NotSupportedError Traceback (most recent call last)
File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2270, in SQLiteDatabase.execute(self, sql, params)
2269 try:
-> 2270 self.con.rollback()
2271 except Exception as inner_exc: # pragma: no cover
File ~/mambaforge/lib/python3.10/site-packages/pyhive/hive.py:285, in Connection.rollback(self)
284 def rollback(self):
--> 285 raise NotSupportedError("Hive does not have transactions")
NotSupportedError: Hive does not have transactions
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
Cell In[11], line 2
1 # # Uncomment this if you would like to view your selected features
----> 2 selected_features.show(5)
File ~/mambaforge/lib/python3.10/site-packages/hsfs/constructor/query.py:182, in Query.show(self, n, online)
179 read_options = {}
180 sql_query, online_conn = self._prep_read(online, read_options)
--> 182 return engine.get_instance().show(
183 sql_query, self._feature_store_name, n, online_conn, read_options
184 )
File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:317, in Engine.show(self, sql_query, feature_store, n, online_conn, read_options)
316 def show(self, sql_query, feature_store, n, online_conn, read_options={}):
--> 317 return self.sql(
318 sql_query, feature_store, online_conn, "default", read_options
319 ).head(n)
File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:106, in Engine.sql(self, sql_query, feature_store, online_conn, dataframe_type, read_options, schema)
96 def sql(
97 self,
98 sql_query,
(...)
103 schema=None,
104 ):
105 if not online_conn:
--> 106 return self._sql_offline(
107 sql_query,
108 feature_store,
109 dataframe_type,
110 schema,
111 hive_config=read_options.get("hive_config") if read_options else None,
112 )
113 else:
114 return self._jdbc(
115 sql_query, online_conn, dataframe_type, read_options, schema
116 )
File ~/mambaforge/lib/python3.10/site-packages/hsfs/engine/python.py:144, in Engine._sql_offline(self, sql_query, feature_store, dataframe_type, schema, hive_config)
142 with warnings.catch_warnings():
143 warnings.simplefilter("ignore", UserWarning)
--> 144 result_df = util.run_with_loading_animation(
145 "Reading data from Hopsworks, using Hive",
146 pd.read_sql,
147 sql_query,
148 hive_conn,
149 )
151 if schema:
152 result_df = Engine.cast_columns(result_df, schema)
File ~/mambaforge/lib/python3.10/site-packages/hsfs/util.py:345, in run_with_loading_animation(message, func, *args, **kwargs)
342 end = None
344 try:
--> 345 result = func(*args, **kwargs)
346 end = time.time()
347 return result
File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:654, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize, dtype_backend, dtype)
652 with pandasSQL_builder(con) as pandas_sql:
653 if isinstance(pandas_sql, SQLiteDatabase):
--> 654 return pandas_sql.read_query(
655 sql,
656 index_col=index_col,
657 params=params,
658 coerce_float=coerce_float,
659 parse_dates=parse_dates,
660 chunksize=chunksize,
661 dtype_backend=dtype_backend,
662 dtype=dtype,
663 )
665 try:
666 _is_table_name = pandas_sql.has_table(sql)
File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2330, in SQLiteDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend)
2319 def read_query(
2320 self,
2321 sql,
(...)
2328 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
2329 ) -> DataFrame | Iterator[DataFrame]:
-> 2330 cursor = self.execute(sql, params)
2331 columns = [col_desc[0] for col_desc in cursor.description]
2333 if chunksize is not None:
File ~/mambaforge/lib/python3.10/site-packages/pandas/io/sql.py:2275, in SQLiteDatabase.execute(self, sql, params)
2271 except Exception as inner_exc: # pragma: no cover
2272 ex = DatabaseError(
2273 f"Execution failed on sql: {sql}\n{exc}\nunable to rollback"
2274 )
-> 2275 raise ex from inner_exc
2277 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")
2278 raise ex from exc
DatabaseError: Execution failed on sql: WITH right_fg0 AS (SELECT *
FROM (SELECT fg1
.city_name
city_name
, fg1
.date
date
, fg1
.pm2_5
pm2_5
, fg1
.pm_2_5_previous_1_day
pm_2_5_previous_1_day
, fg1
.pm_2_5_previous_2_day
pm_2_5_previous_2_day
, fg1
.pm_2_5_previous_3_day
pm_2_5_previous_3_day
, fg1
.pm_2_5_previous_4_day
pm_2_5_previous_4_day
, fg1
.pm_2_5_previous_5_day
pm_2_5_previous_5_day
, fg1
.pm_2_5_previous_6_day
pm_2_5_previous_6_day
, fg1
.pm_2_5_previous_7_day
pm_2_5_previous_7_day
, fg1
.mean_7_days
mean_7_days
, fg1
.mean_14_days
mean_14_days
, fg1
.mean_28_days
mean_28_days
, fg1
.std_7_days
std_7_days
, fg1
.exp_mean_7_days
exp_mean_7_days
, fg1
.exp_std_7_days
exp_std_7_days
, fg1
.std_14_days
std_14_days
, fg1
.exp_mean_14_days
exp_mean_14_days
, fg1
.exp_std_14_days
exp_std_14_days
, fg1
.std_28_days
std_28_days
, fg1
.exp_mean_28_days
exp_mean_28_days
, fg1
.exp_std_28_days
exp_std_28_days
, fg1
.year
year
, fg1
.day_of_month
day_of_month
, fg1
.month
month
, fg1
.day_of_week
day_of_week
, fg1
.is_weekend
is_weekend
, fg1
.sin_day_of_year
sin_day_of_year
, fg1
.cos_day_of_year
cos_day_of_year
, fg1
.sin_day_of_week
sin_day_of_week
, fg1
.cos_day_of_week
cos_day_of_week
, fg1
.unix_time
unix_time
, fg1
.city_name
join_pk_city_name
, fg1
.unix_time
join_pk_unix_time
, fg1
.unix_time
join_evt_unix_time
, fg0
.temperature_max
temperature_max
, fg0
.temperature_min
temperature_min
, fg0
.precipitation_sum
precipitation_sum
, fg0
.rain_sum
rain_sum
, fg0
.snowfall_sum
snowfall_sum
, fg0
.precipitation_hours
precipitation_hours
, fg0
.wind_speed_max
wind_speed_max
, fg0
.wind_gusts_max
wind_gusts_max
, fg0
.wind_direction_dominant
wind_direction_dominant
, RANK() OVER (PARTITION BY fg1
.city_name
, fg1
.date
, fg1
.unix_time
ORDER BY fg0
.unix_time
DESC) pit_rank_hopsworks
FROM soll_featurestore
.air_quality_1
fg1
INNER JOIN soll_featurestore
.weather_1
fg0
ON fg1
.city_name
= fg0
.city_name
AND fg1
.date
= fg0
.date
AND fg1
.unix_time
>= fg0
.unix_time
) NA
WHERE pit_rank_hopsworks
= 1) (SELECT right_fg0
.city_name
city_name
, right_fg0
.date
date
, right_fg0
.pm2_5
pm2_5
, right_fg0
.pm_2_5_previous_1_day
pm_2_5_previous_1_day
, right_fg0
.pm_2_5_previous_2_day
pm_2_5_previous_2_day
, right_fg0
.pm_2_5_previous_3_day
pm_2_5_previous_3_day
, right_fg0
.pm_2_5_previous_4_day
pm_2_5_previous_4_day
, right_fg0
.pm_2_5_previous_5_day
pm_2_5_previous_5_day
, right_fg0
.pm_2_5_previous_6_day
pm_2_5_previous_6_day
, right_fg0
.pm_2_5_previous_7_day
pm_2_5_previous_7_day
, right_fg0
.mean_7_days
mean_7_days
, right_fg0
.mean_14_days
mean_14_days
, right_fg0
.mean_28_days
mean_28_days
, right_fg0
.std_7_days
std_7_days
, right_fg0
.exp_mean_7_days
exp_mean_7_days
, right_fg0
.exp_std_7_days
exp_std_7_days
, right_fg0
.std_14_days
std_14_days
, right_fg0
.exp_mean_14_days
exp_mean_14_days
, right_fg0
.exp_std_14_days
exp_std_14_days
, right_fg0
.std_28_days
std_28_days
, right_fg0
.exp_mean_28_days
exp_mean_28_days
, right_fg0
.exp_std_28_days
exp_std_28_days
, right_fg0
.year
year
, right_fg0
.day_of_month
day_of_month
, right_fg0
.month
month
, right_fg0
.day_of_week
day_of_week
, right_fg0
.is_weekend
is_weekend
, right_fg0
.sin_day_of_year
sin_day_of_year
, right_fg0
.cos_day_of_year
cos_day_of_year
, right_fg0
.sin_day_of_week
sin_day_of_week
, right_fg0
.cos_day_of_week
cos_day_of_week
, right_fg0
.unix_time
unix_time
, right_fg0
.temperature_max
temperature_max
, right_fg0
.temperature_min
temperature_min
, right_fg0
.precipitation_sum
precipitation_sum
, right_fg0
.rain_sum
rain_sum
, right_fg0
.snowfall_sum
snowfall_sum
, right_fg0
.precipitation_hours
precipitation_hours
, right_fg0
.wind_speed_max
wind_speed_max
, right_fg0
.wind_gusts_max
wind_gusts_max
, right_fg0
.wind_direction_dominant
wind_direction_dominant
FROM right_fg0)
TExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:runQuery:SQLOperation.java:232', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:269', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor268:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750'], sqlState='08S01', errorCode=1, errorMessage='Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.tez.TezTask'), operationHandle=None)
unable to rollback