Open devjain47 opened 1 year ago
Here is tutorial https://www.youtube.com/watch?v=-_TFB0Gh3TY
@devjain47 Were you able to resolve this issue?
@devjain47 Were you able to resolve this issue?
HI @ad1happy2go, Till now I am unable to figure out the proper solution, do u have any suggestions please let me know
@soumilshah1995 Can you help here please.
let me search my code snip I know I have done this on AWS
Found the code
here is code https://github.com/soumilshah1995/code-snippets/blob/main/schema_evol_lab.ipynb
Here is Video Guide https://www.youtube.com/watch?v=-_TFB0Gh3TY
@devjain47 devjain4
let me know if my code helps
@devjain47 Any updates here please?
I am getting an error while I was trying to rename the column in the hudi table
` %etl %session_id_prefix native-hudi-dataframe- %glue_version 3.0 %idle_timeout 2880 %worker_type G.2X %number_of_workers 20 %%configure { "--conf": "spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.hive.convertMetastoreParquet=false --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog --conf spark.sql.legacy.pathOptionBehavior.enabled= true --conf spark.sql.extensions= org.apache.spark.sql.hudi.HoodieSparkSessionExtension", "--datalake-formats": "hudi", "--enable-metrics": "true", "--enable-spark-ui": "true", "--spark-event-logs-path": "s3://aws-glue-assets-441369475256-us-west-1/sparkHistoryLogs/", "--enable-job-insights": "true", "--enable-glue-datacatalog": "true", "--enable-continuous-cloudwatch-log": "true", "--job-bookmark-option": "job-bookmark-disable" }
import logging import sys from awsglue.transforms import * from awsglue.utils import getResolvedOptions from pyspark.context import SparkContext from awsglue.context import GlueContext from awsglue.job import Job from awsglue.dynamicframe import DynamicFrame
sc = SparkContext.getOrCreate() glueContext = GlueContext(sc) spark = glueContext.spark_session job = Job(glueContext) logger = glueContext.get_logger()
db_name = "dev_mesh" table_name_test = "data_quality_pass_results" query = f"ALTER TABLE {db_name}.{table_name_test} RENAME COLUMN PartialPaymentLetterOfExpalnDate TO PartialPaymentLetterOfExplanDate" spark.sql(query) print(f"Query Executed: {query}")
print_hudi_table(BUCKET=BUCKET)`
Py4JJavaError: An error occurred while calling o97.sql. : org.apache.spark.SparkException: Cannot find catalog plugin class for catalog 'spark_catalog': org.apache.spark.sql.hudi.catalog.HoodieCatalog at org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:66) at org.apache.spark.sql.connector.catalog.CatalogManager.loadV2SessionCatalog(CatalogManager.scala:66) at org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$v2SessionCatalog$2(CatalogManager.scala:85) at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:82) at org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$v2SessionCatalog$1(CatalogManager.scala:85) at scala.Option.map(Option.scala:146) at org.apache.spark.sql.connector.catalog.CatalogManager.v2SessionCatalog(CatalogManager.scala:84) at org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:50) at org.apache.spark.sql.connector.catalog.CatalogManager.currentCatalog(CatalogManager.scala:117) at org.apache.spark.sql.connector.catalog.LookupCatalog.currentCatalog(LookupCatalog.scala:35) at org.apache.spark.sql.connector.catalog.LookupCatalog.currentCatalog$(LookupCatalog.scala:35) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.currentCatalog(ResolveCatalogs.scala:29) at org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndIdentifier$.unapply(LookupCatalog.scala:131) at org.apache.spark.sql.connector.catalog.LookupCatalog$NonSessionCatalogAndIdentifier$.unapply(LookupCatalog.scala:78) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$NonSessionCatalogAndTable$.unapply(ResolveCatalogs.scala:220) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:93) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:34) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$2(AnalysisHelper.scala:108) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$1(AnalysisHelper.scala:108) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:221) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown(AnalysisHelper.scala:106) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown$(AnalysisHelper.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDown(LogicalPlan.scala:29) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:73) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:72) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:29) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:34) at org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:29) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:216) at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:122) at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:118) at scala.collection.immutable.List.foldLeft(List.scala:85) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:213) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:205) at scala.collection.immutable.List.foreach(List.scala:388) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:205) at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:196) at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:190) at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:155) at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:183) at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107) at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:183) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:174) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:228) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:173) at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73) at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192) at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:163) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:163) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:750)