AbsaOSS / spline

Data Lineage Tracking And Visualization Solution
https://absaoss.github.io/spline/
Apache License 2.0
596 stars 154 forks source link

Need to re-write a scala code written to get notebook details for databricks #1185

Closed zacayd closed 1 year ago

zacayd commented 1 year ago

Hi i have this code to get details of notebook into the spline aget:

However from version 1.0.0 this is not supported do you have a way to re right it that will be supported on 1.0.0

%scala import scala.util.Try

import com.databricks.dbutilsv1.DBUtilsHolder.dbutils import za.co.absa.spline.harvester.conf.StandardSplineConfigurationStack import za.co.absa.spline.harvester.extra.UserExtraMetadataProvider import za.co.absa.spline.harvester.extra.UserExtraAppendingPostProcessingFilter import za.co.absa.spline.harvester.HarvestingContext import org.apache.commons.configuration.Configuration import za.co.absa.spline.harvester.SparkLineageInitializer. import za.co.absa.spline.harvester.conf.DefaultSplineConfigurer import za.co.absa.spline.producer.model.v11. import za.co.absa.spline.producer.model._ import scala.util.parsing.json.JSON import scala.concurrent.duration.Duration import scala.util.{Failure, Success, Try} val splineConf: Configuration = StandardSplineConfigurationStack(spark) spark.enableLineageTracking(new DefaultSplineConfigurer(spark,splineConf) { //override protected def userExtraMetadataProvider = new UserExtraMetaDataProvider { //val test = dbutils.notebook.getContext.notebookPath val notebookInformationJson = dbutils.notebook.getContext.toJson val outerMap = JSON.parseFull(notebookInformationJson).getOrElse(0).asInstanceOf[Map[String,String]] val tagMap = outerMap("tags").asInstanceOf[Map[String,String]] val extraContextMap = outerMap("extraContext").asInstanceOf[Map[String,String]] val notebookPath = extraContextMap("notebook_path").split("/") val workspaceUrl=tagMap("browserHostName")

val workspaceName=dbutils.notebook().getContext().notebookPath.get val notebookURL = tagMap("browserHostName")+"/?o="+tagMap("orgId")+tagMap("browserHash") val user = tagMap("user") val name = notebookPath(notebookPath.size-1) val notebookInfo = Map("notebookURL" -> notebookURL, "user" -> user, "workspaceName" ->workspaceName, "workspaceUrl" -> workspaceUrl,
"name" -> name, "mounts" -> dbutils.fs.ls("/FileStore/tables").map(_.path), "timestamp" -> System.currentTimeMillis) val notebookInfoJson = scala.util.parsing.json.JSONObject(notebookInfo) override protected def maybeUserExtraMetadataProvider: Option[UserExtraMetadataProvider] = Some(new UserExtraMetadataProvider() { override def forExecEvent(event: ExecutionEvent, ctx: HarvestingContext): Map[String, Any] = Map("foo" -> "bar1") override def forExecPlan(plan: ExecutionPlan, ctx: HarvestingContext): Map[String, Any] = Map("notebookInfo" -> notebookInfoJson) override def forOperation(op: ReadOperation, ctx: HarvestingContext): Map[String, Any] = Map("foo" -> "bar3") override def forOperation(op: WriteOperation, ctx: HarvestingContext): Map[String, Any] = Map("foo" -> "bar4") override def forOperation(op: DataOperation, ctx: HarvestingContext): Map[String, Any] = Map("foo" -> "bar5") }) })