AbsaOSS / spline-spark-agent

Spline agent for Apache Spark
https://absaoss.github.io/spline/
Apache License 2.0
185 stars 95 forks source link

Error loading dispatcher plugin? #636

Closed jinmu0410 closed 1 year ago

jinmu0410 commented 1 year ago
23/03/28 11:05:34 INFO AppInfoParser: Kafka version: 2.3.0
23/03/28 11:05:34 INFO AppInfoParser: Kafka commitId: fc1aaa116b661c8a
23/03/28 11:05:34 INFO AppInfoParser: Kafka startTimeMs: 1679972734018
23/03/28 11:05:34 INFO Metadata: [Producer clientId=producer-2] Cluster ID: DK2tcVuISxKUMJkoT0D0zg
23/03/28 11:05:34 INFO KafkaProducer: [Producer clientId=producer-2] Closing the Kafka producer with timeoutMillis = 9223372036854775807 ms.
23/03/28 11:05:34 ERROR ApplicationMaster: User class threw exception: java.lang.ExceptionInInitializerError
java.lang.ExceptionInInitializerError
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry.<init>(AutoDiscoveryPluginRegistry.scala:51)
    at za.co.absa.spline.agent.SplineAgent$.create(SplineAgent.scala:66)
    at za.co.absa.spline.harvester.SparkLineageInitializer.createListener(SparkLineageInitializer.scala:162)
    at za.co.absa.spline.harvester.SparkLineageInitializer.$anonfun$createListener$6(SparkLineageInitializer.scala:139)
    at za.co.absa.spline.harvester.SparkLineageInitializer.withErrorHandling(SparkLineageInitializer.scala:176)
    at za.co.absa.spline.harvester.SparkLineageInitializer.createListener(SparkLineageInitializer.scala:138)
    at za.co.absa.spline.harvester.listener.SplineQueryExecutionListener.<init>(SplineQueryExecutionListener.scala:37)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
    at org.apache.spark.util.Utils$.$anonfun$loadExtensions$1(Utils.scala:2930)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:293)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:293)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:290)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
    at org.apache.spark.util.Utils$.loadExtensions(Utils.scala:2919)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$2(QueryExecutionListener.scala:90)
    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
    at org.apache.spark.sql.internal.SQLConf$.withExistingConf(SQLConf.scala:158)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$1(QueryExecutionListener.scala:90)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$1$adapted(QueryExecutionListener.scala:88)
    at scala.Option.foreach(Option.scala:407)
    at org.apache.spark.sql.util.ExecutionListenerManager.<init>(QueryExecutionListener.scala:88)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.$anonfun$listenerManager$2(BaseSessionStateBuilder.scala:336)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.listenerManager(BaseSessionStateBuilder.scala:336)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:364)
    at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1175)
    at org.apache.spark.sql.SparkSession.$anonfun$sessionState$2(SparkSession.scala:162)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:160)
    at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:157)
    at org.apache.spark.sql.DataFrameReader.<init>(DataFrameReader.scala:698)
    at org.apache.spark.sql.SparkSession.read(SparkSession.scala:662)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.$anonfun$createDataFrame$1(DeltaMulTableSDIJob.scala:390)
    at scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)
    at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)
    at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)
    at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)
    at scala.collection.mutable.HashMap.foreach(HashMap.scala:149)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.createDataFrame(DeltaMulTableSDIJob.scala:387)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.calculation(DeltaMulTableSDIJob.scala:480)
    at com.hs.sdi.DeltaMulTableSDIJobMain$.main(DeltaMulTableSDIJobMain.scala:67)
    at com.hs.sdi.DeltaMulTableSDIJobMain.main(DeltaMulTableSDIJobMain.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:739)
Caused by: io.github.classgraph.ClassGraphException: Uncaught exception during scan
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1558)
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1575)
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1588)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.$anonfun$PluginClasses$2(AutoDiscoveryPluginRegistry.scala:96)
    at za.co.absa.commons.lang.ARM$.using(ARM.scala:30)
    at za.co.absa.commons.lang.ARM$ResourceWrapper.flatMap(ARM.scala:43)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.<init>(AutoDiscoveryPluginRegistry.scala:96)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.<clinit>(AutoDiscoveryPluginRegistry.scala)
    ... 53 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
    at nonapi.io.github.classgraph.fileslice.reader.ClassfileReader.<init>(ClassfileReader.java:141)
    at io.github.classgraph.ClasspathElementZip$1.openClassfile(ClasspathElementZip.java:409)
    at io.github.classgraph.Classfile.<init>(Classfile.java:1925)
    at io.github.classgraph.Scanner$ClassfileScannerWorkUnitProcessor.processWorkUnit(Scanner.java:741)
    at io.github.classgraph.Scanner$ClassfileScannerWorkUnitProcessor.processWorkUnit(Scanner.java:664)
    at nonapi.io.github.classgraph.concurrency.WorkQueue.runWorkLoop(WorkQueue.java:246)
    at nonapi.io.github.classgraph.concurrency.WorkQueue.access$000(WorkQueue.java:50)
    at nonapi.io.github.classgraph.concurrency.WorkQueue$1.call(WorkQueue.java:201)
    at nonapi.io.github.classgraph.concurrency.WorkQueue$1.call(WorkQueue.java:198)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
23/03/28 11:05:34 INFO ApplicationMaster: Final app status: FAILED, exitCode: 15, (reason: User class threw exception: java.lang.ExceptionInInitializerError
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry.<init>(AutoDiscoveryPluginRegistry.scala:51)
    at za.co.absa.spline.agent.SplineAgent$.create(SplineAgent.scala:66)
    at za.co.absa.spline.harvester.SparkLineageInitializer.createListener(SparkLineageInitializer.scala:162)
    at za.co.absa.spline.harvester.SparkLineageInitializer.$anonfun$createListener$6(SparkLineageInitializer.scala:139)
    at za.co.absa.spline.harvester.SparkLineageInitializer.withErrorHandling(SparkLineageInitializer.scala:176)
    at za.co.absa.spline.harvester.SparkLineageInitializer.createListener(SparkLineageInitializer.scala:138)
    at za.co.absa.spline.harvester.listener.SplineQueryExecutionListener.<init>(SplineQueryExecutionListener.scala:37)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
    at org.apache.spark.util.Utils$.$anonfun$loadExtensions$1(Utils.scala:2930)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:293)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:293)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:290)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
    at org.apache.spark.util.Utils$.loadExtensions(Utils.scala:2919)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$2(QueryExecutionListener.scala:90)
    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
    at org.apache.spark.sql.internal.SQLConf$.withExistingConf(SQLConf.scala:158)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$1(QueryExecutionListener.scala:90)
    at org.apache.spark.sql.util.ExecutionListenerManager.$anonfun$new$1$adapted(QueryExecutionListener.scala:88)
    at scala.Option.foreach(Option.scala:407)
    at org.apache.spark.sql.util.ExecutionListenerManager.<init>(QueryExecutionListener.scala:88)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.$anonfun$listenerManager$2(BaseSessionStateBuilder.scala:336)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.listenerManager(BaseSessionStateBuilder.scala:336)
    at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:364)
    at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1175)
    at org.apache.spark.sql.SparkSession.$anonfun$sessionState$2(SparkSession.scala:162)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:160)
    at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:157)
    at org.apache.spark.sql.DataFrameReader.<init>(DataFrameReader.scala:698)
    at org.apache.spark.sql.SparkSession.read(SparkSession.scala:662)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.$anonfun$createDataFrame$1(DeltaMulTableSDIJob.scala:390)
    at scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)
    at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)
    at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)
    at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)
    at scala.collection.mutable.HashMap.foreach(HashMap.scala:149)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.createDataFrame(DeltaMulTableSDIJob.scala:387)
    at com.hs.sdi.utils.DeltaMulTableSDIJob.calculation(DeltaMulTableSDIJob.scala:480)
    at com.hs.sdi.DeltaMulTableSDIJobMain$.main(DeltaMulTableSDIJobMain.scala:67)
    at com.hs.sdi.DeltaMulTableSDIJobMain.main(DeltaMulTableSDIJobMain.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:739)
Caused by: io.github.classgraph.ClassGraphException: Uncaught exception during scan
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1558)
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1575)
    at io.github.classgraph.ClassGraph.scan(ClassGraph.java:1588)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.$anonfun$PluginClasses$2(AutoDiscoveryPluginRegistry.scala:96)
    at za.co.absa.commons.lang.ARM$.using(ARM.scala:30)
    at za.co.absa.commons.lang.ARM$ResourceWrapper.flatMap(ARM.scala:43)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.<init>(AutoDiscoveryPluginRegistry.scala:96)
    at za.co.absa.spline.harvester.plugin.registry.AutoDiscoveryPluginRegistry$.<clinit>(AutoDiscoveryPluginRegistry.scala)
    ... 53 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
    at nonapi.io.github.classgraph.fileslice.reader.ClassfileReader.<init>(ClassfileReader.java:141)
    at io.github.classgraph.ClasspathElementZip$1.openClassfile(ClasspathElementZip.java:409)
    at io.github.classgraph.Classfile.<init>(Classfile.java:1925)
    at io.github.classgraph.Scanner$ClassfileScannerWorkUnitProcessor.processWorkUnit(Scanner.java:741)
    at io.github.classgraph.Scanner$ClassfileScannerWorkUnitProcessor.processWorkUnit(Scanner.java:664)
    at nonapi.io.github.classgraph.concurrency.WorkQueue.runWorkLoop(WorkQueue.java:246)
    at nonapi.io.github.classgraph.concurrency.WorkQueue.access$000(WorkQueue.java:50)
    at nonapi.io.github.classgraph.concurrency.WorkQueue$1.call(WorkQueue.java:201)
    at nonapi.io.github.classgraph.concurrency.WorkQueue$1.call(WorkQueue.java:198)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
)
cerveada commented 1 year ago

@JinJiDeJinMudo you know what cause the issue?

wajda commented 1 year ago

GC overhead as I can see. Too many classes and too little memory in the process I guess?

jinmu0410 commented 1 year ago

@cerveada https://github.com/classgraph/classgraph/issues/338