could not parse main worker host and port correctly。
24/01/09 15:06:12.357 dag-scheduler-event-loop INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 9 (MapPartitionsRDD[39] at reduce at LightGBMBase.scala:490) (first 15 tasks are for partitions Vector(0))
24/01/09 15:06:12.357 dag-scheduler-event-loop INFO YarnClusterScheduler: Adding task set 9.0 with 1 tasks
24/01/09 15:06:12.361 dispatcher-CoarseGrainedScheduler INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 36, n203-129-119.byted.org, executor 1, partition 0, NODE_LOCAL, 8224 bytes, 1 cores)
24/01/09 15:06:12.394 dispatcher-BlockManagerMaster INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on n203-129-119.byted.org:45605 (size: 77.4 KiB, free: 9.4 GiB)
24/01/09 15:06:21.398 pool-44-thread-1 INFO LightGBMClassifier: driver received socket from task: fdbd:dc01:ff:100:c084:9d17:34e2:b14e:12404
24/01/09 15:06:21.400 pool-44-thread-1 INFO LightGBMClassifier: driver writing back to all connections: fdbd:dc01:ff:100:c084:9d17:34e2:b14e:12404
24/01/09 15:06:21.401 pool-44-thread-1 INFO LightGBMClassifier: driver closing all sockets and server socket
24/01/09 15:06:22.401 task-result-getter-0 WARN TaskSetManager: Lost task 0.0 in stage 9.0 (TID 36, n203-129-119.byted.org, executor 1): java.lang.Exception: Error: could not parse main worker host and port correctly
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getMainWorkerPort(TrainUtils.scala:311)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getReturnBooster(TrainUtils.scala:361)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.trainLightGBM(LightGBMBase.scala:372)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.$anonfun$innerTrain$4(LightGBMBase.scala:485)
at org.apache.spark.sql.execution.MapPartitionsExec.$anonfun$doExecute$3(objects.scala:195)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:887)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:887)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:130)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:501)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1580)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:504)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Code to reproduce issue
from synapse.ml.lightgbm import LightGBMClassifier
lgbm_model = LightGBMClassifier(
featuresCol='post_feature_vec', labelCol='label'
)
lgbm_model = lgbm_model.fit(df_samples)
### Other info / logs
```Py4JJavaError Traceback (most recent call last)
/data01/yodel/datacache/usercache/lizonghao/filecache/1704783267379-195867/launch_ipykernel.py in <module>
5 featuresCol='post_feature_vec', labelCol='label'
6 )
----> 7 lgbm_model = lgbm_model.fit(df_samples)
/opt/tiger/yodel/container/pyspark.zip/pyspark/ml/base.py in fit(self, dataset, params)
127 return self.copy(params)._fit(dataset)
128 else:
--> 129 return self._fit(dataset)
130 else:
131 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
/opt/tiger/yodel/container/com.microsoft.azure_synapseml-lightgbm_2.12-0.9.5.jar/synapse/ml/lightgbm/LightGBMClassifier.py in _fit(self, dataset)
1467
1468 def _fit(self, dataset):
-> 1469 java_model = self._fit_java(dataset)
1470 return self._create_model(java_model)
1471
/opt/tiger/yodel/container/pyspark.zip/pyspark/ml/wrapper.py in _fit_java(self, dataset)
320 """
321 self._transfer_params_to_java()
--> 322 return self._java_obj.fit(dataset._jdf)
323
324 def _fit(self, dataset):
/opt/tiger/yodel/container/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/opt/tiger/yodel/container/pyspark.zip/pyspark/sql/utils.py in deco(*a, **kw)
126 def deco(*a, **kw):
127 try:
--> 128 return f(*a, **kw)
129 except py4j.protocol.Py4JJavaError as e:
130 converted = convert_exception(e.java_exception)
/opt/tiger/yodel/container/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o1299.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 9.0 failed 8 times, most recent failure: Lost task 0.7 in stage 9.0 (TID 43, n203-129-119.byted.org, executor 1): java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at java.net.Socket.connect(Socket.java:538)
at java.net.Socket.<init>(Socket.java:434)
at java.net.Socket.<init>(Socket.java:211)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getNetworkInitNodes(TrainUtils.scala:240)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.$anonfun$getNetworkInfo$2(TrainUtils.scala:341)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:24)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.$anonfun$getNetworkInfo$1(TrainUtils.scala:341)
at com.microsoft.azure.synapse.ml.core.env.StreamUtilities$.using(StreamUtilities.scala:29)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getNetworkInfo(TrainUtils.scala:337)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.trainLightGBM(LightGBMBase.scala:358)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.$anonfun$innerTrain$4(LightGBMBase.scala:485)
at org.apache.spark.sql.execution.MapPartitionsExec.$anonfun$doExecute$3(objects.scala:195)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:887)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:887)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:130)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:501)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1580)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:504)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2173)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2121)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2119)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2119)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1046)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1046)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1046)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2357)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2303)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2292)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:842)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2439)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2534)
at org.apache.spark.rdd.RDD.$anonfun$reduce$1(RDD.scala:1109)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:393)
at org.apache.spark.rdd.RDD.reduce(RDD.scala:1091)
at org.apache.spark.sql.Dataset.$anonfun$reduce$1(Dataset.scala:1809)
at org.apache.spark.sql.Dataset.$anonfun$withNewRDDExecutionId$1(Dataset.scala:3689)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:115)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:189)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:89)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:825)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at org.apache.spark.sql.Dataset.withNewRDDExecutionId(Dataset.scala:3687)
at org.apache.spark.sql.Dataset.reduce(Dataset.scala:1809)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.innerTrain(LightGBMBase.scala:490)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.innerTrain$(LightGBMBase.scala:447)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier.innerTrain(LightGBMClassifier.scala:26)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.$anonfun$train$1(LightGBMBase.scala:63)
at com.microsoft.azure.synapse.ml.logging.BasicLogging.logVerb(BasicLogging.scala:63)
at com.microsoft.azure.synapse.ml.logging.BasicLogging.logVerb$(BasicLogging.scala:60)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier.logVerb(LightGBMClassifier.scala:26)
at com.microsoft.azure.synapse.ml.logging.BasicLogging.logTrain(BasicLogging.scala:49)
at com.microsoft.azure.synapse.ml.logging.BasicLogging.logTrain$(BasicLogging.scala:48)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier.logTrain(LightGBMClassifier.scala:26)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.train(LightGBMBase.scala:44)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.train$(LightGBMBase.scala:43)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassifier.train(LightGBMClassifier.scala:26)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:150)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:114)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at java.net.Socket.connect(Socket.java:538)
at java.net.Socket.<init>(Socket.java:434)
at java.net.Socket.<init>(Socket.java:211)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getNetworkInitNodes(TrainUtils.scala:240)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.$anonfun$getNetworkInfo$2(TrainUtils.scala:341)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:24)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.core.utils.FaultToleranceUtils$.retryWithTimeout(FaultToleranceUtils.scala:29)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.$anonfun$getNetworkInfo$1(TrainUtils.scala:341)
at com.microsoft.azure.synapse.ml.core.env.StreamUtilities$.using(StreamUtilities.scala:29)
at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getNetworkInfo(TrainUtils.scala:337)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.trainLightGBM(LightGBMBase.scala:358)
at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.$anonfun$innerTrain$4(LightGBMBase.scala:485)
at org.apache.spark.sql.execution.MapPartitionsExec.$anonfun$doExecute$3(objects.scala:195)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:887)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:887)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:318)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:130)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:501)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1580)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:504)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more
### What component(s) does this bug affect?
- [ ] `area/cognitive`: Cognitive project
- [ ] `area/core`: Core project
- [ ] `area/deep-learning`: DeepLearning project
- [X] `area/lightgbm`: Lightgbm project
- [ ] `area/opencv`: Opencv project
- [ ] `area/vw`: VW project
- [ ] `area/website`: Website
- [ ] `area/build`: Project build system
- [ ] `area/notebooks`: Samples under notebooks folder
- [ ] `area/docker`: Docker usage
- [ ] `area/models`: models related issue
### What language(s) does this bug affect?
- [ ] `language/scala`: Scala source code
- [X] `language/python`: Pyspark APIs
- [ ] `language/r`: R APIs
- [ ] `language/csharp`: .NET APIs
- [ ] `language/new`: Proposals for new client languages
### What integration(s) does this bug affect?
- [ ] `integrations/synapse`: Azure Synapse integrations
- [ ] `integrations/azureml`: Azure ML integrations
- [ ] `integrations/databricks`: Databricks integrations
Hey @LiDaiY :wave:!
Thank you so much for reporting the issue/feature request :rotating_light:.
Someone from SynapseML Team will be looking to triage this issue soon.
We appreciate your patience.
SynapseML version
synapseml-lightgbm_2.12:0.9.5
System information
Describe the problem
could not parse main worker host and port correctly。
24/01/09 15:06:12.357 dag-scheduler-event-loop INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 9 (MapPartitionsRDD[39] at reduce at LightGBMBase.scala:490) (first 15 tasks are for partitions Vector(0)) 24/01/09 15:06:12.357 dag-scheduler-event-loop INFO YarnClusterScheduler: Adding task set 9.0 with 1 tasks 24/01/09 15:06:12.361 dispatcher-CoarseGrainedScheduler INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 36, n203-129-119.byted.org, executor 1, partition 0, NODE_LOCAL, 8224 bytes, 1 cores) 24/01/09 15:06:12.394 dispatcher-BlockManagerMaster INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on n203-129-119.byted.org:45605 (size: 77.4 KiB, free: 9.4 GiB) 24/01/09 15:06:21.398 pool-44-thread-1 INFO LightGBMClassifier: driver received socket from task: fdbd:dc01:ff:100:c084:9d17:34e2:b14e:12404 24/01/09 15:06:21.400 pool-44-thread-1 INFO LightGBMClassifier: driver writing back to all connections: fdbd:dc01:ff:100:c084:9d17:34e2:b14e:12404 24/01/09 15:06:21.401 pool-44-thread-1 INFO LightGBMClassifier: driver closing all sockets and server socket 24/01/09 15:06:22.401 task-result-getter-0 WARN TaskSetManager: Lost task 0.0 in stage 9.0 (TID 36, n203-129-119.byted.org, executor 1): java.lang.Exception: Error: could not parse main worker host and port correctly at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getMainWorkerPort(TrainUtils.scala:311) at com.microsoft.azure.synapse.ml.lightgbm.TrainUtils$.getReturnBooster(TrainUtils.scala:361) at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.trainLightGBM(LightGBMBase.scala:372) at com.microsoft.azure.synapse.ml.lightgbm.LightGBMBase.$anonfun$innerTrain$4(LightGBMBase.scala:485) at org.apache.spark.sql.execution.MapPartitionsExec.$anonfun$doExecute$3(objects.scala:195) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:887) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:887) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354) at org.apache.spark.rdd.RDD.iterator(RDD.scala:318) at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:55) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354) at org.apache.spark.rdd.RDD.iterator(RDD.scala:318) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:354) at org.apache.spark.rdd.RDD.iterator(RDD.scala:318) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:130) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:501) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1580) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:504) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:748)
Code to reproduce issue