Open-EO / openeo-geotrellis-extensions

Java/Scala extensions for Geotrellis, for use with OpenEO GeoPySpark backend.
Apache License 2.0
5 stars 3 forks source link

NPE in SHub /process request #293

Open bossie opened 4 months ago

bossie commented 4 months ago

I've seen this a couple of times in the past; in this case it failed a unit test and the build:

org.openeo.geotrellissentinelhub.PyramidFactoryTest.testGamma0

org.apache.spark.SparkException: 
Job aborted due to stage failure: Task 5 in stage 153.0 failed 1 times, most recent failure: Lost task 5.0 in stage 153.0 (TID 286) (localhost executor driver): java.lang.NullPointerException: inputStream
    at java.base/java.util.Objects.requireNonNull(Objects.java:246)
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1305)
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:978)
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1282)
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:953)
    at org.apache.commons.io.IOUtils.toByteArray(IOUtils.java:2405)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8(ProcessApi.scala:200)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8$adapted(ProcessApi.scala:184)
    at scalaj.http.HttpRequest.$anonfun$toResponse$17(Http.scala:422)
    at scala.Option.getOrElse(Option.scala:189)
    at scalaj.http.HttpRequest.$anonfun$toResponse$14(Http.scala:414)
    at scala.Option.getOrElse(Option.scala:189)
    at scalaj.http.HttpRequest.toResponse(Http.scala:414)
    at scalaj.http.HttpRequest.doConnection(Http.scala:368)
    at scalaj.http.HttpRequest.exec(Http.scala:343)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$7(ProcessApi.scala:184)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi$.$anonfun$withRetryAfterRetries$10(ProcessApi.scala:88)
    at net.jodah.failsafe.Functions.lambda$get$0(Functions.java:46)
    at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
    at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
    at net.jodah.failsafe.Execution.executeSync(Execution.java:128)
    at net.jodah.failsafe.FailsafeExecutor.call(FailsafeExecutor.java:378)
    at net.jodah.failsafe.FailsafeExecutor.get(FailsafeExecutor.java:68)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi$.withRetryAfterRetries(ProcessApi.scala:88)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.getTile(ProcessApi.scala:183)
    at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$layer$13(PyramidFactory.scala:163)
    at org.openeo.geotrellissentinelhub.MemoizedAuthApiAccessTokenAuthorizer.authorized(Authorizer.scala:112)
    at org.openeo.geotrellissentinelhub.PyramidFactory.authorized(PyramidFactory.scala:77)
    at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$layer$12(PyramidFactory.scala:160)
    at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
    at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
    at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:168)
    at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
    at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
    at org.apache.spark.scheduler.Task.run(Task.scala:139)
    at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
    at java.base/java.lang.Thread.run(Thread.java:829)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2785)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2721)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2720)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2720)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1206)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1206)
    at scala.Option.foreach(Option.scala:407)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1206)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2984)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2923)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2912)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:971)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2263)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2284)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2303)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2328)
    at org.apache.spark.rdd.RDD.count(RDD.scala:1266)
    at org.openeo.geotrellissentinelhub.PyramidFactoryTest.testLayer(PyramidFactoryTest.scala:319)
    at org.openeo.geotrellissentinelhub.PyramidFactoryTest.testGamma0(PyramidFactoryTest.scala:146)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.base/java.lang.reflect.Method.invoke(Method.java:566)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
    at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
    at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:54)
    at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
    at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
    at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
    at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
    at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
    at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
    at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:115)
    at org.junit.vintage.engine.execution.RunnerExecutor.execute(RunnerExecutor.java:42)
    at org.junit.vintage.engine.VintageTestEngine.executeAllChildren(VintageTestEngine.java:80)
    at org.junit.vintage.engine.VintageTestEngine.execute(VintageTestEngine.java:72)
    at org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:220)
    at org.junit.platform.launcher.core.DefaultLauncher.lambda$execute$6(DefaultLauncher.java:188)
    at org.junit.platform.launcher.core.DefaultLauncher.withInterceptedStreams(DefaultLauncher.java:202)
    at org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:181)
    at org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:128)
    at org.apache.maven.surefire.junitplatform.JUnitPlatformProvider.invokeAllTests(JUnitPlatformProvider.java:150)
    at org.apache.maven.surefire.junitplatform.JUnitPlatformProvider.invoke(JUnitPlatformProvider.java:124)
    at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
    at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
    at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
    at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
Caused by: java.lang.NullPointerException: inputStream
    at java.base/java.util.Objects.requireNonNull(Objects.java:246)
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1305)
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:978)
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1282)
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:953)
    at org.apache.commons.io.IOUtils.toByteArray(IOUtils.java:2405)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8(ProcessApi.scala:200)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$8$adapted(ProcessApi.scala:184)
    at scalaj.http.HttpRequest.$anonfun$toResponse$17(Http.scala:422)
    at scala.Option.getOrElse(Option.scala:189)
    at scalaj.http.HttpRequest.$anonfun$toResponse$14(Http.scala:414)
    at scala.Option.getOrElse(Option.scala:189)
    at scalaj.http.HttpRequest.toResponse(Http.scala:414)
    at scalaj.http.HttpRequest.doConnection(Http.scala:368)
    at scalaj.http.HttpRequest.exec(Http.scala:343)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.$anonfun$getTile$7(ProcessApi.scala:184)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi$.$anonfun$withRetryAfterRetries$10(ProcessApi.scala:88)
    at net.jodah.failsafe.Functions.lambda$get$0(Functions.java:46)
    at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
    at net.jodah.failsafe.RetryPolicyExecutor.lambda$supply$0(RetryPolicyExecutor.java:65)
    at net.jodah.failsafe.Execution.executeSync(Execution.java:128)
    at net.jodah.failsafe.FailsafeExecutor.call(FailsafeExecutor.java:378)
    at net.jodah.failsafe.FailsafeExecutor.get(FailsafeExecutor.java:68)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi$.withRetryAfterRetries(ProcessApi.scala:88)
    at org.openeo.geotrellissentinelhub.DefaultProcessApi.getTile(ProcessApi.scala:183)
    at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$layer$13(PyramidFactory.scala:163)
    at org.openeo.geotrellissentinelhub.MemoizedAuthApiAccessTokenAuthorizer.authorized(Authorizer.scala:112)
    at org.openeo.geotrellissentinelhub.PyramidFactory.authorized(PyramidFactory.scala:77)
    at org.openeo.geotrellissentinelhub.PyramidFactory.$anonfun$layer$12(PyramidFactory.scala:160)
    at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
    at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
    at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
    at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:168)
    at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101)
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
    at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
    at org.apache.spark.scheduler.Task.run(Task.scala:139)
    at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
    at java.base/java.lang.Thread.run(Thread.java:829)

Judging from the code we get a 200 response back but there's no InputStream to read from:

https://github.com/Open-EO/openeo-geotrellis-extensions/blob/2a1cd0d8486a194189f9f7cd7cb3d4b301ba5874/geotrellis-sentinelhub/src/main/scala/org/openeo/geotrellissentinelhub/ProcessApi.scala#L185-L201

Not sure whether this is a bug in our code or e.g. scalaj or there is something peculiar about the response that SHub sent back. Might be appropriate to log and retry but not sure if PUs are deducted for this 200 response.

There is a 500 response in the test's output that might indicate a temporary problem at SHub: shub_npe.txt