apache / pinot

Apache Pinot - A realtime distributed OLAP datastore
https://pinot.apache.org/
Apache License 2.0
5.38k stars 1.26k forks source link

segment download from gcs as deep store doesn't work. #5884

Open laxmanchekka opened 4 years ago

laxmanchekka commented 4 years ago

Pulling from deep store (gcs) fails with the following exception

2020/08/17 18:40:28.418 ERROR [WebApplicationExceptionMapper] [grizzly-http-server-1] Server error:
com.google.cloud.storage.StorageException: /var/pinot/controller/data/temp/fileDownloadTemp/rawServiceView/rawServiceView__0__19__20200812T1540Z-977383100601623
    at com.google.cloud.storage.Blob.downloadTo(Blob.java:219) ~[pinot-gcs-0.4.0-SNAPSHOT-shaded.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at com.google.cloud.storage.Blob.downloadTo(Blob.java:260) ~[pinot-gcs-0.4.0-SNAPSHOT-shaded.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.apache.pinot.plugin.filesystem.GcsPinotFS.copyToLocalFile(GcsPinotFS.java:329) ~[pinot-gcs-0.4.0-SNAPSHOT-shaded.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.apache.pinot.controller.api.resources.PinotSegmentUploadDownloadRestletResource.downloadSegment(PinotSegmentUploadDownloadRestletResource.java:162) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_265]
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_265]
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_265]
    at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_265]
    at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory.lambda$static$0(ResourceMethodInvocationHandlerFactory.java:52) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.java:124) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.java:167) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$ResponseOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:176) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:79) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:469) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:391) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:80) ~[pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.ServerRuntime$1.run(ServerRuntime.java:253) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.internal.Errors$1.call(Errors.java:248) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.internal.Errors$1.call(Errors.java:244) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.internal.Errors.process(Errors.java:292) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.internal.Errors.process(Errors.java:274) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.internal.Errors.process(Errors.java:244) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:265) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:232) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:679) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpContainer.service(GrizzlyHttpContainer.java:353) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.grizzly.http.server.HttpHandler$1.run(HttpHandler.java:200) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.grizzly.threadpool.AbstractThreadPool$Worker.doWork(AbstractThreadPool.java:569) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at org.glassfish.grizzly.threadpool.AbstractThreadPool$Worker.run(AbstractThreadPool.java:549) [pinot-all-0.4.0-SNAPSHOT-jar-with-dependencies.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_265]
Caused by: java.nio.file.NoSuchFileException: /var/pinot/controller/data/temp/fileDownloadTemp/rawServiceView/rawServiceView__0__19__20200812T1540Z-977383100601623
    at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) ~[?:1.8.0_265]
    at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) ~[?:1.8.0_265]
    at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) ~[?:1.8.0_265]
    at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214) ~[?:1.8.0_265]
    at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:434) ~[?:1.8.0_265]
    at java.nio.file.Files.newOutputStream(Files.java:216) ~[?:1.8.0_265]
    at com.google.cloud.storage.Blob.downloadTo(Blob.java:216) ~[pinot-gcs-0.4.0-SNAPSHOT-shaded.jar:0.4.0-SNAPSHOT-889889e2020f0fcbd2ef316b7fd7fe3eb985c65a]
    ... 29 more

The gcs library expects the destination file's directory already exists and fails when the directory doesn't exist. https://github.com/apache/incubator-pinot/blob/master/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentUploadDownloadRestletResource.java#L169

And we cleanup the temp directories every time during the startup which means download from gcs never works.

xiangfu0 commented 4 years ago

@KKcorps @elonazoulay