since a few days our Historical Nodes on AWS have temporarily problems connecting to S3. The Data are still written in S3, but from time to time follwoing error occurs (I replaced sensitive Information with <>):
ERROR-Message:
Failed on try 1, retrying in 739ms.
org.jets3t.service.ServiceException: Request Error: \<BUCKETNAME>.s3.eu-central-1.amazonaws.com: Name or service not known
at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:625) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:279) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRestHead(RestStorageService.java:1052) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.impl.rest.httpclient.RestStorageService.getObjectImpl(RestStorageService.java:2264) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.impl.rest.httpclient.RestStorageService.getObjectDetailsImpl(RestStorageService.java:2193) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.StorageService.getObjectDetails(StorageService.java:1120) ~[jets3t-0.9.4.jar:0.9.4]
at org.jets3t.service.StorageService.getObjectDetails(StorageService.java:575) ~[jets3t-0.9.4.jar:0.9.4]
at io.druid.storage.s3.S3Utils.isObjectInBucket(S3Utils.java:96) ~[?:?]
at io.druid.storage.s3.S3DataSegmentPuller$4.call(S3DataSegmentPuller.java:318) ~[?:?]
at io.druid.storage.s3.S3DataSegmentPuller$4.call(S3DataSegmentPuller.java:314) ~[?:?]
at io.druid.java.util.common.RetryUtils.retry(RetryUtils.java:63) [java-util-0.12.3.jar:0.12.3]
at io.druid.java.util.common.RetryUtils.retry(RetryUtils.java:81) [java-util-0.12.3.jar:0.12.3]
at io.druid.storage.s3.S3Utils.retryS3Operation(S3Utils.java:89) [druid-s3-extensions-0.12.3.jar:0.12.3]
at io.druid.storage.s3.S3DataSegmentPuller.isObjectInBucket(S3DataSegmentPuller.java:312) [druid-s3-extensions-0.12.3.jar:0.12.3]
at io.druid.storage.s3.S3DataSegmentPuller.getSegmentFiles(S3DataSegmentPuller.java:176) [druid-s3-extensions-0.12.3.jar:0.12.3]
at io.druid.storage.s3.S3LoadSpec.loadSegment(S3LoadSpec.java:60) [druid-s3-extensions-0.12.3.jar:0.12.3]
at io.druid.segment.loading.SegmentLoaderLocalCacheManager.loadInLocation(SegmentLoaderLocalCacheManager.java:205) [druid-server-0.12.3.jar:0.12.3]
at io.druid.segment.loading.SegmentLoaderLocalCacheManager.loadInLocationWithStartMarker(SegmentLoaderLocalCacheManager.java:193) [druid-server-0.12.3.jar:0.12.3]
at io.druid.segment.loading.SegmentLoaderLocalCacheManager.loadSegmentWithRetry(SegmentLoaderLocalCacheManager.java:151) [druid-server-0.12.3.jar:0.12.3]
at io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegmentFiles(SegmentLoaderLocalCacheManager.java:133) [druid-server-0.12.3.jar:0.12.3]
at io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:108) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.SegmentManager.getAdapter(SegmentManager.java:196) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.SegmentManager.loadSegment(SegmentManager.java:157) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.coordination.SegmentLoadDropHandler.loadSegment(SegmentLoadDropHandler.java:261) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.coordination.SegmentLoadDropHandler.addSegment(SegmentLoadDropHandler.java:307) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:47) [druid-server-0.12.3.jar:0.12.3]
at io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:118) [druid-server-0.12.3.jar:0.12.3]
at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:520) [curator-recipes-4.0.0.jar:4.0.0]
at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:514) [curator-recipes-4.0.0.jar:4.0.0]
at org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) [curator-framework-4.0.0.jar:4.0.0]
at org.apache.curator.shaded.com.google.common.util.concurrent.MoreExecutors$DirectExecutorService.execute(MoreExecutors.java:296) [curator-client-4.0.0.jar:?]
at org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) [curator-framework-4.0.0.jar:4.0.0]
at org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:512) [curator-recipes-4.0.0.jar:4.0.0]
at org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) [curator-recipes-4.0.0.jar:4.0.0]
at org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:771) [curator-recipes-4.0.0.jar:4.0.0]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_181]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_181]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_181]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_181]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_181]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_181]
Caused by: java.net.UnknownHostException: \<BUCKETNAME>.s3.eu-central-1.amazonaws.com: Name or service not known
at java.net.Inet4AddressImpl.lookupAllHostAddr(Native Method) ~[?:1.8.0_181]
at java.net.InetAddress$2.lookupAllHostAddr(InetAddress.java:928) ~[?:1.8.0_181]
at java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1323) ~[?:1.8.0_181]
at java.net.InetAddress.getAllByName0(InetAddress.java:1276) ~[?:1.8.0_181]
at java.net.InetAddress.getAllByName(InetAddress.java:1192) ~[?:1.8.0_181]
at java.net.InetAddress.getAllByName(InetAddress.java:1126) ~[?:1.8.0_181]
at org.apache.http.impl.conn.SystemDefaultDnsResolver.resolve(SystemDefaultDnsResolver.java:45) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.conn.DefaultClientConnectionOperator.resolveHostname(DefaultClientConnectionOperator.java:259) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.conn.DefaultClientConnectionOperator.openConnection(DefaultClientConnectionOperator.java:159) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.conn.AbstractPoolEntry.open(AbstractPoolEntry.java:144) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.conn.AbstractPooledConnAdapter.open(AbstractPooledConnAdapter.java:131) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.client.DefaultRequestDirector.tryConnect(DefaultRequestDirector.java:611) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:446) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:882) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) ~[httpclient-4.5.1.jar:4.5.1]
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) ~[httpclient-4.5.1.jar:4.5.1]
at org.jets3t.service.impl.rest.httpclient.RestStorageService.performRequest(RestStorageService.java:328) ~[jets3t-0.9.4.jar:0.9.4]
... 41 more
We are using currently Druid 0.12.3 with following configuration:
# Maximum Amount of Heap space to use for the string dictionary during merging (broker, historical, middlemanager)
druid.query.groupBy.maxMergingDictionarySize = 250000000
druid.query.groupBy.maxOnDiskStorage = 4294967296
Hi,
since a few days our Historical Nodes on AWS have temporarily problems connecting to S3. The Data are still written in S3, but from time to time follwoing error occurs (I replaced sensitive Information with <>):
ERROR-Message:
We are using currently Druid 0.12.3 with following configuration:
common.runtime.properties:
jets3t.properties:
Does anybody else experience this behavior? I gladly provide more information/configuration to solve this issue.
Thanks, Stephan