Hi @mjakubowski84 I want to write these data inside a minio bucket (the code below) with this following hadoop configuration:
case class User(userId: String, name: String, created: java.sql.Timestamp)
val users: Iterable[User] = Seq(
User("hippo", "leger", new java.sql.Timestamp(1L))
)
val hadoopConf = new Configuration()
hadoopConf.set("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem")
hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hadoopConf.set("fs.s3a.connection.ssl.enabled", "true")
hadoopConf.set("fs.s3a.path.style.access", "true")
hadoopConf.set("fs.s3a.endpoint", "some-endpoint") ) // if my code i put the real endpoint
hadoopConf.set("fs.s3a.access.key", "access_key") // if my code i put the real access token
hadoopConf.set("fs.s3a.secret.key", "secret_key") ) // if my code i put the real secret token
Exception in thread "main" com.amazonaws.AmazonClientException: Unable to execute HTTP request: bucket.some-endpoint: nodename nor servname provided, or not known at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:454) at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:232) at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3528) at com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1031) at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:994) at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:297) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) at org.apache.parquet.hadoop.util.HadoopOutputFile.fromPath(HadoopOutputFile.java:58) at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:677) at com.github.mjakubowski84.parquet4s.ParquetWriter$.internalWriter(ParquetWriter.scala:129) at com.github.mjakubowski84.parquet4s.ParquetWriterImpl.<init>(ParquetWriter.scala:186) at com.github.mjakubowski84.parquet4s.ParquetWriter$BuilderImpl.build(ParquetWriter.scala:111) at com.github.mjakubowski84.parquet4s.ParquetWriter$BuilderImpl.writeAndClose(ParquetWriter.scala:113) at WriteParquetFile$.delayedEndpoint$WriteParquetFile$1(WriteParquetFile.scala:64) at WriteParquetFile$delayedInit$body.apply(WriteParquetFile.scala:5) at scala.Function0.apply$mcV$sp(Function0.scala:42) at scala.Function0.apply$mcV$sp$(Function0.scala:42) at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17) at scala.App.$anonfun$main$1(App.scala:98) at scala.App.$anonfun$main$1$adapted(App.scala:98) at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:575) at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:573) at scala.collection.AbstractIterable.foreach(Iterable.scala:933) at scala.App.main(App.scala:98) at scala.App.main$(App.scala:96) at WriteParquetFile$.main(WriteParquetFile.scala:5) at WriteParquetFile.main(WriteParquetFile.scala) Caused by: java.net.UnknownHostException: bucket.some-endpoint: nodename nor servname provided, or not known at java.net.Inet6AddressImpl.lookupAllHostAddr(Native Method) at java.net.InetAddress$2.lookupAllHostAddr(InetAddress.java:929) at java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1324) at java.net.InetAddress.getAllByName0(InetAddress.java:1277) at java.net.InetAddress.getAllByName(InetAddress.java:1193) at java.net.InetAddress.getAllByName(InetAddress.java:1127) at org.apache.http.impl.conn.SystemDefaultDnsResolver.resolve(SystemDefaultDnsResolver.java:45) at org.apache.http.impl.conn.DefaultClientConnectionOperator.resolveHostname(DefaultClientConnectionOperator.java:278) at org.apache.http.impl.conn.DefaultClientConnectionOperator.openConnection(DefaultClientConnectionOperator.java:162) at org.apache.http.impl.conn.ManagedClientConnectionImpl.open(ManagedClientConnectionImpl.java:294) at org.apache.http.impl.client.DefaultRequestDirector.tryConnect(DefaultRequestDirector.java:643) at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:479) at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:906) at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:805) at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:384) ... 31 more
Hi @mjakubowski84 I want to write these data inside a minio bucket (the code below) with this following hadoop configuration:
case class User(userId: String, name: String, created: java.sql.Timestamp)
val users: Iterable[User] = Seq( User("hippo", "leger", new java.sql.Timestamp(1L)) )
val hadoopConf = new Configuration() hadoopConf.set("fs.gs.impl", "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem") hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") hadoopConf.set("fs.s3a.connection.ssl.enabled", "true") hadoopConf.set("fs.s3a.path.style.access", "true") hadoopConf.set("fs.s3a.endpoint", "some-endpoint") ) // if my code i put the real endpoint hadoopConf.set("fs.s3a.access.key", "access_key") // if my code i put the real access token hadoopConf.set("fs.s3a.secret.key", "secret_key") ) // if my code i put the real secret token
val writerOptions = ParquetWriter.Options( compressionCodecName = CompressionCodecName.SNAPPY, hadoopConf = hadoopConf )
ParquetWriter .of[User] .options(writerOptions) .writeAndClose(Path("s3a://bucket/label_file.parquet"), users)`
Exception in thread "main" com.amazonaws.AmazonClientException: Unable to execute HTTP request: bucket.some-endpoint: nodename nor servname provided, or not known at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:454) at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:232) at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3528) at com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1031) at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:994) at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:297) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) at org.apache.parquet.hadoop.util.HadoopOutputFile.fromPath(HadoopOutputFile.java:58) at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:677) at com.github.mjakubowski84.parquet4s.ParquetWriter$.internalWriter(ParquetWriter.scala:129) at com.github.mjakubowski84.parquet4s.ParquetWriterImpl.<init>(ParquetWriter.scala:186) at com.github.mjakubowski84.parquet4s.ParquetWriter$BuilderImpl.build(ParquetWriter.scala:111) at com.github.mjakubowski84.parquet4s.ParquetWriter$BuilderImpl.writeAndClose(ParquetWriter.scala:113) at WriteParquetFile$.delayedEndpoint$WriteParquetFile$1(WriteParquetFile.scala:64) at WriteParquetFile$delayedInit$body.apply(WriteParquetFile.scala:5) at scala.Function0.apply$mcV$sp(Function0.scala:42) at scala.Function0.apply$mcV$sp$(Function0.scala:42) at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17) at scala.App.$anonfun$main$1(App.scala:98) at scala.App.$anonfun$main$1$adapted(App.scala:98) at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:575) at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:573) at scala.collection.AbstractIterable.foreach(Iterable.scala:933) at scala.App.main(App.scala:98) at scala.App.main$(App.scala:96) at WriteParquetFile$.main(WriteParquetFile.scala:5) at WriteParquetFile.main(WriteParquetFile.scala) Caused by: java.net.UnknownHostException: bucket.some-endpoint: nodename nor servname provided, or not known at java.net.Inet6AddressImpl.lookupAllHostAddr(Native Method) at java.net.InetAddress$2.lookupAllHostAddr(InetAddress.java:929) at java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1324) at java.net.InetAddress.getAllByName0(InetAddress.java:1277) at java.net.InetAddress.getAllByName(InetAddress.java:1193) at java.net.InetAddress.getAllByName(InetAddress.java:1127) at org.apache.http.impl.conn.SystemDefaultDnsResolver.resolve(SystemDefaultDnsResolver.java:45) at org.apache.http.impl.conn.DefaultClientConnectionOperator.resolveHostname(DefaultClientConnectionOperator.java:278) at org.apache.http.impl.conn.DefaultClientConnectionOperator.openConnection(DefaultClientConnectionOperator.java:162) at org.apache.http.impl.conn.ManagedClientConnectionImpl.open(ManagedClientConnectionImpl.java:294) at org.apache.http.impl.client.DefaultRequestDirector.tryConnect(DefaultRequestDirector.java:643) at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:479) at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:906) at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:805) at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:384) ... 31 more