# Use a simple base for downloading and setting permissions
FROM alpine as downloader
ADD https://github.com/streamnative/pulsar-io-lakehouse/releases/download/v3.1.0.4/pulsar-io-lakehouse-3.1.0.4-cloud.nar /tmp/pulsar-io-lakehouse-3.1.0.4-cloud.nar
ADD https://github.com/streamnative/pulsar-io-lakehouse/releases/download/v3.1.0.4/pulsar-io-lakehouse-3.1.0.4.nar /tmp/pulsar-io-lakehouse-3.1.0.4.nar
ADD https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/3.1.2/hive-metastore-3.1.2.jar /tmp/hive-metastore-3.1.2.jar
ADD https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-hive-runtime/0.13.2/iceberg-hive-runtime-0.13.2.jar /tmp/iceberg-hive-runtime-0.13.2.jar
RUN chmod 644 /tmp/pulsar-io-lakehouse-3.1.0.4-cloud.nar
RUN chmod 644 /tmp/pulsar-io-lakehouse-3.1.0.4.nar
RUN chmod 644 /tmp/hive-metastore-3.1.2.jar
RUN chmod 644 /tmp/iceberg-hive-runtime-0.13.2.jar
# Use the Pulsar image
FROM apachepulsar/pulsar-all:3.1.0
COPY --from=downloader /tmp/pulsar-io-lakehouse-3.1.0.4-cloud.nar /pulsar/connectors/pulsar-io-lakehouse-3.1.0.4-cloud.nar
COPY --from=downloader /tmp/pulsar-io-lakehouse-3.1.0.4.nar /pulsar/connectors/pulsar-io-lakehouse-3.1.0.4.nar
COPY --from=downloader /tmp/hive-metastore-3.1.2.jar /pulsar/lib/hive-metastore-3.1.2.jar
COPY --from=downloader /tmp/iceberg-hive-runtime-0.13.2.jar /pulsar/lib/iceberg-hive-runtime-0.13.2.jar
# Continue with the rest of your Dockerfile...
COPY ./iceberg.json /pulsar/connectors/iceberg.json
Error Log
2023-10-03T20:26:59,826+0000 [lakehouse-io-1-1] ERROR org.apache.pulsar.ecosystem.io.lakehouse.sink.SinkWriter - process record failed.
java.lang.IllegalArgumentException: Cannot initialize Catalog implementation org.apache.iceberg.hive.HiveCatalog: Cannot find constructor for interface org.apache.iceberg.catalog.Catalog
Missing org.apache.iceberg.hive.HiveCatalog [java.lang.NoClassDefFoundError: org/apache/hadoop/hive/metastore/api/UnknownDBException]
at org.apache.iceberg.CatalogUtil.loadCatalog(CatalogUtil.java:182) ~[iceberg-core-0.13.1.jar:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.iceberg.CatalogLoader$HiveCatalogLoader.loadCatalog(CatalogLoader.java:107) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.iceberg.TableLoader$CatalogTableLoader.<init>(TableLoader.java:124) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.iceberg.TableLoader$CatalogTableLoader.<init>(TableLoader.java:112) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.iceberg.TableLoader.fromCatalog(TableLoader.java:48) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.iceberg.IcebergWriter.<init>(IcebergWriter.java:83) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.LakehouseWriter.getWriter(LakehouseWriter.java:41) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.SinkWriter.getOrCreateWriter(SinkWriter.java:148) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.ecosystem.io.lakehouse.sink.SinkWriter.run(SinkWriter.java:104) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?]
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) ~[netty-common-4.1.77.Final.jar:4.1.77.Final]
at java.lang.Thread.run(Thread.java:833) ~[?:?]
Caused by: java.lang.NoSuchMethodException: Cannot find constructor for interface org.apache.iceberg.catalog.Catalog
Missing org.apache.iceberg.hive.HiveCatalog [java.lang.NoClassDefFoundError: org/apache/hadoop/hive/metastore/api/UnknownDBException]
at org.apache.iceberg.common.DynConstructors$Builder.buildChecked(DynConstructors.java:227) ~[iceberg-common-0.13.1.jar:?]
at org.apache.iceberg.CatalogUtil.loadCatalog(CatalogUtil.java:180) ~[iceberg-core-0.13.1.jar:?]
... 12 more
2023-10-03T20:27:01,514+0000 [unifiyadkinville/ifs/icerberg_sink-0] ERROR org.apache.pulsar.ecosystem.io.lakehouse.SinkConnector - Exit caused by lakehouse writer stop working
2023-10-03T20:27:01,514+0000 [unifiyadkinville/ifs/icerberg_sink-0] INFO org.apache.pulsar.functions.instance.JavaInstanceRunnable - Encountered exception in sink write:
org.apache.pulsar.ecosystem.io.lakehouse.exception.LakehouseConnectorException: Exit caused by lakehouse writer stop working
at org.apache.pulsar.ecosystem.io.lakehouse.SinkConnector.write(SinkConnector.java:83) ~[cZDQlLbu3-KT07yEI4R2DQ/:?]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.sendOutputMessage(JavaInstanceRunnable.java:439) ~[?:?]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.handleResult(JavaInstanceRunnable.java:401) ~[?:?]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.run(JavaInstanceRunnable.java:341) ~[?:?]
at java.lang.Thread.run(Thread.java:833) ~[?:?]
2023-10-03T20:27:01,519+0000 [unifiyadkinville/ifs/icerberg_sink-0] ERROR org.apache.pulsar.functions.instance.JavaInstanceRunnable - [unifiyadkinville/ifs/icerberg_sink:0] Uncaught exception in Java Instance
java.lang.RuntimeException: Failed to process message: 69:48:-1:151
at org.apache.pulsar.functions.source.PulsarSource.lambda$buildRecord$6(PulsarSource.java:155) ~[org.apache.pulsar-pulsar-functions-instance-3.1.0.jar:3.1.0]
at org.apache.pulsar.functions.source.PulsarRecord.fail(PulsarRecord.java:133) ~[org.apache.pulsar-pulsar-functions-instance-3.1.0.jar:3.1.0]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.sendOutputMessage(JavaInstanceRunnable.java:444) ~[org.apache.pulsar-pulsar-functions-instance-3.1.0.jar:3.1.0]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.handleResult(JavaInstanceRunnable.java:401) ~[org.apache.pulsar-pulsar-functions-instance-3.1.0.jar:3.1.0]
at org.apache.pulsar.functions.instance.JavaInstanceRunnable.run(JavaInstanceRunnable.java:341) ~[org.apache.pulsar-pulsar-functions-instance-3.1.0.jar:3.1.0]
at java.lang.Thread.run(Thread.java:833) ~[?:?]
2023-10-03T20:27:01,520+0000 [unifiyadkinville/ifs/icerberg_sink-0] INFO org.apache.pulsar.functions.instance.JavaInstanceRunnable - Closing instance
Additional context
I haven't added any addition sink, source or java libraries in the pulsar image, as it was not mentioned in the document. Let me know if I am missing something.
Does not work with iceberg hive catalog Dependency missing for the hive catalog
Missing org.apache.iceberg.hive.HiveCatalog
.To Reproduce Steps to reproduce the behavior:
Missing org.apache.iceberg.hive.HiveCatalog [java.lang.NoClassDefFoundError: org/apache/hadoop/hive/metastore/api/UnknownDBException]
Expected behavior The sink should work and tables should be created in minio.
Sink Configuration
Custom Pulsar Image
Error Log
Environment
Additional context I haven't added any addition sink, source or java libraries in the pulsar image, as it was not mentioned in the document. Let me know if I am missing something.