getindata / kafka-connect-iceberg-sink

Apache License 2.0
77 stars 30 forks source link

Connector Crashed for some reason #16

Open rjdp opened 1 year ago

rjdp commented 1 year ago

Version : 0.2.1

Logs


ksql> show connectors
>;

 Connector Name         | Type   | Class                                                | Status                      
----------------------------------------------------------------------------------------------------------------------
 data-compliance-source | SOURCE | com.mongodb.kafka.connect.MongoSourceConnector       | RUNNING (1/1 tasks RUNNING) 
 data-compliance-sink   | SINK   | com.getindata.kafka.connect.iceberg.sink.IcebergSink | WARNING (0/1 tasks RUNNING) 
----------------------------------------------------------------------------------------------------------------------

>DESCRIBE CONNECTOR  "data-compliance-sink";

Name                 : data-compliance-sink
Class                : com.getindata.kafka.connect.iceberg.sink.IcebergSink
Type                 : sink
State                : RUNNING
WorkerId             : ksqldb-server:8083

 Task ID | State  | Error Trace                                                                                                                                                    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 0       | FAILED | org.apache.kafka.connect.errors.ConnectException: Exiting WorkerSinkTask due to unrecoverable exception.
    at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:618)
    at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:334)
    at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:235)
    at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:204)
    at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:201)
    at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:256)
    at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
    at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
    at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
    at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
    at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.io.UncheckedIOException: java.io.IOException: Resetting to invalid mark
    at software.amazon.awssdk.utils.FunctionalUtils.asRuntimeException(FunctionalUtils.java:180)
    at software.amazon.awssdk.utils.FunctionalUtils.lambda$safeRunnable$5(FunctionalUtils.java:126)
    at software.amazon.awssdk.utils.FunctionalUtils.invokeSafely(FunctionalUtils.java:140)
    at software.amazon.awssdk.core.sync.RequestBody.lambda$fromInputStream$1(RequestBody.java:135)
    at software.amazon.awssdk.core.internal.handler.BaseClientHandler.lambda$getBody$3(BaseClientHandler.java:137)
    at software.amazon.awssdk.core.internal.http.StreamManagingStage$ClosingStreamProvider.newStream(StreamManagingStage.java:78)
    at software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient$RequestCallable.call(UrlConnectionHttpClient.java:298)
    at software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient$RequestCallable.call(UrlConnectionHttpClient.java:269)
    at software.amazon.awssdk.core.internal.util.MetricUtils.measureDurationUnsafe(MetricUtils.java:63)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.MakeHttpRequestStage.executeHttpRequest(MakeHttpRequestStage.java:77)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.MakeHttpRequestStage.execute(MakeHttpRequestStage.java:56)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.MakeHttpRequestStage.execute(MakeHttpRequestStage.java:39)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptTimeoutTrackingStage.execute(ApiCallAttemptTimeoutTrackingStage.java:73)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptTimeoutTrackingStage.execute(ApiCallAttemptTimeoutTrackingStage.java:42)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.TimeoutExceptionHandlingStage.execute(TimeoutExceptionHandlingStage.java:78)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.TimeoutExceptionHandlingStage.execute(TimeoutExceptionHandlingStage.java:40)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptMetricCollectionStage.execute(ApiCallAttemptMetricCollectionStage.java:50)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptMetricCollectionStage.execute(ApiCallAttemptMetricCollectionStage.java:36)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.RetryableStage.execute(RetryableStage.java:81)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.RetryableStage.execute(RetryableStage.java:36)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.StreamManagingStage.execute(StreamManagingStage.java:56)
    at software.amazon.awssdk.core.internal.http.StreamManagingStage.execute(StreamManagingStage.java:36)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.executeWithTimer(ApiCallTimeoutTrackingStage.java:80)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.execute(ApiCallTimeoutTrackingStage.java:60)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.execute(ApiCallTimeoutTrackingStage.java:42)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallMetricCollectionStage.execute(ApiCallMetricCollectionStage.java:48)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallMetricCollectionStage.execute(ApiCallMetricCollectionStage.java:31)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ExecutionFailureExceptionReportingStage.execute(ExecutionFailureExceptionReportingStage.java:37)
    at software.amazon.awssdk.core.internal.http.pipeline.stages.ExecutionFailureExceptionReportingStage.execute(ExecutionFailureExceptionReportingStage.java:26)
    at software.amazon.awssdk.core.internal.http.AmazonSyncHttpClient$RequestExecutionBuilderImpl.execute(AmazonSyncHttpClient.java:193)
    at software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.invoke(BaseSyncClientHandler.java:103)
    at software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.doExecute(BaseSyncClientHandler.java:171)
    at software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.lambda$execute$1(BaseSyncClientHandler.java:82)
    at software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.measureApiCallSuccess(BaseSyncClientHandler.java:179)
    at software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.execute(BaseSyncClientHandler.java:76)
    at software.amazon.awssdk.core.client.handler.SdkSyncClientHandler.execute(SdkSyncClientHandler.java:45)
    at software.amazon.awssdk.awscore.client.handler.AwsSyncClientHandler.execute(AwsSyncClientHandler.java:56)
    at software.amazon.awssdk.services.s3.DefaultS3Client.putObject(DefaultS3Client.java:9326)
    at org.apache.iceberg.aws.s3.S3OutputStream.completeUploads(S3OutputStream.java:422)
    at org.apache.iceberg.aws.s3.S3OutputStream.close(S3OutputStream.java:267)
    at java.base/sun.nio.cs.StreamEncoder.implClose(StreamEncoder.java:341)
    at java.base/sun.nio.cs.StreamEncoder.close(StreamEncoder.java:161)
    at java.base/java.io.OutputStreamWriter.close(OutputStreamWriter.java:255)
    at org.apache.iceberg.TableMetadataParser.internalWrite(TableMetadataParser.java:127)
    at org.apache.iceberg.TableMetadataParser.overwrite(TableMetadataParser.java:110)
    at org.apache.iceberg.BaseMetastoreTableOperations.writeNewMetadata(BaseMetastoreTableOperations.java:162)
    at org.apache.iceberg.aws.glue.GlueTableOperations.doCommit(GlueTableOperations.java:129)
    at org.apache.iceberg.BaseMetastoreTableOperations.commit(BaseMetastoreTableOperations.java:133)
    at org.apache.iceberg.SnapshotProducer.lambda$commit$2(SnapshotProducer.java:357)
    at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:402)
    at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:212)
    at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:196)
    at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:189)
    at org.apache.iceberg.SnapshotProducer.commit(SnapshotProducer.java:331)
    at com.getindata.kafka.connect.iceberg.sink.tableoperator.IcebergTableOperator.addToTablePerSchema(IcebergTableOperator.java:171)
    at com.getindata.kafka.connect.iceberg.sink.tableoperator.IcebergTableOperator.addToTable(IcebergTableOperator.java:69)
    at com.getindata.kafka.connect.iceberg.sink.IcebergChangeConsumer.accept(IcebergChangeConsumer.java:51)
    at com.getindata.kafka.connect.iceberg.sink.IcebergSinkTask.put(IcebergSinkTask.java:38)
    at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:584)
    ... 10 more
Caused by: java.io.IOException: Resetting to invalid mark
    at java.base/java.io.BufferedInputStream.reset(BufferedInputStream.java:454)
    at software.amazon.awssdk.core.io.SdkFilterInputStream.reset(SdkFilterInputStream.java:96)
    at software.amazon.awssdk.utils.FunctionalUtils.lambda$safeRunnable$5(FunctionalUtils.java:124)
    ... 74 more

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
ksql> 
gliter commented 1 year ago

Is there any other log as this looks like recovery and not root cause. or could you provide configuration that replicates it?