streamnative / pulsar-archived

Apache Pulsar - distributed pub-sub messaging system
https://pulsar.apache.org
Apache License 2.0
73 stars 25 forks source link

ISSUE-9007: How to resolve BK Error while recovering ledger #1909

Open sijie opened 3 years ago

sijie commented 3 years ago

Original Issue: apache/pulsar#9007


pulsar broker version 2.7.0 bookkeeper version 4.10.0


Pulsar Broker Error log when client producer message

15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-0-0] ERROR org.apache.bookkeeper.client.BookKeeperAdmin - BK error opening ledger: 73176
org.apache.bookkeeper.client.BKException$BKReadException: Error while reading ledger
        at org.apache.bookkeeper.client.BKException.create(BKException.java:62) ~[org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.BookKeeperAdmin$6.openComplete(BookKeeperAdmin.java:751) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp.openComplete(LedgerOpenOp.java:232) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp$2.readLastConfirmedComplete(LedgerOpenOp.java:215) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerHandle$10.readLastConfirmedDataComplete(LedgerHandle.java:1405) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.ReadLastConfirmedOp.readEntryComplete(ReadLastConfirmedOp.java:117) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion$1.readEntryComplete(PerChannelBookieClient.java:1829) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleReadResponse(PerChannelBookieClient.java:1910) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleV3Response(PerChannelBookieClient.java:1885) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$3.safeRun(PerChannelBookieClient.java:1446) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.common.util.SafeRunnable.run(SafeRunnable.java:36) [org.apache.bookkeeper-bookkeeper-common-4.10.0.jar:4.10.0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_121]
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) [io.netty-netty-common-4.1.48.Final.jar:4.1.48.Final]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-0-0] ERROR org.apache.bookkeeper.proto.BookkeeperInternalCallbacks - Error in multi callback : -1
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-7-0] DEBUG org.apache.bookkeeper.proto.PerChannelBookieClient - Got Read response from bookie:10.33.50.71:3181 rc:EUA, ledger:0:entry:0:entryLength:0
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-0-0] DEBUG org.apache.bookkeeper.proto.PerChannelBookieClient - Got Read response from bookie:10.33.50.144:3181 rc:EUA, ledger:0:entry:0:entryLength:0
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-0-0] DEBUG org.apache.bookkeeper.proto.PerChannelBookieClient - Got Read response from bookie:10.33.50.88:3181 rc:ENOENTRY, ledger:73176:entry:-1:entryLength:0
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-7-0] ERROR org.apache.bookkeeper.client.BookKeeperAdmin - BK error opening ledger: 73175
org.apache.bookkeeper.client.BKException$BKReadException: Error while reading ledger
        at org.apache.bookkeeper.client.BKException.create(BKException.java:62) ~[org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.BookKeeperAdmin$6.openComplete(BookKeeperAdmin.java:751) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp.openComplete(LedgerOpenOp.java:232) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp$2.readLastConfirmedComplete(LedgerOpenOp.java:215) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerHandle$10.readLastConfirmedDataComplete(LedgerHandle.java:1405) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.ReadLastConfirmedOp.readEntryComplete(ReadLastConfirmedOp.java:117) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion$1.readEntryComplete(PerChannelBookieClient.java:1829) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleReadResponse(PerChannelBookieClient.java:1910) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleV3Response(PerChannelBookieClient.java:1885) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$3.safeRun(PerChannelBookieClient.java:1446) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.common.util.SafeRunnable.run(SafeRunnable.java:36) [org.apache.bookkeeper-bookkeeper-common-4.10.0.jar:4.10.0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_121]
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) [io.netty-netty-common-4.1.48.Final.jar:4.1.48.Final]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-7-0] ERROR org.apache.bookkeeper.proto.BookkeeperInternalCallbacks - Error in multi callback : -1

I try to run bookkeeper recover  

bookkeeper shell recover 10.33.50.144:3181

same error  

15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-0-0] DEBUG org.apache.bookkeeper.proto.PerChannelBookieClient - Got Read response from bookie:10.33.50.88:3181 rc:ENOENTRY, ledger:73176:entry:-1:entryLength:0
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-7-0] ERROR org.apache.bookkeeper.client.BookKeeperAdmin - BK error opening ledger: 73175
org.apache.bookkeeper.client.BKException$BKReadException: Error while reading ledger
        at org.apache.bookkeeper.client.BKException.create(BKException.java:62) ~[org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.BookKeeperAdmin$6.openComplete(BookKeeperAdmin.java:751) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp.openComplete(LedgerOpenOp.java:232) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerOpenOp$2.readLastConfirmedComplete(LedgerOpenOp.java:215) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.LedgerHandle$10.readLastConfirmedDataComplete(LedgerHandle.java:1405) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.client.ReadLastConfirmedOp.readEntryComplete(ReadLastConfirmedOp.java:117) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion$1.readEntryComplete(PerChannelBookieClient.java:1829) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleReadResponse(PerChannelBookieClient.java:1910) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$ReadCompletion.handleV3Response(PerChannelBookieClient.java:1885) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.proto.PerChannelBookieClient$3.safeRun(PerChannelBookieClient.java:1446) [org.apache.bookkeeper-bookkeeper-server-4.10.0.jar:4.10.0]
        at org.apache.bookkeeper.common.util.SafeRunnable.run(SafeRunnable.java:36) [org.apache.bookkeeper-bookkeeper-common-4.10.0.jar:4.10.0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_121]
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) [io.netty-netty-common-4.1.48.Final.jar:4.1.48.Final]
        at java.lang.Thread.run(Thread.java:745) [?:1.8.0_121]
15:01:12.364 [BookKeeperClientWorker-OrderedExecutor-7-0] ERROR org.apache.bookkeeper.proto.BookkeeperInternalCallbacks - Error in multi callback : -1
bhasvij commented 2 years ago

Any work around for this? We are facing same issue in AWS , production setup?

bhasvij commented 2 years ago

Facing this issue in following scenario:

We have 4 bookies. We are using (4,3,1) persistency. Stopped one of the bookie. We are seeing that recovery process is keep throwing the above error