OpenTSDB / opentsdb

A scalable, distributed Time Series Database.
http://opentsdb.net
GNU Lesser General Public License v2.1
5k stars 1.25k forks source link

A deadlock occurs when HBase region server restart or becomes unstable #978

Closed opsun closed 7 years ago

opsun commented 7 years ago
Found one Java-level deadlock:
=============================
"AsyncHBase Timer RPC Timeout Timer #3":
  waiting to lock monitor 0x00007f5d5810e4e8 (object 0x00000003fb02bc80, a org.hbase.async.RegionClient),
  which is held by "AsyncHBase I/O Worker #8"
"AsyncHBase I/O Worker #8":
  waiting to lock monitor 0x00007f5d5000d5d8 (object 0x0000000409f3f848, a org.hbase.async.RegionClient),
  which is held by "AsyncHBase I/O Worker #67"
"AsyncHBase I/O Worker #67":
  waiting to lock monitor 0x00007f5d5810e4e8 (object 0x00000003fb02bc80, a org.hbase.async.RegionClient),
  which is held by "AsyncHBase I/O Worker #8"

Java stack information for the threads listed above:
===================================================
"AsyncHBase Timer RPC Timeout Timer #3":
    at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
    - waiting to lock <0x00000003fb02bc80> (a org.hbase.async.RegionClient)
    at java.lang.String.valueOf(String.java:2847)
    at java.lang.StringBuilder.append(StringBuilder.java:128)
    at org.hbase.async.HBaseRpc$TimeoutTask.run(HBaseRpc.java:632)
    at org.jboss.netty.util.HashedWheelTimer$HashedWheelTimeout.expire(HashedWheelTimer.java:556)
    at org.jboss.netty.util.HashedWheelTimer$HashedWheelBucket.expireTimeouts(HashedWheelTimer.java:632)
    at org.jboss.netty.util.HashedWheelTimer$Worker.run(HashedWheelTimer.java:369)
    at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
    at java.lang.Thread.run(Thread.java:745)
"AsyncHBase I/O Worker #8":
    at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
    - waiting to lock <0x0000000409f3f848> (a org.hbase.async.RegionClient)
    at java.lang.String.valueOf(String.java:2847)
    at java.lang.StringBuilder.append(StringBuilder.java:128)
    at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
    at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
    at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
    - locked <0x00000003fb02bc80> (a org.hbase.async.RegionClient)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
    at org.jboss.netty.channel.Channels.close(Channels.java:812)
    at org.hbase.async.RegionClient.exceptionCaught(RegionClient.java:1239)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:112)
    at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1223)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelHandler.exceptionCaught(SimpleChannelHandler.java:156)
    at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:130)
    at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.exceptionCaught(SimpleChannelUpstreamHandler.java:153)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:112)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3121)
    at org.jboss.netty.channel.Channels.fireExceptionCaught(Channels.java:525)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.write0(AbstractNioWorker.java:291)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.writeFromUserCode(AbstractNioWorker.java:146)
    at org.jboss.netty.channel.socket.nio.NioClientSocketPipelineSink.eventSunk(NioClientSocketPipelineSink.java:84)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendDownstream(DefaultChannelPipeline.java:779)
    at org.jboss.netty.channel.SimpleChannelHandler.writeRequested(SimpleChannelHandler.java:292)
    at org.jboss.netty.channel.SimpleChannelHandler.handleDownstream(SimpleChannelHandler.java:254)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendDownstream(DefaultChannelPipeline.java:591)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendDownstream(DefaultChannelPipeline.java:582)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3111)
    at org.jboss.netty.channel.Channels.write(Channels.java:704)
    at org.jboss.netty.channel.Channels.write(Channels.java:671)
    at org.hbase.async.RegionClient.sendRpc(RegionClient.java:1016)
    at org.hbase.async.HBaseClient.sendRpcToRegion(HBaseClient.java:1964)
    at org.hbase.async.RegionClient.retryEdit(RegionClient.java:945)
    at org.hbase.async.RegionClient.access$800(RegionClient.java:88)
    at org.hbase.async.RegionClient$1SingleEditErrback.call(RegionClient.java:960)
    at org.hbase.async.RegionClient$1SingleEditErrback.call(RegionClient.java:955)
    at com.stumbleupon.async.Deferred.doCall(Deferred.java:1278)
    at com.stumbleupon.async.Deferred.runCallbacks(Deferred.java:1257)
    at com.stumbleupon.async.Deferred.callback(Deferred.java:1005)
    at org.hbase.async.HBaseRpc.callback(HBaseRpc.java:712)
    at org.hbase.async.RegionClient.decode(RegionClient.java:1533)
    at org.hbase.async.RegionClient.decode(RegionClient.java:88)
    at org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:500)
    at org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:485)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70)
    at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1223)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelHandler.messageReceived(SimpleChannelHandler.java:142)
    at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:88)
    at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.handler.timeout.IdleStateHandler.messageReceived(IdleStateHandler.java:294)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3121)
    at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:268)
    at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:255)
    at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:88)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.process(AbstractNioWorker.java:108)
    at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:318)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
    at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
    at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
    at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
"AsyncHBase I/O Worker #67":
    at org.hbase.async.RegionClient.toString(RegionClient.java:2028)
    - waiting to lock <0x00000003fb02bc80> (a org.hbase.async.RegionClient)
    at java.lang.String.valueOf(String.java:2847)
    at java.lang.StringBuilder.append(StringBuilder.java:128)
    at org.hbase.async.HBaseClient.removeClientFromCache(HBaseClient.java:3296)
    at org.hbase.async.HBaseClient.access$2200(HBaseClient.java:190)
    at org.hbase.async.HBaseClient$RegionClientPipeline.handleDisconnect(HBaseClient.java:3164)
    - locked <0x0000000409f3f848> (a org.hbase.async.RegionClient)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3109)
    at org.jboss.netty.channel.Channels.close(Channels.java:812)
    at org.hbase.async.RegionClient.exceptionCaught(RegionClient.java:1239)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:112)
    at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1223)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelHandler.exceptionCaught(SimpleChannelHandler.java:156)
    at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:130)
    at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.exceptionCaught(SimpleChannelUpstreamHandler.java:153)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:112)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3121)
    at org.jboss.netty.channel.Channels.fireExceptionCaught(Channels.java:525)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.write0(AbstractNioWorker.java:291)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.writeFromUserCode(AbstractNioWorker.java:146)
    at org.jboss.netty.channel.socket.nio.NioClientSocketPipelineSink.eventSunk(NioClientSocketPipelineSink.java:84)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendDownstream(DefaultChannelPipeline.java:779)
    at org.jboss.netty.channel.SimpleChannelHandler.writeRequested(SimpleChannelHandler.java:292)
    at org.jboss.netty.channel.SimpleChannelHandler.handleDownstream(SimpleChannelHandler.java:254)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendDownstream(DefaultChannelPipeline.java:591)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendDownstream(DefaultChannelPipeline.java:582)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendDownstream(HBaseClient.java:3111)
    at org.jboss.netty.channel.Channels.write(Channels.java:704)
    at org.jboss.netty.channel.Channels.write(Channels.java:671)
    at org.hbase.async.RegionClient.sendRpc(RegionClient.java:1016)
    at org.hbase.async.HBaseClient.sendRpcToRegion(HBaseClient.java:1964)
    at org.hbase.async.RegionClient.retryEdit(RegionClient.java:945)
    at org.hbase.async.RegionClient.access$800(RegionClient.java:88)
    at org.hbase.async.RegionClient$1MultiActionCallback.handleException(RegionClient.java:918)
    at org.hbase.async.RegionClient$1MultiActionCallback.call(RegionClient.java:867)
    at com.stumbleupon.async.Deferred.doCall(Deferred.java:1278)
    at com.stumbleupon.async.Deferred.runCallbacks(Deferred.java:1257)
    at com.stumbleupon.async.Deferred.callback(Deferred.java:1005)
    at org.hbase.async.HBaseRpc.callback(HBaseRpc.java:712)
    at org.hbase.async.RegionClient.decode(RegionClient.java:1533)
    at org.hbase.async.RegionClient.decode(RegionClient.java:88)
    at org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:500)
    at org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:485)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70)
    at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1223)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.channel.SimpleChannelHandler.messageReceived(SimpleChannelHandler.java:142)
    at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:88)
    at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791)
    at org.jboss.netty.handler.timeout.IdleStateHandler.messageReceived(IdleStateHandler.java:294)
    at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564)
    at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559)
    at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3121)
    at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:268)
    at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:255)
    at org.jboss.netty.channel.socket.nio.NioWorker.read(NioWorker.java:88)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.process(AbstractNioWorker.java:108)
    at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:318)
    at org.jboss.netty.channel.socket.nio.AbstractNioWorker.run(AbstractNioWorker.java:89)
    at org.jboss.netty.channel.socket.nio.NioWorker.run(NioWorker.java:178)
    at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
    at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)

Found 1 deadlock.
manolama commented 7 years ago

Fixed in AsyncHbase 1.7.2 and later so this should be good. Thanks!