Open BruceOuyang opened 2 years ago
常见能够引起CPU100%异常的情况:
排查步骤
1) top
命令找到占用CPU高的Java进程PID
[root@localhost ~]# top
top - 14:00:50 up 4 days, 22:50, 3 users, load average: 0.22, 0.18, 0.15
Tasks: 131 total, 2 running, 129 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.3 us, 0.5 sy, 0.0 ni, 99.0 id, 0.0 wa, 0.0 hi, 0.2 si, 0.0 st
KiB Mem : 8581928 total, 869088 free, 6752996 used, 959844 buff/cache
KiB Swap: 4390908 total, 4390908 free, 0 used. 1557144 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
6894 root 20 0 4914804 1.0g 15248 S 1.0 12.8 77:45.27 java
9 root 20 0 0 0 0 R 0.3 0.0 8:16.12 rcu_sched
687 root 20 0 264972 4864 3736 S 0.3 0.1 8:32.06 vmtoolsd
1914 root 20 0 869520 35528 6088 S 0.3 0.4 10:06.24 BT-Task
2107 mysql 20 0 3090688 727384 11120 S 0.3 8.5 14:41.44 mysqld
86629 root 20 0 162104 2328 1596 R 0.3 0.0 0:00.64 top
127200 devops 20 0 718388 24200 6416 S 0.3 0.3 13:15.48 node_exporter
1 root 20 0 125476 3992 2608 S 0.0 0.0 2:16.30 systemd
2) 根据进程ID找到占用CPU高的线程
[root@localhost ~]# ps -mp 6894 -o THREAD,tid | sort -r
USER %CPU PRI SCNT WCHAN USER SYSTEM TID
root 1.1 - - - - - -
root 0.2 19 - futex_ - - 7101
root 0.2 19 - futex_ - - 7100
root 0.2 19 - futex_ - - 7099
3) 将指定的线程ID输出为16进制格式
[root@localhost ~]# printf "%x\n" 7101
1bbd
4) 根据16进制格式的线程ID查找线程堆栈信息
[root@localhost ~]# jstack 6894 | grep 1bbd -A 50
"SimplePauseDetectorThread_2" #54 daemon prio=5 os_prio=0 tid=0x0000560f69ae5800 nid=0x1bbd waiting on condition [0x00007f3dd0ee8000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at java.lang.Thread.sleep(Thread.java:340)
at java.util.concurrent.TimeUnit.sleep(TimeUnit.java:386)
at org.LatencyUtils.TimeServices.sleepNanos(TimeServices.java:62)
at org.LatencyUtils.SimplePauseDetector$SimplePauseDetectorThread.run(SimplePauseDetector.java:116)
"SimplePauseDetectorThread_1" #53 daemon prio=5 os_prio=0 tid=0x0000560f69ae3800 nid=0x1bbc waiting on condition [0x00007f3dd0fe9000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at java.lang.Thread.sleep(Thread.java:340)
at java.util.concurrent.TimeUnit.sleep(TimeUnit.java:386)
at org.LatencyUtils.TimeServices.sleepNanos(TimeServices.java:62)
at org.LatencyUtils.SimplePauseDetector$SimplePauseDetectorThread.run(SimplePauseDetector.java:116)
"SimplePauseDetectorThread_0" #52 daemon prio=5 os_prio=0 tid=0x0000560f69a33000 nid=0x1bbb waiting on condition [0x00007f3dd10ea000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at java.lang.Thread.sleep(Thread.java:340)
at java.util.concurrent.TimeUnit.sleep(TimeUnit.java:386)
at org.LatencyUtils.TimeServices.sleepNanos(TimeServices.java:62)
at org.LatencyUtils.SimplePauseDetector$SimplePauseDetectorThread.run(SimplePauseDetector.java:116)
"Thread-7" #51 daemon prio=5 os_prio=0 tid=0x0000560f69a72800 nid=0x1bba waiting on condition [0x00007f3dd11eb000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x0000000741da57e8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.LatencyUtils.PauseDetector$PauseDetectorThread.run(PauseDetector.java:85)
"lettuce-nioEventLoop-4-1" #50 daemon prio=5 os_prio=0 tid=0x00007f3e2e39e000 nid=0x1bb8 runnable [0x00007f3dd22ed000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
- locked <0x0000000740ce6880> (a io.netty.channel.nio.SelectedSelectionKeySet)
- locked <0x0000000740d599a8> (a java.util.Collections$UnmodifiableSet)
- locked <0x0000000740ce78a8> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:101)
at io.netty.channel.nio.SelectedSelectionKeySetSelector.select(SelectedSelectionKeySetSelector.java:68)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:805)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:748)
使用 mat 分析内存泄漏
可以使用一下命令查使用内存最多的5个进程
或者
或者