http-nio-8080-exec-177" #441 daemon prio=5 os_prio=0 tid=0x00002ae00812e800 nid=0x6183 waiting for monitor entry [0x00002ae042f6b000]
java.lang.Thread.State: BLOCKED (on object monitor)
at sun.security.provider.NativePRNG$RandomIO.implNextBytes(NativePRNG.java:543)
- waiting to lock <0x0000000700ad0ca0> (a java.lang.Object)
at sun.security.provider.NativePRNG$RandomIO.access$400(NativePRNG.java:331)
at sun.security.provider.NativePRNG$Blocking.engineNextBytes(NativePRNG.java:268)
at at java.security.SecureRandom.nextBytes(SecureRandom.java:455)
at java.util.UUID.randomUUID(UUID.java:145)
......
package java.util;
import java.security.*;
public final class UUID implements java.io.Serializable, Comparable<UUID> {
public static UUID randomUUID() {
SecureRandom ng = Holder.numberGenerator;
byte[] randomBytes = new byte[16];
ng.nextBytes(randomBytes);
randomBytes[6] &= 0x0f; /* clear version */
randomBytes[6] |= 0x40; /* set to version 4 */
randomBytes[8] &= 0x3f; /* clear variant */
randomBytes[8] |= 0x80; /* set to IETF variant */
return new UUID(randomBytes);
}
/*
* The random number generator used by this class to create random
* based UUIDs. In a holder class to defer initialization until needed.
*/
private static class Holder {
static final SecureRandom numberGenerator = new SecureRandom();
}
}
安装haveged补足熵(The haveged project is an attempt to provide an easy-to-use, unpredictable random number generator based upon an adaptation of the HAVEGE algorithm. Haveged was created to remedy low-entropy conditions in the Linux random device that can occur under some workloads, especially on headless servers.)
//推荐方式
SecureRandom sr = SecureRandom.getInstance("NativePRNGNonBlocking");
//【危险】jdk8新增方法,时间久或调用频繁的话熵池很容易不够用而导致阻塞;
SecureRandom sr = SecureRandom.getInstanceStrong();
今日值班收到线上告警 如下(部分删减)
赶紧去查看了那台机器的堆栈信息:
可以看出是 java.security.SecureRandom.nextBytes 方法导致了线程阻塞,服务使用了公司的分布式tracing框架,它底层使用 java.util.UUID 生成traceId。
1、问题分析
Linux环境下,SecureRandom.nextBytes生成随机数依赖于操作系统提供的随机数据:/dev/random和/dev/urandom,默认使用的是/dev/random,而当/dev/random文件中没有随机种子/熵(键盘和鼠标输入以及磁盘活动可以产生所需的随机性或熵)时,会阻塞线程。
Linux内核采用熵来描述数据的随机性,熵(entropy)是描述系统混乱无序程度的物理量,一个系统的熵越大则说明该系统的有序性越差,即不确定性越大。内核维护了一个熵池用来收集来自设备驱动程序和其它来源的环境噪音。
简言之就是键盘和鼠标的输入以及磁盘活动可以产生所需的随机性或熵。
但在一个缺乏这样的活动服务器,可能会出现问题,当系统的熵池中数量不足时,就会阻塞当前线程。
Linux的随机数
random设备了提供了2个字符设备供用户态进程使用:
2、解决方案
有2种方式:
使用
-Djava.security.egd=file:/dev/./urandom
强制使用/dev/urandom
这个文件,避免阻塞现象(注:对 SecureRandom.getInstanceStrong() 不生效)。参考链接:https://www.synopsys.com/blogs/software-security/proper-use-of-javas-securerandom/
参考资料
https://hongjiang.info/java8-nativeprng-blocking/
https://stackoverflow.com/questions/137212/how-to-deal-with-a-slow-securerandom-generator
https://stackoverflow.com/questions/11051205/difference-between-java-util-random-and-java-security-securerandom
https://cloud.tencent.com/developer/article/1549509
https://segmentfault.com/a/1190000039268233
https://tersesystems.com/blog/2015/12/17/the-right-way-to-use-securerandom/
https://bugs.openjdk.java.net/browse/JDK-8098581