Closed semistone closed 4 years ago
test in single-mode and it doesn't happen. only in cluster mode.
could reproduce by
import org.redisson.Redisson;
import org.redisson.api.BatchOptions;
import org.redisson.api.RBatch;
import org.redisson.api.RedissonClient;
import org.redisson.client.codec.Codec;
import org.redisson.codec.JsonJacksonCodec;
import org.redisson.config.Config;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CompletionStage;
public class TestRedis {
static RedissonClient client;
public static void main(String[] args) throws InterruptedException {
new TestRedis();
}
public TestRedis() throws InterruptedException {
initRedisCluster(); // test cluster
//initRedisSingle(); // test single
initData();
crash();
Thread.sleep(10000);
test();
Thread.sleep(100000000);
}
private void initData () {
for (int i = 0 ; i < 10000; i++) {
client.getBucket("api:rapid:test:test"+i).set("testx"+i);
}
}
private void test() {
new Thread(() -> {
try {
while (true) {
System.out.println("start test");
try {
System.out.println("=========== test thread ==================");
//System.out.println("get normal mode " + client.getBucket("api:rapid:test:test10").get());
get().thenAccept((map) ->{
System.out.println("get batch mode");
map.forEach((k, v)-> {
System.out.print("k:" + k + " v:"+ v + ",");
});
});
System.out.println("end test");
Thread.sleep(2000);
} catch (Exception e) {
e.printStackTrace();
}
}
} catch (Exception e) {
e.printStackTrace();
}
}).start();
}
private void crash () {
for (int i =0 ; i< 100 ; i++) {
System.out.println("start thread "+ i);
final int ii = i;
new Thread(new Runnable() {
@Override
public void run() {
for (int j = 0 ; j < 1000; j++) {
get();
}
System.out.println("finish thread "+ ii);
}
}).start();
}
}
private void initRedisSingle() {
Config config = new Config();
Codec jsonCodec = new JsonJacksonCodec();
config.setCodec(jsonCodec);
config.useSingleServer()
.setAddress("redis://127.0.0.1:6379")
.setConnectTimeout(500).setPingConnectionInterval(2000)
.setTimeout(100).setRetryAttempts(0).setRetryInterval(20);
client = Redisson.create(config);
}
private void initRedisCluster() {
Config config = new Config();
Codec jsonCodec = new JsonJacksonCodec();
config.setCodec(jsonCodec);
config.useClusterServers()
.setConnectTimeout(500).setPingConnectionInterval(2000)
.addNodeAddress("redis://127.0.0.1:30001", "redis://127.0.0.1:30002", "redis://127.0.0.1:30003",
"redis://127.0.0.1:30004", "redis://127.0.0.1:30005", "redis://127.0.0.1:30006")
.setMasterConnectionMinimumIdleSize(1).setMasterConnectionPoolSize(1)
.setSlaveConnectionMinimumIdleSize(1).setSlaveConnectionPoolSize(1)
.setTimeout(100).setRetryAttempts(0).setRetryInterval(20);
client = Redisson.create(config);
}
public CompletionStage<HashMap<String, String>> get() {
RBatch batch = client.createBatch(BatchOptions.defaults());
List<String> keys = new ArrayList();
for (int i = 0; i< 100; i++) {
String key = "api:rapid:test:test"+ (5 * 100 + i);
keys.add(key);
batch.getBucket(key).getAsync();
}
return batch.executeAsync().thenApply(objects -> {
List<?> responses = objects.getResponses();
if (responses.size() != keys.size()) {
throw new IllegalStateException("Cache bulk get returned less keys than requested");
}
final Iterator<?> iterator = responses.iterator();
HashMap<String, String> result = new HashMap<>();
for (String key : keys) {
final Object value = iterator.next();
result.put(key, (String)value);
}
return result;
});
}
}
Did you stop any Redis node during test execution?
Thanks for the provided test. Unfortunately it didn't help to reproduce the issue.
I attached used java class and log output.
it shows the normal response. if replace initRedisCluster(); => initRedisSingle();
the log didn't show exception :( but it also didn't return the result.
Did you stop any Redis node during test execution?
no
in my test I use Redis cluster
in my test I use Redis cluster
=========== test thread ================== end test start test =========== test thread ==================
is not normal, normal response is
=========== test thread ================== get normal mode testx10end test get batch mode k:api:rapid:test:test558 v:testx558,k:api:rapid:test:test559 v:testx559,k:api:rapid:test:test554 v:testx554,k:api:rapid:test:test555 v:testx555,k:api:rapid:test:test556 v:testx556,k:api:rapid:test:test557 v:testx557,k:api:rapid:test:test550 v:testx550,k:api:rapid:test:test551 v:testx551,k:api:rapid:test:test552 v:testx552,k:api:rapid:test:test553 v:testx553,k:api:rapid:test:test547 v:testx547,k:api:rapid:test:test548 v:testx548,k:api:rapid:test:test549 v:testx549,k:api:rapid:test:test543 v:testx543,k:api:rapid:test:test544 v:testx544,k:api:rapid:test:test545 v:testx545,k:api:rapid:test:test546 v:testx546,k:api:rapid:test:test540 v:testx540,k:api:rapid:test:test541 v:testx541,k:api:rapid:test:test542 v:testx542,k:api:rapid:test:test536 v:testx536,k:api:rapid:test:test537 v:testx537,k:api:rapid:test:test538 v:testx538,k:api:rapid:test:test539 v:testx539,k:api:rapid:test:test532 v:testx532,k:api:rapid:test:test533 v:testx533,k:api:rapid:test:test534 v:testx534,k:api:rapid:test:test535 v:testx535,k:api:rapid:test:test530 v:testx530,k:api:rapid:test:test531 v:testx531,k:api:rapid:test:test529 v:testx529,k:api:rapid:test:test525 v:testx525,k:api:rapid:test:test526 v:testx526,k:api:rapid:test:test527 v:testx527,k:api:rapid:test:test528 v:testx528,k:api:rapid:test:test521 v:testx521,k:api:rapid:test:test522 v:testx522,k:api:rapid:test:test523 v:testx523,k:api:rapid:test:test524 v:testx524,k:api:rapid:test:test520 v:testx520,k:api:rapid:test:test518 v:testx518,k:api:rapid:test:test519 v:testx519,k:api:rapid:test:test514 v:testx514,k:api:rapid:test:test515 v:testx515,k:api:rapid:test:test516 v:testx516,k:api:rapid:test:test517 v:testx517,k:api:rapid:test:test510 v:testx510,k:api:rapid:test:test598 v:testx598,k:api:rapid:test:test511 v:testx511,k:api:rapid:test:test599 v:testx599,k:api:rapid:test:test512 v:testx512,k:api:rapid:test:test513 v:testx513,k:api:rapid:test:test594 v:testx594,k:api:rapid:test:test595 v:testx595,k:api:rapid:test:test596 v:testx596,k:api:rapid:test:test597 v:testx597,k:api:rapid:test:test590 v:testx590,k:api:rapid:test:test591 v:testx591,k:api:rapid:test:test592 v:testx592,k:api:rapid:test:test593 v:testx593,k:api:rapid:test:test507 v:testx507,k:api:rapid:test:test508 v:testx508,k:api:rapid:test:test509 v:testx509,k:api:rapid:test:test503 v:testx503,k:api:rapid:test:test504 v:testx504,k:api:rapid:test:test505 v:testx505,k:api:rapid:test:test506 v:testx506,k:api:rapid:test:test587 v:testx587,k:api:rapid:test:test500 v:testx500,k:api:rapid:test:test588 v:testx588,k:api:rapid:test:test501 v:testx501,k:api:rapid:test:test589 v:testx589,k:api:rapid:test:test502 v:testx502,k:api:rapid:test:test583 v:testx583,k:api:rapid:test:test584 v:testx584,k:api:rapid:test:test585 v:testx585,k:api:rapid:test:test586 v:testx586,k:api:rapid:test:test580 v:testx580,k:api:rapid:test:test581 v:testx581,k:api:rapid:test:test582 v:testx582,k:api:rapid:test:test576 v:testx576,k:api:rapid:test:test577 v:testx577,k:api:rapid:test:test578 v:testx578,k:api:rapid:test:test579 v:testx579,k:api:rapid:test:test572 v:testx572,k:api:rapid:test:test573 v:testx573,k:api:rapid:test:test574 v:testx574,k:api:rapid:test:test575 v:testx575,k:api:rapid:test:test570 v:testx570,k:api:rapid:test:test571 v:testx571,k:api:rapid:test:test569 v:testx569,k:api:rapid:test:test565 v:testx565,k:api:rapid:test:test566 v:testx566,k:api:rapid:test:test567 v:testx567,k:api:rapid:test:test568 v:testx568,k:api:rapid:test:test561 v:testx561,k:api:rapid:test:test562 v:testx562,k:api:rapid:test:test563 v:testx563,k:api:rapid:test:test564 v:testx564,k:api:rapid:test:test560 v:testx560,start test =========== test thread ==================
I don't know why there is no exception.
You don't catch it.
You need to add into get() method:
batch.executeAsync().exceptionally(e -> {
e.printStackTrace();
return null;
});
I reproduced the issue. It affects only RBatch objects in cluster mode. Thanks for test case
I reproduced the issue. It affects only RBatch objects in cluster mode. Thanks for test case
Can you inform us which operation or param will trigger this bug and is there any workaround?
@nicreve
workaround is to comment out this code block:
Fixed!
Relate to https://github.com/redisson/redisson/issues/2043
Expected behavior
When using a fixed connection pool of min=64 and max=64 to every node (masters and slaves) on cluster config. Redisson is able to open a healthy connection pool of 64 to each of the nodes
Actual behavior
Connection never recover
Redis version
4.0.8 and use utils/create-cluster to create local cluster
Redission version
3.11.5
Reproduce step
Redisson configuration
and request controller, each HTTP GET request will fetch 100 keys from Redis cluster in batch mode.
stress test 100 concurrency
ab -n 10000 -c 100 http://localhost:8080/rapid-samples-minimal-spring-boot/get/5
just after running about 20 seconds and stop the stress test tool.
then I call curl http://localhost:8080/rapid-samples-minimal-spring-boot/get/5 again it will already return an error and never recover. error log is
Connection status, still connected.