spotahome / redis-operator

Redis Operator creates/configures/manages high availability redis with sentinel automatic failover atop Kubernetes.
Apache License 2.0
1.49k stars 356 forks source link

[redis error] detected child with unmatched pid #615

Closed tabrizu891 closed 11 months ago

tabrizu891 commented 1 year ago

Redis Version: Redis server v=6.0.16 sha=00000000:0 malloc=jemalloc-5.1.0 bits=64 build=f7b6ec69a8fa9847 Spotahome Chart Version: 3.2.4 Operator Version: redis-operator:v1.2.2

Hi, we see this warning in our sentinel redis clusters that have save config. when this happens failover is performed and we have interuption for several minutes. in this case the master has 9 slaves and DB contains around 20M keys. What does it mean and how can we fix it?

1:S 12 Jun 2023 05:10:32.064 * 1000 changes in 10800 seconds. Saving...
1:S 12 Jun 2023 05:10:32.212 * Background saving started by pid 218143
1:S 12 Jun 2023 05:10:32.313 # Warning, detected child with unmatched pid: 181469
1:S 12 Jun 2023 05:10:32.413 # Warning, detected child with unmatched pid: 181470
1:S 12 Jun 2023 05:10:32.513 # Warning, detected child with unmatched pid: 181467
1:S 12 Jun 2023 05:10:32.613 # Warning, detected child with unmatched pid: 181472
1:S 12 Jun 2023 05:10:32.714 # Warning, detected child with unmatched pid: 181473
1:S 12 Jun 2023 05:10:32.814 # Warning, detected child with unmatched pid: 181474
1:S 12 Jun 2023 05:10:32.915 # Warning, detected child with unmatched pid: 181481
1:S 12 Jun 2023 05:10:33.015 # Warning, detected child with unmatched pid: 181485
1:S 12 Jun 2023 05:10:33.116 # Warning, detected child with unmatched pid: 181486
1:S 12 Jun 2023 05:10:33.216 # Warning, detected child with unmatched pid: 181487
1:S 12 Jun 2023 05:10:33.320 # Warning, detected child with unmatched pid: 181488
1:S 12 Jun 2023 05:10:33.422 # Warning, detected child with unmatched pid: 181484
1:S 12 Jun 2023 05:10:33.523 # Warning, detected child with unmatched pid: 181507
1:S 12 Jun 2023 05:10:33.626 # Warning, detected child with unmatched pid: 181505
1:S 12 Jun 2023 05:10:33.726 # Warning, detected child with unmatched pid: 181509
1:S 12 Jun 2023 05:10:33.826 # Warning, detected child with unmatched pid: 181511
1:S 12 Jun 2023 05:10:33.927 # Warning, detected child with unmatched pid: 181512
1:S 12 Jun 2023 05:10:34.028 # Warning, detected child with unmatched pid: 181508
1:S 12 Jun 2023 05:10:34.128 # Warning, detected child with unmatched pid: 181524
1:S 12 Jun 2023 05:10:34.230 # Warning, detected child with unmatched pid: 181525
1:S 12 Jun 2023 05:10:34.332 # Warning, detected child with unmatched pid: 181526
1:S 12 Jun 2023 05:10:34.432 # Warning, detected child with unmatched pid: 181527
1:S 12 Jun 2023 05:10:34.532 # Warning, detected child with unmatched pid: 181523
1:S 12 Jun 2023 05:10:34.632 # Warning, detected child with unmatched pid: 181520
1:S 12 Jun 2023 05:10:34.732 # Warning, detected child with unmatched pid: 181546
1:S 12 Jun 2023 05:10:34.832 # Warning, detected child with unmatched pid: 181547
1:S 12 Jun 2023 05:10:34.934 # Warning, detected child with unmatched pid: 181544
1:S 12 Jun 2023 05:10:35.036 # Warning, detected child with unmatched pid: 181549
1:S 12 Jun 2023 05:10:35.136 # Warning, detected child with unmatched pid: 181550
1:S 12 Jun 2023 05:10:35.237 # Warning, detected child with unmatched pid: 181551
218143:C 12 Jun 2023 05:11:43.024 * DB saved on disk
218143:C 12 Jun 2023 05:11:43.245 * RDB: 172 MB of memory used by copy-on-write
1:S 12 Jun 2023 05:11:43.602 * Background saving terminated with success

Redis Config:

  1) "rdbchecksum"
  2) "yes"
  3) "daemonize"
  4) "no"
  5) "io-threads-do-reads"
  6) "no"
  7) "lua-replicate-commands"
  8) "yes"
  9) "always-show-logo"
 10) "no"
 11) "protected-mode"
 12) "no"
 13) "rdbcompression"
 14) "yes"
 15) "rdb-del-sync-files"
 16) "no"
 17) "activerehashing"
 18) "yes"
 19) "stop-writes-on-bgsave-error"
 20) "no"
 21) "dynamic-hz"
 22) "yes"
 23) "lazyfree-lazy-eviction"
 24) "no"
 25) "lazyfree-lazy-expire"
 26) "no"
 27) "lazyfree-lazy-server-del"
 28) "no"
 29) "lazyfree-lazy-user-del"
 30) "no"
 31) "repl-disable-tcp-nodelay"
 32) "no"
 33) "repl-diskless-sync"
 34) "no"
 35) "gopher-enabled"
 36) "no"
 37) "aof-rewrite-incremental-fsync"
 38) "yes"
 39) "no-appendfsync-on-rewrite"
 40) "no"
 41) "cluster-require-full-coverage"
 42) "yes"
 43) "rdb-save-incremental-fsync"
 44) "yes"
 45) "aof-load-truncated"
 46) "yes"
 47) "aof-use-rdb-preamble"
 48) "yes"
 49) "cluster-replica-no-failover"
 50) "no"
 51) "cluster-slave-no-failover"
 52) "no"
 53) "replica-lazy-flush"
 54) "no"
 55) "slave-lazy-flush"
 56) "no"
 57) "replica-serve-stale-data"
 58) "yes"
 59) "slave-serve-stale-data"
 60) "yes"
 61) "replica-read-only"
 62) "yes"
 63) "slave-read-only"
 64) "yes"
 65) "replica-ignore-maxmemory"
 66) "yes"
 67) "slave-ignore-maxmemory"
 68) "yes"
 69) "jemalloc-bg-thread"
 70) "yes"
 71) "activedefrag"
 72) "no"
 73) "syslog-enabled"
 74) "no"
 75) "cluster-enabled"
 76) "no"
 77) "appendonly"
 78) "no"
 79) "cluster-allow-reads-when-down"
 80) "no"
 81) "aclfile"
 82) ""
 83) "unixsocket"
 84) ""
 85) "pidfile"
 86) ""
 87) "replica-announce-ip"
 88) ""
 89) "slave-announce-ip"
 90) ""
 91) "masteruser"
 92) ""
 93) "masterauth"
 94) "Lrkd5wQvdWVV04yELwgm"
 95) "cluster-announce-ip"
 96) ""
 97) "syslog-ident"
 98) "redis"
 99) "dbfilename"
100) "dump.rdb"
101) "appendfilename"
102) "appendonly.aof"
103) "server_cpulist"
104) ""
105) "bio_cpulist"
106) ""
107) "aof_rewrite_cpulist"
108) ""
109) "bgsave_cpulist"
110) ""
111) "ignore-warnings"
112) "ARM64-COW-BUG"
113) "supervised"
114) "no"
115) "syslog-facility"
116) "local0"
117) "repl-diskless-load"
118) "disabled"
119) "loglevel"
120) "notice"
121) "maxmemory-policy"
122) "noeviction"
123) "appendfsync"
124) "everysec"
125) "oom-score-adj"
126) "no"
127) "databases"
128) "16"
129) "port"
130) "6379"
131) "io-threads"
132) "1"
133) "auto-aof-rewrite-percentage"
134) "100"
135) "cluster-replica-validity-factor"
136) "10"
137) "cluster-slave-validity-factor"
138) "10"
139) "list-max-ziplist-size"
140) "-2"
141) "tcp-keepalive"
142) "60"
143) "cluster-migration-barrier"
144) "1"
145) "active-defrag-cycle-min"
146) "1"
147) "active-defrag-cycle-max"
148) "25"
149) "active-defrag-threshold-lower"
150) "10"
151) "active-defrag-threshold-upper"
152) "100"
153) "lfu-log-factor"
154) "10"
155) "lfu-decay-time"
156) "1"
157) "replica-priority"
158) "100"
159) "slave-priority"
160) "100"
161) "repl-diskless-sync-delay"
162) "5"
163) "maxmemory-samples"
164) "5"
165) "timeout"
166) "60"
167) "replica-announce-port"
168) "0"
169) "slave-announce-port"
170) "0"
171) "tcp-backlog"
172) "511"
173) "cluster-announce-bus-port"
174) "0"
175) "cluster-announce-port"
176) "0"
177) "repl-timeout"
178) "60"
179) "repl-ping-replica-period"
180) "10"
181) "repl-ping-slave-period"
182) "10"
183) "list-compress-depth"
184) "0"
185) "rdb-key-save-delay"
186) "0"
187) "key-load-delay"
188) "0"
189) "active-expire-effort"
190) "1"
191) "hz"
192) "10"
193) "min-replicas-to-write"
194) "0"
195) "min-slaves-to-write"
196) "0"
197) "min-replicas-max-lag"
198) "10"
199) "min-slaves-max-lag"
200) "10"
201) "maxclients"
202) "10000"
203) "active-defrag-max-scan-fields"
204) "1000"
205) "slowlog-max-len"
206) "128"
207) "acllog-max-len"
208) "128"
209) "lua-time-limit"
210) "5000"
211) "cluster-node-timeout"
212) "15000"
213) "slowlog-log-slower-than"
214) "10000"
215) "latency-monitor-threshold"
216) "0"
217) "proto-max-bulk-len"
218) "536870912"
219) "stream-node-max-entries"
220) "100"
221) "repl-backlog-size"
222) "1048576"
223) "maxmemory"
224) "8000000000"
225) "hash-max-ziplist-entries"
226) "512"
227) "set-max-intset-entries"
228) "512"
229) "zset-max-ziplist-entries"
230) "128"
231) "active-defrag-ignore-bytes"
232) "104857600"
233) "hash-max-ziplist-value"
234) "64"
235) "stream-node-max-bytes"
236) "4096"
237) "zset-max-ziplist-value"
238) "64"
239) "hll-sparse-max-bytes"
240) "3000"
241) "tracking-table-max-keys"
242) "1000000"
243) "repl-backlog-ttl"
244) "3600"
245) "auto-aof-rewrite-min-size"
246) "67108864"
247) "tls-port"
248) "0"
249) "tls-session-cache-size"
250) "20480"
251) "tls-session-cache-timeout"
252) "300"
253) "tls-cluster"
254) "no"
255) "tls-replication"
256) "no"
257) "tls-auth-clients"
258) "yes"
259) "tls-prefer-server-ciphers"
260) "no"
261) "tls-session-caching"
262) "yes"
263) "tls-cert-file"
264) ""
265) "tls-key-file"
266) ""
267) "tls-dh-params-file"
268) ""
269) "tls-ca-cert-file"
270) ""
271) "tls-ca-cert-dir"
272) ""
273) "tls-protocols"
274) ""
275) "tls-ciphers"
276) ""
277) "tls-ciphersuites"
278) ""
279) "logfile"
280) ""
281) "client-query-buffer-limit"
282) "1073741824"
283) "watchdog-period"
284) "0"
285) "dir"
286) "/data"
287) "save"
288) "10800 1000"
289) "client-output-buffer-limit"
290) "normal 0 0 0 slave 536870912 536870912 0 pubsub 33554432 8388608 60"
291) "unixsocketperm"
292) "0"
293) "slaveof"
294) "10.0.11.179 6379"
295) "notify-keyspace-events"
296) ""
297) "bind"
298) ""
299) "requirepass"
300) "XXXXXXXXXXXXXXXX"
301) "oom-score-adj-values"
302) "0 200 800"
github-actions[bot] commented 11 months ago

This issue is stale because it has been open for 45 days with no activity.

github-actions[bot] commented 11 months ago

This issue was closed because it has been inactive for 14 days since being marked as stale.