signal18 / replication-manager

Signal 18 repman - Replication Manager for MySQL / MariaDB / Percona Server
https://signal18.io/products/srm
GNU General Public License v3.0
647 stars 167 forks source link

Replication Manager occupies more memory in OS gradually. Need optimization #440

Open Manoharan-NMS opened 2 years ago

Manoharan-NMS commented 2 years ago

Defined memory is 256 MB/1024 MB. Over the period of a 1 or 2 days its occupying more memory around 1GB and above. In a week, its occupying around 2GB memory even though defined limit is 1GB or 256 MB

File Name: /etc/replication-manager/cluster.d/cluster1.toml

Default condition

prov-orchestrator = "onpremise" prov-db-tags = "innodb,noquerycache,slow,pfs,pkg,linux,smallredolog,logtotable" prov-db-memory = "256" prov-db-memory-shared-pct = "threads:16,innodb:60,myisam:10,aria:10,rocksdb:1,tokudb:1,s3:1,archive:1,querycache:0" prov-db-disk-size = "1" prov-db-cpu-cores = "1" prov-db-disk-iops = "300"

Modified

prov-orchestrator = "onpremise" prov-db-tags = "innodb,noquerycache,slow,pfs,pkg,linux,smallredolog,logtotable" prov-db-memory = "1024" prov-db-memory-shared-pct = "threads:16,innodb:60,myisam:10,aria:10,rocksdb:1,tokudb:1,s3:1,archive:1,querycache:0" prov-db-disk-size = "1" prov-db-cpu-cores = "1" prov-db-disk-iops = "300"

Memory Details

sudo pmap 10593 10593: /usr/bin/replication-manager-osc monitor 0000000000400000 23656K r-x-- replication-manager-osc 0000000001b1a000 24944K r---- replication-manager-osc 0000000003376000 764K rw--- replication-manager-osc 0000000003435000 356K rw--- [ anon ] 00000000053a7000 132K rw--- [ anon ] 000000c000000000 4096K rw--- [ anon ] 000000c000400000 12288K rw--- [ anon ] 000000c001000000 4096K rw--- [ anon ] 000000c001400000 4096K rw--- [ anon ] 000000c001800000 40960K ----- [ anon ] 00007f1d14000000 132K rw--- [ anon ] 00007f1d14021000 65404K ----- [ anon ] 00007f1d1c000000 132K rw--- [ anon ] 00007f1d1c021000 65404K ----- [ anon ] 00007f1d20000000 132K rw--- [ anon ] 00007f1d20021000 65404K ----- [ anon ] 00007f1d24000000 132K rw--- [ anon ] 00007f1d24021000 65404K ----- [ anon ] 00007f1d2c000000 132K rw--- [ anon ] 00007f1d2c021000 65404K ----- [ anon ] 00007f1d30000000 132K rw--- [ anon ] 00007f1d30021000 65404K ----- [ anon ] 00007f1d34000000 132K rw--- [ anon ] 00007f1d34021000 65404K ----- [ anon ] 00007f1d38000000 132K rw--- [ anon ] 00007f1d38021000 65404K ----- [ anon ] 00007f1d3c000000 132K rw--- [ anon ] 00007f1d3c021000 65404K ----- [ anon ] 00007f1d40000000 132K rw--- [ anon ] 00007f1d40021000 65404K ----- [ anon ] 00007f1d44000000 132K rw--- [ anon ] 00007f1d44021000 65404K ----- [ anon ] 00007f1d48000000 132K rw--- [ anon ] 00007f1d48021000 65404K ----- [ anon ] 00007f1d4c000000 132K rw--- [ anon ] 00007f1d4c021000 65404K ----- [ anon ] 00007f1d50cfa000 768K rw--- [ anon ] 00007f1d50dba000 4K ----- [ anon ] 00007f1d50dbb000 8448K rw--- [ anon ] 00007f1d515fb000 4K ----- [ anon ] 00007f1d515fc000 9984K rw--- [ anon ] 00007f1d51fbc000 4K ----- [ anon ] 00007f1d51fbd000 8192K rw--- [ anon ] 00007f1d527bd000 4K ----- [ anon ] 00007f1d527be000 8448K rw--- [ anon ] 00007f1d52ffe000 4K ----- [ anon ] 00007f1d52fff000 8192K rw--- [ anon ] 00007f1d537ff000 4K ----- [ anon ] 00007f1d53800000 8192K rw--- [ anon ] 00007f1d54000000 132K rw--- [ anon ] 00007f1d54021000 65404K ----- [ anon ] 00007f1d58019000 4K ----- [ anon ] 00007f1d5801a000 8192K rw--- [ anon ] 00007f1d5881a000 4K ----- [ anon ] 00007f1d5881b000 8192K rw--- [ anon ] 00007f1d5901b000 4K ----- [ anon ] 00007f1d5901c000 8448K rw--- [ anon ] 00007f1d5985c000 44K r-x-- libnss_files-2.28.so 00007f1d59867000 2048K ----- libnss_files-2.28.so 00007f1d59a67000 4K r---- libnss_files-2.28.so 00007f1d59a68000 4K rw--- libnss_files-2.28.so 00007f1d59a69000 24K rw--- [ anon ] 00007f1d59a6f000 8212K r--s- passwd 00007f1d5a274000 32K r-x-- libnss_sss.so.2 00007f1d5a27c000 2044K ----- libnss_sss.so.2 00007f1d5a47b000 4K r---- libnss_sss.so.2 00007f1d5a47c000 4K rw--- libnss_sss.so.2 00007f1d5a47d000 256K rw--- [ anon ] 00007f1d5a4bd000 4K ----- [ anon ] 00007f1d5a4be000 8448K rw--- [ anon ] 00007f1d5acfe000 4K ----- [ anon ] 00007f1d5acff000 8192K rw--- [ anon ] 00007f1d5b4ff000 4K ----- [ anon ] 00007f1d5b500000 8448K rw--- [ anon ] 00007f1d5bd40000 4K ----- [ anon ] 00007f1d5bd41000 9600K rw--- [ anon ] 00007f1d5c6a1000 4K ----- [ anon ] 00007f1d5c6a2000 44100K rw--- [ anon ] 00007f1d5f1b3000 263680K ----- [ anon ] 00007f1d6f333000 4K rw--- [ anon ] 00007f1d6f334000 293564K ----- [ anon ] 00007f1d811e3000 4K rw--- [ anon ] 00007f1d811e4000 36692K ----- [ anon ] 00007f1d835b9000 4K rw--- [ anon ] 00007f1d835ba000 4068K ----- [ anon ] 00007f1d839b3000 1768K r-x-- libc-2.28.so 00007f1d83b6d000 2048K ----- libc-2.28.so 00007f1d83d6d000 16K r---- libc-2.28.so 00007f1d83d71000 8K rw--- libc-2.28.so 00007f1d83d73000 16K rw--- [ anon ] 00007f1d83d77000 108K r-x-- libpthread-2.28.so 00007f1d83d92000 2044K ----- libpthread-2.28.so 00007f1d83f91000 4K r---- libpthread-2.28.so 00007f1d83f92000 4K rw--- libpthread-2.28.so 00007f1d83f93000 16K rw--- [ anon ] 00007f1d83f97000 160K r-x-- ld-2.28.so 00007f1d83fd5000 448K rw--- [ anon ] 00007f1d84045000 512K ----- [ anon ] 00007f1d840c5000 4K rw--- [ anon ] 00007f1d840c6000 508K ----- [ anon ] 00007f1d84145000 404K rw--- [ anon ] 00007f1d841af000 64K rw--- [ anon ] 00007f1d841bf000 4K r---- ld-2.28.so 00007f1d841c0000 4K rw--- ld-2.28.so 00007f1d841c1000 4K rw--- [ anon ] 00007ffe6ccaa000 132K rw--- [ stack ] 00007ffe6cd3f000 12K r---- [ anon ] 00007ffe6cd42000 8K r-x-- [ anon ] ffffffffff600000 4K r-x-- [ anon ] total 1807784K

svaroqui commented 2 years ago

Hello are you using the config files generated by replication-manager is this an opensvc cluster ?

Manoharan-NMS commented 2 years ago

Hello are you using the config files generated by replication-manager is this an opensvc cluster ?

Yes.

Is there any way to optimize the memory

svaroqui commented 2 years ago

Most of the parameters that control memory do not work dynamically and propably need a restart of the databases once you have setup replicatio-manger to upload the config file via ssh or via init container and use stop start of dbabases from the replication-manager server itself

svaroqui commented 2 years ago

can you ls in /etc/mysql/conf.d on the database servers ? and confirm you have many files coming from replication-manager here ?

svaroqui commented 2 years ago

Oh you are talking about the replication-manager memory itself

svaroqui commented 2 years ago

prov prov-db-memory = "256" is for the configuration of databases not replication-manager

Manoharan-NMS commented 2 years ago

Oh you are talking about the replication-manager memory itself

Yes. Im talking about why replication-manager service occupies more memory gradually over the period of time. Also how to optimize that.

Manoharan-NMS commented 2 years ago

prov prov-db-memory = "256" is for the configuration of databases not replication-manager

If I increase this property, how I can improve my replication manager performance or DB replication manager control the database sync fastly

Manoharan-NMS commented 2 years ago

prov prov-db-memory = "256" is for the configuration of databases not replication-manager

Why we need to control DB memory from replication manager? Already we defined the memory in my.cnf itself.

svaroqui commented 2 years ago

there is not way to optimize the memory of replication-manager itself can you attach the full config files. A feature that is well know to consume memory is monitoring of metrics graphite-embedded = true graphite-metrics = true

svaroqui commented 2 years ago

Replication-manager is used by some clients and users to configure databases with best practices so it can deploy a full cluster on some orchestrators like opensvc or slapos and manage the database and proxies configuration for the user.

svaroqui commented 2 years ago

I'm still intreated to see if replication-manager memory is growing over time or if it reaches a limit witch i think it will do

Manoharan-NMS commented 2 years ago

I'm still intreated to see if replication-manager memory is growing over time or if it reaches a limit witch i think it will do

Replication Manager conf file.

cat repltest.toml

[repltest]
title = "repltest"
prov-orchestrator = "onpremise"
prov-db-tags = "innodb,noquerycache,slow,pfs,pkg,linux,smallredolog,logtotable"
#prov-db-memory = "256"
prov-db-memory = "1024"
prov-db-memory-shared-pct = "threads:16,innodb:60,myisam:10,aria:10,rocksdb:1,tokudb:1,s3:1,archive:1,querycache:0"
prov-db-disk-size = "1"
prov-db-cpu-cores = "1"
prov-db-disk-iops = "300"

db-servers-hosts = "192.168.9.42:3315,192.168.9.42:3316,192.168.9.42:3317"
db-servers-prefered-master = "192.168.9.42:3315,192.168.9.42:3316"
#db-servers-prefered-master = "192.168.9.42:3317,192.168.9.42:3316"
#db-servers-prefered-master = "192.168.9.42:3317,192.168.9.42:3315"
#db-servers-prefered-master = "192.168.9.42:3317"
db-servers-credential = "test:test"
db-servers-connect-timeout = 1
replication-credential = "test:test"
#db-servers-ignored-hosts="192.168.9.42:3315"
#db-servers-ignored-hosts="192.168.9.42:3316"
#db-servers-ignored-hosts="192.168.9.42:3317,192.168.9.42:3316"
db-servers-ignored-hosts="192.168.9.42:3317"
#db-servers-ignored-hosts="192.168.9.42:3316,192.168.9.42:3315"

verbose = false
log-failed-election  = true
log-level = 3
log-rotate-max-age = 7
log-rotate-max-backup = 7
log-rotate-max-size = 5
log-sql-in-monitoring   = true
log-sst = true

##############
## TOPOLOGY ##
##############

replication-multi-master = false
replication-multi-tier-slave = false

############
# BACKUPS ##
###########

backup-streaming = false
backup-streaming-aws-access-key-id = "admin"
backup-streaming-aws-access-secret = "xxxx"
backup-streaming-endpoint= "https://s3.signal18.io/"
backup-streaming-region= "fr-1"
backup-streaming-bucket= "repman"

backup-restic = false
backup-restic-aws =  false
backup-physical-type = "mariabackup"
backup-logical-type = "mysqldump"
backup-restic-aws-access-secret = "xxxx"
backup-restic-password = "xxxx"
backup-restic-binary-path = "/usr/local/bin/restic"

monitoring-scheduler = true
scheduler-db-servers-logical-backup  = false
scheduler-db-servers-logical-backup-cron= "0 0 1 * * 6"
scheduler-db-servers-logs   =  false
scheduler-db-servers-logs-cron = "0 0 * * * *"
scheduler-db-servers-logs-table-keep = 4
scheduler-db-servers-logs-table-rotate  = false
scheduler-db-servers-logs-table-rotate-cron = "0 0 0/6 * * *"
scheduler-db-servers-optimize  = false
scheduler-db-servers-optimize-cron = "0 0 3 1 * 5"
scheduler-db-servers-physical-backup = false
scheduler-db-servers-physical-backup-cron = "0 0 0 * * *"

##############
## FAILOVER ##
##############

failover-mode = "automatic"
failover-pre-script = "/home/pre_failover.expect"
failover-post-script = "/home/post_failover.expect"

## Slaves will re enter with read-only

failover-readonly-state = true
failover-event-scheduler = false
failover-event-status = false

## Failover after N failures detection

failover-falsepositive-ping-counter = 5

## Cancel failover if already N failover
## Cancel failover if last failover was N seconds before
## Cancel failover in semi-sync when one slave is not in sync
## Cancel failover if one slave receive master heartbeat
## Cancel failover when replication delay is more than N seconds

failover-limit = 0
failover-time-limit = 0
failover-at-sync = false
failover-max-slave-delay = 30
failover-restart-unsafe = false

# failover-falsepositive-heartbeat = true
# failover-falsepositive-heartbeat-timeout = 3
# failover-falsepositive-maxscale = false
# failover-falsepositive-maxscale-timeout = 14
# failover-falsepositive-external = false
# failover-falsepositive-external-port = 80

################
## SWITCHOVER ##
################

## In switchover Wait N milliseconds before killing long running transactions
## Cancel switchover if transaction running more than N seconds
## Cancel switchover if write query running more than N seconds
## Cancel switchover if one of the slaves is not synced based on GTID equality

switchover-wait-kill = 5000
switchover-wait-trx = 10
switchover-wait-write-query = 10
switchover-at-equal-gtid = false
switchover-at-sync = false
switchover-max-slave-delay = 30

############
## REJOIN ##
############

autorejoin = true
autorejoin-script = ""
autorejoin-semisync = true
autorejoin-backup-binlog = true
autorejoin-flashback = true
autorejoin-mysqldump = true

## Added Manually. By default autorejoin-flashback-on-unsync = false
autorejoin-flashback-on-unsync = true

####################
## CHECKS & FORCE ##
####################

check-replication-filters = true
check-binlog-filters = true
check-replication-state = true

force-slave-heartbeat= false
force-slave-heartbeat-retry = 5
force-slave-heartbeat-time = 3
force-slave-gtid-mode = false
force-slave-semisync = false
force-slave-failover-readonly-state = false
force-binlog-row = false
force-binlog-annotate = false
force-binlog-slowqueries = false
force-binlog-compress = false
force-binlog-checksum = false
force-inmemory-binlog-cache-size = false
force-disk-relaylog-size-limit = false
force-disk-relaylog-size-limit-size = 1000000000
force-sync-binlog = false
force-sync-innodb = false

##############
## MAXSCALE ##
##############

## for 2 nodes cluster maxscale can be driven by replication manager

maxscale = false
maxscale-binlog = false
maxscale-servers = "192.168.0.201"
maxscale-port = 4003
maxscale-user = "admin"
maxscale-pass = "mariadb"

## When true replication manager drive maxscale server state
## Not required unless multiple maxscale or release does not support detect_stale_slave

maxscale-disable-monitor = false

## maxinfo|maxadmin

maxscale-get-info-method = "maxadmin"
maxscale-maxinfo-port = 4002

maxscale-write-port = 4007
maxscale-read-port = 4008
maxscale-read-write-port = 4006
maxscale-binlog-port = 4000

#############
## HAPROXY ##
#############

## Wrapper mode unless maxscale or proxysql required to be located with replication-manager

haproxy = false
haproxy-binary-path = "/usr/sbin/haproxy"

## Read write traffic
## Read only load balance least connection traffic
haproxy-write-port = 3306
haproxy-read-port = 3307

####################
## SHARDING PROXY ##
####################

mdbshardproxy = false
mdbshardproxy-hosts = "127.0.0.1:3306"
mdbshardproxy-user = "root:mariadb"