matrix-org / synapse

Synapse: Matrix homeserver written in Python/Twisted.
https://matrix-org.github.io/synapse
Apache License 2.0
11.79k stars 2.13k forks source link

fail sending a message after rebooting machine which have set synapse workers #10501

Closed jg2312 closed 2 years ago

jg2312 commented 3 years ago

Description

fail sending a message after rebooting machine.

Steps to reproduce

Expectation: sending a message can always be available after restart synapse and workers.

Version information

federation1

worker_app: "synapse.app.generic_worker"
worker_name: "federation1"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8085
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics
       - federation

worker_log_config: "/etc/matrix-synapse/log.yaml"

federation2

worker_app: "synapse.app.generic_worker"
worker_name: "federation2"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8085
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics
       - federation

worker_log_config: "/etc/matrix-synapse/log.yaml"

client1

worker_app: "synapse.app.generic_worker"
worker_name: "client1"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8084
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics
       - client

worker_log_config: "/etc/matrix-synapse/log.yaml"

sync1

worker_app: "synapse.app.generic_worker"
worker_name: "sync1"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8083
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics
       - client

worker_log_config: "/etc/matrix-synapse/log.yaml"

event1

worker_app: "synapse.app.generic_worker"
worker_name: "event1"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8082
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics

worker_log_config: "/etc/matrix-synapse/log.yaml"

background

worker_app: "synapse.app.generic_worker"
worker_name: "background"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8086
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics

worker_log_config: "/etc/matrix-synapse/log.yaml"

register1

worker_app: "synapse.app.generic_worker"
worker_name: "register1"

# The replication listener on the main synapse process.
worker_replication_host: "localhost"
worker_replication_http_port: 9093

worker_listeners:
 - type: http
   port: 8088
   x_forwarded: true
   bind_addresses:
    - '::'
   resources:
     - names:
       - metrics
       - client

worker_log_config: "/etc/matrix-synapse/log.yaml"

nginx.config

......
location ~ /_matrix|/_synapse {
        ......
        proxy_pass http://localhost:8008;

        # Inbound federation.
        location ~ ^/_matrix/federation/v1/send/ {
        proxy_pass http://inbound_federation_workers;
        }

        # Anything else federation.
        location ~ (^/_matrix/federation/v1/event/|^/_matrix/federation/v1/state/|^/_matrix/federation/v1/state_ids/|^/_matrix/federation/v1/backfill/|^/_matrix/federation/v1/get_missing_events/|^/_matrix/federation/v1/publicRooms|^/_matrix/federation/v1/query/|^/_matrix/federation/v1/make_join/|^/_matrix/federation/v1/make_leave/|^/_matrix/federation/v1/send_join/|^/_matrix/federation/v2/send_join/|^/_matrix/federation/v1/send_leave/|^/_matrix/federation/v2/send_leave/|^/_matrix/federation/v1/invite/|^/_matrix/federation/v2/invite/|^/_matrix/federation/v1/query_auth/|^/_matrix/federation/v1/event_auth/|^/_matrix/federation/v1/exchange_third_party_invite/|^/_matrix/federation/v1/user/devices/|^/_matrix/federation/v1/get_groups_publicised$|^/_matrix/key/v2/query) {
        proxy_pass http://federation_workers;
        }
        # Syncing
        location ~ (^/_matrix/client/(v2_alpha|r0)/sync$|^/_matrix/client/(api/v1|v2_alpha|r0)/events$|^/_matrix/client/(api/v1|r0)/initialSync$|^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$) {
        proxy_pass http://sync_workers;
        }
        #event
        location ~ (^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$|^/_matrix/client/(api/v1|r0|unstable)/join/|^/_matrix/client/(api/v1|r0|unstable)/profile/) {

        proxy_pass http://event_sending_workers;
        }
        #client
        location ~ (^/_matrix/client/(api/v1|r0|unstable)/publicRooms$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$|^/_matrix/client/(api/v1|r0|unstable)/account/3pid$|^/_matrix/client/(api/v1|r0|unstable)/devices$|^/_matrix/client/(api/v1|r0|unstable)/keys/query$|^/_matrix/client/(api/v1|r0|unstable)/keys/changes$|^/_matrix/client/versions$|^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$|^/_matrix/client/(api/v1|r0|unstable)/joined_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/event/|^/_matrix/client/(api/v1|r0|unstable)/joined_rooms$|^/_matrix/client/(api/v1|r0|unstable)/search$) {
        proxy_pass http://client_workers;
        }
        #register
        location ~ (^/_matrix/client/(api/v1|r0|unstable)/login$|^/_matrix/client/(r0|unstable)/register$) {
        proxy_pass http://register_workers;
        }
        }

upstream

upstream inbound_federation_workers {
    server localhost:8085;
    server localhost:8087;
ip_hash;
}
upstream sync_workers {
    server localhost:8083;
ip_hash;
}
upstream federation_workers {
    server localhost:8085;
    server localhost:8087;
}
upstream client_workers {
    server localhost:8084;
}
upstream event_sending_workers {
    server localhost:8082;
}
upstream register_workers {
    server localhost:8088;
}

matrix-synapse-worker@.service

[Unit]
Description=Synapse %i
AssertPathExists=/etc/matrix-synapse/workers/%i.yaml

# This service should be restarted when the synapse target is restarted.
PartOf=matrix-synapse.target

# if this is started at the same time as the main, let the main process start
# first, to initialise the database schema.
After=matrix-synapse.service

[Service]
Type=notify
NotifyAccess=main
User=matrix-synapse
WorkingDirectory=/var/lib/matrix-synapse
EnvironmentFile=/etc/default/matrix-synapse
ExecStart=/usr/bin/python3 -m synapse.app.generic_worker --config-path=/etc/matrix-synapse/homeserver.yaml --config-path=/etc/matrix-synapse/conf.d/ --config-path=/etc/matrix-synapse/workers/%i.yaml
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=3
SyslogIdentifier=matrix-synapse-%i

[Install]
WantedBy=matrix-synapse.target
dklimpel commented 3 years ago

Did you check that redis is starting first?

jg2312 commented 3 years ago

Did you check that redis is starting first?

yes, redis is wokrknig. i also restarted redis.

reivilibre commented 3 years ago

I think the emphasis needed was: Is redis starting first?

For example, in systemd, you can add something like Requires=redis.service so that Synapse only starts up after redis is already started.

jg2312 commented 3 years ago

I think the emphasis needed was: Is redis starting first?

For example, in systemd, you can add something like Requires=redis.service so that Synapse only starts up after redis is already started.

Thank you for reply, i started redis first than other service(matrix-synapse and workers). but it still has an issue.

erikjohnston commented 3 years ago

Expectation: sending a message can always be available after restart synapse and workers.

What failure are you seeing? Are you seeing errors or timeouts or something else? Does it start working after some time? It'd be also helpful to see your nginx access logs to see what's going on.

As well as redis you'll need to wait for the DB to come back up

You probably also want to have different log configs for the different workers, as I don't know how well it works trying to log to the same file from multiple workers (assuming your log config logs to a file).

It's worth comparing the logs to see when Synapse finished starting compared with when the box itself finished booting. It's possible Synapse is taking a long time to start up (e.g. because its waiting for the DB to be ready?)

jg2312 commented 3 years ago

when RAM usage over 800MB and purgh_history of stacked CPU over 371%, synapse main processor does not work.

VCPU 16, RAM 200G synapse with workers.(no background worker)

event_cache_size: 60K

caches:
   global_factor: 3.5
   per_cache_factors:
     #get_users_who_share_room_with_user: 2.0

synapse_cpu_ram synapse_background synapse_stack_cpu

reivilibre commented 2 years ago

Are you still having this issue?

If so, please could we have some logs?

Can you be a bit more precise with what you mean than 'synapse main processor does not work' — any specific requests that are failing?

Do you have nginx error logs for example (as a starting point to see which requests are failing)?

cremesk commented 2 years ago

@jg2312 it looks like your nginx config is not correct.

you write you use it like this:


location ~ /_matrix|/_synapse {
......
proxy_pass http://localhost:8008;
    # Inbound federation.
    location ~ ^/_matrix/federation/v1/send/ {
    proxy_pass http://inbound_federation_workers;
    }

    # Anything else federation.
    location ~ (^/_matrix/federation/v1/event/|^/_matrix/federation/v1/state/|^/_matrix/federation/v1/state_ids/|^/_matrix/federation/v1/backfill/|^/_matrix/federation/v1/get_missing_events/|^/_matrix/federation/v1/publicRooms|^/_matrix/federation/v1/query/|^/_matrix/federation/v1/make_join/|^/_matrix/federation/v1/make_leave/|^/_matrix/federation/v1/send_join/|^/_matrix/federation/v2/send_join/|^/_matrix/federation/v1/send_leave/|^/_matrix/federation/v2/send_leave/|^/_matrix/federation/v1/invite/|^/_matrix/federation/v2/invite/|^/_matrix/federation/v1/query_auth/|^/_matrix/federation/v1/event_auth/|^/_matrix/federation/v1/exchange_third_party_invite/|^/_matrix/federation/v1/user/devices/|^/_matrix/federation/v1/get_groups_publicised$|^/_matrix/key/v2/query) {
    proxy_pass http://federation_workers;
    }
    # Syncing
    location ~ (^/_matrix/client/(v2_alpha|r0)/sync$|^/_matrix/client/(api/v1|v2_alpha|r0)/events$|^/_matrix/client/(api/v1|r0)/initialSync$|^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$) {
    proxy_pass http://sync_workers;
    }
    #event
    location ~ (^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/redact|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state/|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$|^/_matrix/client/(api/v1|r0|unstable)/join/|^/_matrix/client/(api/v1|r0|unstable)/profile/) {

    proxy_pass http://event_sending_workers;
    }
    #client
    location ~ (^/_matrix/client/(api/v1|r0|unstable)/publicRooms$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/joined_members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/context/.*$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$|^/_matrix/client/(api/v1|r0|unstable)/account/3pid$|^/_matrix/client/(api/v1|r0|unstable)/devices$|^/_matrix/client/(api/v1|r0|unstable)/keys/query$|^/_matrix/client/(api/v1|r0|unstable)/keys/changes$|^/_matrix/client/versions$|^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$|^/_matrix/client/(api/v1|r0|unstable)/joined_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/|^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/event/|^/_matrix/client/(api/v1|r0|unstable)/joined_rooms$|^/_matrix/client/(api/v1|r0|unstable)/search$) {
    proxy_pass http://client_workers;
    }
    #register
    location ~ (^/_matrix/client/(api/v1|r0|unstable)/login$|^/_matrix/client/(r0|unstable)/register$) {
    proxy_pass http://register_workers;
    }

}


however, the locations for the workers must be outside the matrix location.
E.g.:

Inbound federation.

location ~ ^/_matrix/federation/v1/send/ { proxy_pass http://inbound_federation_workers; }

Anything else federation.

location ~ (^/_matrix/federation/v1/event/|^/_matrix/federation/v1/state/|^/_matrix/federation/v1/state_ids/|^/_matrix/federation/v1/backfill/|^/_matrix/federation/v1/get_missing_events/|^/_matrix/federation/v1/publicRooms|^/_matrix/federation/v1/query/|^/_matrix/federation/v1/make_join/|^/_matrix/federation/v1/make_leave/|^/_matrix/federation/v1/send_join/|^/_matrix/federation/v2/send_join/|^/_matrix/federation/v1/send_leave/|^/_matrix/federation/v2/send_leave/|^/_matrix/federation/v1/invite/|^/_matrix/federation/v2/invite/|^/_matrix/federation/v1/query_auth/|^/_matrix/federation/v1/event_auth/|^/_matrix/federation/v1/exchange_third_party_invite/|^/_matrix/federation/v1/user/devices/|^/_matrix/federation/v1/get_groups_publicised$|^/_matrix/key/v2/query) { proxy_pass http://federation_workers; }

Syncing

location ~ (^/_matrix/client/(v2_alpha|r0)/sync$|^/_matrix/client/(api/v1|v2_alpha|r0)/events$|^/_matrix/client/(api/v1|r0)/initialSync$|^/_matrix/client/(api/v1|r0)/rooms/[^/]+/initialSync$) { proxy_pass http://sync_workers; }

event

location ~ (^/_matrix/client/(api/v1|r0|unstable)/rooms/./redact|^/_matrix/client/(api/v1|r0|unstable)/rooms/./send|^/_matrix/client/(api/v1|r0|unstable)/rooms/./state/|^/_matrix/client/(api/v1|r0|unstable)/rooms/./(join|invite|leave|ban|unban|kick)$|^/_matrix/client/(api/v1|r0|unstable)/join/|^/_matrix/client/(api/v1|r0|unstable)/profile/) {

    proxy_pass http://event_sending_workers;

}

client

location ~ (^/_matrix/client/(api/v1|r0|unstable)/publicRooms$|^/_matrix/client/(api/v1|r0|unstable)/rooms/./joined_members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/./context/.$|^/_matrix/client/(api/v1|r0|unstable)/rooms/./members$|^/_matrix/client/(api/v1|r0|unstable)/rooms/./state$|^/_matrix/client/(api/v1|r0|unstable)/account/3pid$|^/_matrix/client/(api/v1|r0|unstable)/devices$|^/_matrix/client/(api/v1|r0|unstable)/keys/query$|^/_matrix/client/(api/v1|r0|unstable)/keys/changes$|^/_matrix/client/versions$|^/_matrix/client/(api/v1|r0|unstable)/voip/turnServer$|^/_matrix/client/(api/v1|r0|unstable)/joined_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups$|^/_matrix/client/(api/v1|r0|unstable)/publicised_groups/|^/_matrix/client/(api/v1|r0|unstable)/rooms/./event/|^/_matrix/client/(api/v1|r0|unstable)/joined_rooms$|^/_matrix/client/(api/v1|r0|unstable)/search$) { proxy_pass http://client_workers; }

register

    location ~ (^/_matrix/client/(api/v1|r0|unstable)/login$|^/_matrix/client/(r0|unstable)/register$) {
    proxy_pass http://register_workers;

}

location ~ ^(/_matrix|/_synapse/client) { ...... proxy_pass http://localhost:8008; }

clokep commented 2 years ago

Closing due to lack of response. If you're still seeing this issue, please double check your nginx config and provide the requested logs! Thanks!