raiden-network / scenario-player

MIT License
1 stars 16 forks source link

bf7 run-5041 failed (MatrixSyncMaxTimeoutReached) #694

Closed konradkonrad closed 3 years ago

konradkonrad commented 3 years ago
{
  "args": [
    "/usr/local/bin/raiden",
    "--accept-disclaimer",
    "--datadir",
    "/data/scenarios/bf7_long_path/node_5041_003",
    "--keystore-path",
    "/data/scenarios/bf7_long_path/node_5041_003/keys",
    "--address",
    "0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3",
    "--password-file",
    "/data/scenarios/bf7_long_path/node_5041_003/password.txt",
    "--network-id",
    "5",
    "--sync-check",
    "--gas-price",
    "fast",
    "--eth-rpc-endpoint",
    "http://parity.goerli.ethnodes.brainbot.com:8545",
    "--log-config",
    ":info,raiden:debug,raiden_contracts:debug,raiden.api.rest.pywsgi:warning",
    "--log-json",
    "--log-file",
    "/data/scenarios/bf7_long_path/node_5041_003/run-5041.log",
    "--disable-debug-logfile",
    "--matrix-server",
    "https://transport.transport04.raiden.network",
    "--api-address",
    "127.0.0.1:33385",
    "--no-web-ui",
    "--pathfinding-service-address",
    "https://pfs.transport01.raiden.network",
    "--development-environment",
    "unstable",
    "--pathfinding-max-fee",
    "50000000000000000",
    "--proportional-fee",
    "0x59105441977ecD9d805A4f5b060E34676F50F806",
    "0",
    "--gas-price",
    "fast",
    "--proportional-imbalance-fee",
    "0x59105441977ecD9d805A4f5b060E34676F50F806",
    "0",
    "--environment-type",
    "development",
    "--enable-monitoring",
    "--routing-mode",
    "pfs"
  ],
  "exit_code": 1,
  "event": "Process died! Bailing out.",
  "logger": "raiden.utils.nursery",
  "level": "error",
  "timestamp": "2021-08-18 18:36:33.301668"
}

node_5041_0*/*.stderr

--------- Starting ---------
--------- Starting ---------
--------- Starting ---------
KeyboardInterrupt
2021-08-18T18:36:33Z
--------- Starting ---------
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 317, in listen_forever
    self._sync(timeout_ms, latency_ms)
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 516, in _sync
    f"Time between syncs exceeded timeout:  "
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
2021-08-18T18:36:06Z <Greenlet "GMatrixClient.sync_worker user_id:@0xc77a82a0d452c8d42e7d4414816419fa74f2d5c3:transport.transport04.raiden.network" at 0x7aa72a58fb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7aa72a6b9e50>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached

Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 566, in _run
    self._broadcast_worker()
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 763, in _broadcast_worker
    self._broadcast_event.wait(self._config.retry_interval_initial)
  File "src/gevent/event.py", line 163, in gevent._gevent_cevent.Event.wait
  File "src/gevent/_abstract_linkable.py", line 521, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait
  File "src/gevent/_abstract_linkable.py", line 487, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 490, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 442, in gevent._gevent_c_abstract_linkable.AbstractLinkable._AbstractLinkable__wait_to_be_notified
  File "src/gevent/_abstract_linkable.py", line 451, in gevent._gevent_c_abstract_linkable.AbstractLinkable._switch_to_hub
  File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
2021-08-18T18:36:22Z <Greenlet "MatrixTransport._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3" at 0x7aa73d7d03b0: _run> failed with MatrixSyncMaxTimeoutReached

Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/raiden_service.py", line 506, in _run
    self.stop_event.wait()
  File "src/gevent/event.py", line 163, in gevent._gevent_cevent.Event.wait
  File "src/gevent/_abstract_linkable.py", line 521, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait
  File "src/gevent/_abstract_linkable.py", line 487, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 490, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 442, in gevent._gevent_c_abstract_linkable.AbstractLinkable._AbstractLinkable__wait_to_be_notified
  File "src/gevent/_abstract_linkable.py", line 451, in gevent._gevent_c_abstract_linkable.AbstractLinkable._switch_to_hub
  File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
2021-08-18T18:36:31Z <Greenlet "RaidenService._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3" at 0x7aa73d7d0950: _run> failed with MatrixSyncMaxTimeoutReached

Traceback (most recent call last):
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/ui/cli.py", line 721, in _run
    run_services(kwargs)
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/ui/runners.py", line 92, in run_services
    signal_received = stop_event.get()
  File "src/gevent/event.py", line 329, in gevent._gevent_cevent.AsyncResult.get
  File "src/gevent/event.py", line 359, in gevent._gevent_cevent.AsyncResult.get
  File "src/gevent/event.py", line 347, in gevent._gevent_cevent.AsyncResult.get
  File "src/gevent/event.py", line 327, in gevent._gevent_cevent.AsyncResult._raise_exception
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/gevent/_compat.py", line 65, in reraise
    raise value.with_traceback(tb)
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/raiden_service.py", line 506, in _run
    self.stop_event.wait()
  File "src/gevent/event.py", line 163, in gevent._gevent_cevent.Event.wait
  File "src/gevent/_abstract_linkable.py", line 521, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait
  File "src/gevent/_abstract_linkable.py", line 487, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 490, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 442, in gevent._gevent_c_abstract_linkable.AbstractLinkable._AbstractLinkable__wait_to_be_notified
  File "src/gevent/_abstract_linkable.py", line 451, in gevent._gevent_c_abstract_linkable.AbstractLinkable._switch_to_hub
  File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
ReturnCode.FATAL
--------- Starting ---------
--------- Starting ---------
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 317, in listen_forever
    self._sync(timeout_ms, latency_ms)
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 516, in _sync
    f"Time between syncs exceeded timeout:  "
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.73739075288177s > 75s. The thread had 742 context_switches, and idled 0.45586254870474174% of the time.
2021-08-18T18:36:31Z <Greenlet "GMatrixClient.sync_worker user_id:@0x0a46e18e39d68d04f4dac5668a001da1da3f3b12:transport.transport02.raiden.network" at 0x7ed7011d0b90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7ed7012e0150>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached

Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 566, in _run
    self._broadcast_worker()
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 763, in _broadcast_worker
    self._broadcast_event.wait(self._config.retry_interval_initial)
  File "src/gevent/event.py", line 163, in gevent._gevent_cevent.Event.wait
  File "src/gevent/_abstract_linkable.py", line 521, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait
  File "src/gevent/_abstract_linkable.py", line 487, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 490, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 442, in gevent._gevent_c_abstract_linkable.AbstractLinkable._AbstractLinkable__wait_to_be_notified
  File "src/gevent/_abstract_linkable.py", line 451, in gevent._gevent_c_abstract_linkable.AbstractLinkable._switch_to_hub
  File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.73739075288177s > 75s. The thread had 742 context_switches, and idled 0.45586254870474174% of the time.
2021-08-18T18:36:46Z <Greenlet "MatrixTransport._run node:0x0A46E18E39D68d04f4DAC5668a001DA1dA3F3b12" at 0x7ed7144123b0: _run> failed with MatrixSyncMaxTimeoutReached

--------- Starting ---------
--------- Starting ---------
--------- Starting ---------
--------- Starting ---------
--------- Starting ---------
KeyboardInterrupt
2021-08-18T18:36:33Z
--------- Starting ---------
--------- Starting ---------
--------- Starting ---------
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 317, in listen_forever
    self._sync(timeout_ms, latency_ms)
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/client.py", line 516, in _sync
    f"Time between syncs exceeded timeout:  "
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.62216652557254s > 75s. The thread had 613 context_switches, and idled 0.4720526172878252% of the time.
2021-08-18T18:36:31Z <Greenlet "GMatrixClient.sync_worker user_id:@0xb88ec54c0ff4ee9ddc3a52113d6ee10d9f97820e:transport.transport02.raiden.network" at 0x7863e67efb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7863e68b4190>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached

Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 906, in gevent._gevent_cgreenlet.Greenlet.run
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 566, in _run
    self._broadcast_worker()
  File "/root/.cache/pypoetry/virtualenvs/scenario-player-BbLh1Wrj-py3.7/lib/python3.7/site-packages/raiden/network/transport/matrix/transport.py", line 763, in _broadcast_worker
    self._broadcast_event.wait(self._config.retry_interval_initial)
  File "src/gevent/event.py", line 163, in gevent._gevent_cevent.Event.wait
  File "src/gevent/_abstract_linkable.py", line 521, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait
  File "src/gevent/_abstract_linkable.py", line 487, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 490, in gevent._gevent_c_abstract_linkable.AbstractLinkable._wait_core
  File "src/gevent/_abstract_linkable.py", line 442, in gevent._gevent_c_abstract_linkable.AbstractLinkable._AbstractLinkable__wait_to_be_notified
  File "src/gevent/_abstract_linkable.py", line 451, in gevent._gevent_c_abstract_linkable.AbstractLinkable._switch_to_hub
  File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
  File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.62216652557254s > 75s. The thread had 613 context_switches, and idled 0.4720526172878252% of the time.
2021-08-18T18:36:46Z <Greenlet "MatrixTransport._run node:0xb88EC54c0Ff4Ee9Ddc3A52113D6ee10d9f97820E" at 0x7863f9a39830: _run> failed with MatrixSyncMaxTimeoutReached

--------- Starting ---------

node_5041_003/run-5041.stdout

WARNING
Your account's balance is below the estimated gas reserve of 0.005557824039080913 eth. This may lead to a loss of of funds because your account will be unable to perform on-chain transactions. Please add funds to your account as soon as possible.
FATAL: An unexpected exception occurred. A traceback has been written to /tmp/raiden-exception-2021-08-18T18-36nuoi_i_s.txt
Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.

(gas reserve WARNING)

node_5041_011/run-5041.stdout

WARNING
Your account's balance is below the estimated gas reserve of 0.005557824039080913 eth. This may lead to a loss of of funds because your account will be unable to perform on-chain transactions. Please add funds to your account as soon as possible.

(gas reserve WARNING)

karlb commented 3 years ago

Beware that the idle percentages above are wrong. See https://github.com/raiden-network/raiden/pull/7300.

ezdac commented 3 years ago

Possibly related: https://github.com/raiden-network/raiden/issues/7141

fredo commented 3 years ago

looking at the logs of failing node node_5041_003, I suspect that the Matrix service wasn't reachable or rather stalled.

Node 003 - connected to t04

Here is a snippet of the latest sync iterations:

command: grep "18:3[456]" run-5041.log | grep "raiden.network.transport.matrix" | jq -c "{event, timestamp, sync_iteration}"

{"event":"api.sync returned","timestamp":"2021-08-18 18:34:46.571640","sync_iteration":24}
{"event":"Sync returned","timestamp":"2021-08-18 18:34:46.572060","sync_iteration":null}
{"event":"Sync called","timestamp":"2021-08-18 18:34:46.572329","sync_iteration":25}
{"event":"Calling api.sync","timestamp":"2021-08-18 18:34:46.572512","sync_iteration":25}
{"event":"Handling Matrix response","timestamp":"2021-08-18 18:34:46.574249","sync_iteration":null}
{"event":"Processed queued Matrix responses","timestamp":"2021-08-18 18:34:46.574554","sync_iteration":null}
{"event":"api.sync returned","timestamp":"2021-08-18 18:34:46.623525","sync_iteration":25}
{"event":"Sync returned","timestamp":"2021-08-18 18:34:46.623983","sync_iteration":null}
{"event":"Sync called","timestamp":"2021-08-18 18:34:46.624359","sync_iteration":26}
{"event":"Calling api.sync","timestamp":"2021-08-18 18:34:46.624594","sync_iteration":26}
{"event":"Handling Matrix response","timestamp":"2021-08-18 18:34:46.626455","sync_iteration":null}
{"event":"Processed queued Matrix responses","timestamp":"2021-08-18 18:34:46.626761","sync_iteration":null}
{"event":"api.sync returned","timestamp":"2021-08-18 18:36:06.538441","sync_iteration":26}
{"event":"Sync returned","timestamp":"2021-08-18 18:36:06.538841","sync_iteration":null}
{"event":"Sync called","timestamp":"2021-08-18 18:36:06.539094","sync_iteration":27}
{"event":"Exception thrown during sync","timestamp":"2021-08-18 18:36:06.540085","sync_iteration":null}
{"event":"Handling Matrix response","timestamp":"2021-08-18 18:36:06.541944","sync_iteration":null}
{"event":"Processed queued Matrix responses","timestamp":"2021-08-18 18:36:06.542217","sync_iteration":null}
{"event":"Runnable subtask died!","timestamp":"2021-08-18 18:36:06.542627","sync_iteration":null}
{"event":"Matrix stopping","timestamp":"2021-08-18 18:36:06.543247","sync_iteration":null}
{"event":"Closing WebRTC connections","timestamp":"2021-08-18 18:36:06.543499","sync_iteration":null}
{"event":"Waiting on handle greenlet","timestamp":"2021-08-18 18:36:06.543701","sync_iteration":null}
{"event":"Handling worker exiting, stop is set","timestamp":"2021-08-18 18:36:06.543896","sync_iteration":null}
{"event":"Listener greenlet exited","timestamp":"2021-08-18 18:36:06.544096","sync_iteration":null}
{"event":"Waiting on own greenlets","timestamp":"2021-08-18 18:36:06.544252","sync_iteration":null}
{"event":"Got http _send exception, waiting then retrying","timestamp":"2021-08-18 18:36:06.549095","sync_iteration":null}
{"event":"Got http _send exception, waiting then retrying","timestamp":"2021-08-18 18:36:11.561029","sync_iteration":null}
{"event":"Transport performance report","timestamp":"2021-08-18 18:36:22.653072","sync_iteration":null}
{"event":"Matrix stopped","timestamp":"2021-08-18 18:36:22.653585","sync_iteration":null}

As one can see, everything works fine until iteration 26 which times out.

Stalled sync

iteration: 26 start: 18:34:46.624594 end: 18:36:06.540085

Note, that at the end time the timeout exception was thrown

other api calls

At 18:36:06 there are two other Matrix requests which return with an MatrixRequestError

{
  "wait_for": 5,
  "_exception": "MatrixRequestError('503: Service Unavailable')",
  "event": "Got http _send exception, waiting then retrying",
  "logger": "raiden.network.transport.matrix.client",
  "level": "debug",
  "greenlet_name": "MatrixTransport._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3",
  "timestamp": "2021-08-18 18:36:06.549095"
}
{
  "wait_for": 10,
  "_exception": "MatrixRequestError('503: Service Unavailable')",
  "event": "Got http _send exception, waiting then retrying",
  "logger": "raiden.network.transport.matrix.client",
  "level": "debug",
  "greenlet_name": "MatrixTransport._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3",
  "timestamp": "2021-08-18 18:36:11.561029"
}

Other nodes (i.e. Node 007) connected to t04

In the meantime other nodes had the same "stalling" effect but the timeout wasnt hit. See this example from node 007.

Stalled sync

iteration: 26 start: 18:34:56.204447 end: 18:36:06.552607

As one can see the 80 seconds timeout was not hit. And the sync returned. Addiotnally there were serverside problems acknowldeged afterwards:

{
  "node": "0xf696e68c1e0c6fc5f7900e5ddcb43cb2b0eb02f8",
  "user_id": "@0xf696e68c1e0c6fc5f7900e5ddcb43cb2b0eb02f8:transport.transport04.raiden.network",
  "wait_for": 10,
  "event": "Problem occurred serverside. Waiting",
  "logger": "raiden.network.transport.matrix.client",
  "level": "warning",
  "greenlet_name": "GMatrixClient.sync_worker user_id:@0xf696e68c1e0c6fc5f7900e5ddcb43cb2b0eb02f8:transport.transport04.raiden.network",
  "timestamp": "2021-08-18 18:36:11.572211"
}

Unfortunately, the exact same errors happen at two other nodes which are connected to another transport.

We have this failure seen for

This leads me to the suspicion that the transports stalled the sync calls for at least 70 seconds. Maybe the timeout of Node 3 caused the Matrix server to return the sync call from node 7 as this happened very closely in time. Afterwards, the server continued to have problems as node 7 was not able to call another sync successfully.

fredo commented 3 years ago

@konradkonrad have you seen this error happening on any other scenario run at another point in time?

konradkonrad commented 3 years ago

Found some other instances:

ag MatrixSyncMaxTimeoutReached /data/scenario-player/scenarios/*/node_*/*.stderr
/data/scenario-player/scenarios/bf2_long_running/node_5055_001/run-5055.stderr
8:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  106.26962391287088s > 75s. The thread had 4380 context_switches, and idled 0.47740187871785295% of the time.
9:2021-08-18T17:40:48Z <Greenlet "GMatrixClient.sync_worker user_id:@0xfe3198e8904a5b5cb4e4dcdfb6b0345e6a886d5d:transport.transport01.raiden.network" at 0x7ac7db403b90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7ac7db50fb50>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
26:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  106.26962391287088s > 75s. The thread had 4380 context_switches, and idled 0.47740187871785295% of the time.
27:2021-08-18T17:41:24Z <Greenlet "MatrixTransport._run node:0xfe3198E8904A5b5CB4E4DCDfB6b0345e6A886D5d" at 0x7ac7ee6c2830: _run> failed with MatrixSyncMaxTimeoutReached

/data/scenario-player/scenarios/bf3_multi_directional_payment/node_5042_004/run-5042.stderr
8:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  83.15178690478206s > 75s. The thread had 788 context_switches, and idled 0.37430622823079246% of the time.
9:2021-08-18T14:52:03Z <Greenlet "GMatrixClient.sync_worker user_id:@0x15d0d153861cfde2e33c037c845d27e82a6a5ba5:transport.transport01.raiden.network" at 0x718f608bbb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x718f60a0d290>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
26:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  83.15178690478206s > 75s. The thread had 788 context_switches, and idled 0.37430622823079246% of the time.
27:2021-08-18T14:52:18Z <Greenlet "MatrixTransport._run node:0x15d0d153861CfDE2E33C037C845D27E82a6A5Ba5" at 0x718f73b703b0: _run> failed with MatrixSyncMaxTimeoutReached
42:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  83.15178690478206s > 75s. The thread had 788 context_switches, and idled 0.37430622823079246% of the time.
43:2021-08-18T14:52:19Z <Greenlet "RaidenService._run node:0x15d0d153861CfDE2E33C037C845D27E82a6A5Ba5" at 0x718f73b70950: _run> failed with MatrixSyncMaxTimeoutReached
68:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  83.15178690478206s > 75s. The thread had 788 context_switches, and idled 0.37430622823079246% of the time.

/data/scenario-player/scenarios/bf7_long_path/node_5041_003/run-5041.stderr
8:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
9:2021-08-18T18:36:06Z <Greenlet "GMatrixClient.sync_worker user_id:@0xc77a82a0d452c8d42e7d4414816419fa74f2d5c3:transport.transport04.raiden.network" at 0x7aa72a58fb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7aa72a6b9e50>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
26:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
27:2021-08-18T18:36:22Z <Greenlet "MatrixTransport._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3" at 0x7aa73d7d03b0: _run> failed with MatrixSyncMaxTimeoutReached
42:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.
43:2021-08-18T18:36:31Z <Greenlet "RaidenService._run node:0xc77a82A0d452C8D42E7d4414816419fa74f2d5c3" at 0x7aa73d7d0950: _run> failed with MatrixSyncMaxTimeoutReached
68:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  79.91467213630676s > 75s. The thread had 1116 context_switches, and idled 0.3888515555619618% of the time.

/data/scenario-player/scenarios/bf7_long_path/node_5041_005/run-5041.stderr
8:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.73739075288177s > 75s. The thread had 742 context_switches, and idled 0.45586254870474174% of the time.
9:2021-08-18T18:36:31Z <Greenlet "GMatrixClient.sync_worker user_id:@0x0a46e18e39d68d04f4dac5668a001da1da3f3b12:transport.transport02.raiden.network" at 0x7ed7011d0b90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7ed7012e0150>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
26:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.73739075288177s > 75s. The thread had 742 context_switches, and idled 0.45586254870474174% of the time.
27:2021-08-18T18:36:46Z <Greenlet "MatrixTransport._run node:0x0A46E18E39D68d04f4DAC5668a001DA1dA3F3b12" at 0x7ed7144123b0: _run> failed with MatrixSyncMaxTimeoutReached

/data/scenario-player/scenarios/bf7_long_path/node_5041_013/run-5041.stderr
8:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.62216652557254s > 75s. The thread had 613 context_switches, and idled 0.4720526172878252% of the time.
9:2021-08-18T18:36:31Z <Greenlet "GMatrixClient.sync_worker user_id:@0xb88ec54c0ff4ee9ddc3a52113d6ee10d9f97820e:transport.transport02.raiden.network" at 0x7863e67efb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x7863e68b4190>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
26:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  78.62216652557254s > 75s. The thread had 613 context_switches, and idled 0.4720526172878252% of the time.
27:2021-08-18T18:36:46Z <Greenlet "MatrixTransport._run node:0xb88EC54c0Ff4Ee9Ddc3A52113D6ee10d9f97820E" at 0x7863f9a39830: _run> failed with MatrixSyncMaxTimeoutReached
root@kronk:~# 
ag MatrixSyncMaxTimeoutReached /data/scenario-player/scenarios/*/node_*/*.stderr
/data/scenario-player/scenarios/bf1_basic_functionality/node_5058_003/run-5058.stderr
30:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  87.18671032041311s > 75s. The thread had 4633 context_switches, and idled 0.49593128018567073% of the time.
31:2021-08-18T17:40:47Z <Greenlet "GMatrixClient.sync_worker user_id:@0x2eda704ab7d747f32b1378dfb1bae5f3dfbdad6f:transport.transport04.raiden.network" at 0x76e0727edb90: <bound method GMatrixClient.listen_forever of <raiden.network.transport.matrix.client.GMatrixClient object at 0x76e07293c490>>(60000, 15000, None)> failed with MatrixSyncMaxTimeoutReached
48:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  87.18671032041311s > 75s. The thread had 4633 context_switches, and idled 0.49593128018567073% of the time.
49:2021-08-18T17:40:47Z <Greenlet "MatrixTransport._run node:0x2edA704ab7D747F32b1378dFB1BAe5F3DfBDAD6f" at 0x76e085a9b830: _run> failed with MatrixSyncMaxTimeoutReached
64:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  87.18671032041311s > 75s. The thread had 4633 context_switches, and idled 0.49593128018567073% of the time.
65:2021-08-18T17:40:56Z <Greenlet "RaidenService._run node:0x2edA704ab7D747F32b1378dFB1BAe5F3DfBDAD6f" at 0x76e085a9b950: _run> failed with MatrixSyncMaxTimeoutReached
90:raiden.exceptions.MatrixSyncMaxTimeoutReached: Time between syncs exceeded timeout:  87.18671032041311s > 75s. The thread had 4633 context_switches, and idled 0.49593128018567073% of the time.
root@yzma:~# 
fredo commented 3 years ago

looking from the timestamp they all happened on 2021-08-18 during that day

fredo commented 3 years ago

closed in favor of threading fix in presence router