Obviously very bad, as it seems to have resulted in multiple FTS transfers stepping on each other's toes
2024-09-18 12:30:48 UTC DataManagement/FTS3Agent/FTS3DB ERROR: persistOperation: unexpected exception
Traceback (most recent call last):
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
self.dialect.do_execute(
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/default.py", line 924, in do_execute
cursor.execute(statement, parameters)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
MySQLdb._exceptions.OperationalError: (1213, 'Deadlock found when trying to get lock; try restarting transaction')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/DB/FTS3DB.py", line 247, in persistOperation
session.commit()
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 2017, in commit
trans.commit(_to_root=True)
File "<string>", line 2, in commit
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go
ret_value = fn(self, *arg, **kw)
^^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 1302, in commit
self._prepare_impl()
File "<string>", line 2, in _prepare_impl
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go
ret_value = fn(self, *arg, **kw)
^^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 1277, in _prepare_impl
self.session.flush()
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 4341, in flush
self._flush(objects)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 4476, in _flush
with util.safe_reraise():
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/util/langhelpers.py", line 146, in __exit__
raise exc_value.with_traceback(exc_tb)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/session.py", line 4437, in _flush
flush_context.execute()
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/unitofwork.py", line 466, in execute
rec.execute(self)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/unitofwork.py", line 642, in execute
util.preloaded.orm_persistence.save_obj(
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/persistence.py", line 93, in save_obj
_emit_insert_statements(
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/orm/persistence.py", line 1233, in _emit_insert_statements
result = connection.execute(
^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1418, in execute
return meth(
^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/sql/elements.py", line 515, in _execute_on_connection
return connection._execute_clauseelement(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1640, in _execute_clauseelement
ret = self._execute_context(
^^^^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1846, in _execute_context
return self._exec_single_context(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1986, in _exec_single_context
self._handle_dbapi_exception(
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 2353, in _handle_dbapi_exception
raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
self.dialect.do_execute(
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/sqlalchemy/engine/default.py", line 924, in do_execute
cursor.execute(statement, parameters)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/cursors.py", line 206, in execute
res = self._query(query)
^^^^^^^^^^^^^^^^^^
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/cursors.py", line 319, in _query
db.query(q)
File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/MySQLdb/connections.py", line 259, in query
_mysql.connection.query(self, query)
sqlalchemy.exc.OperationalError: (MySQLdb._exceptions.OperationalError) (1213, 'Deadlock found when trying to get lock; try restarting transaction')
[SQL: INSERT INTO `Jobs` (`operationID`, `submitTime`, `lastUpdate`, `lastMonitor`, completeness, username, `userGroup`, `ftsGUID`, `ftsServer`, error, status, assignment) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)]
[parameters: (8840397, datetime.datetime(2024, 9, 18, 12, 30, 46), datetime.datetime(2024, 9, 18, 12, 30, 46), datetime.datetime(2024, 9, 18, 12, 30, 46), None, 'lbprods', 'lhcb_prmgr', 'd48f1c5a-75b9-11ef-bbfb-fa163e8e0048', 'https://fts3-lhcb.cern.ch:8446', None, 'Submitted', None)]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
2024-09-18 12:30:48 UTC DataManagement/FTS3Agent/submit/8840413/RAL-BUFFER_RAL-RAW INFO: Got GUID d5c1de82-75b9-11ef-ac94-fa163e5a69c7
2024-09-18 12:30:48 UTC DataManagement/FTS3Agent/treatOperation/8840397 ERROR: Could not persist operation {'OK': False, 'Errno': 0, 'Message': "persistOperation: unexpected exception (MySQLdb._exceptions.OperationalError) (1213, 'Deadlock found when trying to get lock; try restarting transaction')\n[SQL: INSERT INTO `Jobs` (`operationID`, `submitTime`, `lastUpdate`, `lastMonitor`, completeness, username, `userGroup`, `ftsGUID`, `ftsServer`, error, status, assignment) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)]\n[parameters: (8840397, datetime.datetime(2024, 9, 18, 12, 30, 46), datetime.datetime(2024, 9, 18, 12, 30, 46), datetime.datetime(2024, 9, 18, 12, 30, 46), None, 'lbprods', 'lhcb_prmgr', 'd48f1c5a-75b9-11ef-bbfb-fa163e8e0048', 'https://fts3-lhcb.cern.ch:8446', None, 'Submitted', None)]\n(Background on this error at: https://sqlalche.me/e/20/e3q8)", 'CallStack': [' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/threading.py", line 1002, in _bootstrap\n self._bootstrap_inner()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/threading.py", line 1045, in _bootstrap_inner\n self.run()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/threading.py", line 982, in run\n self._target(*self._args, **self._kwargs)\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/multiprocessing/pool.py", line 125, in worker\n result = (True, func(*args, **kwds))\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/Agent/FTS3Agent.py", line 513, in _treatOperation\n res = self.fts3db.persistOperation(operation)\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/DB/FTS3DB.py", line 255, in persistOperation\n return S_ERROR(f"persistOperation: unexpected exception {e}")\n']}
2024-09-18 14:39:56 UTC DataManagement/FTS3Agent/monitorJobs ERROR: Could not retrieve ftsJobs from the DB {'OK': False, 'Errno': 0, 'Message': "getAllActiveJobs: unexpected exception : (MySQLdb._exceptions.OperationalError) (1213, 'Deadlock found when trying to get lock; try restarting transaction')\n[SQL: SELECT `Jobs`.`jobID` AS `Jobs_jobID`, `Jobs`.`operationID` AS `Jobs_operationID`, `Jobs`.`submitTime` AS `Jobs_submitTime`, `Jobs`.`lastUpdate` AS `Jobs_lastUpdate`, `Jobs`.`lastMonitor` AS `Jobs_lastMonitor`, `Jobs`.completeness AS `Jobs_completeness`, `Jobs`.username AS `Jobs_username`, `Jobs`.`userGroup` AS `Jobs_userGroup`, `Jobs`.`ftsGUID` AS `Jobs_ftsGUID`, `Jobs`.`ftsServer` AS `Jobs_ftsServer`, `Jobs`.error AS `Jobs_error`, `Jobs`.status AS `Jobs_status`, `Jobs`.assignment AS `Jobs_assignment` \nFROM `Jobs` INNER JOIN `Operations` ON `Operations`.`operationID` = `Jobs`.`operationID` \nWHERE `Jobs`.status IN (%s, %s, %s, %s, %s) AND `Jobs`.assignment IS NULL AND `Operations`.assignment IS NULL AND `Jobs`.`lastMonitor` < %s ORDER BY `Jobs`.`lastMonitor` ASC \n LIMIT %s FOR UPDATE]\n[parameters: ('Submitted', 'Staging', 'Archiving', 'Ready', 'Active', datetime.datetime(2024, 9, 18, 14, 28, 22, 250137), 20)]\n(Background on this error at: https://sqlalche.me/e/20/e3q8)", 'CallStack': [' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/bin/dirac-agent", line 8, in <module>\n sys.exit(main())\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Base/Script.py", line 74, in __call__\n return entrypointFunc._func()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/scripts/dirac_agent.py", line 39, in main\n agentReactor.go()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Base/AgentReactor.py", line 144, in go\n timeToNext = self.__scheduler.executeNextTask()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Utilities/ThreadScheduler.py", line 127, in executeNextTask\n self.__executeTask(taskId)\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Utilities/ThreadScheduler.py", line 180, in __executeTask\n task["func"](*task["args"])\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Base/AgentModule.py", line 350, in am_go\n cycleResult = self.__executeModuleCycle()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Base/AgentModule.py", line 420, in __executeModuleCycle\n result = self.am_secureCall(executors[0][0], executors[0][1])\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/Core/Base/AgentModule.py", line 310, in am_secureCall\n result = functor(*args)\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/Agent/FTS3Agent.py", line 656, in execute\n res = self.monitorJobsLoop()\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/Agent/FTS3Agent.py", line 320, in monitorJobsLoop\n res = self.fts3db.getActiveJobs(\n', ' File "/opt/dirac/versions/v11.0.46-1723623328/Linux-x86_64/lib/python3.11/site-packages/DIRAC/DataManagementSystem/DB/FTS3DB.py", line 349, in getActiveJobs\n return S_ERROR(f"getAllActiveJobs: unexpected exception : {e}")\n']}
Obviously very bad, as it seems to have resulted in multiple FTS transfers stepping on each other's toes