Closed aclum closed 2 months ago
Here's a copy/paste of the error stack trace shown on Dagit:
File "/usr/local/lib/python3.10/site-packages/dagster/_core/errors.py", line 287, in user_code_error_boundary
yield
File "/usr/local/lib/python3.10/site-packages/dagster/_grpc/impl.py", line 393, in get_external_sensor_execution
return sensor_def.evaluate_tick(sensor_context)
File "/usr/local/lib/python3.10/site-packages/dagster/_core/definitions/sensor_definition.py", line 788, in evaluate_tick
result = self._evaluation_fn(context)
File "/usr/local/lib/python3.10/site-packages/dagster/_core/definitions/sensor_definition.py", line 1110, in _wrapped_fn
result.append(next(raw_evaluation_result))
File "/opt/dagster/lib/nmdc_runtime/site/repository.py", line 351, in claim_and_run_apply_changesheet_jobs
jobs = [Job(**d) for d in mdb.jobs.find({"workflow.id": "apply-changesheet-1.0.0"})]
File "/opt/dagster/lib/nmdc_runtime/site/repository.py", line 351, in <listcomp>
jobs = [Job(**d) for d in mdb.jobs.find({"workflow.id": "apply-changesheet-1.0.0"})]
File "/usr/local/lib/python3.10/site-packages/pymongo/cursor.py", line 1243, in next
if len(self.__data) or self._refresh():
File "/usr/local/lib/python3.10/site-packages/pymongo/cursor.py", line 1160, in _refresh
self.__send_message(q)
File "/usr/local/lib/python3.10/site-packages/pymongo/cursor.py", line 1039, in __send_message
response = client._run_operation(
File "/usr/local/lib/python3.10/site-packages/pymongo/_csot.py", line 108, in csot_wrapper
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 1431, in _run_operation
return self._retryable_read(
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 1540, in _retryable_read
return self._retry_internal(
File "/usr/local/lib/python3.10/site-packages/pymongo/_csot.py", line 108, in csot_wrapper
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 1507, in _retry_internal
).run()
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 2353, in run
return self._read() if self._is_read else self._write()
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 2485, in _read
with self._client._conn_from_server(self._read_pref, self._server, self._session) as (
File "/usr/local/lib/python3.10/contextlib.py", line 135, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 1357, in _conn_from_server
with self._checkout(server, session) as conn:
File "/usr/local/lib/python3.10/contextlib.py", line 135, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.10/site-packages/pymongo/mongo_client.py", line 1266, in _checkout
with server.checkout(handler=err_handler) as conn:
File "/usr/local/lib/python3.10/contextlib.py", line 135, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.10/site-packages/pymongo/pool.py", line 1767, in checkout
conn = self._get_conn(checkout_started_time, handler=handler)
File "/usr/local/lib/python3.10/site-packages/pymongo/pool.py", line 1926, in _get_conn
conn = self.connect(handler=handler)
File "/usr/local/lib/python3.10/site-packages/pymongo/pool.py", line 1729, in connect
conn.authenticate()
File "/usr/local/lib/python3.10/site-packages/pymongo/pool.py", line 1099, in authenticate
auth.authenticate(creds, self, reauthenticate=reauthenticate)
File "/usr/local/lib/python3.10/site-packages/pymongo/auth.py", line 656, in authenticate
auth_func(credentials, conn)
File "/usr/local/lib/python3.10/site-packages/pymongo/auth.py", line 558, in _authenticate_default
return _authenticate_scram(credentials, conn, "SCRAM-SHA-256")
File "/usr/local/lib/python3.10/site-packages/pymongo/auth.py", line 338, in _authenticate_scram
res = conn.command(source, cmd)
File "/usr/local/lib/python3.10/site-packages/pymongo/helpers.py", line 342, in inner
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/pymongo/pool.py", line 989, in command
return command(
File "/usr/local/lib/python3.10/site-packages/pymongo/network.py", line 212, in command
helpers._check_command_response(
File "/usr/local/lib/python3.10/site-packages/pymongo/helpers.py", line 248, in _check_command_response
raise OperationFailure(errmsg, code, response, max_wire_version)
I see pymongo/auth.py
in there. I'll verify the credentials Dagster is trying to use to access Mongo are correct.
Fixed.
Dagster was getting the Mongo username from some ConfigMap where it was set to root
instead of the username that the Runtime is using. Meanwhile, Dagster was using the Mongo password associated with the username that the Runtime was using. So, there was a mismatch.
To fix this, I explicitly defined an environment variable on the Dagster workload, and populated that environment variable (dynamically) with the username that the Runtime is using. At that point, the username and password were consistent with one another. As a result Dagster was able to access the database, as shown here.
Confirmed fixed.
Describe the bug When debugging another issue I found there are failures in dagit-berkeley with claim_and_run_apply_changesheets_jobs. It appears to be a permission error. To Reproduce Steps to reproduce the behavior:
Expected behavior See behavior on dev https://dagit-dev.microbiomedata.org/locations/repo@nmdc_runtime.site.repository%3Arepo/sensors/claim_and_run_apply_changesheet_jobs lastest ticket should most frequently have a status of
0 runs requested
Acceptance Criteria No failures for claim_and_run_apply_changesheet_jobs