Graylog does not recover from a primary MongoDB failure

ErmakovDmitriy commented 1 year ago

We use Graylog deployed on Kubernetes (Rancher) with https://artifacthub.io/packages/helm/kong-z/graylog helm chart.

In order to have the service available, we deployed it as:

2 Graylog Pods;
2 MongoDB (https://artifacthub.io/packages/helm/bitnami/mongodb) Pods + 1 Arbiter Pod (ReplicaSet mode);
3 Opensearch Pods.

Here is the current state:

$ kubectl get pod       
NAME                                                              READY   STATUS    RESTARTS   AGE
graylog-test-0                                                    2/2     Running   0          20h
mongo-mongodb-0                                                   2/2     Running   0          20h
mongo-mongodb-1                                                   2/2     Running   0          23h
mongo-mongodb-arbiter-0                                           1/1     Running   0          23h
opensearch-cluster-master-0                                       1/1     Running   0          39h
opensearch-cluster-master-1                                       1/1     Running   0          39h
opensearch-cluster-master-2                                       1/1     Running   0          39h
opensearch-exporter-prometheus-elasticsearch-exporter-5ccdn8hhc   1/1     Running   0          45h

Here is the MongoDB status under normal operations:

{
  set: 'rs0',
  date: ISODate("2023-09-08T05:56:57.597Z"),
  myState: 2,
  term: Long("28"),
  syncSourceHost: 'mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
  syncSourceId: 0,
  heartbeatIntervalMillis: Long("2000"),
  majorityVoteCount: 2,
  writeMajorityCount: 2,
  votingMembersCount: 3,
  writableVotingMembersCount: 2,
  optimes: {
    lastCommittedOpTime: { ts: Timestamp({ t: 1694152617, i: 4 }), t: Long("28") },
    lastCommittedWallTime: ISODate("2023-09-08T05:56:57.519Z"),
    readConcernMajorityOpTime: { ts: Timestamp({ t: 1694152617, i: 4 }), t: Long("28") },
    appliedOpTime: { ts: Timestamp({ t: 1694152617, i: 4 }), t: Long("28") },
    durableOpTime: { ts: Timestamp({ t: 1694152617, i: 4 }), t: Long("28") },
    lastAppliedWallTime: ISODate("2023-09-08T05:56:57.519Z"),
    lastDurableWallTime: ISODate("2023-09-08T05:56:57.519Z")
  },
  lastStableRecoveryTimestamp: Timestamp({ t: 1694152603, i: 2 }),
  electionParticipantMetrics: {
    votedForCandidate: false,
    electionTerm: Long("28"),
    lastVoteDate: ISODate("2023-09-07T11:34:02.121Z"),
    electionCandidateMemberId: 0,
    voteReason: "candidate's data is staler than mine. candidate's last applied OpTime: { ts: Timestamp(1694086442, 16), t: 27 }, my last applied OpTime: { ts: Timestamp(1694086442, 18), t: 27 }",
    lastAppliedOpTimeAtElection: { ts: Timestamp({ t: 1694086442, i: 18 }), t: Long("27") },
    maxAppliedOpTimeInSet: { ts: Timestamp({ t: 1694086442, i: 18 }), t: Long("27") },
    priorityAtElection: 1,
    newTermStartDate: ISODate("2023-09-07T11:34:02.123Z"),
    newTermAppliedDate: ISODate("2023-09-07T11:34:13.157Z")
  },
  members: [
    {
      _id: 0,
      name: 'mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
      health: 1,
      state: 1,
      stateStr: 'PRIMARY',
      uptime: 66182,
      optime: { ts: Timestamp({ t: 1694152616, i: 4 }), t: Long("28") },
      optimeDurable: { ts: Timestamp({ t: 1694152616, i: 4 }), t: Long("28") },
      optimeDate: ISODate("2023-09-08T05:56:56.000Z"),
      optimeDurableDate: ISODate("2023-09-08T05:56:56.000Z"),
      lastAppliedWallTime: ISODate("2023-09-08T05:56:56.519Z"),
      lastDurableWallTime: ISODate("2023-09-08T05:56:56.519Z"),
      lastHeartbeat: ISODate("2023-09-08T05:56:57.121Z"),
      lastHeartbeatRecv: ISODate("2023-09-08T05:56:56.256Z"),
      pingMs: Long("0"),
      lastHeartbeatMessage: '',
      syncSourceHost: '',
      syncSourceId: -1,
      infoMessage: '',
      electionTime: Timestamp({ t: 1694086442, i: 19 }),
      electionDate: ISODate("2023-09-07T11:34:02.000Z"),
      configVersion: 93217,
      configTerm: -1
    },
    {
      _id: 2,
      name: 'mongo-mongodb-1.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
      health: 1,
      state: 2,
      stateStr: 'SECONDARY',
      uptime: 84387,
      optime: { ts: Timestamp({ t: 1694152617, i: 4 }), t: Long("28") },
      optimeDate: ISODate("2023-09-08T05:56:57.000Z"),
      lastAppliedWallTime: ISODate("2023-09-08T05:56:57.519Z"),
      lastDurableWallTime: ISODate("2023-09-08T05:56:57.519Z"),
      syncSourceHost: 'mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
      syncSourceId: 0,
      infoMessage: '',
      configVersion: 93217,
      configTerm: -1,
      self: true,
      lastHeartbeatMessage: ''
    },
    {
      _id: 4,
      name: 'mongo-mongodb-arbiter-0.mongo-mongodb-arbiter-headless.graylog-test.svc.cluster.local:27017',
      health: 1,
      state: 7,
      stateStr: 'ARBITER',
      uptime: 84343,
      lastHeartbeat: ISODate("2023-09-08T05:56:56.450Z"),
      lastHeartbeatRecv: ISODate("2023-09-08T05:56:56.357Z"),
      pingMs: Long("0"),
      lastHeartbeatMessage: '',
      syncSourceHost: '',
      syncSourceId: -1,
      infoMessage: '',
      configVersion: 93217,
      configTerm: -1
    }
  ],
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1694152617, i: 4 }),
    signature: {
      hash: Binary(Buffer.from("b75b9c7237f7b7e47c72d4b50f9b4c030a2167dc", "hex"), 0),
      keyId: Long("7275644989919461383")
    }
  },
  operationTime: Timestamp({ t: 1694152617, i: 4 })
}

This toplogy, from my understanding, is supposed to survive a failure of either one Graylog Pod or one of the MongoDB replica Pods.

Expected Behavior

When I delete the primary mongodb Pod and make it unschedulable so that Kubernetes cannot restart it to simulate its failure, MongoDB replica and arbiter complain that the primary is lost for a while but then the replica is promoted to primary.

I expect Graylog to connect to the new primary and continue operation.

Current Behavior

After the primary MongodB is killed, the secondary is promoted to be the new primary.

When I connect to the replica with MongoDB compass or mongosh, I can see that the graylog database is here (I will provide mongodb outputs below in this section).

Graylog also complains that it cannot contact mongodb for a while, then it discovers that the secondary became primary but Graylog does not returns to operation until the original primary MongoDB is started. Does not return to operation == Web UI is not loaded, always in "loading" state. This behavior remains for at least an hour (I did not try to wait longer).

MongoDB relects the secondary as the primary (status from mongo-mongodb-1 as the normal operations primary mongo-mongodb-0 is dead):

{
  set: 'rs0',
  date: ISODate("2023-09-08T06:32:11.019Z"),
  myState: 1,
  term: Long("33"),
  syncSourceHost: '',
  syncSourceId: -1,
  heartbeatIntervalMillis: Long("2000"),
  majorityVoteCount: 2,
  writeMajorityCount: 2,
  votingMembersCount: 3,
  writableVotingMembersCount: 2,
  optimes: {
    lastCommittedOpTime: { ts: Timestamp({ t: 1694154276, i: 2 }), t: Long("33") },
    lastCommittedWallTime: ISODate("2023-09-08T06:24:36.841Z"),
    readConcernMajorityOpTime: { ts: Timestamp({ t: 1694154276, i: 2 }), t: Long("33") },
    appliedOpTime: { ts: Timestamp({ t: 1694154722, i: 1 }), t: Long("33") },
    durableOpTime: { ts: Timestamp({ t: 1694154722, i: 1 }), t: Long("33") },
    lastAppliedWallTime: ISODate("2023-09-08T06:32:02.350Z"),
    lastDurableWallTime: ISODate("2023-09-08T06:32:02.350Z")
  },
  lastStableRecoveryTimestamp: Timestamp({ t: 1694154276, i: 2 }),
  electionCandidateMetrics: {
    lastElectionReason: 'electionTimeout',
    lastElectionDate: ISODate("2023-09-08T06:24:32.337Z"),
    electionTerm: Long("33"),
    lastCommittedOpTimeAtElection: { ts: Timestamp({ t: 1694154261, i: 4 }), t: Long("31") },
    lastSeenOpTimeAtElection: { ts: Timestamp({ t: 1694154261, i: 4 }), t: Long("31") },
    numVotesNeeded: 2,
    priorityAtElection: 1,
    electionTimeoutMillis: Long("10000"),
    numCatchUpOps: Long("0"),
    newTermStartDate: ISODate("2023-09-08T06:24:32.341Z"),
    wMajorityWriteAvailabilityDate: ISODate("2023-09-08T06:24:32.962Z")
  },
  electionParticipantMetrics: {
    votedForCandidate: true,
    electionTerm: Long("31"),
    lastVoteDate: ISODate("2023-09-08T06:10:40.068Z"),
    electionCandidateMemberId: 0,
    voteReason: '',
    lastAppliedOpTimeAtElection: { ts: Timestamp({ t: 1694153439, i: 4 }), t: Long("30") },
    maxAppliedOpTimeInSet: { ts: Timestamp({ t: 1694153439, i: 4 }), t: Long("30") },
    priorityAtElection: 1
  },
  members: [
    {
      _id: 0,
      name: 'mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
      health: 0,
      state: 8,
      stateStr: '(not reachable/healthy)',
      uptime: 0,
      optime: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
      optimeDurable: { ts: Timestamp({ t: 0, i: 0 }), t: Long("-1") },
      optimeDate: ISODate("1970-01-01T00:00:00.000Z"),
      optimeDurableDate: ISODate("1970-01-01T00:00:00.000Z"),
      lastAppliedWallTime: ISODate("2023-09-08T06:24:36.841Z"),
      lastDurableWallTime: ISODate("2023-09-08T06:24:36.841Z"),
      lastHeartbeat: ISODate("2023-09-08T06:32:10.422Z"),
      lastHeartbeatRecv: ISODate("2023-09-08T06:24:37.339Z"),
      pingMs: Long("0"),
      lastHeartbeatMessage: 'Error connecting to mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017 :: caused by :: Could not find address for mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017: SocketException: Host not found (authoritative)',
      syncSourceHost: '',
      syncSourceId: -1,
      infoMessage: '',
      configVersion: 93217,
      configTerm: -1
    },
    {
      _id: 2,
      name: 'mongo-mongodb-1.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017',
      health: 1,
      state: 1,
      stateStr: 'PRIMARY',
      uptime: 86501,
      optime: { ts: Timestamp({ t: 1694154722, i: 1 }), t: Long("33") },
      optimeDate: ISODate("2023-09-08T06:32:02.000Z"),
      lastAppliedWallTime: ISODate("2023-09-08T06:32:02.350Z"),
      lastDurableWallTime: ISODate("2023-09-08T06:32:02.350Z"),
      syncSourceHost: '',
      syncSourceId: -1,
      infoMessage: '',
      electionTime: Timestamp({ t: 1694154272, i: 1 }),
      electionDate: ISODate("2023-09-08T06:24:32.000Z"),
      configVersion: 93217,
      configTerm: -1,
      self: true,
      lastHeartbeatMessage: ''
    },
    {
      _id: 4,
      name: 'mongo-mongodb-arbiter-0.mongo-mongodb-arbiter-headless.graylog-test.svc.cluster.local:27017',
      health: 1,
      state: 7,
      stateStr: 'ARBITER',
      uptime: 86457,
      lastHeartbeat: ISODate("2023-09-08T06:32:10.519Z"),
      lastHeartbeatRecv: ISODate("2023-09-08T06:32:10.421Z"),
      pingMs: Long("0"),
      lastHeartbeatMessage: '',
      syncSourceHost: '',
      syncSourceId: -1,
      infoMessage: '',
      configVersion: 93217,
      configTerm: -1
    }
  ],
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1694154722, i: 1 }),
    signature: {
      hash: Binary(Buffer.from("b2ef0f7fcb9b989340f7c6ddc2e6ff4cd56db4c7", "hex"), 0),
      keyId: Long("7275644989919461383")
    }
  },
  operationTime: Timestamp({ t: 1694154722, i: 1 })
}

Graylog's logs are attached as a file: graylog.txt Line 180: Graylog discovers that the primary mongodb is stopping Line 515: Graylog's monitor connects to the new primary and reports that its status is REPLICA_SET_PRIMARY.

Steps to Reproduce (for bugs)

Deploy MongoDB as 2 replicas + 1 arbiter
Deploy Opensearch
Deploy Graylog to use the MongoDB
Stop the primary MongoDB
Observe the error:

Context

By design we use Kubernetes nodes with local storage only in order not to have dependencies for external systems in our logging solution (logging should be available to troubleshoot when everything else is broken).

For that reason we rely on application level high-availability: 2 Graylog replicas + Kubernetes load balancing, 2 MongoDB replicas, 3 Opensearch (each index has 1-2 replicas).

The fact that Graylog fails to "reconnect" to a new primary MongoDB makes this goal somewhat difficult to achieve.

Your Environment

Graylog Version: hub.docker.com graylog/graylog, tag 5.1.4 (While all the tests and logs collection were performed on Graylog 5.1.4, I could reproduce with 5.1.5 as well).
Java Version: OpenJDK 17 (from https://github.com/Graylog2/graylog-docker/blob/5.1/README.md)
OpenSearch Version: opensearchproject/opensearch:2.5.0 (from hub.docker.com)
MongoDB Version: bitnami/mongodb:6.0.6-debian-11-r3
Operating System: Rocky Linux 8.7, 8.8 (Kubernetes worker nodes)
Browser version: Firefox Browser 116.0.3

Configuration: Graylog: graylog.conf.txt

Environment variables:

GRAYLOG_ELASTICSEARCH_HOSTS: https://admin:password@opensearch-cluster-master-0.opensearch-cluster-master-headless.graylog-test.svc.cluster.local:9200,https://admin:password@opensearch-cluster-master-1.opensearch-cluster-master-headless.graylog-test.svc.cluster.local:9200,https://admin:password@opensearch-cluster-master-2.opensearch-cluster-master-headless.graylog-test.svc.cluster.local:9200

GRAYLOG_MONGODB_URI: mongodb://graylog:password@mongo-mongodb-0.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017,mongo-mongodb-1.mongo-mongodb-headless.graylog-test.svc.cluster.local:27017/graylog?replicaSet=rs0

MongoDB is configured as replicaSet=rs0.

moesterheld commented 1 year ago

@ErmakovDmitriy In your mongodb setup, your default write concern should be set to 1 (see https://www.mongodb.com/docs/manual/reference/mongodb-defaults/#default-write-concern) Can you please check that this is set accordingly by issuing db.adminCommand({getDefaultRWConcern:1}) If you need to adjust it, you can use db.adminCommand({ "setDefaultRWConcern": 1, "defaultWriteConcern": { "w": 1 } });

I can reproduce the issue if this is not set correctly. Also, in a case like that other applications like MongoDB Compass, will appear to freeze on write operations.

ErmakovDmitriy commented 1 year ago

Hi @moesterheld ,

Thank you for your explanation. Yes, I had the majority set (see below). After I changed, as you recommended, it seems to work.

May I ask you, if it is possible to add this information to the official Graylog deployment documentation? I think that there could be more people with near-to-zero knowledge of how MongoDB works and the information might help them.

rs0 [direct: primary] test> db.adminCommand({getDefaultRWConcern:1})
{
  defaultReadConcern: { level: 'local' },
  defaultWriteConcern: { w: 'majority', wtimeout: 0 },
  updateOpTime: Timestamp({ t: 1694067864, i: 1 }),
  updateWallClockTime: ISODate("2023-09-07T06:24:29.549Z"),
  defaultWriteConcernSource: 'global',
  defaultReadConcernSource: 'implicit',
  localUpdateWallClockTime: ISODate("2023-09-08T06:49:24.548Z"),
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1695372883, i: 4 }),
    signature: {
      hash: Binary(Buffer.from("1f9cf21ed677f937cdda1c219a9b3d6e720a25b2", "hex"), 0),
      keyId: Long("7275644989919461383")
    }
  },
  operationTime: Timestamp({ t: 1695372883, i: 4 })
}
rs0 [direct: primary] test> 

rs0 [direct: primary] test> db.adminCommand({ "setDefaultRWConcern": 1, "defaultWriteConcern": { "w": 1 } });
{
  defaultReadConcern: { level: 'local' },
  defaultWriteConcern: { w: 1, wtimeout: 0 },
  updateOpTime: Timestamp({ t: 1695372905, i: 2 }),
  updateWallClockTime: ISODate("2023-09-22T08:55:05.503Z"),
  defaultWriteConcernSource: 'global',
  defaultReadConcernSource: 'implicit',
  localUpdateWallClockTime: ISODate("2023-09-22T08:55:05.504Z"),
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1695372905, i: 3 }),
    signature: {
      hash: Binary(Buffer.from("fae4045750fbc04915eade51cf494999ff97a7c5", "hex"), 0),
      keyId: Long("7275644989919461383")
    }
  },
  operationTime: Timestamp({ t: 1695372905, i: 3 })
}
rs0 [direct: primary] test> 

rs0 [direct: primary] test> db.adminCommand({getDefaultRWConcern:1})
{
  defaultReadConcern: { level: 'local' },
  defaultWriteConcern: { w: 1, wtimeout: 0 },
  updateOpTime: Timestamp({ t: 1695372905, i: 2 }),
  updateWallClockTime: ISODate("2023-09-22T08:55:05.503Z"),
  defaultWriteConcernSource: 'global',
  defaultReadConcernSource: 'implicit',
  localUpdateWallClockTime: ISODate("2023-09-22T08:55:05.504Z"),
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1695372909, i: 4 }),
    signature: {
      hash: Binary(Buffer.from("f4de4c21ef847ee0d294c1272a3dbe54eccc82f3", "hex"), 0),
      keyId: Long("7275644989919461383")
    }
  },
  operationTime: Timestamp({ t: 1695372909, i: 4 })
}
rs0 [direct: primary] test>

moesterheld commented 1 year ago

@ErmakovDmitriy Glad to help. I will forward the request to update the documentation to our docs team.

Graylog2 / graylog2-server