globaldothealth / list

Repository for Global.health: a data science initiative to enable rapid sharing of trusted and open public health data to advance the response to infectious diseases.
MIT License
39 stars 7 forks source link

Catch bulk write errors when document validation fails #994

Closed attwad closed 3 years ago

attwad commented 3 years ago

Looking at prod logs, I see:

BulkWriteError: Document failed validation
    at UnorderedBulkOperation.handleWriteError (/usr/src/app/node_modules/mongodb/lib/bulk/common.js:1257:9)
    at UnorderedBulkOperation.handleWriteError (/usr/src/app/node_modules/mongodb/lib/bulk/unordered.js:117:18)
    at resultHandler (/usr/src/app/node_modules/mongodb/lib/bulk/common.js:521:23)
    at handler (/usr/src/app/node_modules/mongodb/lib/core/sdam/topology.js:942:24)
    at /usr/src/app/node_modules/mongodb/lib/cmap/connection_pool.js:356:13
    at handleOperationResult (/usr/src/app/node_modules/mongodb/lib/core/sdam/server.js:558:5)
    at MessageStream.messageHandler (/usr/src/app/node_modules/mongodb/lib/cmap/connection.js:275:5)
    at MessageStream.emit (events.js:315:20)
    at processIncomingData (/usr/src/app/node_modules/mongodb/lib/cmap/message_stream.js:144:12)
    at MessageStream._write (/usr/src/app/node_modules/mongodb/lib/cmap/message_stream.js:42:5)
    at doWrite (_stream_writable.js:403:12)
    at writeOrBuffer (_stream_writable.js:387:5)
    at MessageStream.Writable.write (_stream_writable.js:318:11)
    at TLSSocket.ondata (_stream_readable.js:716:22)
    at TLSSocket.emit (events.js:315:20)
    at addChunk (_stream_readable.js:295:12) {
  driver: true,
  code: 121,
  writeErrors: [
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    WriteError { err: [Object] },
    ... 900 more items
  ],
  result: BulkWriteResult {
    result: {
      ok: 1,
      writeErrors: [Array],
      writeConcernErrors: [],
      insertedIds: [],
      nInserted: 0,
      nUpserted: 0,
      nMatched: 0,
      nModified: 0,
      nRemoved: 0,
      upserted: [],
      lastOp: [Object]
    }
  }
}

We're not looking at writeErrors so all these details end up being swallowed :(

FYI @axmb

attwad commented 3 years ago

Here's an actual document schema error:

{
    "name": "BulkWriteError",
    "driver": true,
    "code": 121,
    "writeErrors": [
        {
            "code": 121,
            "index": 0,
            "errmsg": "Document failed validation",
            "op": {
                "q": {
                    "caseReference.sourceId": "5f5b5ae6def27b58cfb60471",
                    "caseReference.sourceEntryId": "1"
                },
                "u": {
                    "$set": {
                        "revisionMetadata": {
                            "revisionNumber": 0,
                            "creationMetadata": {
                                "curator": "ingestion@covid-19-map-277002.iam.gserviceaccount.com",
                                "date": "2020-09-11T12:24:08.719Z"
                            }
                        },
                        "symptoms": {
                            "values": [],
                            "status": "Symptomatic"
                        },
                        "notes": "Imported case",
                        "events": [
                            {
                                "name": "confirmed",
                                "dateRange": {
                                    "start": "2020-01-23T00:00:00.000Z",
                                    "end": "2020-01-23T00:00:00.000Z"
                                }
                            },
                            {
                                "name": "onsetSymptoms",
                                "dateRange": {
                                    "start": "2020-01-21T00:00:00.000Z",
                                    "end": "2020-01-21T00:00:00.000Z"
                                }
                            },
                            {
                                "name": "outcome",
                                "value": "Recovered"
                            }
                        ],
                        "demographics": {
                            "nationalities": [],
                            "gender": "Male",
                            "ageRange": {
                                "start": 39,
                                "end": 39
                            }
                        },
                        "location": {
                            "country": "China",
                            "administrativeAreaLevel1": "Hong Kong",
                            "geoResolution": "Admin1",
                            "name": "Hong Kong",
                            "geometry": {
                                "longitude": 114.15861,
                                "latitude": 22.27833
                            }
                        },
                        "caseReference": {
                            "uploadIds": [
                                "5f5b6c51d5b11efa26bae2c5"
                            ],
                            "verificationStatus": "UNVERIFIED",
                            "sourceId": "5f5b5ae6def27b58cfb60471",
                            "sourceEntryId": "1",
                            "sourceUrl": "http://www.chp.gov.hk/files/misc/enhanced_sur_covid_19_eng.csv",
                            "additionalSources": []
                        }
                    }
                },
                "multi": false,
                "upsert": true
            }
        }
    ]
}

I can't figure out what's wrong with this document tbh...

attwad commented 3 years ago

The document is valid, we just need to push the new schema to prod, @axmb will revive a script we had to do that and push whenever convenient.

attwad commented 3 years ago

Also as for the original issue that I reported, the individual errors are logged by the lambda (this is how I got the example above), this is good enough, I don't think we want to keep those in mongo.

attwad commented 3 years ago

Fixed by #997