microsoft / ghcrawler

Crawl GitHub APIs and store the discovered orgs, repos, commits, ...
MIT License
373 stars 90 forks source link

mongodb: _metadata.links.self.href missing index #133

Closed grooverdan closed 6 years ago

grooverdan commented 6 years ago
 docker exec -ti docker_mongo_1 mongotop 5
2018-06-07T05:21:41.610+0000    connected to: 127.0.0.1

                            ns     total    read     write    2018-06-07T05:21:46Z
              ghcrawler.commit    4009ms     7ms    4002ms                        
...
{
    "op" : "update",
    "ns" : "ghcrawler.commit",
    "command" : {
        "q" : {
            "_metadata.links.self.href" : "urn:repo:19816070:commit:492e7b081168f1922ef6409ebba77dbf30638185"
        },
        "u" : {
....
    "millis" : 153,
    "planSummary" : "COLLSCAN",
    "execStats" : {
        "stage" : "UPDATE",
        "nReturned" : 0,
        "executionTimeMillisEstimate" : 150,
        "works" : 117269,
        "advanced" : 0,
        "needTime" : 117268,
        "needYield" : 0,
        "saveState" : 916,
        "restoreState" : 916,
        "isEOF" : 1,
        "invalidates" : 0,
        "nMatched" : 0,
        "nWouldModify" : 0,
        "nInvalidateSkips" : 0,
        "wouldInsert" : true,
        "fastmodinsert" : true,
        "inputStage" : {
            "stage" : "COLLSCAN",
            "filter" : {
                "_metadata.links.self.href" : {
                    "$eq" : "urn:repo:19816070:commit:492e7b081168f1922ef6409ebba77dbf30638185"
                }
            },
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 150,
            "works" : 117268,
            "advanced" : 0,
            "needTime" : 117267,
            "needYield" : 0,
            "saveState" : 916,
            "restoreState" : 916,
            "isEOF" : 1,
            "invalidates" : 0,
            "direction" : "forward",
            "docsExamined" : 117266
        }
    },
    "ts" : ISODate("2018-06-07T05:22:13.836Z"),
    "client" : "172.18.0.5",
    "allUsers" : [ ],

Solution:

> db.commit.createIndex( { "_metadata.links.self.href":  "hashed" } )
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 2,
    "numIndexesAfter" : 3,
    "ok" : 1
}

After:

                            ns    total    read    write    2018-06-07T05:40:28Z
              ghcrawler.commit      8ms     8ms      0ms