mbdavid / LiteDB

LiteDB - A .NET NoSQL Document Store in a single data file
http://www.litedb.org
MIT License
8.36k stars 1.22k forks source link

[BUG]OrderBy not using index #2427

Closed mirror222 closed 4 months ago

mirror222 commented 4 months ago

LiteDB 5.0.18 Net 8.0/6.0

I have a large table with over 500,000 records, and I have created indexes on the LabelIds and InternalDate fields. I am now encountering a performance issue, as shown in the following code:

internal class CThreads
{
        [BsonId]
        public int UID { get; set; }       
        public List<string> LabelIds { get; set; } = new List<string>();
        public string From { get; set; }
        public string Subject { get; set; }
        public string Snippet { get; set; }
        public long InternalDate { get; set; }
        public int IsDeleted { get; set; }
}
var connStr = new ConnectionString(@"filename='test5.db9'");

using (var _conn = new LiteDatabase(connStr))
{

    var _dbThreads = _conn.GetCollection<CThreads>();
    _dbThreads.EnsureIndex("date_index", x => x.InternalDate);
    _dbThreads.EnsureIndex("label_index", x => x.LabelIds);
    _dbThreads.EnsureIndex("subject_index", x => x.Subject);

    _conn.BeginTrans();
    var items = new List<CThreads>();
    for (var i = 100000; i < 600000; i++)
    {
        var item = new CThreads()
        {
            LabelIds = new List<string> { "NOTICE" },
            From = "Customer " + i.ToString(),
            Subject = "Subject " + i.ToString(),
            Snippet = "Snippet data " + i.ToString(),
            InternalDate = 1708300201000 + i * 10,
            IsDeleted = 0
        };
        items.Add(item);
        if (i % 50000 == 0)
        {
            _dbThreads.InsertBulk(items);
            items.Clear();
        }
    }
    _conn.Commit();

    //Where only test
    var watch = Stopwatch.StartNew();
    watch.Start();
    var query = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE"));               
    var results = query.Skip(1000).Limit(15).ToList();                 
    watch.Stop();
    Console.WriteLine($"Where ONLY : {watch.ElapsedMilliseconds} ms "); 

    //OrderBy only test
    watch.Restart();
    query = _dbThreads.Query();
    query = query.OrderByDescending(x => x.InternalDate);
    results = query.Skip(1000).Limit(15).ToList();
    watch.Stop();
    Console.WriteLine($"OrderBy ONLY: {watch.ElapsedMilliseconds} ms "); 

    //Where and OrderBy test
    watch.Restart();
    query = _dbThreads.Query();
    query = query.Where(x => x.LabelIds.Contains("NOTICE")).OrderByDescending(x => x.InternalDate);
    results = query.Skip(1000).Limit(15).ToList();
    watch.Stop();
    Console.WriteLine($"Both Where and OrderBy : {watch.ElapsedMilliseconds} ms "); 

}

The execution results are shown in the following figure:

image

I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?

mbdavid commented 4 months ago

Hi @mirror222

I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?

That's it. You can see explain plan in your query using EXPLAIN (in sql) or using GetPlan()

var queryPlan = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE")).GetPlan();
Print(queryPlan);

void Print(BsonDocument doc)
{
    var sb = new StringBuilder();
    var sw = new StringWriter(sb);
    var w = new JsonWriter(sw);
    w.Pretty = true;
    w.Serialize(doc);
    Console.WriteLine(sb);
}
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "label_index",
        "expr": "$.LabelIds[*]",
        "order": 1,
        "mode": "INDEX SEEK(label_index = \"NOTICE\")",
        "cost": 10
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
Where ONLY : 26 ms
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "date_index",
        "expr": "$.InternalDate",
        "order": -1,
        "mode": "FULL INDEX SCAN(date_index)",
        "cost": 100
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
OrderBy ONLY: 5 ms
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "label_index",
        "expr": "$.LabelIds[*]",
        "order": 1,
        "mode": "INDEX SEEK(label_index = \"NOTICE\")",
        "cost": 10
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "orderBy":
    {
        "expr": "$.InternalDate",
        "order": -1
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
Both Where and OrderBy : 1239 ms