shuxinqin / Chloe

A lightweight and high-performance Object/Relational Mapping(ORM) library for .NET --C#
https://github.com/shuxinqin/Chloe/wiki
MIT License
1.52k stars 457 forks source link

分库查询性能慢,如何优化? #397

Open Johnsonscottboger opened 4 hours ago

Johnsonscottboger commented 4 hours ago

您好,我根据Wiki中的分库demo写了一个测试程序,经过调试后已经能正常分库插入和查询,但是发现分库查询性能比单库查询之后再手动合并的速度要慢一倍之多,不知道我的程序该如何优化,请博主指出。

测试程序实现的功能是:创建一个名为 Person 的表,表字段包括自增Id, CreateTime创建时间。在将Person对象插入数据库时,根据CreateTime中的年份字段插入不同的分库。使用 SQLite 实现,例如 2023-1-1 插入test-sharding_2023.db2024-1-1 插入test-sharding_2024.db

下面是代码: 因网络原因,纯手动粘贴,对不齐问题还请见谅。

[Table]
public class Person
{
    [AutoIncrement]
    [Column(IsPrimaryKey = true)]
    public int Id { get; set; }

    public string Name { get; set; }

    public int Age { get; set; }

    public DateTime CreateTime { get; set; }
}
internal class PersonDbContextFactory : IDbContextProviderFactory
{
    private int _index;
    private string _dbFile;

    public PersonDbContextFactory(string dbFile, int index)
    {
            _index = index;
            _dbFile = dbFile;
    }

    public IDbContext CreateDbContext()
    {
            var connString = GetConnectionString(_dbFile, _index);
            var context = new SQLiteContext(() =>
            {
                return new SqliteConnection(connString);
            });
            return context;
    }

    public IDbContextProvider CreateDbContextProvider()
    {
            var connString = GetConnectionString(_dbFile, _index);
            var provider = new SQLiteContextProvider(() =>
            {
                var connection = new SqliteConnection(connString);
                return connection;
            });
            return provider;
    }

        public void CreateTable<T>(IDbContext context)
        {
            var generator = new SQLiteTableGenerator(context);
            generator.CreateTable(typeof(T));
        }

        private string GetConnectionString(string dbFile, int index)
        {
            var name = Path.GetFileNameWithoutExtension(dbFile);
            var extension = Path.GetExtension(dbFile);
            var newName = $"{name}_{index}{extension}";
            return $"Data Source={newName}";
        }
}
    internal class PersonCreateTimeRoutingStrategy : RoutingStrategy<DateTime>
    {
        public PersonCreateTimeRoutingStrategy(IShardingRoute route) : base(route)
        {
        }
        public override IEnumerable<RouteTable> ForEqual(DateTime value)
        {
            var year = value.Year.ToString();
            return this.Route.GetTables().Where(p => p.DataSource.Name == year);
        }

        //其他的实现省略
    }
    internal class PersonCreateTimeShardingRoute<T> : IShardingRoute
    {
        private IEnumerable<int> _years;
        private IRoutingStrategy _routingStrategy;
        public IEnumerable<RouteTable> AllTables { get; set; }

        public PersonCreateTimeShardingRoute(string dbFile, IEnumerable<int> years)
        {
            _years = years;
            _routingStrategy = new PersonCreateTimeRoutingStrategy(this);
            var tables = new List<RouteTable>();
            foreach (var year in _years)
            {
                var dbContextFactory = new PersonDbContextFactory(dbFile, year);
                var dbContext = dbContextFactory.CreateDbContext();
                dbContextFactory.CreateTable<T>(dbContext);

                var routeTable = new RouteTable()
                {
                    Name = typeof(T).Name,
                    DataSource = new RouteDataSource()
                    {
                        Name = year.ToString(),
                        DbContextProviderFactory = dbContextFactory,
                      }
                };
                tables.Add(routeTable);
            }
            this.AllTables = tables;
        }

        public IRoutingStrategy GetStrategy(MemberInfo member)
        {
            if (member.Name.Equals("createtime", StringComparison.OrdinalIgnoreCase))
                return _routingStrategy;
            return null;
        }

        public IEnumerable<RouteTable> GetTables()
        {
            return this.AllTables;
        }

        public SortResult SortTables(List<RouteTable> tables, List<Ordering> orderings)
        {
            return new SortResult() { IsOrdered = false, Tables = tables };
        }
}

使用:

var context = new DbContext();
var shardingConfigBuilder = new ShardingConfigBuilder<Person>();
shardingConfigBuilder.HasShardingKey(p => p.CreateTime);
shardingConfigBuilder.HasRoute(new PersonCreateTimeShardingRoute<Person>("test-sharding.db", new int[] { 2020, 2021, 2022, 2023, 2024 }));
var shardingConfig = shardingConfigBuilder.Build();

context.HasShardingConfig(typeof(Person), shardingConfig);

//数据插入,首次执行时需要
//context.UseTransaction(() =>
//{
//    for (int i = 0; i < 100000; i++)
//    {
//        var person1 = new Person { Name = "Alice", Age = 30, CreateTime = new DateTime(2020, 1, 1) };
//       var person2 = new Person { Name = "Alice", Age = 30, CreateTime = new DateTime(2021, 1, 1) };
//        var person3 = new Person { Name = "Alice", Age = 30, CreateTime = new DateTime(2022, 1, 1) };
//        var person4 = new Person { Name = "Alice", Age = 30, CreateTime = new DateTime(2023, 1, 1) };
//        var person5 = new Person { Name = "Alice", Age = 30, CreateTime = new DateTime(2024, 1, 1) };

//        context.Insert(person1);
//        context.Insert(person2);
//        context.Insert(person3);
//        context.Insert(person4);
//        context.Insert(person5);
//    }
//});

//分库查询
//计时开始
 var list0 = context.Query<Person>().Where(p => p.Id > 0).ToList();
 Console.WriteLine($"{list0.Count}");
//计时结束   约 1758 ms

//单库查询后手动合并
var singleContext1 = new SQLiteContext(() =>
{
    var connection = new SqliteConnection($"Data Source=test-sharding_2020.db");
    return connection;
});
var singleContext2 = new SQLiteContext(() =>
{
    var connection = new SqliteConnection($"Data Source=test-sharding_2021.db");
    return connection;
});
var singleContext3 = new SQLiteContext(() =>
{
    var connection = new SqliteConnection($"Data Source=test-sharding_2022.db");
    return connection;
});
var singleContext4 = new SQLiteContext(() =>
{
    var connection = new SqliteConnection($"Data Source=test-sharding_2023.db");
    return connection;
});
var singleContext5 = new SQLiteContext(() =>
{
    var connection = new SqliteConnection($"Data Source=test-sharding_2024.db");
    return connection;
});

//计时开始
var list1 = singleContext1.Query<Person>().Where(p => p.Id > 0).ToList();
var list2 = singleContext2.Query<Person>().Where(p => p.Id > 0).ToList();
var list3 = singleContext3.Query<Person>().Where(p => p.Id > 0).ToList();
var list4 = singleContext4.Query<Person>().Where(p => p.Id > 0).ToList();
var list5 = singleContext5.Query<Person>().Where(p => p.Id > 0).ToList();
var list = list1.Concat(list2).Concat(list3).Concat(list4).Concat(list5).ToList();
Console.WriteLine($"{list.Count}");
//计时结束  约 872 ms

如上代码,单库查询时已经ToList尽可能模拟分库查询的操作,但多次测试结果表示分库查询耗时是单库查询的两倍以上。请问是否可以继续优化。

shuxinqin commented 3 hours ago

嗯,我测下。 每个库的数据有多大?10W? 是否可以把sqlite的数据文件和你的测试demo压缩上传上来呢?

Johnsonscottboger commented 2 hours ago

对的,每个库10W条数据,很抱歉我在公司内网环境写的测试,没法办法外网上传。