dotnet / efcore

EF Core is a modern object-database mapper for .NET. It supports LINQ queries, change tracking, updates, and schema migrations.
https://docs.microsoft.com/ef/
MIT License
13.65k stars 3.15k forks source link

Left Join (GroupJoin-DefaultIfEmpty-SelectMany) will not be translated with ExpressionVisitor if no cache can hit #32742

Closed universorum closed 8 months ago

universorum commented 8 months ago

I'm writing the extension method for Left Join Linq. In normal GroupJoin-DefaultIfEmpty-SelectMany case, we need to pass the Expression<Func<TRight, TLeft, TTemp>> and Expression<Func<(TRight, TLeft), TLeft, TResult?>> to make Left Join query. I want to wrap it so caller who can pass the single argument Expression<Func<TRight, TLeft>> to do same functional.

I'm noticed that the translate will fail if I pass the Expression that created by ExpressionVisitor.

If I compile query that using the Expression that maked with same parameter name directly before using the Expression that created by ExpressionVisitor, It will compiled. Change the name of parameter will get failed again.

using Microsoft.EntityFrameworkCore;
namespace EfLab;
public class Parent
{
    public int Id { get; set; }
}

public class Chlid
{
    public int Id       { get; set; }
    public int ParentId { get; set; }
}

public class Context : DbContext
{
    public DbSet<Parent> Parents { get; set; }
    public DbSet<Chlid>  Chlids  { get; set; }

    protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
    {
        base.OnConfiguring(optionsBuilder);

        optionsBuilder.UseSqlite("Data Source=InMemorySample;Mode=Memory;Cache=Shared");
    }
}

public static class QueryableExtension
{
    public static IQueryable<TResult> LeftJoin<TRight, TLeft, TKey, TResult>(
        this IQueryable<TRight>                  rightQueryable,
        IEnumerable<TLeft>                       leftQueryable,
        Expression<Func<TRight, TKey>>           rightKeySelector,
        Expression<Func<TLeft, TKey>>            leftKeySelector,
        Expression<Func<TRight, TLeft, TResult>> resultSelector)
    {
        var selector = ParameterReplacer.Rewrite(resultSelector);
        return rightQueryable
            .GroupJoin(leftQueryable,
                rightKeySelector,
                leftKeySelector,
                (right, left) => new Group<TRight, TLeft> { Right = right, Left = left })
            .SelectMany(t => t.Left.DefaultIfEmpty()!, selector);
    }

    public class Group<TRight, TLeft>
    {
        public TRight             Right { get; init; }
        public IEnumerable<TLeft> Left  { get; init; }
    }

    private class ParameterReplacer : ExpressionVisitor
    {
        private readonly ParameterExpression _oldParam;
        private readonly Expression          _replacement;

        private static PropertyInfo? _info;

        private ParameterReplacer(ParameterExpression oldParam, Expression replacement)
        {
            _oldParam    = oldParam;
            _replacement = replacement;
        }

        protected override Expression VisitParameter(ParameterExpression node)
        {
            return node == _oldParam ? _replacement : node;
        }

        public static Expression<Func<Group<TRight, TLeft>, TLeft, TResult>> Rewrite<TRight, TLeft, TResult>(
            Expression<Func<TRight, TLeft, TResult>> resultSelector)
        {
            _info ??= typeof(Group<TRight, TLeft>).GetProperty(nameof(Group<TRight, TLeft>.Right),
                BindingFlags.Public | BindingFlags.Instance)!;

            var oldParam    = resultSelector.Parameters[0];
            var newParam    = Expression.Parameter(typeof(Group<TRight, TLeft>), oldParam.Name);
            var memberParam = Expression.MakeMemberAccess(newParam, _info);
            var leftParam   = Expression.Parameter(typeof(TLeft), resultSelector.Parameters[1].Name);

            var replacer = new ParameterReplacer(oldParam, memberParam);

            var replacedBody = replacer.Visit(resultSelector.Body);
            var result =
                Expression.Lambda<Func<Group<TRight, TLeft>, TLeft, TResult>>(replacedBody, newParam, leftParam);
            return result;
        }
    }
}
using EfLab;
using Microsoft.EntityFrameworkCore;

using var shared = new Context();
shared.Database.EnsureCreated();

// No cache can hit, Fail.
using (var context = new Context())
{
    var query = context.Parents.LeftJoin(context.Chlids, b => b.Id, p => p.ParentId, (r, l) => l);

    Console.WriteLine(query.Expression.ToString());
    try { Console.WriteLine(query.ToQueryString()); }
    catch (Exception e) { Console.WriteLine(e); }
}

Console.WriteLine();

// Make Expression directly with same name of parameter, work fine.
using (var context = new Context())
{
    var query = context.Parents
        .GroupJoin(context.Chlids,
            b => b.Id,
            p => p.ParentId,
            (right, left) => new QueryableExtension.Group<Parent, Chlid> { Right = right, Left = left })
        .SelectMany(t => t.Left.DefaultIfEmpty(), (r, l) => l);

    Console.WriteLine(query.Expression.ToString());
    Console.WriteLine(query.ToQueryString());
}

Console.WriteLine();

// Create the Expression via ExpressionVisitor with same name of parameter, work fine.
using (var context = new Context())
{
    var query = context.Parents.LeftJoin(context.Chlids, b => b.Id, p => p.ParentId, (r, l) => l);

    Console.WriteLine(query.Expression.ToString());
    Console.WriteLine(query.ToQueryString());
}

Console.WriteLine();

// Create the Expression via ExpressionVisitor but change the name of parameter, get failed.
using (var context = new Context())
{
    var query = context.Parents.LeftJoin(context.Chlids, b => b.Id, p => p.ParentId, (r2, l2) => l2);

    Console.WriteLine(query.Expression.ToString());
    try { Console.WriteLine(query.ToQueryString()); }
    catch (Exception e) { Console.WriteLine(e); }
}

Stack traces

System.InvalidOperationException: The LINQ expression 'l2' could not be translated. Either rewrite the query in a form that can be translated, or switch to client evaluation explicitly by inserting a call to 'AsEnumerable', 'AsAsyncEnumerable', 'ToList', or 'ToListAsync'. See https://go.microsoft.com/fwlink/?linkid=2101038 for more information.
   at Microsoft.EntityFrameworkCore.Query.RelationalSqlTranslatingExpressionVisitor.VisitParameter(ParameterExpression parameterExpression)
   at Microsoft.EntityFrameworkCore.Query.RelationalSqlTranslatingExpressionVisitor.TranslateInternal(Expression expression, Boolean applyDefaultTypeMapping)
   at Microsoft.EntityFrameworkCore.Query.RelationalSqlTranslatingExpressionVisitor.TranslateProjection(Expression expression, Boolean applyDefaultTypeMapping)
   at Microsoft.EntityFrameworkCore.Query.Internal.RelationalProjectionBindingExpressionVisitor.Visit(Expression expression)
   at Microsoft.EntityFrameworkCore.Query.Internal.RelationalProjectionBindingExpressionVisitor.Translate(SelectExpression selectExpression, Expression expression)
   at Microsoft.EntityFrameworkCore.Query.RelationalQueryableMethodTranslatingExpressionVisitor.TranslateSelect(ShapedQueryExpression source, LambdaExpression selector)
   at Microsoft.EntityFrameworkCore.Query.QueryableMethodTranslatingExpressionVisitor.VisitMethodCall(MethodCallExpression methodCallExpression)
   at Microsoft.EntityFrameworkCore.Query.RelationalQueryableMethodTranslatingExpressionVisitor.VisitMethodCall(MethodCallExpression methodCallExpression)
   at Microsoft.EntityFrameworkCore.Query.QueryableMethodTranslatingExpressionVisitor.Translate(Expression expression)
   at Microsoft.EntityFrameworkCore.Query.RelationalQueryableMethodTranslatingExpressionVisitor.Translate(Expression expression)
   at Microsoft.EntityFrameworkCore.Query.QueryCompilationContext.CreateQueryExecutor[TResult](Expression query)
   at Microsoft.EntityFrameworkCore.Storage.Database.CompileQuery[TResult](Expression query, Boolean async)
   at Microsoft.EntityFrameworkCore.Query.Internal.QueryCompiler.CompileQueryCore[TResult](IDatabase database, Expression query, IModel model, Boolean async)
   at Microsoft.EntityFrameworkCore.Query.Internal.QueryCompiler.<>c__DisplayClass9_0`1.<Execute>b__0()
   at Microsoft.EntityFrameworkCore.Query.Internal.CompiledQueryCache.GetOrAddQuery[TResult](Object cacheKey, Func`1 compiler)
   at Microsoft.EntityFrameworkCore.Query.Internal.QueryCompiler.Execute[TResult](Expression query)
   at Microsoft.EntityFrameworkCore.Query.Internal.EntityQueryProvider.Execute[TResult](Expression expression)
   at Microsoft.EntityFrameworkCore.EntityFrameworkQueryableExtensions.ToQueryString(IQueryable source)
   at Program.<Main>$(String[] args) in .\EfLab\EfLab\Program.cs

Expression

All 4 expression have same structure

.Call System.Linq.Queryable.SelectMany(
    .Call System.Linq.Queryable.GroupJoin(
        .Extension<Microsoft.EntityFrameworkCore.Query.EntityQueryRootExpression>,
        .Extension<Microsoft.EntityFrameworkCore.Query.EntityQueryRootExpression>,
        '(.Lambda #Lambda1<System.Func`2[EfLab.Parent,System.Int32]>),
        '(.Lambda #Lambda2<System.Func`2[EfLab.Chlid,System.Int32]>),
        '(.Lambda #Lambda3<System.Func`3[EfLab.Parent,System.Collections.Generic.IEnumerable`1[EfLab.Chlid],EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid]]>))
    ,
    '(.Lambda #Lambda4<System.Func`2[EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid],System.Collections.Generic.IEnumerable`1[EfLab.Chlid]]>),
    '(.Lambda #Lambda5<System.Func`3[EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid],EfLab.Chlid,EfLab.Chlid]>))

.Lambda #Lambda1<System.Func`2[EfLab.Parent,System.Int32]>(EfLab.Parent $b) {
    $b.Id
}

.Lambda #Lambda2<System.Func`2[EfLab.Chlid,System.Int32]>(EfLab.Chlid $p) {
    $p.ParentId
}

.Lambda #Lambda3<System.Func`3[EfLab.Parent,System.Collections.Generic.IEnumerable`1[EfLab.Chlid],EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid]]>(
    EfLab.Parent $right,
    System.Collections.Generic.IEnumerable`1[EfLab.Chlid] $left) {
    .New EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid](){
        Right = $right,
        Left = $left
    }
}

.Lambda #Lambda4<System.Func`2[EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid],System.Collections.Generic.IEnumerable`1[EfLab.Chlid]]>(EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid] $t)
{
    .Call System.Linq.Enumerable.DefaultIfEmpty($t.Left)
}

.Lambda #Lambda5<System.Func`3[EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid],EfLab.Chlid,EfLab.Chlid]>(
    EfLab.QueryableExtension+Group`2[EfLab.Parent,EfLab.Chlid] $r,
    EfLab.Chlid $l) {
    $l
}

Provider and version information

EF Core version: 7.0.14/8.0.0 Database provider: Find in SqlServer(EF7.0.14), Repro in SQLite(EF7.0.14/8.0.0) Target framework: Find .NET6.0(EF7.0.14), Repro in .NET6.0(EF7.0.14) / .NET8.0(EF7.0.14/8.0.0) Operating system: Windows 11 IDE: Rider 2023.3.2

roji commented 8 months ago

@universorum I haven't fully looked into the above, but are you able to provide a repro without using your extension method, i.e. only using the standard LINQ operators (GroupJoin, SelectMany...)? If so, that would probably indeed be an EF bug which we should fix. Otherwise, if the problem isn't reproducible via the standard operators, then that would be a bug in your extension nor producing the correct tree.

universorum commented 8 months ago

I created a new instance of parameter that are not rewrite by mistake.