dotnet / efcore

EF Core is a modern object-database mapper for .NET. It supports LINQ queries, change tracking, updates, and schema migrations.
https://docs.microsoft.com/ef/
MIT License
13.77k stars 3.18k forks source link

Redundant inner SELECTs when loading Many-To-Many association #32541

Open nettashamir-allocate opened 11 months ago

nettashamir-allocate commented 11 months ago

Description

In our project we use explicit lazy loading of all associations. When loading entities via a many-to-many association (something like Airports - Airlines where an Airport can host many Airlines and an Airline can fly from many Airports) I would expect SQL of the form:

SELECT [a].[ID], [a].[Name], [a0].[AirlineID], [a0].[AirportID], [a1].[ID], [a1].[Name]
FROM [Airport] AS [a]
INNER JOIN [Airport_Airline] AS [a0] ON [a0].[AirportID] = [a].[ID]
LEFT JOIN [Airline] AS [a1] ON [a1].[ID] = [a0].[AirlineID]
WHERE [a].[ID] = 1
ORDER BY [a0].[AirportID], [a0].[AirlineID]

but instead I get:

SELECT [t].[ID], [t].[Name], [a].[ID], [t].[AirlineID], [t].[AirportID], [t0].[AirlineID], [t0].[AirportID], [t0].[ID], [t0].[Name]
FROM [Airport] AS [a]
INNER JOIN (
    SELECT [a1].[ID], [a1].[Name], [a0].[AirlineID], [a0].[AirportID]
    FROM [Airport_Airline] AS [a0]
    INNER JOIN [Airline] AS [a1] ON [a0].[AirlineID] = [a1].[ID]
) AS [t] ON [a].[ID] = [t].[AirportID]
LEFT JOIN (
    SELECT [a2].[AirlineID], [a2].[AirportID], [a3].[ID], [a3].[Name]
    FROM [Airport_Airline] AS [a2]
    INNER JOIN [Airport] AS [a3] ON [a2].[AirportID] = [a3].[ID]
    WHERE [a3].[ID] = 1
) AS [t0] ON [t].[ID] = [t0].[AirlineID]
WHERE [a].[ID] = 1
ORDER BY [a].[ID], [t].[AirlineID], [t].[AirportID], [t].[ID], [t0].[AirlineID], [t0].[AirportID]

This is overly complex and takes twice as long as the simple version.

Full Repro Code

public class Airport {
    public int ID { get; set; }
    public string Name { get; set; }
    public IEnumerable<Airline> Airlines { get; set; }
}

public class Airline {
    public int ID { get; set; }
    public string Name { get; set; }
    public IEnumerable<Airport> Airports { get; set; }
}

public class AirportMappings : IEntityTypeConfiguration<Airport> {
    public void Configure(EntityTypeBuilder<Airport> entity) {
        entity.ToTable("Airport");
        entity.HasKey(e => e.ID);
        entity.Property(e => e.Name);

        entity.HasMany("Airlines")
            .WithMany("Airports")
            .UsingEntity(
                "Airport_Airline",
                typeof(Dictionary<string, object>),
                entityThis => entityThis
                    .HasOne(typeof(Airline))
                    .WithMany()
                    .HasForeignKey("AirlineID")
                    .HasConstraintName("FK_Airport_Airline_AirlineID"),
                entityOther => entityOther
                    .HasOne(typeof(Airport))
                    .WithMany()
                    .HasForeignKey("AirportID")
                    .HasConstraintName("FK_Airport_Airline_AirportID"));
    }
}

public class Airline {
    public int ID { get; set; }
    public string Name { get; set; }
    public IEnumerable<Airport> Airports { get; set; }
}

public class AirlineMappings : IEntityTypeConfiguration<Airline> {
    public void Configure(EntityTypeBuilder<Airline> entity) {
        entity.ToTable("Airline");
        entity.HasKey(e => e.ID);
        entity.Property(e => e.Name);
    }
}

public class TestDbContext : DbContext {
    public DbSet<Airport> Airports { get; set; }
    public DbSet<Airline> Airlines { get; set; }

    protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) {
        optionsBuilder.LogTo(Console.WriteLine, LogLevel.Information).UseSqlServer(CONNECTION_STRING);
    }

    protected override void OnModelCreating(ModelBuilder modelBuilder) {
        modelBuilder.ApplyConfiguration(new AirportMappings());
        modelBuilder.ApplyConfiguration(new AirlineMappings());
    }
}

Tests

public class ReproTests {
    private TestDbContext context;

    [OneTimeSetUp]
    public void OneTimeSetUp() {
        context = new TestDbContext();
        context.Database.EnsureCreated();
        SetupTestData();
        context.Dispose();
    }

    [SetUp]
    public void Setup() { context = new TestDbContext(); }

    [TearDown]
    public void TearDown() { context.Dispose(); }

    [Test]
    public void GetManyFromEnd1_SqlIncludesRedundantSelects() {
        var airport = context.Airports.First();
        context.Entry(airport).Collection("Airlines").Load();
        _ = airport.Airlines.ToList();
    }

    [Test]
    public void GetManyFromEnd2_SqlIncludesRedundantSelects() {
        var airline = context.Airlines.First();

        // Lazy load the many-to-many association
        context.Entry(airline).Collection("Airports").Load();
        _ = airline.Airports.ToList();
    }

    private void SetupTestData() {
        var airport = new Airport();
        context.Add(airport);

        var airline = new Airline();
        context.Add(airline);

        airport.Airlines = new List<Airline> { airline };
        context.SaveChanges();
    }
}

Generated SQL

          SELECT [t].[ID], [t].[Name], [a].[ID], [t].[AirlineID], [t].[AirportID], [t0].[AirlineID], [t0].[AirportID], [t0].[ID], [t0].[Name]
          FROM [Airport] AS [a]
          INNER JOIN (
              SELECT [a1].[ID], [a1].[Name], [a0].[AirlineID], [a0].[AirportID]
              FROM [Airport_Airline] AS [a0]
              INNER JOIN [Airline] AS [a1] ON [a0].[AirlineID] = [a1].[ID]
          ) AS [t] ON [a].[ID] = [t].[AirportID]
          LEFT JOIN (
              SELECT [a2].[AirlineID], [a2].[AirportID], [a3].[ID], [a3].[Name]
              FROM [Airport_Airline] AS [a2]
              INNER JOIN [Airport] AS [a3] ON [a2].[AirportID] = [a3].[ID]
              WHERE [a3].[ID] = @__p_0
          ) AS [t0] ON [t].[ID] = [t0].[AirlineID]
          WHERE [a].[ID] = @__p_0
          ORDER BY [a].[ID], [t].[AirlineID], [t].[AirportID], [t].[ID], [t0].[AirlineID], [t0].[AirportID]

Mirror image sql is generated when queried from the other end.

This results in a query plan which looks like:

image

Whereas this SQL:

SELECT [a].[ID], [a].[Name], [a0].[AirlineID], [a0].[AirportID], [a1].[ID], [a1].[Name]
FROM [Airport] AS [a]
INNER JOIN [Airport_Airline] AS [a0] ON [a0].[AirportID] = [a].[ID]
LEFT JOIN [Airline] AS [a1] ON [a1].[ID] = [a0].[AirlineID]
WHERE [a].[ID] = 1
ORDER BY [a0].[AirportID], [a0].[AirlineID]

results in this query plan: image

Repro Project

RedundantJoins-CrossLinks.zip

Include provider and version information

EF Core version: 7.0.5 Database provider: Microsoft.EntityFrameworkCore.SqlServer Target framework: .NET 6.0 Operating system: WIndows 11 IDE: Visual Studio 2022 17.7

ajcvickers commented 10 months ago

/cc @roji @maumar

maumar commented 10 months ago

many-to-many loader produces the following query:

DbSet<Airport>()
    .Where(e => EF.Property<int>(e, "ID") == __p_0)
    .SelectMany(e => e.Airlines)
    .NotQuiteInclude(e => EF.Property<IEnumerable<Airport>>(e, "Airports")
        .Where(e => EF.Property<int>(e, "ID") == __p_0))
    .AsTracking()

which then gets converted in nav expansion to:

DbSet<Airport>()
    .Where(a => EF.Property<int>(a, "ID") == __p_0)
    .SelectMany(
        collectionSelector: a => DbSet<Dictionary<string, object>>("Airport_Airline")
            .Where(a0 => EF.Property<int?>(a, "ID") != null && object.Equals(
                objA: (object)EF.Property<int?>(a, "ID"), 
                objB: (object)EF.Property<int?>(a0, "AirportID")))
            .Join(
                inner: DbSet<Airline>(), 
                outerKeySelector: a0 => (object)EF.Property<int?>(a0, "AirlineID"), 
                innerKeySelector: a1 => (object)EF.Property<int?>(a1, "ID"), 
                resultSelector: (a0, a1) => new TransparentIdentifier<Dictionary<string, object>, Airline>(
                    Outer = a0, 
                    Inner = a1
                ))
            .Select(ti => ti.Inner), 
        resultSelector: (a, c) => new TransparentIdentifier<Airport, Airline>(
            Outer = a, 
            Inner = c
        ))
    .Select(ti0 => IncludeExpression(
        EntityExpression:
        ti0.Inner, 
        NavigationExpression:
        MaterializeCollectionNavigation(
            Navigation: Airline.Airports,
            subquery: DbSet<Dictionary<string, object>>("Airport_Airline")
                .Where(a2 => EF.Property<int?>(ti0.Inner, "ID") != null && object.Equals(
                    objA: (object)EF.Property<int?>(ti0.Inner, "ID"), 
                    objB: (object)EF.Property<int?>(a2, "AirlineID")))
                .Join(
                    inner: DbSet<Airport>(), 
                    outerKeySelector: a2 => (object)EF.Property<int?>(a2, "AirportID"), 
                    innerKeySelector: a3 => (object)EF.Property<int?>(a3, "ID"), 
                    resultSelector: (a2, a3) => new TransparentIdentifier<Dictionary<string, object>, Airport>(
                        Outer = a2, 
                        Inner = a3
                    ))
                .Where(ti1 => EF.Property<int>(ti1.Inner, "ID") == __p_0)
                .Select(ti1 => IncludeExpandingExpressionVisitor.FetchJoinEntity<Dictionary<string, object>, Airport>(
                    joinEntity: ti1.Outer, 
                    targetEntity: ti1.Inner))), Airports)
    )

when we Load Airlines from Airport, we not only query Airport -> AirportAirline -> Airline, but also include back references from Airline to Airport. That's where the extra complexity and work is coming from

CheloXL commented 8 months ago

Is there any news on this? I have a similar problem, and is aggravated by having an owned collection in the main entity, making the query to the db loads up tons of unused data.

nettashamir-allocate commented 5 months ago

when we Load Airlines from Airport, we not only query Airport -> AirportAirline -> Airline, but also include back references from Airline to Airport. That's where the extra complexity and work is coming from

@maumar Is there any way to stop it from including the back references if I don't want to fetch them eagerly?

maumar commented 5 months ago

@nettashamir-allocate you can use regular Include query, like so:

var airport = context.Airports.First();
_ = context.Airports.Where(a => a.ID == airport.ID).Include(x => x.Airlines).ToList();

which produces sql like this:

SELECT [a].[ID], [a].[Name], [s].[AirlineID], [s].[AirportID], [s].[ID], [s].[Name]
FROM [Airport] AS [a]
LEFT JOIN (
    SELECT [a0].[AirlineID], [a0].[AirportID], [a1].[ID], [a1].[Name]
    FROM [Airport_Airline] AS [a0]
    INNER JOIN [Airline] AS [a1] ON [a0].[AirlineID] = [a1].[ID]
) AS [s] ON [a].[ID] = [s].[AirportID]
WHERE [a].[ID] = @__airport_ID_0
ORDER BY [a].[ID], [s].[AirlineID], [s].[AirportID]

N'@__airport_ID_0 int',@__airport_ID_0=1

Note that this will only populate Airlines navigation of the selected Airport with all the airlines, however airlines themselves will only point to this one airport, even if there should be other airports associated with it.