Why is a foreach loop on an array slower than a LINQ call?

feO2x commented 6 years ago

Consider the following Benchmark class:

[CoreJob, ClrJob]
[MemoryDiagnoser]
public class IsSubsetOfGist
{
    [Params(10, 100, 1000, 10_000, 100_000, 1_000_000)]
    public int NumberOfItems;

    public int[] SubsetArray;
    public int[] SupersetArray;

    [GlobalSetup]
    public void GlobalArraySetup()
    {
        SupersetArray = new int[NumberOfItems];
        for (var i = 0; i < NumberOfItems; i++)
        {
            SupersetArray[i] = i + 1;
        }

        SubsetArray = new int[5];
        for (var i = 0; i < 5; i++)
        {
            SubsetArray[i] = SupersetArray[SupersetArray.Length - 1 - i];
        }
    }

    [Benchmark(Baseline = true)]
    public bool ArrayWithForLoops() => IsSubsetOfWithForLoops(SubsetArray, SupersetArray);

    private static bool IsSubsetOfWithForLoops<T>(T[] array, T[] superset, IEqualityComparer<T> comparer = null)
    {
        comparer = comparer ?? EqualityComparer<T>.Default;

        for (var i = 0; i < array.Length; i++)
        {
            var currentItem = array[i];
            var wasItemFound = false;
            for (var j = 0; j < superset.Length; j++)
            {
                if (!comparer.Equals(superset[i], currentItem))
                    continue;

                wasItemFound = true;
                break;
            }

            if (!wasItemFound)
                return false;
        }

        return true;
    }

    [Benchmark]
    public bool ArrayWithLinq() => SubsetArray.All(SupersetArray.Contains);

    [Benchmark]
    public bool ArrayWithForeachLoops() => IsSubsetOfWithForeachLoops(SubsetArray, SupersetArray);

    private static bool IsSubsetOfWithForeachLoops<T>(T[] set, T[] superset, IEqualityComparer<T> comparer = null)
    {
        comparer = comparer ?? EqualityComparer<T>.Default;
        foreach (var itemFromSet in set)
        {
            var wasItemFound = false;
            foreach (var itemFromSuperset in superset)
            {
                if (comparer.Equals(itemFromSet, itemFromSuperset))
                {
                    wasItemFound = true;
                    break;
                }
            }

            if (!wasItemFound)
                return false;
        }

        return true;
    }
}

In these benchmarks, I test different ways to determine if a collection is a subset of another collection (without checking for duplicate items). SubsetArray always contains the last 5 items of SupersetArray in reverse order (to mimic the worst case scenario from the number of loop runs). I expected that the versions with the for loops and foreach loops are faster than the call to the LINQ methods Enumerable.All and Enumerable.Contains, however they are not:


BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437500 Hz, Resolution=410.2564 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT

Method	Job	Runtime	NumberOfItems	Mean	Error	StdDev	Scaled	ScaledSD	Gen 0	Allocated
ArrayWithForLoops	Clr	Clr	10	38.97 ns	0.1858 ns	0.1647 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10	321.30 ns	1.0796 ns	1.0098 ns	8.24	0.04	0.0606	96 B
ArrayWithForeachLoops	Clr	Clr	10	131.90 ns	0.4581 ns	0.4285 ns	3.38	0.02	-	0 B
ArrayWithForLoops	Core	Core	10	31.81 ns	0.0935 ns	0.0875 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10	276.83 ns	1.9921 ns	1.8634 ns	8.70	0.06	0.0606	96 B
ArrayWithForeachLoops	Core	Core	10	125.05 ns	0.4460 ns	0.4172 ns	3.93	0.02	-	0 B
ArrayWithForLoops	Clr	Clr	100	305.43 ns	1.8793 ns	1.7579 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100	689.30 ns	3.1709 ns	2.9661 ns	2.26	0.02	0.0601	96 B
ArrayWithForeachLoops	Clr	Clr	100	1,476.23 ns	12.7996 ns	11.9728 ns	4.83	0.05	-	0 B
ArrayWithForLoops	Core	Core	100	298.21 ns	1.4164 ns	1.2556 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100	680.31 ns	4.7340 ns	4.4282 ns	2.28	0.02	0.0601	96 B
ArrayWithForeachLoops	Core	Core	100	1,442.54 ns	3.3149 ns	3.1008 ns	4.84	0.02	-	0 B
ArrayWithForLoops	Clr	Clr	1000	2,900.18 ns	12.7123 ns	11.2691 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000	3,352.68 ns	22.9503 ns	19.1645 ns	1.16	0.01	0.0572	96 B
ArrayWithForeachLoops	Clr	Clr	1000	14,304.57 ns	59.9615 ns	56.0880 ns	4.93	0.03	-	0 B
ArrayWithForLoops	Core	Core	1000	2,862.33 ns	12.5162 ns	11.0953 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000	3,892.44 ns	12.1110 ns	11.3287 ns	1.36	0.01	0.0534	96 B
ArrayWithForeachLoops	Core	Core	1000	14,197.90 ns	72.6340 ns	64.3881 ns	4.96	0.03	-	0 B
ArrayWithForLoops	Clr	Clr	10000	32,064.52 ns	104.7986 ns	98.0287 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10000	30,438.20 ns	387.8124 ns	362.7600 ns	0.95	0.01	-	96 B
ArrayWithForeachLoops	Clr	Clr	10000	142,249.81 ns	1,119.4741 ns	1,047.1567 ns	4.44	0.03	-	0 B
ArrayWithForLoops	Core	Core	10000	28,367.15 ns	46.8529 ns	39.1243 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10000	35,950.57 ns	65.9271 ns	58.4427 ns	1.27	0.00	-	96 B
ArrayWithForeachLoops	Core	Core	10000	142,462.62 ns	649.8010 ns	607.8242 ns	5.02	0.02	-	0 B
ArrayWithForLoops	Clr	Clr	100000	320,891.14 ns	1,262.2017 ns	1,180.6642 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100000	302,454.62 ns	1,715.1904 ns	1,604.3900 ns	0.94	0.01	-	100 B
ArrayWithForeachLoops	Clr	Clr	100000	1,428,956.23 ns	11,414.0670 ns	9,531.2620 ns	4.45	0.03	-	0 B
ArrayWithForLoops	Core	Core	100000	284,467.21 ns	1,791.0643 ns	1,675.3625 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100000	356,758.14 ns	834.7999 ns	780.8722 ns	1.25	0.01	-	96 B
ArrayWithForeachLoops	Core	Core	100000	1,416,342.79 ns	3,701.5172 ns	3,462.4011 ns	4.98	0.03	-	0 B
ArrayWithForLoops	Clr	Clr	1000000	3,181,027.99 ns	20,028.1485 ns	18,734.3407 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000000	3,231,952.75 ns	10,953.6190 ns	10,246.0211 ns	1.02	0.01	-	128 B
ArrayWithForeachLoops	Clr	Clr	1000000	14,555,322.97 ns	43,249.8432 ns	40,455.9262 ns	4.58	0.03	-	0 B
ArrayWithForLoops	Core	Core	1000000	2,830,874.57 ns	11,429.7118 ns	10,691.3585 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000000	3,801,994.66 ns	11,227.0771 ns	9,952.5104 ns	1.34	0.01	-	96 B
ArrayWithForeachLoops	Core	Core	1000000	14,704,212.82 ns	71,842.9255 ns	67,201.9106 ns	5.19	0.03	-	0 B

How can that be? Am I setting up my benchmarks in a wrong way? I expected the for loop variant to be the fastest, and the foreach variant to be at least a little bit faster than the LINQ calls, especially as foreach on arrays does not allocate any objects. But the foreach loops are way slower - which confuses me as Enumerable.All and Enumerable.Contains are implemented in basically the same way. The LINQ version is sometimes even faster than the for-loop version in the CLR.

Thanks for your help in advance.

IsSubsetOfGist.log

redknightlois commented 6 years ago

The only way to know for sure is looking at the assembly generated for all of those. You can use the disassembly support of BenchmarkDotNet to look at it. Without looking at the assembly there are a few things that can cause it. The likely cause is that the returned enumerator is not a struct enumerator, therefore, the calls to MoveNext() and the other support functions are not inlined.

feO2x commented 6 years ago

@redknightlois after activating the [DisassemblyDiagnoser(printIL:true, printAsm:false)], the results of the important methods IsSubsetOfWithForeachLoops and IsSubsetOfWithForLoops are not printed out (I guess because they are not the marked with the BenchmarkAttribute but called indirectly?). However, when using ILDasm, I can see no calls to the IEnumerator<T> methods - the IL looks like the following (identical for .NET and .NET Core):

.method private hidebysig static bool  IsSubsetOfWithForeachLoops<T>(!!T[] set,
                                                                     !!T[] superset,
                                                                     [opt] class [mscorlib]System.Collections.Generic.IEqualityComparer`1<!!T> comparer) cil managed
{
  .param [3] = nullref
  // Code size       93 (0x5d)
  .maxstack  3
  .locals init (!!T[] V_0,
           int32 V_1,
           !!T V_2,
           bool V_3,
           !!T[] V_4,
           int32 V_5,
           !!T V_6)
  IL_0000:  ldarg.2
  IL_0001:  dup
  IL_0002:  brtrue.s   IL_000a
  IL_0004:  pop
  IL_0005:  call       class [mscorlib]System.Collections.Generic.EqualityComparer`1<!0> class [mscorlib]System.Collections.Generic.EqualityComparer`1<!!T>::get_Default()
  IL_000a:  starg.s    comparer
  IL_000c:  ldarg.0
  IL_000d:  stloc.0
  IL_000e:  ldc.i4.0
  IL_000f:  stloc.1
  IL_0010:  br.s       IL_0055
  IL_0012:  ldloc.0
  IL_0013:  ldloc.1
  IL_0014:  ldelem     !!T
  IL_0019:  stloc.2
  IL_001a:  ldc.i4.0
  IL_001b:  stloc.3
  IL_001c:  ldarg.1
  IL_001d:  stloc.s    V_4
  IL_001f:  ldc.i4.0
  IL_0020:  stloc.s    V_5
  IL_0022:  br.s       IL_0044
  IL_0024:  ldloc.s    V_4
  IL_0026:  ldloc.s    V_5
  IL_0028:  ldelem     !!T
  IL_002d:  stloc.s    V_6
  IL_002f:  ldarg.2
  IL_0030:  ldloc.2
  IL_0031:  ldloc.s    V_6
  IL_0033:  callvirt   instance bool class [mscorlib]System.Collections.Generic.IEqualityComparer`1<!!T>::Equals(!0,
                                                                                                                 !0)
  IL_0038:  brfalse.s  IL_003e
  IL_003a:  ldc.i4.1
  IL_003b:  stloc.3
  IL_003c:  br.s       IL_004c
  IL_003e:  ldloc.s    V_5
  IL_0040:  ldc.i4.1
  IL_0041:  add
  IL_0042:  stloc.s    V_5
  IL_0044:  ldloc.s    V_5
  IL_0046:  ldloc.s    V_4
  IL_0048:  ldlen
  IL_0049:  conv.i4
  IL_004a:  blt.s      IL_0024
  IL_004c:  ldloc.3
  IL_004d:  brtrue.s   IL_0051
  IL_004f:  ldc.i4.0
  IL_0050:  ret
  IL_0051:  ldloc.1
  IL_0052:  ldc.i4.1
  IL_0053:  add
  IL_0054:  stloc.1
  IL_0055:  ldloc.1
  IL_0056:  ldloc.0
  IL_0057:  ldlen
  IL_0058:  conv.i4
  IL_0059:  blt.s      IL_0012
  IL_005b:  ldc.i4.1
  IL_005c:  ret
} // end of method IsSubsetOfGist::IsSubsetOfWithForeachLoops

Thus I would assume that the foreach loops are optimized and should perform at least as good as the LINQ methods.

redknightlois commented 6 years ago

@feO2x When benchmarking you shouldn't wrap the calls, you may be skewing your results via an extra call; which for many benchmarking workloads is high enough to account for the lion share of your total time and/or standard deviation.

I just realized you are passing around arrays of integers over a generic type. The LINQ implementation relies on a few tricks that would outperform a typical foreach by a while, for example, devirtualization of the Default comparer when passing a null value (which you are passing to). That means that you may actually benchmarking oranges and apples and the results kinda suggest that (there is no apparent cause for the difference).

Again difficult to know from IL because that depends on what assembler instructions the JIT is emitting there. For example, if the devirtualization is not happening, that is enough to account for the massive difference you are looking at there.

EDIT: For reference https://github.com/dotnet/corefx/commit/4cc7cd68cd544c8703d740ced9a31cd9b16479e3#diff-acc6cdce907ecf103772cf4bdc0e95a0R26

feO2x commented 6 years ago

@redknightlois Thanks for your insights about devirtualizing - maybe I should give a little bit of context about why I structured the benchmarks the way they are. I am the author of Light.GuardClauses, an assertion library for checking parameter values in production code. I'm currently considering whether it's useful to boost the performance of collection assertions - of course, these assertions are implemented in a generic way like the methods above are.

I'm not that good in reading assembler language - but I will check if I can get hold of the assembler instructions.

redknightlois commented 6 years ago

@feO2x I see. I have been looking at the code of GuardClauses and many of the patterns used internally are definitely not 'free' in the general sense. There are many tricks you can use to achieve almost zero overhead if you are willing to trick the JIT into emitting the proper code. I explained some of the techniques at DotNext Moscow 2017 (live streaming available at their YouTube channel https://youtu.be/FMktdWexo8A?t=6175) there are a few more that would be necessary but you can make it almost zero overhead if you are interested into going there.

feO2x commented 6 years ago

@redknightlois Thanks - I will take a look. I have much to learn about performance optimizations. Will close this issue for now. Can I come back to you if I have further questions?

redknightlois commented 6 years ago

Sure, I do 8 hours of performance consulting for Open Source projects as community work for free every month. Some like to write blogs, I am more of a 'get my hands dirty' kind of guy... essentially I despise writing :D ... So ping me on my twitter if you want GuardClauses to be scheduled for January /February spot (which I didn't open yet ;) ).

mikedn commented 6 years ago

The proper question here is not why foreach is slower than LINQ but rather why foreach is slower than for. In the case of arrays foreach and for should generate similar code and thus similar timings. If the timings are so different then there's a good chance that the 2 versions aren't comparable.

Indeed, this line appears to be incorrect:

if (!comparer.Equals(superset[i], currentItem))

It should probably be superset[j] instead of superset[i].

feO2x commented 6 years ago

@mikedn You are completely correct, there is a bug in my for-loop benchmark. After fixing it, the for loop version is as slow as the foreach version:


BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437500 Hz, Resolution=410.2564 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT

Method	Job	Runtime	NumberOfItems	Mean	Error	StdDev	Scaled	ScaledSD	Gen 0	Allocated
ArrayWithForLoops	Clr	Clr	10	132.1 ns	0.5414 ns	0.4799 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10	318.5 ns	2.0388 ns	1.8073 ns	2.41	0.02	0.0606	96 B
ArrayWithForeachLoops	Clr	Clr	10	132.8 ns	0.5085 ns	0.4756 ns	1.01	0.00	-	0 B
ArrayWithForLoops	Core	Core	10	120.2 ns	0.2715 ns	0.2540 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10	266.0 ns	2.0088 ns	1.8790 ns	2.21	0.02	0.0606	96 B
ArrayWithForeachLoops	Core	Core	10	112.1 ns	0.4089 ns	0.3825 ns	0.93	0.00	-	0 B
ArrayWithForLoops	Clr	Clr	100	1,471.5 ns	26.4787 ns	22.1109 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100	681.1 ns	4.5160 ns	3.7711 ns	0.46	0.01	0.0601	96 B
ArrayWithForeachLoops	Clr	Clr	100	1,458.3 ns	7.0498 ns	5.5040 ns	0.99	0.01	-	0 B
ArrayWithForLoops	Core	Core	100	1,446.6 ns	7.4547 ns	6.6084 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100	809.8 ns	10.0747 ns	9.4238 ns	0.56	0.01	0.0601	96 B
ArrayWithForeachLoops	Core	Core	100	1,282.7 ns	4.6168 ns	4.3186 ns	0.89	0.00	-	0 B
ArrayWithForLoops	Clr	Clr	1000	14,300.6 ns	63.4179 ns	56.2183 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000	3,339.2 ns	20.9335 ns	17.4804 ns	0.23	0.00	0.0572	96 B
ArrayWithForeachLoops	Clr	Clr	1000	14,257.6 ns	46.0916 ns	43.1141 ns	1.00	0.00	-	0 B
ArrayWithForLoops	Core	Core	1000	14,233.8 ns	87.3339 ns	81.6922 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000	3,951.1 ns	13.7111 ns	12.8254 ns	0.28	0.00	0.0534	96 B
ArrayWithForeachLoops	Core	Core	1000	12,524.4 ns	28.8571 ns	26.9929 ns	0.88	0.01	-	0 B
ArrayWithForLoops	Clr	Clr	10000	142,812.0 ns	449.4916 ns	420.4547 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10000	30,494.6 ns	120.6997 ns	100.7897 ns	0.21	0.00	-	96 B
ArrayWithForeachLoops	Clr	Clr	10000	142,572.6 ns	657.7825 ns	615.2901 ns	1.00	0.01	-	0 B
ArrayWithForLoops	Core	Core	10000	142,693.7 ns	430.9256 ns	403.0880 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10000	36,163.4 ns	96.1260 ns	85.2132 ns	0.25	0.00	-	96 B
ArrayWithForeachLoops	Core	Core	10000	124,936.0 ns	294.8500 ns	275.8028 ns	0.88	0.00	-	0 B
ArrayWithForLoops	Clr	Clr	100000	1,427,927.9 ns	6,193.1063 ns	5,793.0349 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100000	301,327.9 ns	885.6928 ns	785.1436 ns	0.21	0.00	-	100 B
ArrayWithForeachLoops	Clr	Clr	100000	1,427,722.5 ns	7,160.2840 ns	6,697.7335 ns	1.00	0.01	-	0 B
ArrayWithForLoops	Core	Core	100000	1,433,723.0 ns	5,300.5856 ns	4,958.1706 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100000	358,312.9 ns	1,399.4997 ns	1,309.0928 ns	0.25	0.00	-	96 B
ArrayWithForeachLoops	Core	Core	100000	1,250,089.9 ns	6,911.6685 ns	6,465.1784 ns	0.87	0.01	-	0 B
ArrayWithForLoops	Clr	Clr	1000000	14,622,995.1 ns	63,250.9134 ns	59,164.9378 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000000	3,227,347.0 ns	14,015.2056 ns	12,424.1135 ns	0.22	0.00	-	128 B
ArrayWithForeachLoops	Clr	Clr	1000000	14,738,577.8 ns	28,660.7134 ns	22,376.3978 ns	1.01	0.00	-	0 B
ArrayWithForLoops	Core	Core	1000000	14,589,810.0 ns	42,640.7048 ns	39,886.1378 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000000	3,789,296.6 ns	15,291.9963 ns	12,769.5082 ns	0.26	0.00	-	96 B
ArrayWithForeachLoops	Core	Core	1000000	12,903,099.5 ns	39,239.6372 ns	36,704.7774 ns	0.88	0.00	-	0 B

mikedn commented 6 years ago

So now both for and foreach are slow, funny.

The LINQ version is faster because it uses the Contains overload without an IEqualityComparer<T> parameter. If you change it to use the other overload then it will probably be slow as well.

redknightlois commented 6 years ago

@mikedn afaik the changes to the operation were done so it could devirtualize the comparer and because it is an int, it would only add a single check to the code. You may need to use a custom comparer to actually not being able to devirtualize and be as slow as those 2.

feO2x commented 6 years ago

FWIW, it suffices to create a lambda and pass null as the comparer to decrease the performance of the LINQ version:

SubsetArray.All(i => SupersetArray.Contains(i, null));


BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437498 Hz, Resolution=410.2567 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT

Method	Job	Runtime	NumberOfItems	Mean	Error	StdDev	Scaled	ScaledSD	Gen 0	Allocated
ArrayWithForLoops	Clr	Clr	10	127.5 ns	0.6032 ns	0.5037 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10	628.0 ns	9.0563 ns	8.0282 ns	4.92	0.06	0.1621	256 B
ArrayWithForeachLoops	Clr	Clr	10	139.9 ns	0.6873 ns	0.5740 ns	1.10	0.01	-	0 B
ArrayWithForLoops	Core	Core	10	133.8 ns	2.1991 ns	1.9494 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10	581.3 ns	3.6555 ns	3.2405 ns	4.35	0.06	0.1621	256 B
ArrayWithForeachLoops	Core	Core	10	125.0 ns	1.4068 ns	1.2471 ns	0.94	0.02	-	0 B
ArrayWithForLoops	Clr	Clr	100	1,464.1 ns	17.2244 ns	13.4477 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100	4,722.7 ns	47.8080 ns	44.7197 ns	3.23	0.04	0.1602	256 B
ArrayWithForeachLoops	Clr	Clr	100	1,637.1 ns	16.8034 ns	15.7179 ns	1.12	0.01	-	0 B
ArrayWithForLoops	Core	Core	100	1,620.1 ns	13.9337 ns	11.6353 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100	4,663.8 ns	20.7348 ns	16.1884 ns	2.88	0.02	0.1602	256 B
ArrayWithForeachLoops	Core	Core	100	1,463.2 ns	10.7066 ns	9.4911 ns	0.90	0.01	-	0 B
ArrayWithForLoops	Clr	Clr	1000	14,466.2 ns	138.0810 ns	122.4052 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000	45,050.0 ns	256.5118 ns	227.3910 ns	3.11	0.03	0.1221	257 B
ArrayWithForeachLoops	Clr	Clr	1000	16,220.3 ns	108.1038 ns	90.2716 ns	1.12	0.01	-	0 B
ArrayWithForLoops	Core	Core	1000	15,691.8 ns	173.4613 ns	153.7689 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000	45,022.9 ns	224.8520 ns	187.7616 ns	2.87	0.03	0.1221	256 B
ArrayWithForeachLoops	Core	Core	1000	15,040.7 ns	160.2104 ns	149.8609 ns	0.96	0.01	-	0 B
ArrayWithForLoops	Clr	Clr	10000	143,433.2 ns	667.3433 ns	521.0177 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	10000	449,157.9 ns	3,467.6669 ns	3,073.9961 ns	3.13	0.02	-	260 B
ArrayWithForeachLoops	Clr	Clr	10000	162,755.0 ns	883.6883 ns	826.6025 ns	1.13	0.01	-	0 B
ArrayWithForLoops	Core	Core	10000	157,583.6 ns	1,586.7188 ns	1,484.2176 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	10000	446,518.7 ns	2,945.0956 ns	2,754.8439 ns	2.83	0.03	-	256 B
ArrayWithForeachLoops	Core	Core	10000	148,659.9 ns	774.2595 ns	686.3608 ns	0.94	0.01	-	0 B
ArrayWithForLoops	Clr	Clr	100000	1,443,379.0 ns	3,643.9711 ns	2,844.9727 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	100000	4,521,913.9 ns	35,326.2630 ns	31,315.8089 ns	3.13	0.02	-	320 B
ArrayWithForeachLoops	Clr	Clr	100000	1,619,285.5 ns	9,384.6966 ns	8,778.4502 ns	1.12	0.01	-	0 B
ArrayWithForLoops	Core	Core	100000	1,629,428.2 ns	8,392.3427 ns	7,850.2018 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	100000	4,489,078.2 ns	26,777.5918 ns	23,737.6354 ns	2.76	0.02	-	256 B
ArrayWithForeachLoops	Core	Core	100000	1,447,246.3 ns	3,888.5543 ns	3,035.9272 ns	0.89	0.00	-	0 B
ArrayWithForLoops	Clr	Clr	1000000	14,903,893.6 ns	65,066.3644 ns	57,679.6316 ns	1.00	0.00	-	0 B
ArrayWithLinq	Clr	Clr	1000000	45,719,124.6 ns	659,323.2631 ns	550,564.7327 ns	3.07	0.04	-	512 B
ArrayWithForeachLoops	Clr	Clr	1000000	16,750,741.9 ns	137,658.8803 ns	128,766.1895 ns	1.12	0.01	-	0 B
ArrayWithForLoops	Core	Core	1000000	16,716,489.7 ns	104,164.0436 ns	92,338.7024 ns	1.00	0.00	-	0 B
ArrayWithLinq	Core	Core	1000000	45,882,191.5 ns	485,470.1228 ns	454,109.0099 ns	2.74	0.03	-	256 B
ArrayWithForeachLoops	Core	Core	1000000	14,956,303.7 ns	107,229.4869 ns	100,302.5188 ns	0.89	0.01	-	0 B

lazyloader commented 6 years ago

My guess is that it has something to do with the fact that LINQ uses less lines of code, so the compiler is able to process the request faster.

dotnet / BenchmarkDotNet

Why is a foreach loop on an array slower than a LINQ call? #610