Zonciu / Box2DSharp

A C# port of Box2D
MIT License
544 stars 102 forks source link

Performance when using `in` arguments with Vector2 #7

Closed grimaldini closed 4 years ago

grimaldini commented 4 years ago

Hi,

I noticed that you use the in keyword when passing down a Vector2. I think you might be doing this for performance reasons to avoid copying the Vector2. However, Vector2 is not a readonly struct, so when you do this, I believe the compiler is making defensive copies of Vector2 regardless, making this not efficient. https://github.com/Zonciu/Box2DSharp/blob/master/src/Common/MathUtils.cs#L12

More about in arguments with mutable structures: https://docs.microsoft.com/en-us/dotnet/csharp/write-safe-efficient-code#avoid-mutable-structs-as-an-in-argument

Zonciu commented 4 years ago

I wrote a benchmark by following this article https://devblogs.microsoft.com/premier-developer/the-in-modifier-and-the-readonly-structs-in-c/, and the result is interesting......

Benchmark.LargeStructTest-report


BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.900 (1909/November2018Update/19H2)
Intel Core i7-4790K CPU 4.00GHz (Haswell), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.1.301
  [Host]     : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
  DefaultJob : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
Method Mean Error StdDev
PropertyAggregatePassedByValue 56.96 ms 0.244 ms 0.228 ms
PropertyAggregatePassedByIn 57.45 ms 0.469 ms 0.438 ms
FieldAggregatePassedByValue 56.98 ms 0.151 ms 0.141 ms
FieldAggregatePassedByIn 57.06 ms 0.215 ms 0.202 ms

Benchmark.ReadonlyTest-report


BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.900 (1909/November2018Update/19H2)
Intel Core i7-4790K CPU 4.00GHz (Haswell), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.1.301
  [Host]     : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
  DefaultJob : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
Method Mean Error StdDev
AggregateForNonReadOnlyField 57.61 ms 0.471 ms 0.440 ms
AggregateForReadOnlyField 57.19 ms 0.233 ms 0.206 ms

Benchmark.Vector2Test-report


BenchmarkDotNet=v0.12.1, OS=Windows 10.0.18363.900 (1909/November2018Update/19H2)
Intel Core i7-4790K CPU 4.00GHz (Haswell), 1 CPU, 8 logical and 4 physical cores
.NET Core SDK=3.1.301
  [Host]     : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
  DefaultJob : .NET Core 3.1.5 (CoreCLR 4.700.20.26901, CoreFX 4.700.20.27001), X64 RyuJIT
Method Mean Error StdDev
Test 121.5 ms 0.55 ms 0.52 ms
InTestImplicit 108.9 ms 0.44 ms 0.35 ms
InTestExplicit 108.8 ms 0.44 ms 0.39 ms
RefTest 109.7 ms 0.95 ms 0.89 ms

Code

using System;
using System.Linq;
using System.Numerics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

namespace Benchmark
{
    class Program
    {
        static void Main(string[] args)
        {
            BenchmarkRunner.Run(typeof(Program).Assembly);
        }
    }

    public class Vector2Test
    {
        private int[] _data;

        private Vector2 A;

        private Vector2 B;

        [GlobalSetup]
        public void Setup()
        {
            var rnd = new Random();
            A = new Vector2(rnd.Next(), rnd.Next());
            B = new Vector2(rnd.Next(), rnd.Next());
            _data = Enumerable.Range(1, 100_000_000).ToArray();
        }

        [Benchmark]
        public float Test()
        {
            var c = 0f;
            foreach (var i in _data)
            {
                c += i + Cross(A, B);
            }

            return c;
        }

        [Benchmark]
        public float InTestImplicit()
        {
            var c = 0f;
            foreach (var i in _data)
            {
                c += i + CrossIn(A, B);
            }

            return c;
        }

        [Benchmark]
        public float InTestExplicit()
        {
            var c = 0f;
            foreach (var i in _data)
            {
                c += i + CrossIn(in A, in B);
            }

            return c;
        }

        [Benchmark]
        public float RefTest()
        {
            var c = 0f;
            foreach (var i in _data)
            {
                c += i + CrossRef(ref A, ref B);
            }

            return c;
        }

        public static float Cross(Vector2 a, Vector2 b)
        {
            return a.X * b.Y - a.Y * b.X;
        }

        public static float CrossIn(in Vector2 a, in Vector2 b)
        {
            return a.X * b.Y - a.Y * b.X;
        }

        public static float CrossRef(ref Vector2 a, ref Vector2 b)
        {
            return a.X * b.Y - a.Y * b.X;
        }
    }

    public class LargeStructTest
    {
        public struct FairlyLargeStructWithProperty
        {
            private readonly long l1, l2, l3, l4;

            public int N { get; }

            public FairlyLargeStructWithProperty(int n)
                : this() => N = n;
        }

        public struct FairlyLargeStructWithField
        {
            private readonly long l1, l2, l3, l4;

            public int N;

            public FairlyLargeStructWithField(int n)
                : this() => N = n;
        }

        private readonly int[] _data = Enumerable.Range(1, 100_000_000).ToArray();

        [Benchmark]
        public int PropertyAggregatePassedByValue()
        {
            return DoAggregate(new FairlyLargeStructWithProperty(42));

            int DoAggregate(FairlyLargeStructWithProperty largeStruct)
            {
                int result = 0;
                foreach (int n in _data)
                    result += n + largeStruct.N;
                return result;
            }
        }

        [Benchmark]
        public int PropertyAggregatePassedByIn()
        {
            return DoAggregate(new FairlyLargeStructWithProperty(42));

            int DoAggregate(in FairlyLargeStructWithProperty largeStruct)
            {
                int result = 0;
                foreach (int n in _data)
                    result += n + largeStruct.N;
                return result;
            }
        }

        [Benchmark]
        public int FieldAggregatePassedByValue()
        {
            return DoAggregate(new FairlyLargeStructWithField(42));

            int DoAggregate(FairlyLargeStructWithField largeStruct)
            {
                int result = 0;
                foreach (int n in _data)
                    result += n + largeStruct.N;
                return result;
            }
        }

        [Benchmark]
        public int FieldAggregatePassedByIn()
        {
            return DoAggregate(new FairlyLargeStructWithField(42));

            int DoAggregate(in FairlyLargeStructWithField largeStruct)
            {
                int result = 0;
                foreach (int n in _data)
                    result += n + largeStruct.N;
                return result;
            }
        }
    }

    public class ReadonlyTest
    {
        public struct FairlyLargeStruct
        {
            private readonly long l1, l2, l3, l4;

            public int N { get; }

            public FairlyLargeStruct(int n)
                : this() => N = n;
        }

        private FairlyLargeStruct _nonReadOnlyStruct = new FairlyLargeStruct(42);

        private readonly FairlyLargeStruct _readOnlyStruct = new FairlyLargeStruct(42);

        private readonly int[] _data = Enumerable.Range(1, 100_000_000).ToArray();

        [Benchmark]
        public int AggregateForNonReadOnlyField()
        {
            int result = 0;
            foreach (int n in _data)
                result += n + _nonReadOnlyStruct.N;
            return result;
        }

        [Benchmark]
        public int AggregateForReadOnlyField()
        {
            int result = 0;
            foreach (int n in _data)
                result += n + _readOnlyStruct.N;
            return result;
        }
    }
}
grimaldini commented 4 years ago

Very interesting, it still somehow shows that in has better performance. I don't think I quite understand why, I also tried making a whole new Vector2 that is fully readonly and that also didn't yield any significant gains.