dotnet / runtime

.NET is a cross-platform runtime for cloud, mobile, desktop, and IoT apps.
https://docs.microsoft.com/dotnet/core/
MIT License
15.03k stars 4.67k forks source link

Performance degradation in .Net 9 #108058

Open iSazonov opened 1 week ago

iSazonov commented 1 week ago

Description

I ran a performance comparison of my simple project and was surprised that the code runs noticeably slower on .Net 9.0 than on .Net 8.0. (35 ms vs 28 ms on my notebook) Moreover, a code (not my code) that I used as baseline (BinaryTrieBase) works almost the same, but my code is slower. The project: BinaryTrie.zip

Regression?

Yes.

Data

BenchmarkDotNet v0.14.0, Windows 10 (10.0.17763.6054/1809/October2018Update/Redstone5)
Intel Core i5-7400 CPU 3.00GHz (Kaby Lake), 1 CPU, 4 logical and 4 physical cores
.NET SDK 9.0.100-rc.1.24452.12
  [Host]     : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2
  Job-AXRZBO : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2
  Job-TZZJOW : .NET 9.0.0 (9.0.24.43107), X64 RyuJIT AVX2

IterationCount=32  

| Method              | Runtime  | Mean     | Error    | StdDev   | Ratio | Allocated | Alloc Ratio |
|-------------------- |--------- |---------:|---------:|---------:|------:|----------:|------------:|
| BinaryTrieBool      | .NET 8.0 | 27.94 ns | 0.136 ns | 0.211 ns |  0.40 |         - |          NA |
| BinaryTrieIPAddress | .NET 8.0 | 28.31 ns | 0.169 ns | 0.252 ns |  0.41 |         - |          NA |
| BinaryTrieBase      | .NET 8.0 | 69.29 ns | 0.392 ns | 0.599 ns |  1.00 |         - |          NA |
| BinaryTrieBool      | .NET 9.0 | 35.24 ns | 0.173 ns | 0.269 ns |  0.51 |         - |          NA |
| BinaryTrieIPAddress | .NET 9.0 | 35.36 ns | 0.132 ns | 0.202 ns |  0.51 |         - |          NA |
| BinaryTrieBase      | .NET 9.0 | 70.04 ns | 0.172 ns | 0.236 ns |  1.01 |         - |          NA |
EgorBo commented 1 week ago

@EgorBot -intel -arm64 --runtimes net8.0 net9.0

using System.Net;
using System.Net.Sockets;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);

public class IPNetworkCollection
{
    private readonly TrieNode _root = new();
    internal void Add(IPNetwork network)
    {
        var ipAddressBytes = network.BaseAddress.GetAddressBytes();
        var prefixLength = network.PrefixLength;
        TrieNode currentNode = _root;
        for (int i = 0; i < prefixLength; i++)
        {
            int bit = GetBit(ipAddressBytes, i);
            if (currentNode.Children[bit] == null)
                currentNode.Children[bit] = new TrieNode();
            currentNode = currentNode.Children[bit];
        }
        currentNode.IsTerminal = true;
    }
    private static int GetBit(byte[] bytes, int index)
    {
        int byteIndex = index / 8;
        int bitIndex = index % 8;
        return (bytes[byteIndex] >> (7 - bitIndex)) & 1;
    }
    private class TrieNode
    {
        public TrieNode[] Children { get; } = new TrieNode[2];
        public bool IsTerminal { get; set; }
    }
}

[MemoryDiagnoser]
public class Benchmarks
{
    private IPAddress _a4 = null!;
    private IPBinaryTrie<bool> _binaryTrieBool = null!;
    private IPBinaryTrie<IPAddress> _binaryTrieIPAddress = null!;
    private IPNetworkCollection _bbelius = null!;
    [GlobalSetup]
    public void Setup()
    {
        _a4 = IPAddress.Parse("28.0.18.3");
        _binaryTrieBool = new();
        _binaryTrieBool.AddOrUpdate(IPNetwork.Parse("28.0.18.0/24"), true);
        _binaryTrieIPAddress = new();
        _binaryTrieIPAddress.AddOrUpdate(IPNetwork.Parse("28.0.18.0/24"), IPAddress.Parse("1.1.1.1"));
        _bbelius = new();
        _bbelius.Add(IPNetwork.Parse("28.0.18.0/24"));
    }
    [Benchmark] public void BinaryTrieBool() => _binaryTrieBool.Lookup(_a4);
}

public sealed class IPBinaryTrie<TLeaf>
{
    private class Node<TNodeLeaf>
    {
        public Node<TNodeLeaf>? Branch0;
        public Node<TNodeLeaf>? Branch1;
        public bool IsLeaf;
        public TNodeLeaf? NodeLeaf;
    }
    private readonly object _lock = new();
    private Node<TLeaf> _rootIPv4;
    private Node<TLeaf> _rootIPv6;
    public IPBinaryTrie()
    {
        _rootIPv4 = new Node<TLeaf>();
        _rootIPv6 = new Node<TLeaf>();
    }
    public TLeaf? Lookup(IPAddress address)
    {
        Span<byte> addressBuffer = stackalloc byte[16];
        address.TryWriteBytes(addressBuffer, out int bytesWritten);
        addressBuffer = addressBuffer.Slice(0, bytesWritten);
        Node<TLeaf> trieRoot = address.AddressFamily == AddressFamily.InterNetwork ? _rootIPv4 : _rootIPv6;
        TLeaf? result = LookupCore(addressBuffer, trieRoot);
        return result;
    }
    private static TLeaf? LookupCore(ReadOnlySpan<byte> addressBuffer, Node<TLeaf> root)
    {
        Node<TLeaf> currentNode = root;
        TLeaf? result = default;
        int currentBit = 0;
        for (int i = 0; i < addressBuffer.Length; i++)
        {
            for (int j = 7; j >= 0; j--)
            {
                currentBit++;
                int bit = (addressBuffer[i] >> j) & 1;
                Node<TLeaf>? branch = bit == 0 ? currentNode.Branch0 : currentNode.Branch1;
                if (branch is null)
                    return result;

                currentNode = branch;
                if (currentNode.IsLeaf)
                    result = currentNode.NodeLeaf;
            }
        }
        return result;
    }
    public void AddOrUpdate(IPNetwork network, TLeaf leaf)
    {
        Span<byte> addressBuffer = stackalloc byte[16];
        network.BaseAddress.TryWriteBytes(addressBuffer, out int bytesWritten);
        addressBuffer = addressBuffer.Slice(0, bytesWritten);
        lock (_lock)
        {
            Node<TLeaf> trieRoot = network.BaseAddress.AddressFamily == AddressFamily.InterNetwork ? _rootIPv4 : _rootIPv6;
            AddOrUpdateCore(addressBuffer, network.PrefixLength, trieRoot, leaf);
        }
    }
    private static void AddOrUpdateCore(ReadOnlySpan<byte> addressBuffer, int prefixLength, Node<TLeaf> root, TLeaf leaf)
    {
        Node<TLeaf> currentNode = root;
        int currentBit = 0;
        for (int i = 0; i < addressBuffer.Length; i++)
        {
            for (int j = 7; j >= 0; j--)
            {
                int bit = (addressBuffer[i] >> j) & 1;
                ref Node<TLeaf>? branch = ref (bit == 0 ? ref currentNode.Branch0 : ref currentNode.Branch1);
                branch ??= new Node<TLeaf>();
                currentNode = branch;
                if (++currentBit == prefixLength)
                {
                    currentNode.NodeLeaf = leaf;
                    currentNode.IsLeaf = true;
                    return;
                }
            }
        }
    }
}
iSazonov commented 5 days ago

Looking the bot results I guess it is something specific for my cpu so I share disasm results. results.zip

JulieLeeMSFT commented 4 days ago

@EgorBo, please review the disasm from @iSazonov.

Looking the bot results I guess it is something specific for my cpu so I share disasm results. results.zip