Cysharp / MemoryPack

Zero encoding extreme performance binary serializer for C# and Unity.
MIT License
3.29k stars 193 forks source link

Why HyperSerializer faster for simple objects? #103

Closed pairbit closed 1 year ago

pairbit commented 1 year ago

image

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net6.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="BenchmarkDotNet" Version="0.13.2" />
    <PackageReference Include="HyperSerializer" Version="1.0.13" />
    <PackageReference Include="MemoryPack" Version="1.9.6" />
  </ItemGroup>

</Project>
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using Hyper;
using MemoryPack;

var bench = new SerializeBenchmark();
var person = bench.person;

var p1 = bench.MemoryPack_Copy();
var p2 = bench.HyperSerializer_Copy();

if (!person.Equals(p1)) throw new InvalidOperationException();
if (!p1!.Equals(p2)) throw new InvalidOperationException("MemoryPack != HyperSerializer");

BenchmarkDotNet.Running.BenchmarkRunner.Run(typeof(SerializeBenchmark));

[MemoryPackable]
public partial record Person
{
    public Guid Id { get; set; }
    public string? Name { get; set; }
    public int Age { get; set; }
    public bool IsDeleted { get; set; }
    public DateTime Created { get; set; }
    public string? Mother { get; set; }
    public string? Father { get; set; }
}

[MemoryDiagnoser]
[MinColumn, MaxColumn]
[Orderer(SummaryOrderPolicy.FastestToSlowest, MethodOrderPolicy.Declared)]
public class SerializeBenchmark
{
    internal readonly Person person;
    private readonly byte[] _personBytes_MemoryPack;
    private readonly byte[] _personBytes_HyperSerializer;

    public SerializeBenchmark()
    {
        person = new Person
        {
            Id = new Guid("a4711a09-cc9b-4681-b13f-b5f46b61f5d4"),
            Name = "John",
            Age = 32,
            IsDeleted = false,
            Created = new DateTime(1990, 09, 08),
            Mother = "Inna",
            Father = "Mike"
        };

        _personBytes_MemoryPack = MemoryPack_Serialize();
        _personBytes_HyperSerializer = HyperSerializer_Serialize().ToArray();
    }

    [Benchmark]
    public byte[] MemoryPack_Serialize() => MemoryPackSerializer.Serialize(in person);

    [Benchmark]
    public Person? MemoryPack_Deserialize() => MemoryPackSerializer.Deserialize<Person>(_personBytes_MemoryPack);

    [Benchmark]
    public Person? MemoryPack_Copy() => MemoryPackSerializer.Deserialize<Person>(MemoryPackSerializer.Serialize(in person));

    [Benchmark]
    public Span<byte> HyperSerializer_Serialize() => HyperSerializer<Person>.Serialize(person);

    [Benchmark]
    public Person HyperSerializer_Deserialize() => HyperSerializer<Person>.Deserialize(_personBytes_HyperSerializer);

    [Benchmark]
    public Person HyperSerializer_Copy() => HyperSerializer<Person>.Deserialize(HyperSerializer<Person>.Serialize(person));
}
neuecc commented 1 year ago

please provide more details

pairbit commented 1 year ago

added benchmark

neuecc commented 1 year ago

Thanks, it's too big a difference, so it's natural to be concerned.

Decompiled result of Hyper

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Span<byte> Serialize(Person obj)
{
    var offset = 0;
    var offsetWritten = 0;
    var len = 41 + (obj.Name?.Length ?? 0) * Unsafe.SizeOf<System.Char>() + (obj.Mother?.Length ?? 0) * Unsafe.SizeOf<System.Char>() + (obj.Father?.Length ?? 0) * Unsafe.SizeOf<System.Char>();
    Span<byte> bytes = new byte[len];
    var _Id = (System.Guid)obj.Id; MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 16), ref _Id);
    int _Name = (obj.Name?.Length ?? -1) * Unsafe.SizeOf<System.Char>(); MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 4), ref _Name);
    if (_Name > 0) { var b = bytes.Slice(offset += offsetWritten, offsetWritten = _Name); MemoryMarshal.Cast<char, byte>(obj.Name.AsSpan()).CopyTo(b); }
    var _Age = (System.Int32)obj.Age; MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 4), ref _Age);
    var _IsDeleted = (System.Boolean)obj.IsDeleted; MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 1), ref _IsDeleted);
    var _Created = (System.DateTime)obj.Created; MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 8), ref _Created);
    int _Mother = (obj.Mother?.Length ?? -1) * Unsafe.SizeOf<System.Char>(); MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 4), ref _Mother);
    if (_Mother > 0) { var b = bytes.Slice(offset += offsetWritten, offsetWritten = _Mother); MemoryMarshal.Cast<char, byte>(obj.Mother.AsSpan()).CopyTo(b); }
    int _Father = (obj.Father?.Length ?? -1) * Unsafe.SizeOf<System.Char>(); MemoryMarshal.Write(bytes.Slice(offset += offsetWritten, offsetWritten = 4), ref _Father);
    if (_Father > 0) { var b = bytes.Slice(offset += offsetWritten, offsetWritten = _Father); MemoryMarshal.Cast<char, byte>(obj.Father.AsSpan()).CopyTo(b); }

    return bytes;

}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Person Deserialize(ReadOnlySpan<byte> bytes)
{
    Person obj = new();
    var offset = 0;
    var offsetWritten = 0;
    int len0 = 0;
    obj.Id = (System.Guid)MemoryMarshal.Read<System.Guid>(bytes.Slice(offset += offsetWritten, offsetWritten = 16));
    var _Name = (Int32)MemoryMarshal.Read<Int32>(bytes.Slice(offset += offsetWritten, offsetWritten = 4));
    obj.Name = (_Name >= 0) ? MemoryMarshal.Cast<byte, char>(bytes.Slice(offset += offsetWritten, offsetWritten = _Name)).ToString() : null;
    obj.Age = (System.Int32)MemoryMarshal.Read<System.Int32>(bytes.Slice(offset += offsetWritten, offsetWritten = 4));
    obj.IsDeleted = (System.Boolean)MemoryMarshal.Read<System.Boolean>(bytes.Slice(offset += offsetWritten, offsetWritten = 1));
    obj.Created = (System.DateTime)MemoryMarshal.Read<System.DateTime>(bytes.Slice(offset += offsetWritten, offsetWritten = 8));
    var _Mother = (Int32)MemoryMarshal.Read<Int32>(bytes.Slice(offset += offsetWritten, offsetWritten = 4));
    obj.Mother = (_Mother >= 0) ? MemoryMarshal.Cast<byte, char>(bytes.Slice(offset += offsetWritten, offsetWritten = _Mother)).ToString() : null;
    var _Father = (Int32)MemoryMarshal.Read<Int32>(bytes.Slice(offset += offsetWritten, offsetWritten = 4));
    obj.Father = (_Father >= 0) ? MemoryMarshal.Cast<byte, char>(bytes.Slice(offset += offsetWritten, offsetWritten = _Father)).ToString() : null;

    return obj;
}

Decompiled result of MemoryPack(eddited,slimed)

static void IMemoryPackable<Person>.Serialize<TBufferWriter>(ref MemoryPackWriter<TBufferWriter> writer, scoped ref Person? value) 
{
    if (value == null)
    {
        writer.WriteNullObjectHeader();
        goto END;
    }

    writer.WriteUnmanagedWithObjectHeader(7, value.@Id);
    writer.WriteString(value.@Name);
    writer.WriteUnmanaged(value.@Age, value.@IsDeleted, value.@Created);
    writer.WriteString(value.@Mother);
    writer.WriteString(value.@Father);
}

static void IMemoryPackable<Person>.Deserialize(ref MemoryPackReader reader, scoped ref Person? value)
{
    if (!reader.TryReadObjectHeader(out var count))
    {
        value = default!;
        goto END;
    }

    global::System.Guid __Id;
    string __Name;
    int __Age;
    bool __IsDeleted;
    global::System.DateTime __Created;
    string __Mother;
    string __Father;

    if (count == 7)
    {
        if (value == null)
        {
            reader.ReadUnmanaged(out __Id);
            __Name = reader.ReadString();
            reader.ReadUnmanaged(out __Age, out __IsDeleted, out __Created);
            __Mother = reader.ReadString();
            __Father = reader.ReadString();

            goto NEW;
        }
    }
    // trimed other code(versioning, SET:, etc...)

NEW:
    value = new Person()
    {
        @Id = __Id,
        @Name = __Name,
        @Age = __Age,
        @IsDeleted = __IsDeleted,
        @Created = __Created,
        @Mother = __Mother,
        @Father = __Father
    };

END:
    return;
}

Both formats are very similar in that they are sequential and write/read the memory data itself as much as possible. However, there are several factors that make a difference in performance.

  1. Hyper's string codec is UTF16 but MemoryPack's default is UTF8

While there are significant advantages to dealing with UTF16 as is, MemoryPack chose UTF8 because of the tradeoff of doubling the payload size in ASCII code. 50% reduction is a level of compression that would be very difficult to achieve with a general-purpose compression library.

Also, if the benchmark payload is large enough to fit into the LOH, there will be a significant performance loss. Therefore, in terms of performance 50% drop has an impact.

Utf16 by selecting MemoryPackOptions.Utf16.

  1. Avoid final buffer copy

In MemoryPack, after writing to MemoryPackWriter, if a byte[] is to be retrieved, it is ToArrayed at the end. This is because the final buffer size is not known during the serialization process. If you had an array of objects and you were calculating the size of all of them, you would run the traversal twice.

Hyper has the strong restriction that types that require computation are Unsupported, and there is always the assumption that serialized types are fixed-length. Therefore, you can write directly to the final buffer size byte[].

Even in MemoryPack, in .NET 7, if the types are all fixed length, we write to a fixed byte[]. However, String is treated as variable-length (UTF8!), so the example type does not pass such an optimization.

  1. Avoid reader/writer

MemoryPack passes through the type MemoryPackWriter/Reader for input/output flexibility (IBufferWriter and ReadOnlySequence support). This has significant performance advantages as well, for example, it can connect directly to Kestrel's PipeBody.

Hyper is completely inline because it is dedicated to reading and writing bytes[] only. Coupled with the fixed-length format specification, this has the advantage of avoiding length checks.

MemoryPackWriter/Reader also has optimizations that try to avoid length checks whenever possible, such as writer.WriteUnmanaged(value.@Age, value.@IsDeleted, value.@Created); but the String, etc., in between, the optimization breaks down.


I do not consider Hyper's specification to be a competitor because it is not suitable for a general-purpose serializer. However, I would not like to see a 3x performance difference (even if I change to UTF16). I would like to optimize it a bit more.

pairbit commented 1 year ago

thanks for such a detailed answer