dotnet / runtime

.NET is a cross-platform runtime for cloud, mobile, desktop, and IoT apps.
https://docs.microsoft.com/dotnet/core/
MIT License
14.67k stars 4.58k forks source link

[API Proposal]: Expose `AVX512 FP16` and `AVX` `F16C` ISA #98820

Open anthonycanino opened 5 months ago

anthonycanino commented 5 months ago

Background and motivation

This proposal allows to accelerate FP16 operations through Intel's FP16 ISAs. Both F16c and AVX512 FP16 are covered.

We use System.Half datatype --- the managed implementation of FP16 in .NET --- as the foundational datatype for the FP16 operations. Note that this proposal exposes the API for latter implementation, but it will require some .NET internal rework, particularly around the use of System.Half as a hardware accelerated vectorized data type.

API Proposal


class F16c : Avx2
{
    // vcvtph2ps
    public static Vector128<float> ConvertToVector128Single(Vector128<Half> value);
    public static Vector256<float> ConvertToVector256Single(Vector128<Half> value);

    // vcvtps2ph
    public static Vector128<Half> ConvertToVector128Half(Vector128<float> value, byte control);
    public static Vector128<Half> ConvertToVector128Half(Vector256<float> value, byte control);
}

class Avx512Fp16 : Avx512F
{
    // vaddph
    public static Vector512<Half> Add(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Add(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vdivph
    public static Vector512<Half> Divide(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Divide(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vgetexpph
    public static Vector512<Half> GetExponent(Vector512<Half> value);

    // vgetmantph
    public static Vector512<Half> GetMantissa(Vector512<Half> value, byte control);

    // vmaxph
    public static Vector512<Half> Max(Vector512<Half> left, Vector512<Half> right);

    // vminph
    public static Vector512<Half> Min(Vector512<Half> value, Vector512<Half> right);

    // vmulph
    public static Vector512<Half> Multiply(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Multiply(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vrcpph
    public static Vector512<Half> Reciprocal(Vector512<Half> value);

    // vreduceph
    public static Half Reduce(Vector512<Half> left, byte control);

    // vrndscaleph
    public static Vector512<Half> RoundScale(Vector512<Half> left, byte control);

    // vrsqrtph
    public static Vector512<Half> ReciprocalSqrt(Vector512<Half> value);
    public static Vector512<Half> ReciprocalSqrt(Vector512<Half> value, FloatRoundingMode mode);

    // vscalefph
    public static Vector512<Half> Scale(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Scale(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vsqrtph
    public static Vector512<Half> Sqrt(Vector512<Half> value);
    public static Vector512<Half> Sqrt(Vector512<Half> value, FloatRoundingMode mode);

    // vsubph
    public static Vector512<Half> Subtract(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Subtract(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vfcmaddcph
    public static Vector512<Half> FusedComplexMultiplyAdd(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> right);

    // vfmaddcph
    public static Vector512<Half> FusedComplexMultiplyAddConjugate(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> FusedComplexMultiplyAddConjugate(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vfmulcph
    public static Vector512<Half> ComplexMultiply(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> ComplexMultiply(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vfcmulcph
    public static V0ector512<Half> ComplexMultiplyConjugate(Vector512<Half> left, Vector512<Half> right);
    public static V0ector512<Half> ComplexMultiplyConjugate(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);

    // vfmaddsubXXXph
    public static Vector512<Half> FusedMultiplyAddSubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAddSubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoudingMode mode);

    // vfmsubaddXXXph
    public static Vector512<Half> FusedMultiplySubtractAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtractAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);

    // vfmaddXXXph
    public static Vector512<Half> FusedMultiplyAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);

    // vfmsubXXXph
    public static Vector512<Half> FusedMultiplySubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);

    // vfnmaddXXXph
    public static Vector512<Half> FusedMultiplyAddNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAddNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);

    // vfnmsubXXXph
    public static Vector512<Half> FusedMultiplySubtractNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtractNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);

    // vcmpph
    public static Vector512<Half> Compare(Vector512<Half> left, Vector512<Half> right, FloatComparisonMode mode);
    public static Vector512<Half> CompareEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareGreaterThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareGreaterThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareLessThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareLessThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotGreaterThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotGreaterThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotLessThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotLessThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareOrdered(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareUnordered(Vector512<Half> left, Vector512<Half> right);

    // vfpclassph
    public static Vector512<Half> Classify(Vector512<Half> value, byte control);

    // vcvtw2ph
    public static Vector512<Half> ConvertToVector512Half(Vector512<short> value);
    public static Vector512<Half> ConvertToVector512Half(Vector512<short> value, FloatRoundingMode mode);

    // vcvtdq2ph
    public static Vector256<Half> ConvertToVector256Half(Vector512<int> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<int> value, FloatRoundingMode mode);

    // vcvtqq2ph
    public static Vector128<Half> ConvertToVector128Half(Vector512<long> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<long> value, FloatRoundingMode mode);

    // vcvtuw2ph
    public static Vector512<Half> ConvertToVector512Half(Vector512<ushort> value);
    public static Vector512<Half> ConvertToVector512Half(Vector512<ushort> value, FloatRoundingMode mode);

    // vcvtudq2ph
    public static Vector256<Half> ConvertToVector256Half(Vector512<uint> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<uint> value, FloatRoundingMode mode);

    // vcvtuqq2ph
    public static Vector128<Half> ConvertToVector128Half(Vector512<ulong> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<ulong> value, FloatRoundingMode mode);

    // vcvtps2ph
    public static Vector256<Half> ConvertToVector256Half(Vector512<float> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<float> value, FloatRoundingMode mode);

    // vcvtpd2ph
    public static Vector128<Half> ConvertToVector128Half(Vector512<double> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<double> value, FloatRoundingMode mode);

    // vcvtph2w
    public static Vector512<short> ConvertToVector512Int16(Vector512<Half> value);
    public static Vector512<short> ConvertToVector512Int16(Vector512<Half> value, FloatRoundingMode mode)

    // vcvttph2w
    public static Vector512<short> ConvertToVector512Int16WithTruncation(Vector512<Half> value);

    // vcvtph2dq
    public static Vector512<int> ConvertToVector512Int32(Vector256<Half> value);
    public static Vector512<int> ConvertToVector512Int32(Vector256<Half> value, FloatRoundingMode mode);

    // vcvttph2dq
    public static Vector512<int> ConvertToVector512Int32WithTruncation(Vector256<Half> value);

    // vcvtph2qq
    public static Vector512<long> ConvertToVector512Int64(Vector128<Half> value);
    public static Vector512<long> ConvertToVector512Int64(Vector128<Half> value, FloatRoundingMode mode);

    // vcvttph2qq
    public static Vector512<long> ConvertToVector512Int64WithTruncation(Vector128<Half> value);

    // vcvtph2uw
    public static Vector512<ushort> ConvertToVector512UInt16(Vector512<Half> value);
    public static Vector512<ushort> ConvertToVector512UInt16(Vector512<Half> value, FloatRoundingMode mode);

    // vcvttph2uw
    public static Vector512<ushort> ConvertToVector512UInt16WithTruncation(Vector512<Half> value);

    // vcvtph2udq
    public static Vector512<uint> ConvertToVector512UInt32(Vector256<Half> value);
    public static Vector512<uint> ConvertToVector512UInt32(Vector256<Half> value, FloatRoundingMode mode);

    // vcvttph2udq
    public static Vector512<uint> ConvertToVector512UInt32WithTruncation(Vector256<Half> value);

    // vcvtph2uqq
    public static Vector512<ulong> ConvertToVector512UInt64(Vector128<Half> value);
    public static Vector512<ulong> ConvertToVector512UInt64(Vector128<Half> value, FloatRoundingMode mode);

    // vcvttph2uqq
    public static Vector512<ulong> ConvertToVector512UInt64WithTruncation(Vector128<Half> value);

    // vcvtph2ps
    public static Vector512<float> ConvertToVector512Single(Vector256<Half> value);

    // vcvtph2pd
    public static Vector512<double> ConvertToVector512Double(Vector128<Half> value);
    public static Vector512<double> ConvertToVector512Double(Vector128<Half> value, FloatRoundingMode mode);

    // SCALAR Ops

    // vaddsh
    public static Vector128<Half> AddScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> AddScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vdivsh
    public static Vector128<Half> DivideScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> DivideScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vgetexpss
    public static Vector128<Half> GetExponentScalar(Vector128<Half> value);

    // vgetmantsh
    public static Vector128<Half> GetMantissaScalar(Vector128<Half> value, byte control);

    // vmaxpsh
    public static Vector128<Half> MaxScalar(Vector128<Half> left, Vector128<Half> right);

    // vminsh
    public static Vector128<Half> MinScalar(Vector128<Half> value, Vector128<Half> right);

    // vmulsh
    public static Vector128<Half> MultiplyScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> MultiplyScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vrcpsh
    public static Vector128<Half> ReciprocalScalar(Vector128<Half> value);

    // vreducesh
    public static Half ReduceScalar(Vector128<Half> left, byte control);

    // vrndscalesh
    public static Vector128<Half> RoundScaleScalar(Vector128<Half> left, byte control);

    // vrsqrtsh
    public static Vector128<Half> ReciprocalSqrtScalar(Vector128<Half> value);

    // vscalefsh
    public static Vector128<Half> ScaleScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> ScaleScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vsqrtsh
    public static Vector128<Half> SqrtScalar(Vector128<Half> value);
    public static Vector128<Half> SqrtScalar(Vector128<Half> value, FloatRoundingMode mode);

    // vsubsh
    public static Vector128<Half> SubtractScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> SubtractScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vfmaddcsh
    public static Vector128<Half> FusedComplexMultiplyAddScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> FusedComplexMultiplyAddScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vfcmaddcsh
    public static Vector128<Half> FusedComplexMultiplyAddConjugateScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> FusedComplexMultiplyAddConjugateScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vfmulcsh
    public static Vector128<Half> ComplexMultiplyScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> ComplexMultiplyScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vfcmulcsh
    public static Vector128<Half> ComplexMultiplyConjugateScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> ComplexMultiplyConjugateScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);

    // vfmaddsubXXXsh
    public static Vector128<Half> FusedMultiplyAddSubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddSubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vfmsubaddXXXsh
    public static Vector128<Half> FusedMultiplySubtractAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vfmaddXXXsh
    public static Vector128<Half> FusedMultiplyAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vfmsubXXXsh
    public static Vector128<Half> FusedMultiplySubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vfnmaddXXXsh
    public static Vector128<Half> FusedMultiplyAddNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vfnmsubXXXsh
    public static Vector128<Half> FusedMultiplySubtractNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);

    // vcmpsh
    public static Vector128<Half> CompareScalar(Vector128<Half> left, Vector128<Half> right, FloatComparisonMode mode);
    public static Vector128<Half> CompareEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareGreaterThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareGreaterThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareLessThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareLessThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotGreaterThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotGreaterThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotLessThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotLessThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareOrderedScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareUnorderedScalar(Vector128<Half> left, Vector128<Half> right);

    // vcomish
    public static bool CompareScalarOrderedEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedGreaterThan(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedLessThan(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedNotEqual(Vector128<Half> left, Vector128<Half> right);

    // vucomish
    public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right);

    // vfpclasssh
    public static Vector128<Half> ClassifyScalar(Vector128<Half> value, byte control);

    // vcvtsi2sh
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, int value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, int value, FloatRoundingMode mode);

    // vcvtusi2sh
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, uint value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, uint value, FloatRoundingMode mode);

    // vcvtss2sh
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<float> value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<float> value, FloatRoundingMode mode);

    // vcvtsd2sh
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<double> value);
 public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<double> value, FloatRoundingMode mode);

    // vcvtsh2si
    public static int ConvertToInt32(Vector128<Half> value);
    public static int ConvertToInt32(Vector128<Half> value, FloatRoundingMode mode);

    // vcvttsh2si
    public static int ConvertToInt32WithTruncation(Vector128<Half> value);

    // vcvtsh2usi
    public static uint ConvertToUInt32(Vector128<Half> value);
    public static uint ConvertToUInt32(Vector128<Half> value, FloatRoundingMode mode);

    // vcvttsh2usi
    public static uint ConvertToUInt32WithTruncation(Vector128<Half> value);

    // vcvtsh2ss
    public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<Half> value);
    public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<Half> value, FloatRoundingMode mode);

    // vcvtsh2sd
    public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<Half> value);
    public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<Half> value, FloatRoundingMode mode);

    class X64 : Avx512F.X64
    {
        // vcvtsi2sh
        public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, long value);

        // vcvtusi2sh
        public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, ulong value);

        // vcvtsh2si
        public static long ConvertToInt64(Vector128<Half> value);

        // vcvttsh2si
        public static long ConvertToInt64WithTruncation(Vector128<Half> value);

        // vcvtsh2usi
        public static ulong ConvertToUInt64(Vector128<Half> value);

        // vcvttsh2usi
        public static ulong ConvertToUInt64WithTruncation(Vector128<Half> value);
    }

    class VL : Avx512F.VL
    {
        // vaddph
        public static Vector128<Half> Add(Vector128<Half> left, Vector128<Half> right);

        // vdivph
        public static Vector128<Half> Divide(Vector128<Half> left, Vector128<Half> right);

        // vgetexpph
        public static Vector128<Half> GetExponent(Vector128<Half> value);

        // vgetmantph
        public static Vector128<Half> GetMantissa(Vector128<Half> value, byte control);

        // vmaxph
        public static Vector128<Half> Max(Vector128<Half> left, Vector128<Half> right);

        // vminph
        public static Vector128<Half> Min(Vector128<Half> value, Vector128<Half> right);

        // vmulph
        public static Vector128<Half> Multiply(Vector128<Half> left, Vector128<Half> right);

        // vrcpph
        public static Vector128<Half> Reciprocal(Vector128<Half> value);

        // vreduceph
        public static Half Reduce(Vector128<Half> left, byte control);

        // vrndscaleph
        public static Vector128<Half> RoundScale(Vector128<Half> left, byte control);

        // vrsqrtph
        public static Vector128<Half> ReciprocalSqrt(Vector128<Half> value);

        // vscalefph
        public static Vector128<Half> Scale(Vector128<Half> left, Vector128<Half> right, byte control);

        // vsqrtph
        public static Vector128<Half> Sqrt(Vector128<Half> value);

        // vsubph
        public static Vector128<Half> Subtract(Vector128<Half> left, Vector128<Half> right);

        // vfcmaddcph
        public static Vector128<Half> FusedComplexMultiplyAdd(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);

        // vfcmaddcph
        public static Vector128<Half> FusedComplexMultiplyAddConjugate(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);

        // vfmulcph
        public static Vector128<Half> ComplexMultiply(Vector128<Half> left, Vector128<Half> right);

        // vfcmulcph
        public static Vector128<Half> ComplexMultiplyConjugate(Vector128<Half> left, Vector128<Half> right);

        // vfmaddsubXXXph
        public static Vector128<Half> FusedMultiplyAddSubtract(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vfmsubaddXXXph
        public static Vector128<Half> FusedMultiplySubtractAdd(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vfmaddXXXph
        public static Vector128<Half> FusedMultiplyAdd(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vfmsubXXXph
        public static Vector128<Half> FusedMultiplySubtract(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vfnmaddXXXph
        public static Vector128<Half> FusedMultiplyAddNegated(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vfnmsubXXXph
        public static Vector128<Half> FusedMultiplySubtractNegated(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);

        // vcmpph
        public static Vector128<Half> Compare(Vector128<Half> left, Vector128<Half> right, FloatComparisonMode mode);
        public static Vector128<Half> CompareEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareGreaterThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareLessThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotGreaterThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotLessThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareOrdered(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareUnordered(Vector128<Half> left, Vector128<Half> right);

        // vfpclassph
        public static Vector128<Half> Classify(Vector128<Half> value, byte control);

        // vcvtw2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<short> value);

        // vcvtdq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<int> value);

        // vcvtqq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<long> value);

        // vcvtuw2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<ushort> value);

        // vcvtudq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<uint> value);

        // vcvtuqq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<ulong> value);

        // vcvtps2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<float> value);

        // vcvtpd2ph
        public static Vector128<Half> ConvertToVector128Half(Vector128<double> value);

        // vcvtph2w
        public static Vector128<short> ConvertToVector128Int16(Vector128<Half> value);

        // vcvttph2w
        public static Vector128<short> ConvertToVector128Int16WithTruncation(Vector128<Half> value);

        // vcvtph2dq
        public static Vector128<int> ConvertToVector128Int32(Vector128<Half> value);

        // vcvttph2dq
        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<Half> value);

        // vcvtph2qq
        public static Vector128<long> ConvertToVector128Int64(Vector128<Half> value);

        // vcvttph2qq
        public static Vector128<long> ConvertToVector128Int64WithTruncation(Vector128<Half> value);

        // vcvtph2uw
        public static Vector128<ushort> ConvertToVector128UInt16(Vector128<Half> value);

        // vcvttph2uw
        public static Vector128<ushort> ConvertToVector128UInt16WithTruncation(Vector128<Half> value);

        // vcvtph2udq
        public static Vector128<uint> ConvertToVector128UInt32(Vector128<Half> value);

        // vcvttph2udq
        public static Vector128<uint> ConvertToVector128UInt32WithTruncation(Vector128<Half> value);

        // vcvtph2uqq
        public static Vector128<ulong> ConvertToVector128UInt64(Vector128<Half> value);

        // vcvttph2uqq
        public static Vector128<ulong> ConvertToVector128UInt64WithTruncation(Vector128<Half> value);

        // vcvtph2ps
        public static Vector128<float> ConvertToVector128Single(Vector128<Half> value);

        // vcvtph2pd
        public static Vector128<double> ConvertToVector128Double(Vector128<Half> value);

        // 256
        // vaddph
        public static Vector256<Half> Add(Vector256<Half> left, Vector256<Half> right);

        // vdivph
        public static Vector256<Half> Divide(Vector256<Half> left, Vector256<Half> right);

        // vgetexpph
        public static Vector256<Half> GetExponent(Vector256<Half> value);

        // vgetmantph
        public static Vector256<Half> GetMantissa(Vector256<Half> value, byte control);

        // vmaxph
        public static Vector256<Half> Max(Vector256<Half> left, Vector256<Half> right);

        // vminph
        public static Vector256<Half> Min(Vector256<Half> value, Vector256<Half> right);

        // vmulph
        public static Vector256<Half> Multiply(Vector256<Half> left, Vector256<Half> right);

        // vrcpph
        public static Vector256<Half> Reciprocal(Vector256<Half> value);

        // vreduceph
        public static Half Reduce(Vector256<Half> left, byte control);

        // vrndscaleph
        public static Vector256<Half> RoundScale(Vector256<Half> left, byte control);

        // vrsqrtph
        public static Vector256<Half> ReciprocalSqrt(Vector256<Half> value);

        // vscalefph
        public static Vector256<Half> Scale(Vector256<Half> left, Vector256<Half> right, byte control);

        // vsqrtph
        public static Vector256<Half> Sqrt(Vector256<Half> value);

        // vsubph
        public static Vector256<Half> Subtract(Vector256<Half> left, Vector256<Half> right);

        // vfmaddcph
        public static Vector256<Half> FusedComplexMultiplyAdd(Vector256<Half> addend, Vector256<Half> left, Vector256<Half> right);

        // vfcaddlcph
        public static Vector256<Half> FusedComplexMultiplyAddConjugate(Vector256<Half> addend, Vector256<Half> left, Vector256<Half> right);

        // vfmulcph
        public static Vector256<Half> ComplexMultiply(Vector256<Half> left, Vector256<Half> right);

        // vfcmulcph
        public static Vector256<Half> ComplexMultiplyConjugate(Vector256<Half> left, Vector256<Half> right);

        // vfmaddsubXXXph
        public static Vector256<Half> FusedMultiplyAddSubtract(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vfmsubaddXXXph
        public static Vector256<Half> FusedMultiplySubtractAdd(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vfmaddXXXph
        public static Vector256<Half> FusedMultiplyAdd(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vfmsubXXXph
        public static Vector256<Half> FusedMultiplySubtract(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vfnmaddXXXph
        public static Vector256<Half> FusedMultiplyAddNegated(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vfnmsubXXXph
        public static Vector256<Half> FusedMultiplySubtractNegated(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);

        // vcmpph
        public static Vector256<Half> Compare(Vector256<Half> left, Vector256<Half> right, FloatComparisonMode mode);
        public static Vector256<Half> CompareEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareGreaterThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareGreaterThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareLessThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareLessThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotGreaterThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotGreaterThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotLessThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotLessThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareOrdered(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareUnordered(Vector256<Half> left, Vector256<Half> right);

        // vfpclassph
        public static Vector256<Half> Classify(Vector256<Half> value, byte control);

        // vcvtw2ph
        public static Vector256<Half> ConvertToVector256Half(Vector256<short> value);

        // vcvtdq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<int> value);

        // vcvtqq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<long> value);

        // vcvtuw2ph
        public static Vector256<Half> ConvertToVector256Half(Vector256<ushort> value);

        // vcvtudq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<uint> value);

        // vcvtuqq2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<ulong> value);

        // vcvtps2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<float> value);

        // vcvtpd2ph
        public static Vector128<Half> ConvertToVector128Half(Vector256<double> value);

        // vcvtph2w
        public static Vector256<short> ConvertToVector256Int16(Vector256<Half> value);

        // vcvttph2w
        public static Vector256<short> ConvertToVector256Int16WithTruncation(Vector256<Half> value);

        // vcvtph2dq
        public static Vector256<int> ConvertToVector256Int32(Vector128<Half> value);

        // vcvttph2dq
        public static Vector256<int> ConvertToVector256Int32WithTruncation(Vector128<Half> value);

        // vcvtph2qq
        public static Vector256<long> ConvertToVector256Int64(Vector128<Half> value);

        // vcvttph2qq
        public static Vector256<long> ConvertToVector256Int64WithTruncation(Vector128<Half> value);

        // vcvtph2uw
        public static Vector256<ushort> ConvertToVector256UInt16(Vector256<Half> value);

        // vcvttph2uw
        public static Vector256<ushort> ConvertToVector256UInt16WithTruncation(Vector256<Half> value);

        // vcvtph2udq
        public static Vector256<uint> ConvertToVector256UInt32(Vector128<Half> value);

        // vcvttph2udq
        public static Vector256<uint> ConvertToVector256UInt32WithTruncation(Vector128<Half> value);

        // vcvtph2uqq
        public static Vector256<ulong> ConvertToVector256UInt64(Vector128<Half> value);

        // vcvttph2uqq
        public static Vector256<ulong> ConvertToVector256UInt64WithTruncation(Vector128<Half> value);

        // vcvtph2ps
        public static Vector256<float> ConvertToVector256Single(Vector128<Half> value);

        // vcvtph2pd
        public static Vector256<double> ConvertToVector256Double(Vector128<Half> value);
    }
}

API Usage

N/A

Alternative Designs

N/A

Risks

N/A

ghost commented 5 months ago

Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics See info in area-owners.md if you want to be subscribed.

Issue Details
### Background and motivation This proposal allows to accelerate `FP16` operations through Intel's `FP16` ISAs. Both `F16c` and `AVX512 FP16` are covered. We use `System.Half` datatype --- the managed implementation of `FP16` in .NET --- as the foundational datatype for the `FP16` operations. Note that this proposal exposes the API for latter implementation, but it will require some .NET internal rework, particularly around the use of `System.Half` as a hardware accelerated vectorized data type. ### API Proposal ```csharp class F16c : Avx2 { // vcvtph2ps public static Vector128 ConvertToSingle(Vector128 value); public static Vector258 ConvertToSingle(Vector258 value); // vcvtps2ph public static Vector128 ConvertToVector128Half(Vector128 value, byte control); public static Vector258 ConvertToVector256Half(Vector258 value, byte control); } class Avx512Fp16 : Avx512F { // vaddph public static Vector512 Add(Vector512 left, Vector512 right); // vdivph public static Vector512 Divide(Vector512 left, Vector512 right); // vgetexpph public static Vector512 GetExponent(Vector512 value); // vgetmantph public static Vector512 GetMantissa(Vector512 value, byte control); // vmaxph public static Vector512 Max(Vector512 left, Vector512 right); // vminph public static Vector512 Min(Vector512 value, Vector512 right); // vminph public static Vector512 Multiply(Vector512 left, Vector512 right); // vrcpph public static Vector512 Reciprocal(Vector512 value); // vreduceph public static Half Reduce(Vector512 left, byte control); // vrndscaleph public static Vector512 RoundScale(Vector512 left, byte control); // vrsqrtph public static Vector512 ReciprocalSqrt(Vector512 value); // vscalefph public static Vector512 FloatingPointScale(Vector512 left, Vector512 right, byte control); // vsqrtph public static Vector512 Sqrt(Vector512 value); // vsubph public static Vector512 Subtract(Vector512 left, Vector512 right); // vfmaddpch public static Vector512 FusedComplexMultiplyAdd(Vector512 left, Vector512 right); // vfcmaddpch public static Vector512 FusedComplexMultiplyAddConjugate(Vector512 left, Vector512 right); // vfmulpch public static Vector512 ComplexMultiply(Vector512 left, Vector512 right); // vfcmulpch public static V0ector512 ComplexMultiplyConjugate(Vector512 left, Vector512 right); // vfmaddsubXXXph public static Vector512 FusedMultiplyAddSubtract(Vector512 a, Vector512 b, Vector512 c); // vfmsubaddXXXph public static Vector512 FusedMultiplySubtractAdd(Vector512 a, Vector512 b, Vector512 c); // vfmaddXXXph public static Vector512 FusedMultiplyAdd(Vector512 a, Vector512 b, Vector512 c); // vfmsubXXXph public static Vector512 FusedMultiplySubtract(Vector512 a, Vector512 b, Vector512 c); // vfnmaddXXXph public static Vector512 FusedMultiplyAddNegated(Vector512 a, Vector512 b, Vector512 c); // vfnmsubXXXph public static Vector512 FusedMultiplySubtractNegated(Vector512 a, Vector512 b, Vector512 c); // vcmpph public static Vector512 Compare(Vector512 left, Vector512 right, FloatComparisonMode mode); public static Vector512 CompareGreaterThan(Vector512 left, Vector512 right); public static Vector512 CompareGreaterThanOrEqual(Vector512 left, Vector512 right); public static Vector512 CompareLessThan(Vector512 left, Vector512 right); public static Vector512 CompareLessThanOrEqual(Vector512 left, Vector512 right); public static Vector512 CompareNotEqual(Vector512 left, Vector512 right); public static Vector512 CompareNotGreaterThan(Vector512 left, Vector512 right); public static Vector512 CompareNotGreaterThanOrEqual(Vector512 left, Vector512 right); public static Vector512 CompareNotLessThan(Vector512 left, Vector512 right); public static Vector512 CompareNotLessThanOrEqual(Vector512 left, Vector512 right); public static Vector512 CompareOrdered(Vector512 left, Vector512 right); public static Vector512 CompareUnordered(Vector512 left, Vector512 right); // vfpclassph public static Vector512 CheckFloatingPointClass(Vector512 value, byte control); // vcvtw2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtdq2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtqq2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtuw2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtudq2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtuqq2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtps2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtpd2ph public static Vector512 ConvertToVector512Half(Vector512 value); // vcvtph2w public static Vector512 ConvertToVector512Int16(Vector512 value); // vcvtph2dq public static Vector512 ConvertToVector512Int32(Vector512 value); // vcvtph2qq public static Vector512 ConvertToVector512Int64(Vector512 value); // vcvtph2uw public static Vector512 ConvertToVector512UInt16(Vector512 value); // vcvtph2udq public static Vector512 ConvertToVector512UInt32(Vector512 value); // vcvtph2uqq public static Vector512 ConvertToVector512UInt64(Vector512 value); // vcvtph2ps public static Vector512 ConvertToVector512Single(Vector512 value); // vcvtph2pd public static Vector512 ConvertToVector512Double(Vector512 value); // SCALAR Ops // vaddsh public static Vector128 AddScalar(Vector128 left, Vector128 right); // vdivsh public static Vector128 DivideScalar(Vector128 left, Vector128 right); // vgetexpss public static Vector128 GetExponentScalar(Vector128 value); // vgetmantsh public static Vector128 GetMantissaScalar(Vector128 value, byte control); // vmaxpsh public static Vector128 MaxScalar(Vector128 left, Vector128 right); // vminsh public static Vector128 MinScalar(Vector128 value, Vector128 right); // vmulsh public static Vector128 MultiplyScalar(Vector128 left, Vector128 right); // vrcpsh public static Vector128 ReciprocalScalar(Vector128 value); // vreducesh public static Half ReduceScalar(Vector128 left, byte control); // vrndscalesh public static Vector128 RoundScaleScalar(Vector128 left, byte control); // vrsqrtsh public static Vector128 ReciprocalSqrtScalar(Vector128 value); // vscalefsh public static Vector128 FloatingPointScaleScalar(Vector128 left, Vector128 right, byte control); // vsqrtsh public static Vector128 SqrtScalar(Vector128 value); // vsubsh public static Vector128 SubtractScalar(Vector128 left, Vector128 right); // vfmaddcsh public static Vector128 FusedComplexMultiplyAddScalar(Vector128 left, Vector128 right); // vfcmaddcsh public static Vector128 FusedComplexMultiplyAddConjugateScalar(Vector128 left, Vector128 right); // vfmulcsh public static Vector128 ComplexMultiplyScalar(Vector128 left, Vector128 right); // vfcmulcsh public static Vector128 ComplexMultiplyConjugateScalar(Vector128 left, Vector128 right); // vfmaddsubXXXsh public static Vector128 FusedMultiplyAddSubtractScalar(Vector128 a, Vector128 b, Vector128 c); // vfmsubaddXXXsh public static Vector128 FusedMultiplySubtractAddScalar(Vector128 a, Vector128 b, Vector128 c); // vfmaddXXXsh public static Vector128 FusedMultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c); // vfmsubXXXsh public static Vector128 FusedMultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c); // vfnmaddXXXsh public static Vector128 FusedMultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c); // vfnmsubXXXsh public static Vector128 FusedMultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c); public static Vector128 CompareScalar(Vector128 left, Vector128 right, FloatComparisonMode mode); public static Vector128 CompareGreaterThanScalar(Vector128 left, Vector128 right); public static Vector128 CompareGreaterThanOrEqualScalar(Vector128 left, Vector128 right); public static Vector128 CompareLessThanScalar(Vector128 left, Vector128 right); public static Vector128 CompareLessThanOrEqualScalar(Vector128 left, Vector128 right); public static Vector128 CompareNotEqualScalar(Vector128 left, Vector128 right); public static Vector128 CompareNotGreaterThanScalar(Vector128 left, Vector128 right); public static Vector128 CompareNotGreaterThanOrEqualScalar(Vector128 left, Vector128 right); public static Vector128 CompareNotLessThanScalar(Vector128 left, Vector128 right); public static Vector128 CompareNotLessThanOrEqualScalar(Vector128 left, Vector128 right); public static Vector128 CompareOrderedScalar(Vector128 left, Vector128 right); public static Vector128 CompareUnorderedScalar(Vector128 left, Vector128 right); // vfpclasssh public static Vector128 CheckFloatingPointClassScalar(Vector128 value, byte control); // vcvtsi2ss public static Vector128 ConvertToHalfScalar(int value); public static Vector128 ConvertToHalfScalar(long value); // vcvtusi2sh public static Vector128 ConvertToHalfScalar(uint value); public static Vector128 ConvertToHalfScalar(ulong value); // vcvtss2sh public static Vector128 ConvertToHalfScalar(Vector128 value); // vcvtsd2sh public static Vector128 ConvertToHalfScalar(Vector128 value); // vcvtsh2si public static int ConvertToInt32Scalar(Vector128 value); public static long ConvertToInt64Scalar(Vector128 value); // vcvtsh2usi public static uint ConvertToUInt32Scalar(Vector128 value); public static ulong ConvertToUInt64Scalar(Vector128 value); // vcvtsh2ss public static Vector128 ConvertToFloatScalar(Vector128 value); // vcvtsh2sd public static Vector128 ConvertToDoubleScalar(Vector128 value); class VL : Avx512F.VL { // vaddph public static Vector128 Add(Vector128 left, Vector128 right); // vdivph public static Vector128 Divide(Vector128 left, Vector128 right); // vgetexpph public static Vector128 GetExponent(Vector128 value); // vgetmantph public static Vector128 GetMantissa(Vector128 value, byte control); // vmaxph public static Vector128 Max(Vector128 left, Vector128 right); // vminph public static Vector128 Min(Vector128 value, Vector128 right); // vminph public static Vector128 Multiply(Vector128 left, Vector128 right); // vrcpph public static Vector128 Reciprocal(Vector128 value); // vreduceph public static Half Reduce(Vector128 left, byte control); // vrndscaleph public static Vector128 RoundScale(Vector128 left, byte control); // vrsqrtph public static Vector128 ReciprocalSqrt(Vector128 value); // vscalefph public static Vector128 FloatingPointScale(Vector128 left, Vector128 right, byte control); // vsqrtph public static Vector128 Sqrt(Vector128 value); // vsubph public static Vector128 Subtract(Vector128 left, Vector128 right); // vfcmaddpch public static Vector128 FusedComplexMultiplyAdd(Vector128 left, Vector128 right); // vfcmaddpch public static Vector128 FusedComplexMultiplyAddConjugate(Vector128 left, Vector128 right); // vfmulpch public static Vector128 ComplexMultiply(Vector128 left, Vector128 right); // vfcmulpch public static Vector128 ComplexMultiplyConjugate(Vector128 left, Vector128 right); // vfmaddsubXXXph public static Vector128 FusedMultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c); // vfmsubaddXXXph public static Vector128 FusedMultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c); // vfmaddXXXph public static Vector128 FusedMultiplyAdd(Vector128 a, Vector128 b, Vector128 c); // vfmsubXXXph public static Vector128 FusedMultiplySubtract(Vector128 a, Vector128 b, Vector128 c); // vfnmaddXXXph public static Vector128 FusedMultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c); // vfnmsubXXXph public static Vector128 FusedMultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c); // vcmpph public static Vector128 Compare(Vector128 left, Vector128 right, FloatComparisonMode mode); public static Vector128 CompareGreaterThan(Vector128 left, Vector128 right); public static Vector128 CompareGreaterThanOrEqual(Vector128 left, Vector128 right); public static Vector128 CompareLessThan(Vector128 left, Vector128 right); public static Vector128 CompareLessThanOrEqual(Vector128 left, Vector128 right); public static Vector128 CompareNotEqual(Vector128 left, Vector128 right); public static Vector128 CompareNotGreaterThan(Vector128 left, Vector128 right); public static Vector128 CompareNotGreaterThanOrEqual(Vector128 left, Vector128 right); public static Vector128 CompareNotLessThan(Vector128 left, Vector128 right); public static Vector128 CompareNotLessThanOrEqual(Vector128 left, Vector128 right); public static Vector128 CompareOrdered(Vector128 left, Vector128 right); public static Vector128 CompareUnordered(Vector128 left, Vector128 right); // vfpclassph public static Vector128 CheckFloatingPointClass(Vector128 value, byte control); // vcvtw2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtdq2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtqq2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtuw2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtudq2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtuqq2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtps2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtpd2ph public static Vector128 ConvertToVector128Half(Vector128 value); // vcvtph2w public static Vector128 ConvertToVector128Int16(Vector128 value); // vcvtph2dq public static Vector128 ConvertToVector128Int32(Vector128 value); // vcvtph2qq public static Vector128 ConvertToVector128Int64(Vector128 value); // vcvtph2uw public static Vector128 ConvertToVector128UInt16(Vector128 value); // vcvtph2udq public static Vector128 ConvertToVector128UInt32(Vector128 value); // vcvtph2uqq public static Vector128 ConvertToVector128UInt64(Vector128 value); // vcvtph2ps public static Vector128 ConvertToVector128Single(Vector128 value); // vcvtph2pd public static Vector128 ConvertToVector128Double(Vector128 value); // 256 // vaddph public static Vector256 Add(Vector256 left, Vector256 right); // vdivph public static Vector256 Divide(Vector256 left, Vector256 right); // vgetexpph public static Vector256 GetExponent(Vector256 value); // vgetmantph public static Vector256 GetMantissa(Vector256 value, byte control); // vmaxph public static Vector256 Max(Vector256 left, Vector256 right); // vminph public static Vector256 Min(Vector256 value, Vector256 right); // vminph public static Vector256 Multiply(Vector256 left, Vector256 right); // vrcpph public static Vector256 Reciprocal(Vector256 value); // vreduceph public static Half Reduce(Vector256 left, byte control); // vrndscaleph public static Vector256 RoundScale(Vector256 left, byte control); // vrsqrtph public static Vector256 ReciprocalSqrt(Vector256 value); // vscalefph public static Vector256 FloatingPointScale(Vector256 left, Vector256 right, byte control); // vsqrtph public static Vector256 Sqrt(Vector256 value); // vsubph public static Vector256 Subtract(Vector256 left, Vector256 right); // vfmaddpch public static Vector256 FusedComplexMultiplyAdd(Vector256 left, Vector256 right); // vfcaddlpch public static Vector256 FusedComplexMultiplyAddConjugate(Vector256 left, Vector256 right); // vfmulpch public static Vector256 ComplexMultiply(Vector256 left, Vector256 right); // vfcmulpch public static Vector256 ComplexMultiplyConjugate(Vector256 left, Vector256 right); // vfmaddsubXXXph public static Vector256 FusedMultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c); // vfmsubaddXXXph public static Vector256 FusedMultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c); // vfmaddXXXph public static Vector256 FusedMultiplyAdd(Vector256 a, Vector256 b, Vector256 c); // vfmsubXXXph public static Vector256 FusedMultiplySubtract(Vector256 a, Vector256 b, Vector256 c); // vfnmaddXXXph public static Vector256 FusedMultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c); // vfnmsubXXXph public static Vector256 FusedMultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c); // vcmpph public static Vector256 Compare(Vector256 left, Vector256 right, FloatComparisonMode mode); public static Vector256 CompareGreaterThan(Vector256 left, Vector256 right); public static Vector256 CompareGreaterThanOrEqual(Vector256 left, Vector256 right); public static Vector256 CompareLessThan(Vector256 left, Vector256 right); public static Vector256 CompareLessThanOrEqual(Vector256 left, Vector256 right); public static Vector256 CompareNotEqual(Vector256 left, Vector256 right); public static Vector256 CompareNotGreaterThan(Vector256 left, Vector256 right); public static Vector256 CompareNotGreaterThanOrEqual(Vector256 left, Vector256 right); public static Vector256 CompareNotLessThan(Vector256 left, Vector256 right); public static Vector256 CompareNotLessThanOrEqual(Vector256 left, Vector256 right); public static Vector256 CompareOrdered(Vector256 left, Vector256 right); public static Vector256 CompareUnordered(Vector256 left, Vector256 right); // vfpclassph public static Vector258 CheckFloatingPointClass(Vector258 value, byte control); // vcvtw2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtdq2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtqq2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtuw2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtudq2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtuqq2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtps2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtpd2ph public static Vector256 ConvertToVector256Half(Vector256 value); // vcvtph2w public static Vector256 ConvertToVector256Int16(Vector256 value); // vcvtph2dq public static Vector256 ConvertToVector256Int32(Vector256 value); // vcvtph2qq public static Vector256 ConvertToVector256Int64(Vector256 value); // vcvtph2uw public static Vector256 ConvertToVector256UInt16(Vector256 value); // vcvtph2udq public static Vector256 ConvertToVector256UInt32(Vector256 value); // vcvtph2uqq public static Vector256 ConvertToVector256UInt64(Vector256 value); // vcvtph2ps public static Vector256 ConvertToVector256Single(Vector256 value); // vcvtph2pd public static Vector256 ConvertToVector256Double(Vector256 value); } } ``` ### API Usage N/A ### Alternative Designs N/A ### Risks N/A
Author: anthonycanino
Assignees: -
Labels: `area-System.Runtime.Intrinsics`, `api-ready-for-review`
Milestone: -
anthonycanino commented 5 months ago

@dotnet/avx512-contrib

MichalPetryka commented 5 months ago

Duplicate of #62416?

tannergooding commented 5 months ago

Duplicate of https://github.com/dotnet/runtime/issues/62416?

There is overlap, but having it explicitly separated is a bit better for API review, especially since we've got 3 separate total concerns to consider: x64, Arm64, and xplat

tannergooding commented 5 months ago

-- Fixed a couple typos of 258 to 256.

@anthonycanino, It looks like we can't directly use FloatRoundingMode (instead of byte) for F16C.ConvertToVector###Half because the constant values don't line up with the __MM_FROUND and related values used elsewhere, is that right? I wonder if it's worth taking it anyways and having the JIT do the fixup here, to make it more convenient for end users. It's already basically the same, just that FloatRoundingMode has and makes available the __MM_FROUND_NO_EXC bit and that's implicit to the consumer. For .NET there is no ability to control MCXSR.RC but we can support such an API by having a version that doesn't take the byte control, which then flows how Avx512F and other scenarios support it.

MichalPetryka commented 5 months ago

I wonder if it's worth taking it anyways and having the JIT do the fixup here, to make it more convenient for end users. It's already basically the same, just that FloatRoundingMode has and makes available the __MM_FROUND_NO_EXC bit and that's implicit to the consumer.

Since this is a ConstantExpected param, I think it's fine to take it here? Fixups aren't even needed in this case as the upper bits of the value are documented as Ignored by processor so the JIT could pass them as is.

tannergooding commented 5 months ago

Fixups aren't even needed in this case as the upper bits of the value are documented as Ignored by processor so the JIT could pass them as is.

A fixup would be still needed due to the special handling of bit 2.

MichalPetryka commented 5 months ago

A fixup would be still needed due to the special handling of bit 2.

Why not just let a user cast custom constants with it set to get the officially unsupported MCXSR.RC mode? This way an additional overload without a control isn't even needed.

tannergooding commented 5 months ago

Why not just let a user cast custom constants with it set to get the officially unsupported MCXSR.RC mode?

Because then that doesn't actually work with FloatRoundingMode and is inconsistent with the other APIs.

We should either expose this like we do for other rounding APIs:

or we should expose it as is and expect users to know the control bits off the top of their head:

tannergooding commented 5 months ago

@anthonycanino, Looks like we might also be missing a couple APIs:

And there are a couple APIs that need name fixups:

I've adjusted the top post to include these changes

tannergooding commented 5 months ago

I think then there might be a few convenience overloads missing which I haven't added and we should minimally discuss.

Most notably there are some concepts like And, AndNot, BlendVariable, BroadcastScalarToVector128/256/512, BroadcastVector128ToVector512, ExtractVector128/256, InsertVector128/256, LoadAlignedVector128/256/512, LoadAlignedVector128/256/512NonTemporal, LoadVector128/256/512, Or, PermuteVar8x16, PermuteVar8x16x2, PermuteVar16x16, PermuteVar16x16x2, PermuteVar32x16, PermuteVar32x16x2, ShuffleHigh, ShuffleLow, Store, StoreAligned, StoreAlignedNonTemporal, TenaryLogic, UnpackHigh, UnpackLow, and Xor

These APIs are notably bitwise or represent a general-purpose operation and historically have existed as explicit overloads for both floating-point and integer overloads. Since they exist for float/int/uint, double/long/ulong, and for short/ushort they should likely also exist for Half (noting this is a blanket statement, naturally something like PermuteVar8x16 only exists for short/ushort, but the 32-bit equivalent exists for float/int/uint and the 64-bit equivalent for double/long/ulong).

Historically there have been explicit floating-point vs integer variants (like andps vs pandd). Some of this notably changed with AVX512F. For example, no vandps existed for EVEX, you instead had pand change to vpandd. This was later "fixed" with AVX512DQ and an explicit evex encoding of vandps was added.

However, there isn't an explicit Half variant instruction for any of these instructions, so we'd just be using the 16-bit integer variant to make it more apparent/convenient for users.

anthonycanino commented 5 months ago

-- Fixed a couple typos of 258 to 256.

@anthonycanino, It looks like we can't directly use FloatRoundingMode (instead of byte) for F16C.ConvertToVector###Half because the constant values don't line up with the __MM_FROUND and related values used elsewhere, is that right? I wonder if it's worth taking it anyways and having the JIT do the fixup here, to make it more convenient for end users. It's already basically the same, just that FloatRoundingMode has and makes available the __MM_FROUND_NO_EXC bit and that's implicit to the consumer. For .NET there is no ability to control MCXSR.RC but we can support such an API by having a version that doesn't take the byte control, which then flows how Avx512F and other scenarios support it.

FYI, vrndscaleps has similar bit 2 functionality.

My thoughts on this are, it's confusing to have multiple rounding mode setups. I think making this consistent with other APIs is the most consistent. Won't Add(Vector128<Half>, Vector128<Half>) also require a Add(Vector128<Half>, Vector128<Half>, FloatRoundingMode) anyways? (I realize I forgot to include).

anthonycanino commented 5 months ago

I think then there might be a few convenience overloads missing which I haven't added and we should minimally discuss.

I am fine with this. Given that all AVX512 extensions are packed up with AVX512 FP16 (and naturally on AVX10 onwards) we can safely rely on them to optimize/implement these methods.

tannergooding commented 5 months ago

Won't Add(Vector128, Vector128) also require a Add(Vector128, Vector128, FloatRoundingMode) anyways? (I realize I forgot to include).

Yes, same for a few of the other APIs.

anthonycanino commented 5 months ago

Ok, I'll make those adjustments.

So we agree on the use of FloatRoundingMode everywhere and let the JIT handle the fixup?

terrajobst commented 5 months ago

Video

namespace System.Runtime.Intrinsics.X86;

public sealed class F16c : Avx2
{
    public static bool IsSupported { get; }

    public static Vector128<float> ConvertToVector128Single(Vector128<Half> value);
    public static Vector256<float> ConvertToVector256Single(Vector128<Half> value);

    public static Vector128<Half> ConvertToVector128Half(Vector128<float> value, byte control);
    public static Vector128<Half> ConvertToVector128Half(Vector256<float> value, byte control);
}

public sealed class Avx512Fp16 : Avx512F
{
    public static bool IsSupported { get; }

    public static Vector512<Half> Add(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Add(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> Divide(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Divide(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> GetExponent(Vector512<Half> value);
    public static Vector512<Half> GetMantissa(Vector512<Half> value, byte control);
    public static Vector512<Half> Max(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Min(Vector512<Half> value, Vector512<Half> right);
    public static Vector512<Half> Multiply(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Multiply(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> Reciprocal(Vector512<Half> value);
    public static Half Reduce(Vector512<Half> left, byte control);
    public static Vector512<Half> RoundScale(Vector512<Half> left, byte control);
    public static Vector512<Half> ReciprocalSqrt(Vector512<Half> value);
    public static Vector512<Half> ReciprocalSqrt(Vector512<Half> value, FloatRoundingMode mode);
    public static Vector512<Half> Scale(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Scale(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> Sqrt(Vector512<Half> value);
    public static Vector512<Half> Sqrt(Vector512<Half> value, FloatRoundingMode mode);
    public static Vector512<Half> Subtract(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Subtract(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> FusedComplexMultiplyAdd(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> FusedComplexMultiplyAddConjugate(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> rightConjugate);
    public static Vector512<Half> FusedComplexMultiplyAddConjugate(Vector512<Half> addend, Vector512<Half> left, Vector512<Half> rightConjugate, FloatRoundingMode mode);
    public static Vector512<Half> ComplexMultiply(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> ComplexMultiply(Vector512<Half> left, Vector512<Half> right, FloatRoundingMode mode);
    public static Vector512<Half> ComplexMultiplyConjugate(Vector512<Half> left, Vector512<Half> rightConjugate);
    public static Vector512<Half> ComplexMultiplyConjugate(Vector512<Half> left, Vector512<Half> rightConjugate, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplyAddSubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAddSubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplySubtractAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtractAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplyAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAdd(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplySubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtract(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplyAddNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplyAddNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> FusedMultiplySubtractNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c);
    public static Vector512<Half> FusedMultiplySubtractNegated(Vector512<Half> a, Vector512<Half> b, Vector512<Half> c, FloatRoundingMode mode);
    public static Vector512<Half> Compare(Vector512<Half> left, Vector512<Half> right, FloatComparisonMode mode);
    public static Vector512<Half> CompareEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareGreaterThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareGreaterThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareLessThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareLessThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotGreaterThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotGreaterThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotLessThan(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareNotLessThanOrEqual(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareOrdered(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> CompareUnordered(Vector512<Half> left, Vector512<Half> right);
    public static Vector512<Half> Classify(Vector512<Half> value, byte control);
    public static Vector512<Half> ConvertToVector512Half(Vector512<short> value);
    public static Vector512<Half> ConvertToVector512Half(Vector512<short> value, FloatRoundingMode mode);
    public static Vector256<Half> ConvertToVector256Half(Vector512<int> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<int> value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertToVector128Half(Vector512<long> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<long> value, FloatRoundingMode mode);
    public static Vector512<Half> ConvertToVector512Half(Vector512<ushort> value);
    public static Vector512<Half> ConvertToVector512Half(Vector512<ushort> value, FloatRoundingMode mode);
    public static Vector256<Half> ConvertToVector256Half(Vector512<uint> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<uint> value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertToVector128Half(Vector512<ulong> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<ulong> value, FloatRoundingMode mode);
    public static Vector256<Half> ConvertToVector256Half(Vector512<float> value);
    public static Vector256<Half> ConvertToVector256Half(Vector512<float> value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertToVector128Half(Vector512<double> value);
    public static Vector128<Half> ConvertToVector128Half(Vector512<double> value, FloatRoundingMode mode);
    public static Vector512<short> ConvertToVector512Int16(Vector512<Half> value);
    public static Vector512<short> ConvertToVector512Int16(Vector512<Half> value, FloatRoundingMode mode);
    public static Vector512<short> ConvertToVector512Int16WithTruncation(Vector512<Half> value);
    public static Vector512<int> ConvertToVector512Int32(Vector256<Half> value);
    public static Vector512<int> ConvertToVector512Int32(Vector256<Half> value, FloatRoundingMode mode);
    public static Vector512<int> ConvertToVector512Int32WithTruncation(Vector256<Half> value);
    public static Vector512<long> ConvertToVector512Int64(Vector128<Half> value);
    public static Vector512<long> ConvertToVector512Int64(Vector128<Half> value, FloatRoundingMode mode);
    public static Vector512<long> ConvertToVector512Int64WithTruncation(Vector128<Half> value);
    public static Vector512<ushort> ConvertToVector512UInt16(Vector512<Half> value);
    public static Vector512<ushort> ConvertToVector512UInt16(Vector512<Half> value, FloatRoundingMode mode);
    public static Vector512<ushort> ConvertToVector512UInt16WithTruncation(Vector512<Half> value);
    public static Vector512<uint> ConvertToVector512UInt32(Vector256<Half> value);
    public static Vector512<uint> ConvertToVector512UInt32(Vector256<Half> value, FloatRoundingMode mode);
    public static Vector512<uint> ConvertToVector512UInt32WithTruncation(Vector256<Half> value);
    public static Vector512<ulong> ConvertToVector512UInt64(Vector128<Half> value);
    public static Vector512<ulong> ConvertToVector512UInt64(Vector128<Half> value, FloatRoundingMode mode);
    public static Vector512<ulong> ConvertToVector512UInt64WithTruncation(Vector128<Half> value);
    public static Vector512<float> ConvertToVector512Single(Vector256<Half> value);
    public static Vector512<double> ConvertToVector512Double(Vector128<Half> value);
    public static Vector512<double> ConvertToVector512Double(Vector128<Half> value, FloatRoundingMode mode);
    public static Vector128<Half> AddScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> AddScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> DivideScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> DivideScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> GetExponentScalar(Vector128<Half> value);
    public static Vector128<Half> GetMantissaScalar(Vector128<Half> value, byte control);
    public static Vector128<Half> MaxScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> MinScalar(Vector128<Half> value, Vector128<Half> right);
    public static Vector128<Half> MultiplyScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> MultiplyScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> ReciprocalScalar(Vector128<Half> value);
    public static Half ReduceScalar(Vector128<Half> left, byte control);
    public static Vector128<Half> RoundScaleScalar(Vector128<Half> left, byte control);
    public static Vector128<Half> ReciprocalSqrtScalar(Vector128<Half> value);
    public static Vector128<Half> ScaleScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> ScaleScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> SqrtScalar(Vector128<Half> value);
    public static Vector128<Half> SqrtScalar(Vector128<Half> value, FloatRoundingMode mode);
    public static Vector128<Half> SubtractScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> SubtractScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> FusedComplexMultiplyAddScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> FusedComplexMultiplyAddScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> FusedComplexMultiplyAddConjugateScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> rightConjugate);
    public static Vector128<Half> FusedComplexMultiplyAddConjugateScalar(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> rightConjugate, FloatRoundingMode mode);
    public static Vector128<Half> ComplexMultiplyScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> ComplexMultiplyScalar(Vector128<Half> left, Vector128<Half> right, FloatRoundingMode mode);
    public static Vector128<Half> ComplexMultiplyConjugateScalar(Vector128<Half> left, Vector128<Half> rightConjugate);
    public static Vector128<Half> ComplexMultiplyConjugateScalar(Vector128<Half> left, Vector128<Half> rightConjugate, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplyAddSubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddSubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplySubtractAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplyAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplySubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplyAddNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplyAddNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> FusedMultiplySubtractNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
    public static Vector128<Half> FusedMultiplySubtractNegatedScalar(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c, FloatRoundingMode mode);
    public static Vector128<Half> CompareScalar(Vector128<Half> left, Vector128<Half> right, FloatComparisonMode mode);
    public static Vector128<Half> CompareEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareGreaterThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareGreaterThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareLessThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareLessThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotGreaterThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotGreaterThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotLessThanScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareNotLessThanOrEqualScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareOrderedScalar(Vector128<Half> left, Vector128<Half> right);
    public static Vector128<Half> CompareUnorderedScalar(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedGreaterThan(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedLessThan(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarOrderedNotEqual(Vector128<Half> left, Vector128<Half> right);
    public static bool CompareScalarUnorderedEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedGreaterThan(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedGreaterThanOrEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedLessThan(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedLessThanOrEqual(Vector128<float> left, Vector128<float> right);
    public static bool CompareScalarUnorderedNotEqual(Vector128<float> left, Vector128<float> right);
    public static Vector128<Half> ClassifyScalar(Vector128<Half> value, byte control);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, int value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, int value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, uint value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, uint value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<float> value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<float> value, FloatRoundingMode mode);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<double> value);
    public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, Vector128<double> value, FloatRoundingMode mode);
    public static int ConvertToInt32(Vector128<Half> value);
    public static int ConvertToInt32(Vector128<Half> value, FloatRoundingMode mode);
    public static int ConvertToInt32WithTruncation(Vector128<Half> value);
    public static uint ConvertToUInt32(Vector128<Half> value);
    public static uint ConvertToUInt32(Vector128<Half> value, FloatRoundingMode mode);
    public static uint ConvertToUInt32WithTruncation(Vector128<Half> value);
    public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<Half> value);
    public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, Vector128<Half> value, FloatRoundingMode mode);
    public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<Half> value);
    public static Vector128<double> ConvertScalarToVector128Double(Vector128<double> upper, Vector128<Half> value, FloatRoundingMode mode);

    public sealed class X64 : Avx512F.X64
    {
        public static bool IsSupported { get; }

        public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, long value);
        public static Vector128<Half> ConvertScalarToVector128Half(Vector128<Half> upper, ulong value);
        public static long ConvertToInt64(Vector128<Half> value);
        public static long ConvertToInt64WithTruncation(Vector128<Half> value);
        public static ulong ConvertToUInt64(Vector128<Half> value);
        public static ulong ConvertToUInt64WithTruncation(Vector128<Half> value);
    }

    public sealed class VL : Avx512F.VL
    {
        public static bool IsSupported { get; }

        public static Vector128<Half> Add(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> Divide(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> GetExponent(Vector128<Half> value);
        public static Vector128<Half> GetMantissa(Vector128<Half> value, byte control);
        public static Vector128<Half> Max(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> Min(Vector128<Half> value, Vector128<Half> right);
        public static Vector128<Half> Multiply(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> Reciprocal(Vector128<Half> value);
        public static Half Reduce(Vector128<Half> left, byte control);
        public static Vector128<Half> RoundScale(Vector128<Half> left, byte control);
        public static Vector128<Half> ReciprocalSqrt(Vector128<Half> value);
        public static Vector128<Half> Scale(Vector128<Half> left, Vector128<Half> right, byte control);
        public static Vector128<Half> Sqrt(Vector128<Half> value);
        public static Vector128<Half> Subtract(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> FusedComplexMultiplyAdd(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> FusedComplexMultiplyAddConjugate(Vector128<Half> addend, Vector128<Half> left, Vector128<Half> rightConjugate);
        public static Vector128<Half> ComplexMultiply(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> ComplexMultiplyConjugate(Vector128<Half> left, Vector128<Half> rightConjugate);
        public static Vector128<Half> FusedMultiplyAddSubtract(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> FusedMultiplySubtractAdd(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> FusedMultiplyAdd(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> FusedMultiplySubtract(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> FusedMultiplyAddNegated(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> FusedMultiplySubtractNegated(Vector128<Half> a, Vector128<Half> b, Vector128<Half> c);
        public static Vector128<Half> Compare(Vector128<Half> left, Vector128<Half> right, FloatComparisonMode mode);
        public static Vector128<Half> CompareEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareGreaterThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareLessThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotGreaterThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotGreaterThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotLessThan(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareNotLessThanOrEqual(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareOrdered(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> CompareUnordered(Vector128<Half> left, Vector128<Half> right);
        public static Vector128<Half> Classify(Vector128<Half> value, byte control);
        public static Vector128<Half> ConvertToVector128Half(Vector128<short> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<int> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<long> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<ushort> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<uint> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<ulong> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<float> value);
        public static Vector128<Half> ConvertToVector128Half(Vector128<double> value);
        public static Vector128<short> ConvertToVector128Int16(Vector128<Half> value);
        public static Vector128<short> ConvertToVector128Int16WithTruncation(Vector128<Half> value);
        public static Vector128<int> ConvertToVector128Int32(Vector128<Half> value);
        public static Vector128<int> ConvertToVector128Int32WithTruncation(Vector128<Half> value);
        public static Vector128<long> ConvertToVector128Int64(Vector128<Half> value);
        public static Vector128<long> ConvertToVector128Int64WithTruncation(Vector128<Half> value);
        public static Vector128<ushort> ConvertToVector128UInt16(Vector128<Half> value);
        public static Vector128<ushort> ConvertToVector128UInt16WithTruncation(Vector128<Half> value);
        public static Vector128<uint> ConvertToVector128UInt32(Vector128<Half> value);
        public static Vector128<uint> ConvertToVector128UInt32WithTruncation(Vector128<Half> value);
        public static Vector128<ulong> ConvertToVector128UInt64(Vector128<Half> value);
        public static Vector128<ulong> ConvertToVector128UInt64WithTruncation(Vector128<Half> value);
        public static Vector128<float> ConvertToVector128Single(Vector128<Half> value);
        public static Vector128<double> ConvertToVector128Double(Vector128<Half> value);
        public static Vector256<Half> Add(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> Divide(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> GetExponent(Vector256<Half> value);
        public static Vector256<Half> GetMantissa(Vector256<Half> value, byte control);
        public static Vector256<Half> Max(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> Min(Vector256<Half> value, Vector256<Half> right);
        public static Vector256<Half> Multiply(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> Reciprocal(Vector256<Half> value);
        public static Half Reduce(Vector256<Half> left, byte control);
        public static Vector256<Half> RoundScale(Vector256<Half> left, byte control);
        public static Vector256<Half> ReciprocalSqrt(Vector256<Half> value);
        public static Vector256<Half> Scale(Vector256<Half> left, Vector256<Half> right, byte control);
        public static Vector256<Half> Sqrt(Vector256<Half> value);
        public static Vector256<Half> Subtract(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> FusedComplexMultiplyAdd(Vector256<Half> addend, Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> FusedComplexMultiplyAddConjugate(Vector256<Half> addend, Vector256<Half> left, Vector256<Half> rightConjugate);
        public static Vector256<Half> ComplexMultiply(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> ComplexMultiplyConjugate(Vector256<Half> left, Vector256<Half> rightConjugate);
        public static Vector256<Half> FusedMultiplyAddSubtract(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> FusedMultiplySubtractAdd(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> FusedMultiplyAdd(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> FusedMultiplySubtract(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> FusedMultiplyAddNegated(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> FusedMultiplySubtractNegated(Vector256<Half> a, Vector256<Half> b, Vector256<Half> c);
        public static Vector256<Half> Compare(Vector256<Half> left, Vector256<Half> right, FloatComparisonMode mode);
        public static Vector256<Half> CompareEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareGreaterThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareGreaterThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareLessThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareLessThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotGreaterThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotGreaterThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotLessThan(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareNotLessThanOrEqual(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareOrdered(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> CompareUnordered(Vector256<Half> left, Vector256<Half> right);
        public static Vector256<Half> Classify(Vector256<Half> value, byte control);
        public static Vector256<Half> ConvertToVector256Half(Vector256<short> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<int> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<long> value);
        public static Vector256<Half> ConvertToVector256Half(Vector256<ushort> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<uint> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<ulong> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<float> value);
        public static Vector128<Half> ConvertToVector128Half(Vector256<double> value);
        public static Vector256<short> ConvertToVector256Int16(Vector256<Half> value);
        public static Vector256<short> ConvertToVector256Int16WithTruncation(Vector256<Half> value);
        public static Vector256<int> ConvertToVector256Int32(Vector128<Half> value);
        public static Vector256<int> ConvertToVector256Int32WithTruncation(Vector128<Half> value);
        public static Vector256<long> ConvertToVector256Int64(Vector128<Half> value);
        public static Vector256<long> ConvertToVector256Int64WithTruncation(Vector128<Half> value);
        public static Vector256<ushort> ConvertToVector256UInt16(Vector256<Half> value);
        public static Vector256<ushort> ConvertToVector256UInt16WithTruncation(Vector256<Half> value);
        public static Vector256<uint> ConvertToVector256UInt32(Vector128<Half> value);
        public static Vector256<uint> ConvertToVector256UInt32WithTruncation(Vector128<Half> value);
        public static Vector256<ulong> ConvertToVector256UInt64(Vector128<Half> value);
        public static Vector256<ulong> ConvertToVector256UInt64WithTruncation(Vector128<Half> value);
        public static Vector256<float> ConvertToVector256Single(Vector128<Half> value);
        public static Vector256<double> ConvertToVector256Double(Vector128<Half> value);
    }
}
rickbrew commented 2 months ago

This will be quite useful for me! :) Fast conversion between float and Half is currently the only part of my codebase I can't write in C# (I have it written in C instead). I wrote up my reasons over here: https://github.com/dotnet/runtime/issues/62416#issuecomment-1791237703