This is basically the AVX-512 version of https://github.com/dotnet/runtime/issues/28868. The pointer overloads were missed for AVX-512 as well since they deviate from the C intrinsic surface.
API Proposal
public abstract class Avx512F : Avx2
{
/// <summary>
/// __m512i _mm512_cvtepi8_epi32 (__m128i a)
/// VPMOVSXBD zmm1 {k1}{z}, xmm2/m128
/// </summary>
public static Vector512<int> ConvertToVector512Int32(sbyte* address) => ConvertToVector512Int32(address);
/// <summary>
/// __m512i _mm512_cvtepu8_epi32 (__m128i a)
/// VPMOVZXBD zmm1 {k1}{z}, xmm2/m128
/// </summary>
public static Vector512<int> ConvertToVector512Int32(byte* address) => ConvertToVector512Int32(address);
/// <summary>
/// __m512i _mm512_cvtepi16_epi32 (__m256i a)
/// VPMOVSXWD zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<int> ConvertToVector512Int32(short* address) => ConvertToVector512Int32(address);
/// <summary>
/// __m512i _mm512_cvtepu16_epi32 (__m256i a)
/// VPMOVZXWD zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<int> ConvertToVector512Int32(ushort* address) => ConvertToVector512Int32(address);
/// <summary>
/// __m512i _mm512_cvtepi8_epi64 (__m128i a)
/// VPMOVSXBQ zmm1 {k1}{z}, xmm2/m64
/// </summary>
public static Vector512<long> ConvertToVector512Int64(sbyte* address) => ConvertToVector512Int64(address);
/// <summary>
/// __m512i _mm512_cvtepu8_epi64 (__m128i a)
/// VPMOVZXBQ zmm1 {k1}{z}, xmm2/m64
/// </summary>
public static Vector512<long> ConvertToVector512Int64(byte* address) => ConvertToVector512Int64(address);
/// <summary>
/// __m512i _mm512_cvtepi16_epi64 (__m128i a)
/// VPMOVSXWQ zmm1 {k1}{z}, xmm2/m128
/// </summary>
public static Vector512<long> ConvertToVector512Int64(short* address) => ConvertToVector512Int64(address);
/// <summary>
/// __m512i _mm512_cvtepu16_epi64 (__m128i a)
/// VPMOVZXWQ zmm1 {k1}{z}, xmm2/m128
/// </summary>
public static Vector512<long> ConvertToVector512Int64(ushort* address) => ConvertToVector512Int64(address);
/// <summary>
/// __m512i _mm512_cvtepi32_epi64 (__m256i a)
/// VPMOVSXDQ zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<long> ConvertToVector512Int64(int* address) => ConvertToVector512Int64(address);
/// <summary>
/// __m512i _mm512_cvtepu32_epi64 (__m256i a)
/// VPMOVZXDQ zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<long> ConvertToVector512Int64(uint* address) => ConvertToVector512Int64(address);
}
public abstract class Avx512BW : Avx512F
{
/// <summary>
/// __m512i _mm512_cvtepi8_epi16 (__m256i a)
/// VPMOVSXBW zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<short> ConvertToVector512Int16(sbyte* address) => ConvertToVector512Int16(address);
/// <summary>
/// __m512i _mm512_cvtepu8_epi16 (__m256i a)
/// VPMOVZXBW zmm1 {k1}{z}, ymm2/m256
/// </summary>
public static Vector512<short> ConvertToVector512Int16(byte* address) => ConvertToVector512Int16(address);
}
API Usage
var zmm0 = Avx512F.ConvertToVector512Int32(ptr);
Alternative Designs
vpmovsxbq and vpmovzxbq consume 64 bits, and as such are the only instructions that can't be handled by containment of a Vector128 or Vector256 load without an implied overread. Those could be exposed without the others if containment were handled properly.
Background and motivation
This is basically the AVX-512 version of https://github.com/dotnet/runtime/issues/28868. The pointer overloads were missed for AVX-512 as well since they deviate from the C intrinsic surface.
API Proposal
API Usage
Alternative Designs
vpmovsxbq
andvpmovzxbq
consume 64 bits, and as such are the only instructions that can't be handled by containment of aVector128
orVector256
load without an implied overread. Those could be exposed without the others if containment were handled properly.Risks
No response