Open a74nh opened 1 year ago
Tagging subscribers to this area: @dotnet/area-system-numerics See info in area-owners.md if you want to be subscribed.
Author: | a74nh |
---|---|
Assignees: | - |
Labels: | `area-System.Numerics` |
Milestone: | - |
/// Full API
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// ComputeByteAddresses : Compute vector addresses for 8-bit data
/// svuint32_t svadrb[_u32base]_[s32]offset(svuint32_t bases, svint32_t offsets) : "ADR Zresult.S, [Zbases.S, Zoffsets.S]"
public static unsafe Vector<uint> ComputeByteAddresses(Vector<uint> bases, Vector<int> offsets);
/// svuint32_t svadrb[_u32base]_[u32]offset(svuint32_t bases, svuint32_t offsets) : "ADR Zresult.S, [Zbases.S, Zoffsets.S]"
public static unsafe Vector<uint> ComputeByteAddresses(Vector<uint> bases, Vector<uint> offsets);
/// svuint64_t svadrb[_u64base]_[s64]offset(svuint64_t bases, svint64_t offsets) : "ADR Zresult.D, [Zbases.D, Zoffsets.D]"
public static unsafe Vector<ulong> ComputeByteAddresses(Vector<ulong> bases, Vector<long> offsets);
/// svuint64_t svadrb[_u64base]_[u64]offset(svuint64_t bases, svuint64_t offsets) : "ADR Zresult.D, [Zbases.D, Zoffsets.D]"
public static unsafe Vector<ulong> ComputeByteAddresses(Vector<ulong> bases, Vector<ulong> offsets);
/// ComputeInt16Addresses : Compute vector addresses for 16-bit data
/// svuint32_t svadrh[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #1]"
public static unsafe Vector<uint> ComputeInt16Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrh[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #1]"
public static unsafe Vector<uint> ComputeInt16Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrh[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #1]"
public static unsafe Vector<ulong> ComputeInt16Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrh[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #1]"
public static unsafe Vector<ulong> ComputeInt16Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// ComputeInt32Addresses : Compute vector addresses for 32-bit data
/// svuint32_t svadrw[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #2]"
public static unsafe Vector<uint> ComputeInt32Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrw[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #2]"
public static unsafe Vector<uint> ComputeInt32Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrw[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #2]"
public static unsafe Vector<ulong> ComputeInt32Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrw[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #2]"
public static unsafe Vector<ulong> ComputeInt32Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// ComputeInt64Addresses : Compute vector addresses for 64-bit data
/// svuint32_t svadrd[_u32base]_[s32]index(svuint32_t bases, svint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #3]"
public static unsafe Vector<uint> ComputeInt64Addresses(Vector<uint> bases, Vector<int> indices);
/// svuint32_t svadrd[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #3]"
public static unsafe Vector<uint> ComputeInt64Addresses(Vector<uint> bases, Vector<uint> indices);
/// svuint64_t svadrd[_u64base]_[s64]index(svuint64_t bases, svint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #3]"
public static unsafe Vector<ulong> ComputeInt64Addresses(Vector<ulong> bases, Vector<long> indices);
/// svuint64_t svadrd[_u64base]_[u64]index(svuint64_t bases, svuint64_t indices) : "ADR Zresult.D, [Zbases.D, Zindices.D, LSL #3]"
public static unsafe Vector<ulong> ComputeInt64Addresses(Vector<ulong> bases, Vector<ulong> indices);
/// LoadVector : Unextended load
/// svfloat32_t svld1[_f32](svbool_t pg, const float32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVector(Vector<float> mask, const float *base);
/// svfloat64_t svld1[_f64](svbool_t pg, const float64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVector(Vector<double> mask, const double *base);
/// svint8_t svld1[_s8](svbool_t pg, const int8_t *base) : "LD1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svld1[_s16](svbool_t pg, const int16_t *base) : "LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVector(Vector<short> mask, const short *base);
/// svint32_t svld1[_s32](svbool_t pg, const int32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVector(Vector<int> mask, const int *base);
/// svint64_t svld1[_s64](svbool_t pg, const int64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVector(Vector<long> mask, const long *base);
/// svuint8_t svld1[_u8](svbool_t pg, const uint8_t *base) : "LD1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVector(Vector<byte> mask, const byte *base);
/// svuint16_t svld1[_u16](svbool_t pg, const uint16_t *base) : "LD1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, const ushort *base);
/// svuint32_t svld1[_u32](svbool_t pg, const uint32_t *base) : "LD1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVector(Vector<uint> mask, const uint *base);
/// svuint64_t svld1[_u64](svbool_t pg, const uint64_t *base) : "LD1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, const ulong *base);
/// LoadVector128AndReplicateToVector : Load and replicate 128 bits of data
/// svfloat32_t svld1rq[_f32](svbool_t pg, const float32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<float> LoadVector128AndReplicateToVector(Vector<float> mask, const float *base);
/// svfloat64_t svld1rq[_f64](svbool_t pg, const float64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<double> LoadVector128AndReplicateToVector(Vector<double> mask, const double *base);
/// svint8_t svld1rq[_s8](svbool_t pg, const int8_t *base) : "LD1RQB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1RQB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1RQB Zresult.B, Pg/Z, [Xbase, #0]"
public static unsafe Vector<sbyte> LoadVector128AndReplicateToVector(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svld1rq[_s16](svbool_t pg, const int16_t *base) : "LD1RQH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1RQH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1RQH Zresult.H, Pg/Z, [Xbase, #0]"
public static unsafe Vector<short> LoadVector128AndReplicateToVector(Vector<short> mask, const short *base);
/// svint32_t svld1rq[_s32](svbool_t pg, const int32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<int> LoadVector128AndReplicateToVector(Vector<int> mask, const int *base);
/// svint64_t svld1rq[_s64](svbool_t pg, const int64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<long> LoadVector128AndReplicateToVector(Vector<long> mask, const long *base);
/// svuint8_t svld1rq[_u8](svbool_t pg, const uint8_t *base) : "LD1RQB Zresult.B, Pg/Z, [Xarray, Xindex]" or "LD1RQB Zresult.B, Pg/Z, [Xarray, #index]" or "LD1RQB Zresult.B, Pg/Z, [Xbase, #0]"
public static unsafe Vector<byte> LoadVector128AndReplicateToVector(Vector<byte> mask, const byte *base);
/// svuint16_t svld1rq[_u16](svbool_t pg, const uint16_t *base) : "LD1RQH Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1RQH Zresult.H, Pg/Z, [Xarray, #index * 2]" or "LD1RQH Zresult.H, Pg/Z, [Xbase, #0]"
public static unsafe Vector<ushort> LoadVector128AndReplicateToVector(Vector<ushort> mask, const ushort *base);
/// svuint32_t svld1rq[_u32](svbool_t pg, const uint32_t *base) : "LD1RQW Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1RQW Zresult.S, Pg/Z, [Xarray, #index * 4]" or "LD1RQW Zresult.S, Pg/Z, [Xbase, #0]"
public static unsafe Vector<uint> LoadVector128AndReplicateToVector(Vector<uint> mask, const uint *base);
/// svuint64_t svld1rq[_u64](svbool_t pg, const uint64_t *base) : "LD1RQD Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD1RQD Zresult.D, Pg/Z, [Xarray, #index * 8]" or "LD1RQD Zresult.D, Pg/Z, [Xbase, #0]"
public static unsafe Vector<ulong> LoadVector128AndReplicateToVector(Vector<ulong> mask, const ulong *base);
/// LoadVectorByteSignExtendNonFaultingToInt16 : Load 8-bit data and sign-extend, non-faulting
/// svint16_t svldnf1sb_s16(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToInt32 : Load 8-bit data and sign-extend, non-faulting
/// svint32_t svldnf1sb_s32(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToInt64 : Load 8-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sb_s64(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt16 : Load 8-bit data and sign-extend, non-faulting
/// svuint16_t svldnf1sb_u16(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt32 : Load 8-bit data and sign-extend, non-faulting
/// svuint32_t svldnf1sb_u32(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base);
/// LoadVectorByteSignExtendNonFaultingToUInt64 : Load 8-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sb_u64(svbool_t pg, const int8_t *base) : "LDNF1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt16 : Load 8-bit data and sign-extend
/// svint16_t svld1sb_s16(svbool_t pg, const int8_t *base) : "LD1SB Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt32 : Load 8-bit data and sign-extend
/// svint32_t svld1sb_s32(svbool_t pg, const int8_t *base) : "LD1SB Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base);
/// LoadVectorByteSignExtendToInt64 : Load 8-bit data and sign-extend
/// svint64_t svld1sb_s64(svbool_t pg, const int8_t *base) : "LD1SB Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt16 : Load 8-bit data and sign-extend
/// svuint16_t svld1sb_u16(svbool_t pg, const int8_t *base) : "LD1SB Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt32 : Load 8-bit data and sign-extend
/// svuint32_t svld1sb_u32(svbool_t pg, const int8_t *base) : "LD1SB Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base);
/// LoadVectorByteSignExtendToUInt64 : Load 8-bit data and sign-extend
/// svuint64_t svld1sb_u64(svbool_t pg, const int8_t *base) : "LD1SB Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1SB Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt16 : Load 8-bit data and zero-extend, non-faulting
/// svint16_t svldnf1ub_s16(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt32 : Load 8-bit data and zero-extend, non-faulting
/// svint32_t svldnf1ub_s32(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToInt64 : Load 8-bit data and zero-extend, non-faulting
/// svint64_t svldnf1ub_s64(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt16 : Load 8-bit data and zero-extend, non-faulting
/// svuint16_t svldnf1ub_u16(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt32 : Load 8-bit data and zero-extend, non-faulting
/// svuint32_t svldnf1ub_u32(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base);
/// LoadVectorByteZeroExtendNonFaultingToUInt64 : Load 8-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1ub_u64(svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt16 : Load 8-bit data and zero-extend
/// svint16_t svld1ub_s16(svbool_t pg, const uint8_t *base) : "LD1B Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt32 : Load 8-bit data and zero-extend
/// svint32_t svld1ub_s32(svbool_t pg, const uint8_t *base) : "LD1B Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base);
/// LoadVectorByteZeroExtendToInt64 : Load 8-bit data and zero-extend
/// svint64_t svld1ub_s64(svbool_t pg, const uint8_t *base) : "LD1B Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt16 : Load 8-bit data and zero-extend
/// svuint16_t svld1ub_u16(svbool_t pg, const uint8_t *base) : "LD1B Zresult.H, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt32 : Load 8-bit data and zero-extend
/// svuint32_t svld1ub_u32(svbool_t pg, const uint8_t *base) : "LD1B Zresult.S, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base);
/// LoadVectorByteZeroExtendToUInt64 : Load 8-bit data and zero-extend
/// svuint64_t svld1ub_u64(svbool_t pg, const uint8_t *base) : "LD1B Zresult.D, Pg/Z, [Xarray, Xindex]" or "LD1B Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base);
/// LoadVectorInt16SignExtendNonFaultingToInt32 : Load 16-bit data and sign-extend, non-faulting
/// svint32_t svldnf1sh_s32(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToInt64 : Load 16-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sh_s64(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToUInt32 : Load 16-bit data and sign-extend, non-faulting
/// svuint32_t svldnf1sh_u32(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base);
/// LoadVectorInt16SignExtendNonFaultingToUInt64 : Load 16-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sh_u64(svbool_t pg, const int16_t *base) : "LDNF1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base);
/// LoadVectorInt16SignExtendToInt32 : Load 16-bit data and sign-extend
/// svint32_t svld1sh_s32(svbool_t pg, const int16_t *base) : "LD1SH Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base);
/// LoadVectorInt16SignExtendToInt64 : Load 16-bit data and sign-extend
/// svint64_t svld1sh_s64(svbool_t pg, const int16_t *base) : "LD1SH Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base);
/// LoadVectorInt16SignExtendToUInt32 : Load 16-bit data and sign-extend
/// svuint32_t svld1sh_u32(svbool_t pg, const int16_t *base) : "LD1SH Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base);
/// LoadVectorInt16SignExtendToUInt64 : Load 16-bit data and sign-extend
/// svuint64_t svld1sh_u64(svbool_t pg, const int16_t *base) : "LD1SH Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1SH Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base);
/// LoadVectorInt16ZeroExtendNonFaultingToInt32 : Load 16-bit data and zero-extend, non-faulting
/// svint32_t svldnf1uh_s32(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToInt64 : Load 16-bit data and zero-extend, non-faulting
/// svint64_t svldnf1uh_s64(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToUInt32 : Load 16-bit data and zero-extend, non-faulting
/// svuint32_t svldnf1uh_u32(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendNonFaultingToUInt64 : Load 16-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1uh_u64(svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToInt32 : Load 16-bit data and zero-extend
/// svint32_t svld1uh_s32(svbool_t pg, const uint16_t *base) : "LD1H Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToInt64 : Load 16-bit data and zero-extend
/// svint64_t svld1uh_s64(svbool_t pg, const uint16_t *base) : "LD1H Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToUInt32 : Load 16-bit data and zero-extend
/// svuint32_t svld1uh_u32(svbool_t pg, const uint16_t *base) : "LD1H Zresult.S, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base);
/// LoadVectorInt16ZeroExtendToUInt64 : Load 16-bit data and zero-extend
/// svuint64_t svld1uh_u64(svbool_t pg, const uint16_t *base) : "LD1H Zresult.D, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD1H Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base);
/// LoadVectorInt32SignExtendNonFaultingToInt64 : Load 32-bit data and sign-extend, non-faulting
/// svint64_t svldnf1sw_s64(svbool_t pg, const int32_t *base) : "LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base);
/// LoadVectorInt32SignExtendNonFaultingToUInt64 : Load 32-bit data and sign-extend, non-faulting
/// svuint64_t svldnf1sw_u64(svbool_t pg, const int32_t *base) : "LDNF1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base);
/// LoadVectorInt32SignExtendToInt64 : Load 32-bit data and sign-extend
/// svint64_t svld1sw_s64(svbool_t pg, const int32_t *base) : "LD1SW Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base);
/// LoadVectorInt32SignExtendToUInt64 : Load 32-bit data and sign-extend
/// svuint64_t svld1sw_u64(svbool_t pg, const int32_t *base) : "LD1SW Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1SW Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base);
/// LoadVectorInt32ZeroExtendNonFaultingToInt64 : Load 32-bit data and zero-extend, non-faulting
/// svint64_t svldnf1uw_s64(svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base);
/// LoadVectorInt32ZeroExtendNonFaultingToUInt64 : Load 32-bit data and zero-extend, non-faulting
/// svuint64_t svldnf1uw_u64(svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base);
/// LoadVectorInt32ZeroExtendToInt64 : Load 32-bit data and zero-extend
/// svint64_t svld1uw_s64(svbool_t pg, const uint32_t *base) : "LD1W Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base);
/// LoadVectorInt32ZeroExtendToUInt64 : Load 32-bit data and zero-extend
/// svuint64_t svld1uw_u64(svbool_t pg, const uint32_t *base) : "LD1W Zresult.D, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD1W Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base);
/// LoadVectorNonFaulting : Unextended load, non-faulting
/// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, const float *base);
/// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, const double *base);
/// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base) : "LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base) : "LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, const short *base);
/// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, const int *base);
/// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, const long *base);
/// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base) : "LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, const byte *base);
/// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base) : "LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, const ushort *base);
/// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base) : "LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, const uint *base);
/// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base) : "LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, const ulong *base);
/// LoadVectorNonTemporal : Unextended load, non-temporal
/// svfloat32_t svldnt1[_f32](svbool_t pg, const float32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<float> LoadVectorNonTemporal(Vector<float> mask, const float *base);
/// svfloat64_t svldnt1[_f64](svbool_t pg, const float64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<double> LoadVectorNonTemporal(Vector<double> mask, const double *base);
/// svint8_t svldnt1[_s8](svbool_t pg, const int8_t *base) : "LDNT1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<sbyte> LoadVectorNonTemporal(Vector<sbyte> mask, const sbyte *base);
/// svint16_t svldnt1[_s16](svbool_t pg, const int16_t *base) : "LDNT1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<short> LoadVectorNonTemporal(Vector<short> mask, const short *base);
/// svint32_t svldnt1[_s32](svbool_t pg, const int32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<int> LoadVectorNonTemporal(Vector<int> mask, const int *base);
/// svint64_t svldnt1[_s64](svbool_t pg, const int64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, const long *base);
/// svuint8_t svldnt1[_u8](svbool_t pg, const uint8_t *base) : "LDNT1B Zresult.B, Pg/Z, [Xarray, Xindex]" or "LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<byte> LoadVectorNonTemporal(Vector<byte> mask, const byte *base);
/// svuint16_t svldnt1[_u16](svbool_t pg, const uint16_t *base) : "LDNT1H Zresult.H, Pg/Z, [Xarray, Xindex, LSL #1]" or "LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ushort> LoadVectorNonTemporal(Vector<ushort> mask, const ushort *base);
/// svuint32_t svldnt1[_u32](svbool_t pg, const uint32_t *base) : "LDNT1W Zresult.S, Pg/Z, [Xarray, Xindex, LSL #2]" or "LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<uint> LoadVectorNonTemporal(Vector<uint> mask, const uint *base);
/// svuint64_t svldnt1[_u64](svbool_t pg, const uint64_t *base) : "LDNT1D Zresult.D, Pg/Z, [Xarray, Xindex, LSL #3]" or "LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe Vector<ulong> LoadVectorNonTemporal(Vector<ulong> mask, const ulong *base);
/// LoadVectorx2 : Load two-element tuples into two vectors
/// svfloat32x2_t svld2[_f32](svbool_t pg, const float32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>) LoadVectorx2(Vector<float> mask, const float *base);
/// svfloat64x2_t svld2[_f64](svbool_t pg, const float64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>) LoadVectorx2(Vector<double> mask, const double *base);
/// svint8x2_t svld2[_s8](svbool_t pg, const int8_t *base) : "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xarray, Xindex]" or "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>) LoadVectorx2(Vector<sbyte> mask, const sbyte *base);
/// svint16x2_t svld2[_s16](svbool_t pg, const int16_t *base) : "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>) LoadVectorx2(Vector<short> mask, const short *base);
/// svint32x2_t svld2[_s32](svbool_t pg, const int32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>) LoadVectorx2(Vector<int> mask, const int *base);
/// svint64x2_t svld2[_s64](svbool_t pg, const int64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(Vector<long> mask, const long *base);
/// svuint8x2_t svld2[_u8](svbool_t pg, const uint8_t *base) : "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xarray, Xindex]" or "LD2B {Zresult0.B, Zresult1.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>) LoadVectorx2(Vector<byte> mask, const byte *base);
/// svuint16x2_t svld2[_u16](svbool_t pg, const uint16_t *base) : "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD2H {Zresult0.H, Zresult1.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>) LoadVectorx2(Vector<ushort> mask, const ushort *base);
/// svuint32x2_t svld2[_u32](svbool_t pg, const uint32_t *base) : "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD2W {Zresult0.S, Zresult1.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>) LoadVectorx2(Vector<uint> mask, const uint *base);
/// svuint64x2_t svld2[_u64](svbool_t pg, const uint64_t *base) : "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD2D {Zresult0.D, Zresult1.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>) LoadVectorx2(Vector<ulong> mask, const ulong *base);
/// LoadVectorx3 : Load three-element tuples into three vectors
/// svfloat32x3_t svld3[_f32](svbool_t pg, const float32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>, Vector<float>) LoadVectorx3(Vector<float> mask, const float *base);
/// svfloat64x3_t svld3[_f64](svbool_t pg, const float64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>, Vector<double>) LoadVectorx3(Vector<double> mask, const double *base);
/// svint8x3_t svld3[_s8](svbool_t pg, const int8_t *base) : "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xarray, Xindex]" or "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx3(Vector<sbyte> mask, const sbyte *base);
/// svint16x3_t svld3[_s16](svbool_t pg, const int16_t *base) : "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>, Vector<short>) LoadVectorx3(Vector<short> mask, const short *base);
/// svint32x3_t svld3[_s32](svbool_t pg, const int32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>, Vector<int>) LoadVectorx3(Vector<int> mask, const int *base);
/// svint64x3_t svld3[_s64](svbool_t pg, const int64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(Vector<long> mask, const long *base);
/// svuint8x3_t svld3[_u8](svbool_t pg, const uint8_t *base) : "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xarray, Xindex]" or "LD3B {Zresult0.B - Zresult2.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx3(Vector<byte> mask, const byte *base);
/// svuint16x3_t svld3[_u16](svbool_t pg, const uint16_t *base) : "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD3H {Zresult0.H - Zresult2.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx3(Vector<ushort> mask, const ushort *base);
/// svuint32x3_t svld3[_u32](svbool_t pg, const uint32_t *base) : "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD3W {Zresult0.S - Zresult2.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx3(Vector<uint> mask, const uint *base);
/// svuint64x3_t svld3[_u64](svbool_t pg, const uint64_t *base) : "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD3D {Zresult0.D - Zresult2.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx3(Vector<ulong> mask, const ulong *base);
/// LoadVectorx4 : Load four-element tuples into four vectors
/// svfloat32x4_t svld4[_f32](svbool_t pg, const float32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<float>, Vector<float>, Vector<float>, Vector<float>) LoadVectorx4(Vector<float> mask, const float *base);
/// svfloat64x4_t svld4[_f64](svbool_t pg, const float64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<double>, Vector<double>, Vector<double>, Vector<double>) LoadVectorx4(Vector<double> mask, const double *base);
/// svint8x4_t svld4[_s8](svbool_t pg, const int8_t *base) : "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xarray, Xindex]" or "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx4(Vector<sbyte> mask, const sbyte *base);
/// svint16x4_t svld4[_s16](svbool_t pg, const int16_t *base) : "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<short>, Vector<short>, Vector<short>, Vector<short>) LoadVectorx4(Vector<short> mask, const short *base);
/// svint32x4_t svld4[_s32](svbool_t pg, const int32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<int>, Vector<int>, Vector<int>, Vector<int>) LoadVectorx4(Vector<int> mask, const int *base);
/// svint64x4_t svld4[_s64](svbool_t pg, const int64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(Vector<long> mask, const long *base);
/// svuint8x4_t svld4[_u8](svbool_t pg, const uint8_t *base) : "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xarray, Xindex]" or "LD4B {Zresult0.B - Zresult3.B}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx4(Vector<byte> mask, const byte *base);
/// svuint16x4_t svld4[_u16](svbool_t pg, const uint16_t *base) : "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xarray, Xindex, LSL #1]" or "LD4H {Zresult0.H - Zresult3.H}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx4(Vector<ushort> mask, const ushort *base);
/// svuint32x4_t svld4[_u32](svbool_t pg, const uint32_t *base) : "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xarray, Xindex, LSL #2]" or "LD4W {Zresult0.S - Zresult3.S}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx4(Vector<uint> mask, const uint *base);
/// svuint64x4_t svld4[_u64](svbool_t pg, const uint64_t *base) : "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xarray, Xindex, LSL #3]" or "LD4D {Zresult0.D - Zresult3.D}, Pg/Z, [Xbase, #0, MUL VL]"
public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx4(Vector<ulong> mask, const ulong *base);
/// PrefetchBytes : Prefetch bytes
/// void svprfb(svbool_t pg, const void *base, enum svprfop op) : "PRFB op, Pg, [Xarray, Xindex]" or "PRFB op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt16 : Prefetch halfwords
/// void svprfh(svbool_t pg, const void *base, enum svprfop op) : "PRFH op, Pg, [Xarray, Xindex, LSL #1]" or "PRFH op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt32 : Prefetch words
/// void svprfw(svbool_t pg, const void *base, enum svprfop op) : "PRFW op, Pg, [Xarray, Xindex, LSL #2]" or "PRFW op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, enum SvePrefetchType op);
/// PrefetchInt64 : Prefetch doublewords
/// void svprfd(svbool_t pg, const void *base, enum svprfop op) : "PRFD op, Pg, [Xarray, Xindex, LSL #3]" or "PRFD op, Pg, [Xbase, #0, MUL VL]"
public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, enum SvePrefetchType op);
/// total method signatures: 138
/// total method names: 63
}
/// Optional Entries:
/// public static unsafe Vector<float> LoadVector(Vector<float> mask, const float *base, long vnum); // svld1_vnum[_f32]
/// public static unsafe Vector<double> LoadVector(Vector<double> mask, const double *base, long vnum); // svld1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVector(Vector<sbyte> mask, const sbyte *base, long vnum); // svld1_vnum[_s8]
/// public static unsafe Vector<short> LoadVector(Vector<short> mask, const short *base, long vnum); // svld1_vnum[_s16]
/// public static unsafe Vector<int> LoadVector(Vector<int> mask, const int *base, long vnum); // svld1_vnum[_s32]
/// public static unsafe Vector<long> LoadVector(Vector<long> mask, const long *base, long vnum); // svld1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVector(Vector<byte> mask, const byte *base, long vnum); // svld1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVector(Vector<ushort> mask, const ushort *base, long vnum); // svld1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVector(Vector<uint> mask, const uint *base, long vnum); // svld1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVector(Vector<ulong> mask, const ulong *base, long vnum); // svld1_vnum[_u64]
/// public static unsafe Vector<short> LoadVectorByteSignExtendNonFaultingToInt16(Vector<short> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteSignExtendNonFaultingToInt32(Vector<int> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteSignExtendNonFaultingToInt64(Vector<long> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteSignExtendNonFaultingToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteSignExtendNonFaultingToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteSignExtendNonFaultingToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // svldnf1sb_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteSignExtendToInt16(Vector<short> mask, const sbyte *base, long vnum); // svld1sb_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteSignExtendToInt32(Vector<int> mask, const sbyte *base, long vnum); // svld1sb_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(Vector<long> mask, const sbyte *base, long vnum); // svld1sb_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(Vector<ushort> mask, const sbyte *base, long vnum); // svld1sb_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(Vector<uint> mask, const sbyte *base, long vnum); // svld1sb_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(Vector<ulong> mask, const sbyte *base, long vnum); // svld1sb_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteZeroExtendNonFaultingToInt16(Vector<short> mask, const byte *base, long vnum); // svldnf1ub_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteZeroExtendNonFaultingToInt32(Vector<int> mask, const byte *base, long vnum); // svldnf1ub_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteZeroExtendNonFaultingToInt64(Vector<long> mask, const byte *base, long vnum); // svldnf1ub_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteZeroExtendNonFaultingToUInt16(Vector<ushort> mask, const byte *base, long vnum); // svldnf1ub_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteZeroExtendNonFaultingToUInt32(Vector<uint> mask, const byte *base, long vnum); // svldnf1ub_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const byte *base, long vnum); // svldnf1ub_vnum_u64
/// public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(Vector<short> mask, const byte *base, long vnum); // svld1ub_vnum_s16
/// public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(Vector<int> mask, const byte *base, long vnum); // svld1ub_vnum_s32
/// public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(Vector<long> mask, const byte *base, long vnum); // svld1ub_vnum_s64
/// public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(Vector<ushort> mask, const byte *base, long vnum); // svld1ub_vnum_u16
/// public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(Vector<uint> mask, const byte *base, long vnum); // svld1ub_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(Vector<ulong> mask, const byte *base, long vnum); // svld1ub_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16SignExtendNonFaultingToInt32(Vector<int> mask, const short *base, long vnum); // svldnf1sh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16SignExtendNonFaultingToInt64(Vector<long> mask, const short *base, long vnum); // svldnf1sh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16SignExtendNonFaultingToUInt32(Vector<uint> mask, const short *base, long vnum); // svldnf1sh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16SignExtendNonFaultingToUInt64(Vector<ulong> mask, const short *base, long vnum); // svldnf1sh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(Vector<int> mask, const short *base, long vnum); // svld1sh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(Vector<long> mask, const short *base, long vnum); // svld1sh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(Vector<uint> mask, const short *base, long vnum); // svld1sh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(Vector<ulong> mask, const short *base, long vnum); // svld1sh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16ZeroExtendNonFaultingToInt32(Vector<int> mask, const ushort *base, long vnum); // svldnf1uh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16ZeroExtendNonFaultingToInt64(Vector<long> mask, const ushort *base, long vnum); // svldnf1uh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16ZeroExtendNonFaultingToUInt32(Vector<uint> mask, const ushort *base, long vnum); // svldnf1uh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // svldnf1uh_vnum_u64
/// public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(Vector<int> mask, const ushort *base, long vnum); // svld1uh_vnum_s32
/// public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(Vector<long> mask, const ushort *base, long vnum); // svld1uh_vnum_s64
/// public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(Vector<uint> mask, const ushort *base, long vnum); // svld1uh_vnum_u32
/// public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(Vector<ulong> mask, const ushort *base, long vnum); // svld1uh_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32SignExtendNonFaultingToInt64(Vector<long> mask, const int *base, long vnum); // svldnf1sw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32SignExtendNonFaultingToUInt64(Vector<ulong> mask, const int *base, long vnum); // svldnf1sw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(Vector<long> mask, const int *base, long vnum); // svld1sw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(Vector<ulong> mask, const int *base, long vnum); // svld1sw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32ZeroExtendNonFaultingToInt64(Vector<long> mask, const uint *base, long vnum); // svldnf1uw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendNonFaultingToUInt64(Vector<ulong> mask, const uint *base, long vnum); // svldnf1uw_vnum_u64
/// public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(Vector<long> mask, const uint *base, long vnum); // svld1uw_vnum_s64
/// public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(Vector<ulong> mask, const uint *base, long vnum); // svld1uw_vnum_u64
/// public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, const float *base, long vnum); // svldnf1_vnum[_f32]
/// public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, const double *base, long vnum); // svldnf1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, const sbyte *base, long vnum); // svldnf1_vnum[_s8]
/// public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, const short *base, long vnum); // svldnf1_vnum[_s16]
/// public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, const int *base, long vnum); // svldnf1_vnum[_s32]
/// public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, const long *base, long vnum); // svldnf1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, const byte *base, long vnum); // svldnf1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, const ushort *base, long vnum); // svldnf1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, const uint *base, long vnum); // svldnf1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, const ulong *base, long vnum); // svldnf1_vnum[_u64]
/// public static unsafe Vector<float> LoadVectorNonTemporal(Vector<float> mask, const float *base, long vnum); // svldnt1_vnum[_f32]
/// public static unsafe Vector<double> LoadVectorNonTemporal(Vector<double> mask, const double *base, long vnum); // svldnt1_vnum[_f64]
/// public static unsafe Vector<sbyte> LoadVectorNonTemporal(Vector<sbyte> mask, const sbyte *base, long vnum); // svldnt1_vnum[_s8]
/// public static unsafe Vector<short> LoadVectorNonTemporal(Vector<short> mask, const short *base, long vnum); // svldnt1_vnum[_s16]
/// public static unsafe Vector<int> LoadVectorNonTemporal(Vector<int> mask, const int *base, long vnum); // svldnt1_vnum[_s32]
/// public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, const long *base, long vnum); // svldnt1_vnum[_s64]
/// public static unsafe Vector<byte> LoadVectorNonTemporal(Vector<byte> mask, const byte *base, long vnum); // svldnt1_vnum[_u8]
/// public static unsafe Vector<ushort> LoadVectorNonTemporal(Vector<ushort> mask, const ushort *base, long vnum); // svldnt1_vnum[_u16]
/// public static unsafe Vector<uint> LoadVectorNonTemporal(Vector<uint> mask, const uint *base, long vnum); // svldnt1_vnum[_u32]
/// public static unsafe Vector<ulong> LoadVectorNonTemporal(Vector<ulong> mask, const ulong *base, long vnum); // svldnt1_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>) LoadVectorx2(Vector<float> mask, const float *base, long vnum); // svld2_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>) LoadVectorx2(Vector<double> mask, const double *base, long vnum); // svld2_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>) LoadVectorx2(Vector<sbyte> mask, const sbyte *base, long vnum); // svld2_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>) LoadVectorx2(Vector<short> mask, const short *base, long vnum); // svld2_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>) LoadVectorx2(Vector<int> mask, const int *base, long vnum); // svld2_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>) LoadVectorx2(Vector<long> mask, const long *base, long vnum); // svld2_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>) LoadVectorx2(Vector<byte> mask, const byte *base, long vnum); // svld2_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>) LoadVectorx2(Vector<ushort> mask, const ushort *base, long vnum); // svld2_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>) LoadVectorx2(Vector<uint> mask, const uint *base, long vnum); // svld2_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>) LoadVectorx2(Vector<ulong> mask, const ulong *base, long vnum); // svld2_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>, Vector<float>) LoadVectorx3(Vector<float> mask, const float *base, long vnum); // svld3_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>, Vector<double>) LoadVectorx3(Vector<double> mask, const double *base, long vnum); // svld3_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx3(Vector<sbyte> mask, const sbyte *base, long vnum); // svld3_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>, Vector<short>) LoadVectorx3(Vector<short> mask, const short *base, long vnum); // svld3_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>, Vector<int>) LoadVectorx3(Vector<int> mask, const int *base, long vnum); // svld3_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>, Vector<long>) LoadVectorx3(Vector<long> mask, const long *base, long vnum); // svld3_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx3(Vector<byte> mask, const byte *base, long vnum); // svld3_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx3(Vector<ushort> mask, const ushort *base, long vnum); // svld3_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx3(Vector<uint> mask, const uint *base, long vnum); // svld3_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx3(Vector<ulong> mask, const ulong *base, long vnum); // svld3_vnum[_u64]
/// public static unsafe (Vector<float>, Vector<float>, Vector<float>, Vector<float>) LoadVectorx4(Vector<float> mask, const float *base, long vnum); // svld4_vnum[_f32]
/// public static unsafe (Vector<double>, Vector<double>, Vector<double>, Vector<double>) LoadVectorx4(Vector<double> mask, const double *base, long vnum); // svld4_vnum[_f64]
/// public static unsafe (Vector<sbyte>, Vector<sbyte>, Vector<sbyte>, Vector<sbyte>) LoadVectorx4(Vector<sbyte> mask, const sbyte *base, long vnum); // svld4_vnum[_s8]
/// public static unsafe (Vector<short>, Vector<short>, Vector<short>, Vector<short>) LoadVectorx4(Vector<short> mask, const short *base, long vnum); // svld4_vnum[_s16]
/// public static unsafe (Vector<int>, Vector<int>, Vector<int>, Vector<int>) LoadVectorx4(Vector<int> mask, const int *base, long vnum); // svld4_vnum[_s32]
/// public static unsafe (Vector<long>, Vector<long>, Vector<long>, Vector<long>) LoadVectorx4(Vector<long> mask, const long *base, long vnum); // svld4_vnum[_s64]
/// public static unsafe (Vector<byte>, Vector<byte>, Vector<byte>, Vector<byte>) LoadVectorx4(Vector<byte> mask, const byte *base, long vnum); // svld4_vnum[_u8]
/// public static unsafe (Vector<ushort>, Vector<ushort>, Vector<ushort>, Vector<ushort>) LoadVectorx4(Vector<ushort> mask, const ushort *base, long vnum); // svld4_vnum[_u16]
/// public static unsafe (Vector<uint>, Vector<uint>, Vector<uint>, Vector<uint>) LoadVectorx4(Vector<uint> mask, const uint *base, long vnum); // svld4_vnum[_u32]
/// public static unsafe (Vector<ulong>, Vector<ulong>, Vector<ulong>, Vector<ulong>) LoadVectorx4(Vector<ulong> mask, const ulong *base, long vnum); // svld4_vnum[_u64]
/// public static unsafe void PrefetchBytes(Vector<byte> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfb_vnum
/// public static unsafe void PrefetchInt16(Vector<ushort> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfh_vnum
/// public static unsafe void PrefetchInt32(Vector<uint> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfw_vnum
/// public static unsafe void PrefetchInt64(Vector<ulong> mask, const void *base, long vnum, enum SvePrefetchType op); // svprfd_vnum
/// Total Maybe: 112
/// Total ACLE covered across API: 250
This contributes to https://github.com/dotnet/runtime/issues/93095
It covers instructions in FEAT_SVE related to loads. Note there are more load methods in firstfaulting and gather loads
This list was auto generated from the C ACLE for SVE, and is in three parts:
The methods list reduced down to Vector versions. All possible varaints of T are given above the method. The complete list of all methods. The corresponding ACLE methods and SVE instructions are given above the method. All rejected ACLE methods. These are methods we have agreed that do not need including in C#. Where possible, existing C# naming conventions have been matched.
Many of the C functions include predicate argument(s), of type svbool_t as the first argument. These are missing from the C# method. It is expected that the Jit will create predicates where required, or combine with uses of conditionalSelect(). For more discussion see https://github.com/dotnet/runtime/issues/88140 comment.
I wonder if we can have a simpler name than ComputeVectorAddresses8/16/32/64
. Compute*Addresses
where *
is Byte
, Int16
, Int32
, etc seems like it would work nicely
Load128ReplicateToVector
should probably be LoadVector128AndReplicateToVector
, to match the LoadAndReplicateToVector128
we already have in AdvSimd
.
The LoadVector16SignExtend
and related APIs need to have a name that differentiates by return type. The names here are tricky because you have a single API that is basically doing two things. 1. Load part of a Vector<short>
, then 2. SignExtendWidening
. These probably need some thought into how we can express this semantic cleanly to the user.
The same general comment around Doublewords
applies as the other issues.
This issue has been marked needs-author-action
and may be missing some important information.
I wonder if we can have a simpler name than
ComputeVectorAddresses8/16/32/64
.Compute*Addresses
where*
isByte
,Int16
,Int32
, etc seems like it would work nicely
Done.
Just spotted there are multiple variants here:
The expected one: Return a vector in which element i contains bases[i] + offsets[i].
/// svuint32_t svadrb[_u32base]_[u32]offset(svuint32_t bases, svuint32_t offsets) : "ADR Zresult.S, [Zbases.S, Zoffsets.S]"
public static unsafe Vector<uint> ComputeSignedInt32Addresses(Vector<uint> bases, Vector<uint> offsets);
And three variants with shift: Return a vector in which element i contains bases[i] + indices[i] * N.
/// svuint32_t svadrd[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #3]"
/// svuint32_t svadrh[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #1]"
/// svuint32_t svadrw[_u32base]_[u32]index(svuint32_t bases, svuint32_t indices) : "ADR Zresult.S, [Zbases.S, Zindices.S, LSL #2]"
public static unsafe Vector<uint> ComputeSignedInt32Addresses(Vector<uint> bases, Vector<uint> indices);
Call these variants ComputeSignedInt32AddressesShift1()
, ComputeSignedInt32AddressesShift2()
and ComputeSignedInt32AddressesShift3()
?
Load128ReplicateToVector
should probably beLoadVector128AndReplicateToVector
, to match theLoadAndReplicateToVector128
we already have inAdvSimd
.
Done.
The
LoadVector16SignExtend
and related APIs need to have a name that differentiates by return type. The names here are tricky because you have a single API that is basically doing two things.1. Load part of a Vector<short>
, then2. SignExtendWidening
. These probably need some thought into how we can express this semantic cleanly to the user.
I've expanded these to:
LoadVectorDouble
LoadVector8SignExtendNonFaultingToSignedInt32
plus all the non faulting and non temporal and LoadVectorx.
I think I may have gone too far? Can LoadVectorDouble()
just be LoadVector()
?
The same general comment around
Doublewords
applies as the other issues.
done.
Just spotted there are multiple variants here:
I think they are all pretty much the same, which is "baseAddr + index". Correspondingly, svadrb
is byte* baseAddr
, svadrh
is ushort* baseAddr
, svadrw
is uint* baseAddr
, and svadrd
is ulong* baseAddr
. It scaling the index (by 1, which the manual refers to as just "offset"; or by 2/4/8) then works identically to how C/C++ and C# work. The base addresses can be 32-bit or 64-bit values and the indices are always taken as elements which are zero or sign-extended from the lowest 32-bits.
While I think we could expose all possible permutations here, I think it'd be reasonable to scope it back to effectively:
ComputeByteAddresses(Vector<uint> bases, Vector<uint> indices); // Zero extend
ComputeByteAddresses(Vector<uint> bases, Vector<int> indices); // Sign extend
ComputeByteAddresses(Vector<ulong> bases, Vector<ulong> indices); // Zero extend
ComputeByteAddresses(Vector<ulong> bases, Vector<long> indices); // Sign extend
// Repeat for Int16, Int32, and Int64 Addresses
I think I may have gone too far? Can LoadVectorDouble() just be LoadVector()?
Yes, because while we can't overload by return type, we can overload by parameters. So Vector<double> LoadVector(double* address)
and Vector<int> LoadVector(int* address)
are distinguishable, since the type of their first parameter differs.
LoadVector8SignExtendNonFaultingToSignedInt32
We probably want LoadVectorByte...
, using Vector8
is liable to cause confusion with things like Vector2/3/4
or Vector64/128/256/512
. Where-as VectorByte
follows the existing terminology for differentiating different Vector<...>
All done.
Maybe the following:
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(const short *base);
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(const short *base);
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(const ushort *base);
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(const ushort *base);
should be:
public static unsafe Vector<long> LoadVectorInt16ExtendToInt64(const short *base);
public static unsafe Vector<ulong> LoadVectorInt16ExtendToUInt64(const short *base);
public static unsafe Vector<long> LoadVectorUInt16ExtendToInt64(const ushort *base);
public static unsafe Vector<ulong> LoadVectorUInt16ExtendToUInt64(const ushort *base);
?
I have a slight preference towards the former, it makes it clear the type of extension its doing. -- I've seen a lot of people confused and thing it zero or sign extends based on the destination type, when in practice its the input type that determines which is done.
LoadVector128
- these would be better with a name? LoadVectorQuad
?
I don't think so. We don't have a name Quad
today and if we did it might imply IEEE 754 Binary128
(Half
, Single
, Double
, Quad
are the common names).
Vector128
makes it very clear what's happening without describing any other specifics and matches the name used in AdvSimd for Vector64/Vector128
APIs
Compute{Byte}Addresses are now Compute{8}BitAddresses
Renamed 8-bit ComputeAddresses second parameters to indices
to be consistent with the other sizes.
All of the Load methods taking a T* base
are T* address
for consistency with other Load methods
All of the methods without "NonFaulting" have had the masks removed
LoadVectorByteSignExtendNonFaultingToInt16 => LoadVectorSByteSignExtendNonFaultingToInt16, because they are operating on sbyte
LoadVectorSByteSignExtendNonFaultingToInt16 => LoadVectorSByteNonFaultingSignExtendToInt16 (move NonFaulting left)
LoadVector{Type}ZeroExtendNonFaulting{Etc} => NonFaultingZeroExtend
All optionals are cut, consistent with the others we reviwed today.
LoadVector x{2,3,4} are not approved, they need a review with more people present. (x => x? x => X? x => Bulk? x => "Load3Vectors"?)
Prefetch and SvePrefetchType are also not approved, delayed to the same further review as LoadVector x
namespace System.Runtime.Intrinsics.Arm;
/// VectorT Summary
public abstract class Sve : AdvSimd /// Feature: FEAT_SVE Category: loads
{
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> bases, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> bases, Vector<T> indices); // ADR
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector(T* address); // LD1W or LD1D or LD1B or LD1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVector128AndReplicateToVector(Vector<T> mask, T* address); // LD1RQW or LD1RQD or LD1RQB or LD1RQH
public static unsafe Vector<short> LoadVectorSByteNonFaultingSignExtendToInt16(Vector<short> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<int> LoadVectorSByteNonFaultingSignExtendToInt32(Vector<int> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<long> LoadVectorSByteNonFaultingSignExtendToInt64(Vector<long> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ushort> LoadVectorSByteNonFaultingSignExtendToUInt16(Vector<ushort> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<uint> LoadVectorSByteNonFaultingSignExtendToUInt32(Vector<uint> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<ulong> LoadVectorSByteNonFaultingSignExtendToUInt64(Vector<ulong> mask, sbyte* address); // LDNF1SB
public static unsafe Vector<short> LoadVectorSByteSignExtendToInt16(sbyte* address); // LD1SB
public static unsafe Vector<int> LoadVectorSByteSignExtendToInt32(sbyte* address); // LD1SB
public static unsafe Vector<long> LoadVectorSByteSignExtendToInt64(sbyte* address); // LD1SB
public static unsafe Vector<ushort> LoadVectorSByteSignExtendToUInt16(sbyte* address); // LD1SB
public static unsafe Vector<uint> LoadVectorSByteSignExtendToUInt32(sbyte* address); // LD1SB
public static unsafe Vector<ulong> LoadVectorSByteSignExtendToUInt64(sbyte* address); // LD1SB
public static unsafe Vector<short> LoadVectorByteNonFaultingZeroExtendToInt16(Vector<short> mask, byte* address); // LDNF1B
public static unsafe Vector<int> LoadVectorByteNonFaultingZeroExtendToInt32(Vector<int> mask, byte* address); // LDNF1B
public static unsafe Vector<long> LoadVectorByteNonFaultingZeroExtendToInt64(Vector<long> mask, byte* address); // LDNF1B
public static unsafe Vector<ushort> LoadVectorByteNonFaultingZeroExtendToUInt16(Vector<ushort> mask, byte* address); // LDNF1B
public static unsafe Vector<uint> LoadVectorByteNonFaultingZeroExtendToUInt32(Vector<uint> mask, byte* address); // LDNF1B
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(Vector<ulong> mask, byte* address); // LDNF1B
public static unsafe Vector<short> LoadVectorByteZeroExtendToInt16(byte* address); // LD1B
public static unsafe Vector<int> LoadVectorByteZeroExtendToInt32(byte* address); // LD1B
public static unsafe Vector<long> LoadVectorByteZeroExtendToInt64(byte* address); // LD1B
public static unsafe Vector<ushort> LoadVectorByteZeroExtendToUInt16(byte* address); // LD1B
public static unsafe Vector<uint> LoadVectorByteZeroExtendToUInt32(byte* address); // LD1B
public static unsafe Vector<ulong> LoadVectorByteZeroExtendToUInt64(byte* address); // LD1B
public static unsafe Vector<int> LoadVectorInt16NonFaultingSignExtendToInt32(Vector<int> mask, short* address); // LDNF1SH
public static unsafe Vector<long> LoadVectorInt16NonFaultingSignExtendToInt64(Vector<long> mask, short* address); // LDNF1SH
public static unsafe Vector<uint> LoadVectorInt16NonFaultingSignExtendToUInt32(Vector<uint> mask, short* address); // LDNF1SH
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingSignExtendToUInt64(Vector<ulong> mask, short* address); // LDNF1SH
public static unsafe Vector<int> LoadVectorInt16SignExtendToInt32(short* address); // LD1SH
public static unsafe Vector<long> LoadVectorInt16SignExtendToInt64(short* address); // LD1SH
public static unsafe Vector<uint> LoadVectorInt16SignExtendToUInt32(short* address); // LD1SH
public static unsafe Vector<ulong> LoadVectorInt16SignExtendToUInt64(short* address); // LD1SH
public static unsafe Vector<int> LoadVectorInt16NonFaultingZeroExtendToInt32(Vector<int> mask, ushort* address); // LDNF1H
public static unsafe Vector<long> LoadVectorInt16NonFaultingZeroExtendToInt64(Vector<long> mask, ushort* address); // LDNF1H
public static unsafe Vector<uint> LoadVectorInt16NonFaultingZeroExtendToUInt32(Vector<uint> mask, ushort* address); // LDNF1H
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingZeroExtendToUInt64(Vector<ulong> mask, ushort* address); // LDNF1H
public static unsafe Vector<int> LoadVectorInt16ZeroExtendToInt32(ushort* address); // LD1H
public static unsafe Vector<long> LoadVectorInt16ZeroExtendToInt64(ushort* address); // LD1H
public static unsafe Vector<uint> LoadVectorInt16ZeroExtendToUInt32(ushort* address); // LD1H
public static unsafe Vector<ulong> LoadVectorInt16ZeroExtendToUInt64(ushort* address); // LD1H
public static unsafe Vector<long> LoadVectorInt32NonFaultingSignExtendToInt64(Vector<long> mask, int* address); // LDNF1SW
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingSignExtendToUInt64(Vector<ulong> mask, int* address); // LDNF1SW
public static unsafe Vector<long> LoadVectorInt32SignExtendToInt64(int* address); // LD1SW
public static unsafe Vector<ulong> LoadVectorInt32SignExtendToUInt64(int* address); // LD1SW
public static unsafe Vector<long> LoadVectorInt32NonFaultingZeroExtendToInt64(Vector<long> mask, uint* address); // LDNF1W
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingZeroExtendToUInt64(Vector<ulong> mask, uint* address); // LDNF1W
public static unsafe Vector<long> LoadVectorInt32ZeroExtendToInt64(uint* address); // LD1W
public static unsafe Vector<ulong> LoadVectorInt32ZeroExtendToUInt64(uint* address); // LD1W
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonFaulting(Vector<T> mask, T* address); // LDNF1W or LDNF1D or LDNF1B or LDNF1H
/// T: float, double, sbyte, short, int, long, byte, ushort, uint, ulong
public static unsafe Vector<T> LoadVectorNonTemporal(T* address); // LDNT1W or LDNT1D or LDNT1B or LDNT1H
}
These 4 should be LoadVectorSByteSignExtend
to fit with the other Sign Extended changes.
public static unsafe Vector<long> LoadVectorByteSignExtendToInt64(sbyte* address); // LD1SB
public static unsafe Vector<ushort> LoadVectorByteSignExtendToUInt16(sbyte* address); // LD1SB
public static unsafe Vector<uint> LoadVectorByteSignExtendToUInt32(sbyte* address); // LD1SB
public static unsafe Vector<ulong> LoadVectorByteSignExtendToUInt64(sbyte* address); // LD1SB
Not sure why I missed inserting four esses there; but fixed it in the approval post.
We didn't quite get Non Faulting correct in the review. It is better understood when taken into account with first faulting.
For all three instructions: Inactive elements will not cause a read from Device memory or signal a fault, and are set to zero in the destination vector.
The difference is in the active elements. If address of a element is invalid then, when loading it:
This can be seen in the pseudo code for the instructions. Potential faults loads use the function Mem[]
and non faulting loads use MemNF[]
.
All three instructions have the same latency and throughput.
Thoughts for the API:
mask
parameter to avoid faults on inactive elements.mask
as they will never fault.mask
as this changes which element can fault.When discussing the gather loads fragment we decided that Vector<T> bases
seemed better as Vector<T> addresses
; so there's a partial update:
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute8BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute16BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute32BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
/// T: [uint, int], [ulong, long]
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> addresses, Vector<T2> indices); // ADR
/// T: uint, ulong
public static unsafe Vector<T> Compute64BitAddresses(Vector<T> addresses, Vector<T> indices); // ADR
Follow-on comment: The NonFaulting instructions should remove the mask parameter, and it should be inserted into the ones that do fault.
Tagging subscribers to this area: @dotnet/area-system-runtime-intrinsics See info in area-owners.md if you want to be subscribed.