Closed wangyongxina closed 4 years ago
new function: // added by wangyongxin // Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64&expand=4961,5279 // FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm8)
({ \ __m128i ret; \ if ((imm) <= 0) {\ ret = a; \ } \ else if ((imm) > 63) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_s64(vshlq_n_s64(vreinterpretq_s64_m128i(a), (imm))); \ } \ ret; \ })
//added by wangyongxin //Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64&expand=4961,5279,5491 //FORCE_INLINE __m128i _mm_srli_epi64 (__m128i a, int imm8)
({ \ __m128i ret; \ if ((imm) <= 0) { \ ret = a; \ } \ else if ((imm)> 63) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_u64(vshrq_n_u64(vreinterpretq_u64_m128i(a), (imm))); \ } \ ret; \ })
//added by wangyongxin //Compare packed 32-bit integers in a and b for equality, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32&expand=4961,5279,5491,767 FORCE_INLINE __m128i _mm_cmpeq_epi32(m128i a, m128i b) { return vreinterpretq_m128i_u32(vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); }
bug:
({ \ __m128i ret; \ if ((imm) <= 0) { \ ret = a; \ } \ else if ((imm)> 31) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_u16(vshrq_n_u16(vreinterpretq_u16_m128i(a), (imm))); \ } \ ret; \ }) according to intel document https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16&expand=5473 else if ((imm)> 31) { \ should be changed to else if ((imm)> 15) { \
function _mm_slli_epi16 also has this bug.
thanks
diff file SSE2NEON.zip
Implemented in DLTcollab/sse2neon.
new function: // added by wangyongxin // Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64&expand=4961,5279 // FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm8)
define _mm_slli_epi64(a, imm) \
({ \ __m128i ret; \ if ((imm) <= 0) {\ ret = a; \ } \ else if ((imm) > 63) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_s64(vshlq_n_s64(vreinterpretq_s64_m128i(a), (imm))); \ } \ ret; \ })
//added by wangyongxin //Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64&expand=4961,5279,5491 //FORCE_INLINE __m128i _mm_srli_epi64 (__m128i a, int imm8)
define _mm_srli_epi64(a, imm) \
({ \ __m128i ret; \ if ((imm) <= 0) { \ ret = a; \ } \ else if ((imm)> 63) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_u64(vshrq_n_u64(vreinterpretq_u64_m128i(a), (imm))); \ } \ ret; \ })
//added by wangyongxin //Compare packed 32-bit integers in a and b for equality, and store the results in dst. https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32&expand=4961,5279,5491,767 FORCE_INLINE __m128i _mm_cmpeq_epi32(m128i a, m128i b) { return vreinterpretq_m128i_u32(vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); }
bug:
define _mm_srli_epi16(a, imm) \
({ \ __m128i ret; \ if ((imm) <= 0) { \ ret = a; \ } \ else if ((imm)> 31) { \ ret = _mm_setzero_si128(); \ } \ else { \ ret = vreinterpretq_m128i_u16(vshrq_n_u16(vreinterpretq_u16_m128i(a), (imm))); \ } \ ret; \ }) according to intel document https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16&expand=5473 else if ((imm)> 31) { \ should be changed to else if ((imm)> 15) { \
function _mm_slli_epi16 also has this bug.
thanks