Open GoogleCodeExporter opened 8 years ago
Write a function to extract just the 'A' channel from ARGB Here is a quick SSE2 version // Specialized ARGB to Bayer that just isolates G channel. __declspec(naked) void ARGBToARow_SSE2(const uint8* src_argb, uint8* dst_a, int pix) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_a mov ecx, [esp + 12] // pix wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] psrld xmm0, 24 // Move alpha to bottom. psrld xmm1, 24 packssdw xmm0, xmm1 packuswb xmm0, xmm1 movq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 8 jg wloop ret } } In this CL sobel switched to grey scale, but was previously extracting just the G channel from ARGB: https://webrtc-codereview.appspot.com/57479004/ It was based on Bayer code which used a channel selector. The calling code was ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); // Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) 147 LIBYUV_API 148 int ARGBToG(const uint8* src_argb, int src_stride_argb, 149 uint8* dst_g, int dst_stride_g, 150 int width, int height); // Specialized ARGB to Bayer that just isolates G channel. 5879 __declspec(naked) 5880 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, 5881 uint32 selector, int pix) { 5882 __asm { 5883 mov eax, [esp + 4] // src_argb 5884 mov edx, [esp + 8] // dst_bayer 5885 // selector 5886 mov ecx, [esp + 16] // pix 5887 pcmpeqb xmm5, xmm5 // generate mask 0x000000ff 5888 psrld xmm5, 24 5889 5890 wloop: 5891 movdqu xmm0, [eax] 5892 movdqu xmm1, [eax + 16] 5893 lea eax, [eax + 32] 5894 psrld xmm0, 8 // Move green to bottom. 5895 psrld xmm1, 8 5896 pand xmm0, xmm5 5897 pand xmm1, xmm5 5898 packssdw xmm0, xmm1 5899 packuswb xmm0, xmm1 5900 movq qword ptr [edx], xmm0 5901 lea edx, [edx + 8] 5902 sub ecx, 8 5903 jg wloop 5904 ret 5905 } 5906 } #ifdef HAS_ARGBTOBAYERGGROW_SSE2 5034 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, 5035 uint32 selector, int pix) { 5036 asm volatile ( 5037 "pcmpeqb %%xmm5,%%xmm5 \n" 5038 "psrld $0x18,%%xmm5 \n" 5039 LABELALIGN 5040 "1: \n" 5041 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 5042 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 5043 "lea " MEMLEA(0x20,0) ",%0 \n" 5044 "psrld $0x8,%%xmm0 \n" 5045 "psrld $0x8,%%xmm1 \n" 5046 "pand %%xmm5,%%xmm0 \n" 5047 "pand %%xmm5,%%xmm1 \n" 5048 "packssdw %%xmm1,%%xmm0 \n" 5049 "packuswb %%xmm1,%%xmm0 \n" 5050 "movq %%xmm0," MEMACCESS(1) " \n" 5051 "lea " MEMLEA(0x8,1) ",%1 \n" 5052 "sub $0x8,%2 \n" 5053 "jg 1b \n" 5054 : "+r"(src_argb), // %0 5055 "+r"(dst_bayer), // %1 5056 "+r"(pix) // %2 5057 : 5058 : "memory", "cc" 5059 , "xmm0", "xmm1", "xmm5" 5060 ); 5061 } 5062 #endif // HAS_ARGBTOBAYERGGROW_SSE2 1262 // Select G channels from ARGB. e.g. GGGGGGGG 1263 #ifdef HAS_ARGBTOBAYERGGROW_NEON 1264 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 1265 uint32 /*selector*/, int pix) { 1266 asm volatile ( 1267 "1: \n" 1268 MEMACCESS(0) 1269 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels 1270 "subs %w2, %w2, #8 \n" // 8 processed per loop 1271 MEMACCESS(1) 1272 "st1 {v1.8b}, [%1], #8 \n" // store 8 G's. 1273 "b.gt 1b \n" 1274 : "+r"(src_argb), // %0 1275 "+r"(dst_bayer), // %1 1276 "+r"(pix) // %2 1277 : 1278 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 1279 ); 1280 } 1281 #endif // HAS_ARGBTOBAYERGGROW_NEON 1254 // Select G channels from ARGB. e.g. GGGGGGGG 1255 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 1256 uint32 /*selector*/, int pix) { 1257 asm volatile ( 1258 "1: \n" 1259 MEMACCESS(0) 1260 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. 1261 "subs %2, %2, #8 \n" // 8 processed per loop 1262 MEMACCESS(1) 1263 "vst1.8 {d1}, [%1]! \n" // store 8 G's. 1264 "bgt 1b \n" 1265 : "+r"(src_argb), // %0 1266 "+r"(dst_bayer), // %1 1267 "+r"(pix) // %2 1268 : 1269 : "cc", "memory", "q0", "q1" // Clobber List 1270 ); 1271 } 89 // Select G channel from ARGB. e.g. GGGGGGGG 2090 void ARGBToBayerGGRow_C(const uint8* src_argb, 2091 uint8* dst_bayer, uint32 selector, int pix) { 2092 // Copy a row of G. 2093 int x; 2094 for (x = 0; x < pix - 1; x += 2) { 2095 dst_bayer[0] = src_argb[1]; 2096 dst_bayer[1] = src_argb[5]; 2097 src_argb += 8; 2098 dst_bayer += 2; 2099 } 2100 if (pix & 1) { 2101 dst_bayer[0] = src_argb[1]; 2102 } 2103 } 281 // ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. 282 #define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ 283 void NAMEANY(const uint8* src, uint8* dst, uint32 selector, int width) { \ 284 int n = width & ~MASK; \ 285 if (n > 0) { \ 286 ARGBTORGB_SIMD(src, dst, selector, n); \ 287 } \ 288 ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \ 289 } 290 291 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) 292 BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, 293 4, 1, 7) 294 #endif 295 #if defined(HAS_ARGBTOBAYERGGROW_NEON) 296 BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, 297 4, 1, 7) 298 #endif 299 300 #undef BAYERANY
Original issue reported on code.google.com by fbarch...@google.com on 25 Feb 2016 at 12:04
fbarch...@google.com
Original issue reported on code.google.com by
fbarch...@google.com
on 25 Feb 2016 at 12:04