sayrer / libyuv

Automatically exported from code.google.com/p/libyuv
BSD 3-Clause "New" or "Revised" License
0 stars 0 forks source link

ARGBToA #572

Open GoogleCodeExporter opened 8 years ago

GoogleCodeExporter commented 8 years ago
Write a function to extract just the 'A' channel from ARGB

Here is a quick SSE2 version
// Specialized ARGB to Bayer that just isolates G channel.  
__declspec(naked) 
void ARGBToARow_SSE2(const uint8* src_argb, uint8* dst_a, int pix) {  
  __asm { 
    mov        eax, [esp + 4]    // src_argb  
    mov        edx, [esp + 8]    // dst_a
    mov        ecx, [esp + 12]   // pix 

  wloop:  
    movdqu     xmm0, [eax]  
    movdqu     xmm1, [eax + 16] 
    lea        eax, [eax + 32]  
    psrld      xmm0, 24  // Move alpha to bottom.  
    psrld      xmm1, 24  
    packssdw   xmm0, xmm1 
    packuswb   xmm0, xmm1 
    movq       qword ptr [edx], xmm0  
    lea        edx, [edx + 8] 
    sub        ecx, 8 
    jg         wloop  
    ret 
  } 
}

In this CL sobel switched to grey scale, but was previously extracting just the 
G channel from ARGB:
https://webrtc-codereview.appspot.com/57479004/

It was based on Bayer code which used a channel selector.  The calling code was
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);

// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
 147 LIBYUV_API
 148 int ARGBToG(const uint8* src_argb, int src_stride_argb,
 149             uint8* dst_g, int dst_stride_g,
 150             int width, int height);

// Specialized ARGB to Bayer that just isolates G channel.  
 5879 __declspec(naked) 
 5880 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,   
 5881                            uint32 selector, int pix) {    
 5882   __asm { 
 5883     mov        eax, [esp + 4]    // src_argb  
 5884     mov        edx, [esp + 8]    // dst_bayer 
 5885                                  // selector  
 5886     mov        ecx, [esp + 16]   // pix   
 5887     pcmpeqb    xmm5, xmm5        // generate mask 0x000000ff  
 5888     psrld      xmm5, 24   
 5889   
 5890   wloop:  
 5891     movdqu     xmm0, [eax]    
 5892     movdqu     xmm1, [eax + 16]   
 5893     lea        eax, [eax + 32]    
 5894     psrld      xmm0, 8  // Move green to bottom.  
 5895     psrld      xmm1, 8    
 5896     pand       xmm0, xmm5 
 5897     pand       xmm1, xmm5 
 5898     packssdw   xmm0, xmm1 
 5899     packuswb   xmm0, xmm1 
 5900     movq       qword ptr [edx], xmm0  
 5901     lea        edx, [edx + 8] 
 5902     sub        ecx, 8 
 5903     jg         wloop  
 5904     ret   
 5905   }   
 5906 }

#ifdef HAS_ARGBTOBAYERGGROW_SSE2    
 5034 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,   
 5035                            uint32 selector, int pix) {    
 5036   asm volatile (  
 5037     "pcmpeqb   %%xmm5,%%xmm5                   \n"    
 5038     "psrld     $0x18,%%xmm5                    \n"    
 5039     LABELALIGN    
 5040   "1:                                          \n"    
 5041     "movdqu    " MEMACCESS(0) ",%%xmm0         \n"    
 5042     "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"    
 5043     "lea       " MEMLEA(0x20,0) ",%0           \n"    
 5044     "psrld     $0x8,%%xmm0                     \n"    
 5045     "psrld     $0x8,%%xmm1                     \n"    
 5046     "pand      %%xmm5,%%xmm0                   \n"    
 5047     "pand      %%xmm5,%%xmm1                   \n"    
 5048     "packssdw  %%xmm1,%%xmm0                   \n"    
 5049     "packuswb  %%xmm1,%%xmm0                   \n"    
 5050     "movq      %%xmm0," MEMACCESS(1) "         \n"    
 5051     "lea       " MEMLEA(0x8,1) ",%1            \n"    
 5052     "sub       $0x8,%2                         \n"    
 5053     "jg        1b                              \n"    
 5054   : "+r"(src_argb),  // %0    
 5055     "+r"(dst_bayer), // %1    
 5056     "+r"(pix)        // %2    
 5057   :   
 5058   : "memory", "cc"    
 5059     , "xmm0", "xmm1", "xmm5"  
 5060   );  
 5061 } 
 5062 #endif  // HAS_ARGBTOBAYERGGROW_SSE2

1262 // Select G channels from ARGB.  e.g.  GGGGGGGG    
 1263 #ifdef HAS_ARGBTOBAYERGGROW_NEON  
 1264 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,   
 1265                            uint32 /*selector*/, int pix) {    
 1266   asm volatile (  
 1267   "1:                                          \n"    
 1268     MEMACCESS(0)  
 1269     "ld4        {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"  // load row 8 pixels    
 1270     "subs       %w2, %w2, #8                   \n"  // 8 processed per loop   
 1271     MEMACCESS(1)  
 1272     "st1        {v1.8b}, [%1], #8              \n"  // store 8 G's.   
 1273     "b.gt       1b                             \n"    
 1274   : "+r"(src_argb),   // %0   
 1275     "+r"(dst_bayer),  // %1   
 1276     "+r"(pix)         // %2   
 1277   :   
 1278   : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List   
 1279   );  
 1280 } 
 1281 #endif  // HAS_ARGBTOBAYERGGROW_NEON

1254 // Select G channels from ARGB.  e.g.  GGGGGGGG    
 1255 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,   
 1256                            uint32 /*selector*/, int pix) {    
 1257   asm volatile (  
 1258   "1:                                          \n"    
 1259     MEMACCESS(0)  
 1260     "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load row 8 pixels. 
 1261     "subs       %2, %2, #8                     \n"  // 8 processed per loop   
 1262     MEMACCESS(1)  
 1263     "vst1.8     {d1}, [%1]!                    \n"  // store 8 G's.   
 1264     "bgt        1b                             \n"    
 1265   : "+r"(src_argb),   // %0   
 1266     "+r"(dst_bayer),  // %1   
 1267     "+r"(pix)         // %2   
 1268   :   
 1269   : "cc", "memory", "q0", "q1"  // Clobber List   
 1270   );  
 1271 }

89 // Select G channel from ARGB.  e.g.  GGGGGGGG   
 2090 void ARGBToBayerGGRow_C(const uint8* src_argb,    
 2091                         uint8* dst_bayer, uint32 selector, int pix) { 
 2092   // Copy a row of G. 
 2093   int x;  
 2094   for (x = 0; x < pix - 1; x += 2) {  
 2095     dst_bayer[0] = src_argb[1];   
 2096     dst_bayer[1] = src_argb[5];   
 2097     src_argb += 8;    
 2098     dst_bayer += 2;   
 2099   }   
 2100   if (pix & 1) {  
 2101     dst_bayer[0] = src_argb[1];   
 2102   }   
 2103 }

 281 // ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.  
 282 #define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK)        \   
 283     void NAMEANY(const uint8* src, uint8* dst, uint32 selector, int width) {   \   
 284       int n = width & ~MASK;                                                   \   
 285       if (n > 0) {                                                             \   
 286         ARGBTORGB_SIMD(src, dst, selector, n);                                 \   
 287       }                                                                        \   
 288       ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK);      \   
 289     }  
 290    
 291 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) 
 292 BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, 
 293          4, 1, 7)  
 294 #endif 
 295 #if defined(HAS_ARGBTOBAYERGGROW_NEON) 
 296 BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, 
 297          4, 1, 7)  
 298 #endif 
 299    
 300 #undef BAYERANY

Original issue reported on code.google.com by fbarch...@google.com on 25 Feb 2016 at 12:04