define dso_local <64 x i8> @compress(<64 x i8> %0, i64 %1) local_unnamed_addr {
Entry:
%2 = bitcast i64 %1 to <64 x i1>
%3 = tail call fastcc <64 x i8> @llvm.experimental.vector.compress.v64i8(<64 x i8> %0, <64 x i1> %2, <64 x i8> zeroinitializer)
ret <64 x i8> %3
}
declare fastcc <64 x i8> @llvm.experimental.vector.compress.v64i8(<64 x i8>, <64 x i1>, <64 x i8>) #1
[Godbolt link](https://llvm.godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,selection:(endColumn:1,endLineNumber:9,positionColumn:1,positionLineNumber:9,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:'define+dso_local+%3C64+x+i8%3E+@compress(%3C64+x+i8%3E+%250,+i64+%251)+local_unnamed_addr+%7B%0AEntry:%0A++%252+%3D+bitcast+i64+%251+to+%3C64+x+i1%3E%0A++%253+%3D+tail+call+fastcc+%3C64+x+i8%3E+@llvm.experimental.vector.compress.v64i8(%3C64+x+i8%3E+%250,+%3C64+x+i1%3E+%252,+%3C64+x+i8%3E+zeroinitializer)%0A++ret+%3C64+x+i8%3E+%253%0A%7D%0A%0Adeclare+fastcc+%3C64+x+i8%3E+@llvm.experimental.vector.compress.v64i8(%3C64+x+i8%3E,+%3C64+x+i1%3E,+%3C64+x+i8%3E)+%231%0A'),l:'5',n:'1',o:'LLVM+IR+source+%231',t:'0')),k:49.45205479452055,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:irclangtrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+-march%3Dznver5',overrides:!(),selection:(endColumn:12,endLineNumber:7,positionColumn:12,positionLineNumber:7,selectionStartColumn:12,selectionStartLineNumber:7,startColumn:12,startLineNumber:7),source:1),l:'5',n:'0',o:'+clang+(trunk)+(Editor+%231)',t:'0')),k:50.54794520547946,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
```llvm
define dso_local <64 x i8> @compress(<64 x i8> %0, i64 %1) local_unnamed_addr {
Entry:
%2 = bitcast i64 %1 to <64 x i1>
%3 = tail call fastcc <64 x i8> @llvm.experimental.vector.compress.v64i8(<64 x i8> %0, <64 x i1> %2, <64 x i8> zeroinitializer)
ret <64 x i8> %3
}
declare fastcc <64 x i8> @llvm.experimental.vector.compress.v64i8(<64 x i8>, <64 x i1>, <64 x i8>) #1
```
Compiled for Zen 5, we get:
```asm
compress:
.Lcompress$local:
kmovq k1, rdi
vpxor xmm1, xmm1, xmm1
vpcompressb zmm1 {k1}, zmm0
vmovdqa64 zmm0, zmm1
ret
```
The `vpxor` is unnecessary. We could just use the `{z}` variant.
Godbolt link,l:'5',n:'1',o:'LLVM+IR+source+%231',t:'0')),k:49.45205479452055,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:irclangtrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+-march%3Dznver5',overrides:!(),selection:(endColumn:12,endLineNumber:7,positionColumn:12,positionLineNumber:7,selectionStartColumn:12,selectionStartLineNumber:7,startColumn:12,startLineNumber:7),source:1),l:'5',n:'0',o:'+clang+(trunk)+(Editor+%231)',t:'0')),k:50.54794520547946,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
Compiled for Zen 5, we get:
The
vpxor
is unnecessary. We could just use the{z}
variant.