Open IntegratedQuantum opened 1 year ago
I guess .field_ptr should be generated in fieldAccess in this case.
working on it.
Fix reverted in f93a36c091a151ed02602d2f330f7206eb9f95a3
I've been running into this quite often on platforms where llvm isn't great at optimizing (powerpc).
The following snipped worked great as a test example to show this bug happening and not happening even though all examples index into the array.
const std = @import("std");
const E = enum(u8){
a,
b,
c,
d,
};
var array: std.enums.EnumArray(E, u32) = std.enums.EnumArray(E, u32).initFill(0);
noinline fn handle(id: u32) u32 {
// Causes odd stack pushing and popping with 4 or less enum members, but optimized out the memcpy
// LLVM is unable to optimize out the memcopy when the enum has more than 4 members.
return array.get(@enumFromInt(id));
// These both cause correct code generation, and do not generate a call to memcpy in llvm ir
//return array.values[id];
//return array.getPtrConst(@enumFromInt(id)).*;
}
export fn square(id: u32) u32 {
array.set(.a, 5);
array.set(.c, 8);
return handle(id);
}
I'm compiling this snipped with -O ReleaseSmall -target powerpc-freestanding-eabi
to see the issue easily.
I hope it is of some use.
A good explanation can be found here: https://github.com/ziglang/zig/issues/17580#issuecomment-1951467750
Summary: When an array is indexed, the expression on the left (the array, which is a value) is evaluated before (by design) the expression on the right (the function that returns the index). If there's any possibility that the function call modifies the array (which is the default assumption), we have no choice but to make a copy of the array before calling that function.
return array.get(@enumFromInt(id));
~I guess special cases (as in #13233) can be made for builtin functions like this.~ (Seems that's not the case as the index expression is the one at here and not the builtin one here)
For the general case, AFAIK possible solutions are: 1) Redefine the evaluation order of the index and the indexed. 2) Somehow optimize-out the copy...
Zig Version
0.11.0-dev.753+331861161
Steps to Reproduce and Observed Behavior
I thought this was covered by #12215, but now that it is fixed I still get terrible performance. Here is a simple benchmark:
Even in release-fast the performance is terrible:
[godbolt](https://godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:2,lang:zig,selection:(endColumn:31,endLineNumber:17,positionColumn:31,positionLineNumber:17,selectionStartColumn:31,selectionStartLineNumber:17,startColumn:31,startLineNumber:17),source:'const+std+%3D+@import(%22std%22)%3B%0A%0Aconst+len+%3D+32*32*32%3B%0A%0Afn+getIndex(i:+u16)+u16+%7B%0A%09return+i%3B%0A%7D%0A%0Apub+const+Chunk+%3D+struct+%7B%0A%09blocks:+%5Blen%5Du16+%3D+undefined,%0A%7D%3B%0A%0Apub+noinline+fn+regenerateMainMesh(chunk:+*Chunk)+u32+%7B%0A%09var+sum:+u32+%3D+0%3B%0A%09var+i:+u16+%3D+0%3B%0A%09while(i+%3C+len)+:+(i+%2B%3D+1)+%7B%0A%09%09sum+%2B%3D+chunk.blocks%5BgetIndex(i)%5D%3B+//+%E2%86%90+workaround:+(%26chunk.blocks)%5B...%5D%0A%09%7D%0A%09return+sum%3B%0A%7D%0A%0Apub+fn+main()+void+%7B%0A%09var+chunk:+Chunk+%3D+Chunk%7B%7D%3B%0A%09for(chunk.blocks)+%7C*block,+i%7C+%7B%0A%09%09block.*+%3D+@intCast(u16,+i)%3B%0A%09%7D%0A%09const+start+%3D+std.time.nanoTimestamp()%3B%0A%09const+sum+%3D+regenerateMainMesh(%26chunk)%3B%0A%09const+end+%3D+std.time.nanoTimestamp()%3B%0A%09std.log.err(%22Time:+%7B%7D+Sum:+%7B%7D%22,+.%7Bend+-+start,+sum%7D)%3B%0A%7D'),l:'5',n:'0',o:'Zig+source+%232',t:'0')),k:48.250460405156545,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,deviceViewOpen:'1',filters:(b:'0',binary:'1',commentOnly:'0',demangle:'0',directives:'0',execute:'0',intel:'0',libraryCode:'0',trim:'1'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,libs:!(),options:'-OReleaseFast',selection:(endColumn:9,endLineNumber:479,positionColumn:9,positionLineNumber:479,selectionStartColumn:9,selectionStartLineNumber:479,startColumn:9,startLineNumber:479),source:2),l:'5',n:'0',o:'+zig+trunk+(Editor+%232)',t:'0')),header:(),k:29.233691411637672,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:output,i:(compilerName:'zig+0.9.0',editorid:2,fontScale:14,fontUsePx:'0',j:1,wrap:'1'),l:'5',n:'0',o:'Output+of+zig+trunk+(Compiler+%231)',t:'0')),k:22.515848183205783,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4) reveals that, like in #12215, there is a
memcpy
when accessing the array.Expected Behavior
When applying the workaround
the performance is significantly better: