Open preames opened 9 years ago
I'm not sure if this is the same thing that I'm seeing with Clang. Here's my toy example:
struct A
{
public:
bool read()
{
return (this->*m_func)();
}
private:
bool read_normal()
{
return true;
}
using Func = decltype(&A::read_normal);
Func m_func = &A::read_normal;
};
class B final
{
public:
bool read()
{
return (this->*m_func)();
}
private:
bool read_normal()
{
return true;
}
using Func = decltype(&B::read_normal);
Func m_func = &B::read_normal;
};
auto do_stuff(A& a)
{
return a.read();
}
auto do_stuff(B& a)
{
return a.read();
}
Clang 19.1.0 produces the following with -O3
:
do_stuff(A&):
mov rax, qword ptr [rdi]
add rdi, qword ptr [rdi + 8]
test al, 1
je .LBB0_2
mov rcx, qword ptr [rdi]
mov rax, qword ptr [rcx + rax - 1]
.LBB0_2:
jmp rax
do_stuff(B&):
mov rax, qword ptr [rdi]
add rdi, qword ptr [rdi + 8]
test al, 1
je .LBB1_2
mov rcx, qword ptr [rdi]
mov rax, qword ptr [rcx + rax - 1]
.LBB1_2:
jmp rax
Whereas GCC 14.2.1 with -O3
notices the final
and omits the test
:
do_stuff(A&):
mov rax, QWORD PTR [rdi]
add rdi, QWORD PTR [rdi+8]
test al, 1
je .L2
mov rdx, QWORD PTR [rdi]
mov rax, QWORD PTR [rdx-1+rax]
.L2:
jmp rax
do_stuff(B&):
mov rdx, QWORD PTR [rdi+8]
mov rax, QWORD PTR [rdi]
add rdx, rdi
mov rdi, rdx
jmp rax
I'm not 100% sure why there's a test in the first place but I'm assuming it's to detect multiple inheritance or something along those lines.
Extended Description
We appear to be failing to devirtualize a pointer-to-member call when we know that the type of the receiver is a final class.
(NOTE: This bug report is based on data collected on a ToT build in early March. I have not confirmed that this still applies to ToT today.)
I originally noted this when writing a toy interpreter. Here's a simplified bit of code pulled from that project: struct Interpreter final { typedef void (Interpreter::BytecodeFuncType)(void ); BytecodeFuncType BytecodeFuncs[NumBytecodes] = { ... };
void dispatch_next(void stack){ BytecodeFuncType NextFunc = BytecodeFuncs[bytecode[current_index]]; current_index++; (this->NextFunc)(stack); } };
The important part is this line: (this->*NextFunc)(stack);
This is calling a pointer-to-member function where the receiver type is statically known. In particular, we know that "this" is a static type of Interpreter and that, because of the final annotation, there are no other possible dynamic types.
We emit LLVM IR that looks like this (after -O3): %13 = load { i64, i64 } %arrayidx3.i, align 8, !tbaa !10 %.fca.0.extract.i = extractvalue { i64, i64 } %13, 0 %.fca.1.extract.i = extractvalue { i64, i64 } %13, 1 %14 = bitcast %struct.Interpreter %this to i8 %15 = getelementptr inbounds i8 %14, i64 %.fca.1.extract.i %this.adjusted.i = bitcast i8 %15 to %struct.Interpreter %16 = and i64 %.fca.0.extract.i, 1 %memptr.isvirtual.i = icmp eq i64 %16, 0 br i1 %memptr.isvirtual.i, label %memptr.nonvirtual.i, label %memptr.virtual.i
memptr.virtual.i: ; preds = %_ZNSt6vectorIlSaIlEE9push_backEOl.exit %17 = bitcast i8 %15 to i8 %vtable.i = load i8 %17, align 8, !tbaa !11 %18 = add i64 %.fca.0.extract.i, -1 %19 = getelementptr i8 %vtable.i, i64 %18 %20 = bitcast i8 %19 to void (%struct.Interpreter, %"class.std::vector"*) %memptr.virtualfn.i = load void (%struct.Interpreter, %"class.std::vector") %20, align 8 br label %_ZN11Interpreter13dispatch_nextERSt6vectorIlSaIlEE.exit
memptr.nonvirtual.i: ; preds = %_ZNSt6vectorIlSaIlEE9push_backEOl.exit %memptr.nonvirtualfn.i = inttoptr i64 %.fca.0.extract.i to void (%struct.Interpreter, %"class.std::vector")* br label %_ZN11Interpreter13dispatch_nextERSt6vectorIlSaIlEE.exit
_ZN11Interpreter13dispatch_nextERSt6vectorIlSaIlEE.exit: ; preds = %memptr.virtual.i, %memptr.nonvirtual.i %21 = phi void (%struct.Interpreter, %"class.std::vector") [ %memptr.virtualfn.i, %memptr.virtual.i ], [ %memptr.nonvirtualfn.i, %memptr.nonvirtual.i ] tail call void %21(%struct.Interpreter %this.adjusted.i, %"class.std::vector"* dereferenceable(24) %stack) ret void
The virtual dispatch path of this control flow is impossible. Similarly, all of the this adjustment code is unnecessary.