Clang version in use: 13.0.0 (installed from Gentoo's package manager), but the problem is not Gentoo-specific and I could reproduce with current trunk as well. (Godbolt: https://gcc.godbolt.org/z/boxqK5r6d)
Compiling the following C++ code with clang++ -c -target i386-windows x.cpp fails with error in backend: failed to perform tail call elimination on a call site marked musttail error. When clang is compiled with assertions, the error message is musttail call must precede a ret with an optional bitcast.
What happens is that the x86-winehstate LLVM pass inserts instructions between the musttail call and the ret instructions, which makes the code invalid.
IR dump before and after the x86-winehstate pass
```
*** IR Dump Before Windows 32-bit x86 EH state insertion (x86-winehstate) ***
define dso_local void @"?h@?$Y@X@@SAXXZ"() #0 comdat align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
invoke x86_thiscallcc void @"?f@X@@QAEXXZ"()
to label %1 unwind label %2
1: ; preds = %0
musttail call void @"?g@@YAXXZ"()
ret void
2: ; preds = %0
%3 = cleanuppad within none []
cleanupret from %3 unwind to caller
}
*** IR Dump After Windows 32-bit x86 EH state insertion (x86-winehstate) ***
define dso_local void @"?h@?$Y@X@@SAXXZ"() #0 comdat align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
%1 = alloca %CXXExceptionRegistration, align 4
%2 = bitcast %CXXExceptionRegistration* %1 to i8*
call void @llvm.x86.seh.ehregnode(i8* %2)
%3 = call i8* @llvm.stacksave()
%4 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 0
store i8* %3, i8** %4, align 4
%5 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2
store i32 -1, i32* %5, align 4
%6 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 1
%7 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %6, i32 0, i32 1
store i8* bitcast (i32 (i8*, i8*, i8*, i8*)* @"__ehhandler$?h@?$Y@X@@SAXXZ" to i8*), i8** %7, align 4
%8 = load %EHRegistrationNode*, %EHRegistrationNode* addrspace(257)* null, align 4
%9 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %6, i32 0, i32 0
store %EHRegistrationNode* %8, %EHRegistrationNode** %9, align 4
store %EHRegistrationNode* %6, %EHRegistrationNode* addrspace(257)* null, align 4
%10 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2
store i32 0, i32* %10, align 4
invoke x86_thiscallcc void @"?f@X@@QAEXXZ"()
to label %11 unwind label %16
11: ; preds = %0
%12 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2
store i32 -1, i32* %12, align 4
musttail call void @"?g@@YAXXZ"()
%13 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 1
%14 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %13, i32 0, i32 0
%15 = load %EHRegistrationNode*, %EHRegistrationNode** %14, align 4
store %EHRegistrationNode* %15, %EHRegistrationNode* addrspace(257)* null, align 4
ret void
16: ; preds = %0
%17 = cleanuppad within none []
cleanupret from %17 unwind to caller
}
```
Clang version in use: 13.0.0 (installed from Gentoo's package manager), but the problem is not Gentoo-specific and I could reproduce with current trunk as well. (Godbolt: https://gcc.godbolt.org/z/boxqK5r6d)
Compiling the following C++ code with
clang++ -c -target i386-windows x.cpp
fails witherror in backend: failed to perform tail call elimination on a call site marked musttail
error. When clang is compiled with assertions, the error message ismusttail call must precede a ret with an optional bitcast
.The reduced
bugpoint-reduced-simplified.bc
from runningbugpoint -run-llc x.bc
is uploaded here: https://web.tresorit.com/l/VVWGy#E9cy7Z9o6YakdQ_jmTsPPwWhat happens is that the
x86-winehstate
LLVM pass inserts instructions between themusttail call
and theret
instructions, which makes the code invalid.IR dump before and after the x86-winehstate pass
``` *** IR Dump Before Windows 32-bit x86 EH state insertion (x86-winehstate) *** define dso_local void @"?h@?$Y@X@@SAXXZ"() #0 comdat align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { invoke x86_thiscallcc void @"?f@X@@QAEXXZ"() to label %1 unwind label %2 1: ; preds = %0 musttail call void @"?g@@YAXXZ"() ret void 2: ; preds = %0 %3 = cleanuppad within none [] cleanupret from %3 unwind to caller } *** IR Dump After Windows 32-bit x86 EH state insertion (x86-winehstate) *** define dso_local void @"?h@?$Y@X@@SAXXZ"() #0 comdat align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { %1 = alloca %CXXExceptionRegistration, align 4 %2 = bitcast %CXXExceptionRegistration* %1 to i8* call void @llvm.x86.seh.ehregnode(i8* %2) %3 = call i8* @llvm.stacksave() %4 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 0 store i8* %3, i8** %4, align 4 %5 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2 store i32 -1, i32* %5, align 4 %6 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 1 %7 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %6, i32 0, i32 1 store i8* bitcast (i32 (i8*, i8*, i8*, i8*)* @"__ehhandler$?h@?$Y@X@@SAXXZ" to i8*), i8** %7, align 4 %8 = load %EHRegistrationNode*, %EHRegistrationNode* addrspace(257)* null, align 4 %9 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %6, i32 0, i32 0 store %EHRegistrationNode* %8, %EHRegistrationNode** %9, align 4 store %EHRegistrationNode* %6, %EHRegistrationNode* addrspace(257)* null, align 4 %10 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2 store i32 0, i32* %10, align 4 invoke x86_thiscallcc void @"?f@X@@QAEXXZ"() to label %11 unwind label %16 11: ; preds = %0 %12 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 2 store i32 -1, i32* %12, align 4 musttail call void @"?g@@YAXXZ"() %13 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %1, i32 0, i32 1 %14 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %13, i32 0, i32 0 %15 = load %EHRegistrationNode*, %EHRegistrationNode** %14, align 4 store %EHRegistrationNode* %15, %EHRegistrationNode* addrspace(257)* null, align 4 ret void 16: ; preds = %0 %17 = cleanuppad within none [] cleanupret from %17 unwind to caller } ```