llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
28.53k stars 11.79k forks source link

Itanium ABI DSO ctor dtor codegen control #79691

Open Pawday opened 8 months ago

Pawday commented 8 months ago

Issue

I have an example of code with static object initialization

struct Init_t {
    Init_t(int val);
    ~Init_t();
};

Init_t init_0(0);

using option -fno-register-global-dtors-with-atexit i want the clang compiler to not emit any calls to __cxa_atexit or atexit

I assume this option will disable the calls because compiler manual says:

clang --help | grep "atexit"
  -fno-register-global-dtors-with-atexit
                          Don't use atexit to register global destructors
  -fno-register-global-dtors-with-cxa-atexit
                          Don't use __cxa_atexit to register global destructors
  -fno-use-cxa-atexit 
                         Alias for   -fno-register-global-dtors-with-cxa-atexit
  -fregister-global-dtors-with-atexit
                          Use atexit or __cxa_atexit to register global destructors

However the command

clang -S -O3 -fPIC -fno-register-global-dtors-with-atexit DSO_Life.cc

where DSO_Life.cc contains:

struct Init_t {
    Init_t(int val);
    ~Init_t();
};

Init_t init_0(0);

will generate x86_64 assembly code with call to __cxa_atexit anyway

_GLOBAL__sub_I_DSO_Life.cc:             # @_GLOBAL__sub_I_DSO_Life.cc
    .cfi_startproc
# %bb.0:
    pushq   %rbx
    .cfi_def_cfa_offset 16
    .cfi_offset %rbx, -16
    movq    init_0@GOTPCREL(%rip), %rbx
    movq    %rbx, %rdi
    xorl    %esi, %esi
    callq   _ZN6Init_tC1Ei@PLT
    movq    _ZN6Init_tD1Ev@GOTPCREL(%rip), %rdi
    leaq    __dso_handle(%rip), %rdx
    movq    %rbx, %rsi
    popq    %rbx
    .cfi_def_cfa_offset 8
    jmp __cxa_atexit@PLT                # TAILCALL

LLVM IR also contains call to __cxa_atexit

clang -S -emit-llvm -O3 -fPIC -fno-register-global-dtors-with-atexit DSO_Life.cc
; Function Attrs: nofree nounwind
declare i32 @__cxa_atexit(ptr, ptr, ptr) local_unnamed_addr #2

; Function Attrs: uwtable
define internal void @_GLOBAL__sub_I_DSO_Life.cc() #3 section ".text.startup" {
  tail call void @_ZN6Init_tC1Ei(ptr noundef nonnull align 1 dereferenceable(1) @init_0, i32 noundef 0)
  %1 = tail call i32 @__cxa_atexit(ptr nonnull @_ZN6Init_tD1Ev, ptr nonnull @init_0, ptr nonnull @__dso_handle) #4
  ret void
}

Related behavior

option -fno-use-cxa-atexit will replace call to __cxa_atexit with call to atexit

clang -S -emit-llvm -O3 -fPIC -fno-use-cxa-atexit DSO_Life.cc

Note: DSO_Life.cc changed to

struct Init_t {
    Init_t(int val);
    ~Init_t();
};

Init_t init_0(0);
Init_t init_1(0);
Init_t init_2(0);
Init_t init_3(0);
Init_t init_4(0);
Init_t init_5(0);
Init_t init_6(0);
Init_t init_7(0);
; Function Attrs: uwtable
define internal void @_GLOBAL__sub_I_DSO_Life.cc() #4 section ".text.startup" {
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_0, i32 noundef 0)
  %1 = tail call i32 @atexit(ptr nonnull @__dtor_init_0) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_1, i32 noundef 0)
  %2 = tail call i32 @atexit(ptr nonnull @__dtor_init_1) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_2, i32 noundef 0)
  %3 = tail call i32 @atexit(ptr nonnull @__dtor_init_2) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_3, i32 noundef 0)
  %4 = tail call i32 @atexit(ptr nonnull @__dtor_init_3) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_4, i32 noundef 0)
  %5 = tail call i32 @atexit(ptr nonnull @__dtor_init_4) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_5, i32 noundef 0)
  %6 = tail call i32 @atexit(ptr nonnull @__dtor_init_5) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_6, i32 noundef 0)
  %7 = tail call i32 @atexit(ptr nonnull @__dtor_init_6) #3
  tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_7, i32 noundef 0)
  %8 = tail call i32 @atexit(ptr nonnull @__dtor_init_7) #3
  ret void
}

It also will extract dtor for each global object like that:

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_0() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_0)
  ret void
}

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_1() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_1)
  ret void
}

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_2() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_2)# Proposal

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_3() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_3)
  ret void
}

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_4() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_4)
  ret void
}

; Function Attrs: nounwind uwtable
define internal void @__dtor_init_5() #2 section ".text.startup" {
  tail call void @Init_t::~Init_t()(ptr @init_5)
  ret void
}

(it can be usefull for reflection, i am gonna use that if it's legal)

Compiler Explorer

GCC behavior

Note: DSO_Life.cc changed to

struct Init_t {
    Init_t(int val);
    ~Init_t();
};

Init_t init_0(0);
Init_t init_1(1);

GCC 13.2.1 also have option -fno-use-cxa-atexit

Command:

gcc -S -O3 -fPIC -fno-use-cxa-atexit DSO_Life.cc

Because GCC does not have -fno-register-global-dtors-with-atexit option it will produce construction and destruction symbols for all objects in the TU without the calls to runtime procedures

_GLOBAL__sub_I_DSO_Life.cc:
    subq    $8, %rsp
    movq    init_0@GOTPCREL(%rip), %rdi
    xorl    %esi, %esi
    call    Init_t::Init_t(int)@PLT ; ctor init_0
    movq    init_1@GOTPCREL(%rip), %rdi
    movl    $1, %esi
    addq    $8, %rsp
    jmp Init_t::Init_t(int)@PLT ; ctor init_1
_GLOBAL__sub_D_DSO_Life.cc:
    subq    $8, %rsp
    movq    init_1@GOTPCREL(%rip), %rdi
    call    Init_t::~Init_t()@PLT  ; dtor init_1
    movq    init_0@GOTPCREL(%rip), %rdi
    addq    $8, %rsp
    jmp Init_t::~Init_t()@PLT ; dtor init_0

Proposal

clang --help | grep "atexit"

will produce

  -fno-register-global-dtors-with-atexit
                          Don't use atexit or __cxa_atexit to register global destructors
  -fno-use-cxa-atexit     Don't use __cxa_atexit for calling to register global destructors
  -fregister-global-dtors-with-atexit
                          Use atexit or __cxa_atexit to register global destructors instead of __cxa_atexit

Final output for

clang --help | grep "atexit"
  -fno-register-global-dtors-with-atexit
                          Don't use atexit to register global destructors
  -fno-register-global-dtors-with-cxa-atexit
                          Don't use __cxa_atexit to register global destructors
  -fno-use-atexit 
                          Alias for -fno-register-global-dtors-with-atexit
  -fno-use-cxa-atexit 
                          Alias for -fno-register-global-dtors-with-cxa-atexit
  -fregister-global-dtors-with-atexit
                          Use atexit to register global destructors instead of __cxa_atexit

Thank You

llvmbot commented 8 months ago

@llvm/issue-subscribers-clang-codegen

Author: Pavel Korotkevich (Pawday)

# Issue I have an example of code with static object initialization ```c++ struct Init_t { Init_t(int val); ~Init_t(); }; Init_t init_0(0); ``` using option ``-fno-register-global-dtors-with-atexit`` i want the clang compiler to not emit any calls to **__cxa_atexit** or **atexit** I assume this option will disable the calls because compiler manual says: ```sh clang --help | grep "atexit" ``` ```txt -fno-register-global-dtors-with-atexit Don't use atexit to register global destructors -fno-register-global-dtors-with-cxa-atexit Don't use __cxa_atexit to register global destructors -fno-use-cxa-atexit Alias for -fno-register-global-dtors-with-cxa-atexit -fregister-global-dtors-with-atexit Use atexit or __cxa_atexit to register global destructors ``` However the command ```sh clang -S -O3 -fPIC -fno-register-global-dtors-with-atexit DSO_Life.cc ``` where DSO_Life.cc contains: ```c++ struct Init_t { Init_t(int val); ~Init_t(); }; Init_t init_0(0); ``` will generate x86_64 assembly code with call to **__cxa_atexit** anyway ```asm _GLOBAL__sub_I_DSO_Life.cc: # @_GLOBAL__sub_I_DSO_Life.cc .cfi_startproc # %bb.0: pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 movq init_0@GOTPCREL(%rip), %rbx movq %rbx, %rdi xorl %esi, %esi callq _ZN6Init_tC1Ei@PLT movq _ZN6Init_tD1Ev@GOTPCREL(%rip), %rdi leaq __dso_handle(%rip), %rdx movq %rbx, %rsi popq %rbx .cfi_def_cfa_offset 8 jmp __cxa_atexit@PLT # TAILCALL ``` LLVM IR also contains call to **__cxa_atexit** ```sh clang -S -emit-llvm -O3 -fPIC -fno-register-global-dtors-with-atexit DSO_Life.cc ``` ```llvm ; Function Attrs: nofree nounwind declare i32 @__cxa_atexit(ptr, ptr, ptr) local_unnamed_addr #2 ; Function Attrs: uwtable define internal void @_GLOBAL__sub_I_DSO_Life.cc() #3 section ".text.startup" { tail call void @_ZN6Init_tC1Ei(ptr noundef nonnull align 1 dereferenceable(1) @init_0, i32 noundef 0) %1 = tail call i32 @__cxa_atexit(ptr nonnull @_ZN6Init_tD1Ev, ptr nonnull @init_0, ptr nonnull @__dso_handle) #4 ret void } ``` # Related behavior option ``-fno-use-cxa-atexit`` will replace call to **__cxa_atexit** with call to atexit ```sh clang -S -emit-llvm -O3 -fPIC -fno-use-cxa-atexit DSO_Life.cc ``` Note: DSO_Life.cc changed to ```c++ struct Init_t { Init_t(int val); ~Init_t(); }; Init_t init_0(0); Init_t init_1(0); Init_t init_2(0); Init_t init_3(0); Init_t init_4(0); Init_t init_5(0); Init_t init_6(0); Init_t init_7(0); ``` ```llvm ; Function Attrs: uwtable define internal void @_GLOBAL__sub_I_DSO_Life.cc() #4 section ".text.startup" { tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_0, i32 noundef 0) %1 = tail call i32 @atexit(ptr nonnull @__dtor_init_0) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_1, i32 noundef 0) %2 = tail call i32 @atexit(ptr nonnull @__dtor_init_1) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_2, i32 noundef 0) %3 = tail call i32 @atexit(ptr nonnull @__dtor_init_2) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_3, i32 noundef 0) %4 = tail call i32 @atexit(ptr nonnull @__dtor_init_3) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_4, i32 noundef 0) %5 = tail call i32 @atexit(ptr nonnull @__dtor_init_4) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_5, i32 noundef 0) %6 = tail call i32 @atexit(ptr nonnull @__dtor_init_5) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_6, i32 noundef 0) %7 = tail call i32 @atexit(ptr nonnull @__dtor_init_6) #3 tail call void @Init_t::Init_t(int)(ptr noundef nonnull align 1 dereferenceable(1) @init_7, i32 noundef 0) %8 = tail call i32 @atexit(ptr nonnull @__dtor_init_7) #3 ret void } ``` It also will extract dtor for each global object like that: ```llvm ; Function Attrs: nounwind uwtable define internal void @__dtor_init_0() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_0) ret void } ; Function Attrs: nounwind uwtable define internal void @__dtor_init_1() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_1) ret void } ; Function Attrs: nounwind uwtable define internal void @__dtor_init_2() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_2)# Proposal ; Function Attrs: nounwind uwtable define internal void @__dtor_init_3() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_3) ret void } ; Function Attrs: nounwind uwtable define internal void @__dtor_init_4() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_4) ret void } ; Function Attrs: nounwind uwtable define internal void @__dtor_init_5() #2 section ".text.startup" { tail call void @Init_t::~Init_t()(ptr @init_5) ret void } ``` (it can be usefull for reflection, i am gonna use that if it's legal) # [Compiler Explorer](https://godbolt.org/z/E6x6e7x9n) # GCC behavior Note: DSO_Life.cc changed to ```c++ struct Init_t { Init_t(int val); ~Init_t(); }; Init_t init_0(0); Init_t init_1(1); ``` GCC 13.2.1 also have option ``-fno-use-cxa-atexit`` Command: ```sh gcc -S -O3 -fPIC -fno-use-cxa-atexit DSO_Life.cc ``` Because GCC does not have -fno-register-global-dtors-with-atexit option it will produce construction and destruction symbols for all objects in the TU without the calls to runtime procedures ```asm _GLOBAL__sub_I_DSO_Life.cc: subq $8, %rsp movq init_0@GOTPCREL(%rip), %rdi xorl %esi, %esi call Init_t::Init_t(int)@PLT ; ctor init_0 movq init_1@GOTPCREL(%rip), %rdi movl $1, %esi addq $8, %rsp jmp Init_t::Init_t(int)@PLT ; ctor init_1 _GLOBAL__sub_D_DSO_Life.cc: subq $8, %rsp movq init_1@GOTPCREL(%rip), %rdi call Init_t::~Init_t()@PLT ; dtor init_1 movq init_0@GOTPCREL(%rip), %rdi addq $8, %rsp jmp Init_t::~Init_t()@PLT ; dtor init_0 ``` # Proposal * remove mention of **_cxa_atexit** for ``-fregister-global-dtors-with-atexit`` and ``-fno-register-global-dtors-with-atexit`` options such that ```sh clang --help | grep "atexit" ``` will produce <pre> -fno-register-global-dtors-with-atexit Don't use atexit <del>or __cxa_atexit</del> to register global destructors -fno-use-cxa-atexit Don't use __cxa_atexit <del>for calling</del> <ins>to register global</ins> destructors -fregister-global-dtors-with-atexit Use atexit <del>or __cxa_atexit</del> to register global destructors <ins>instead of __cxa_atexit</ins> </pre> * add ``-fno-register-global-dtors-with-cxa-atexit`` option as alias for existing ``-fno-use-cxa-atexit`` * add ``-fno-use-atexit`` option as alias for existing ``-fno-register-global-dtors-with-atexit`` * make codegen emit separated procedures for construct and destruct global objects in the translation unit if options ``-fno-use-atexit`` and ``-fno-use-cxa-atexit`` are specified Final output for ```sh clang --help | grep "atexit" ``` <pre> -fno-register-global-dtors-with-atexit Don't use atexit to register global destructors -fno-register-global-dtors-with-cxa-atexit Don't use __cxa_atexit to register global destructors -fno-use-atexit Alias for -fno-register-global-dtors-with-atexit -fno-use-cxa-atexit Alias for -fno-register-global-dtors-with-cxa-atexit -fregister-global-dtors-with-atexit Use atexit to register global destructors instead of __cxa_atexit </pre> # Thank You