Open ElvinaYakubova opened 2 years ago
It is not an AArch64 exlusive issue. I also find it behave the same way on armv7 too. Clearly it confuses the first mov
instruction with part of the prologue.
Same issue has been reproduced locally. Details are as follows.
Source code:
int main() {
int a = 2;
}
Compile command:
clang -S -O0 -g --target=aarch64 ./test.c
Clang configuration:
Homebrew clang version 13.0.0
Target: x86_64-apple-darwin20.6.0
Thread model: posix
InstalledDir: /usr/local/opt/llvm/bin
Full assembly output:
.text
.file "test.c"
.globl main // -- Begin function main
.p2align 2
.type main,@function
main: // @main
.Lfunc_begin0:
.file 1 "/Users/***" "./test.c"
.loc 1 1 0 // ./test.c:1:0
.cfi_startproc
// %bb.0:
sub sp, sp, #16 // =16
.cfi_def_cfa_offset 16
mov w8, #2
.Ltmp1:
.loc 1 2 6 prologue_end // ./test.c:2:6
str w8, [sp, #12]
mov w0, wzr
.loc 1 3 1 // ./test.c:3:1
add sp, sp, #16 // =16
ret
.Ltmp2:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
// -- End function
.section .debug_abbrev,"",@progbits
.byte 1 // Abbreviation Code
.byte 17 // DW_TAG_compile_unit
.byte 1 // DW_CHILDREN_yes
.byte 37 // DW_AT_producer
.byte 14 // DW_FORM_strp
.byte 19 // DW_AT_language
.byte 5 // DW_FORM_data2
.byte 3 // DW_AT_name
.byte 14 // DW_FORM_strp
.byte 16 // DW_AT_stmt_list
.byte 23 // DW_FORM_sec_offset
.byte 27 // DW_AT_comp_dir
.byte 14 // DW_FORM_strp
.byte 17 // DW_AT_low_pc
.byte 1 // DW_FORM_addr
.byte 18 // DW_AT_high_pc
.byte 6 // DW_FORM_data4
.byte 0 // EOM(1)
.byte 0 // EOM(2)
.byte 2 // Abbreviation Code
.byte 46 // DW_TAG_subprogram
.byte 1 // DW_CHILDREN_yes
.byte 17 // DW_AT_low_pc
.byte 1 // DW_FORM_addr
.byte 18 // DW_AT_high_pc
.byte 6 // DW_FORM_data4
.byte 64 // DW_AT_frame_base
.byte 24 // DW_FORM_exprloc
.byte 3 // DW_AT_name
.byte 14 // DW_FORM_strp
.byte 58 // DW_AT_decl_file
.byte 11 // DW_FORM_data1
.byte 59 // DW_AT_decl_line
.byte 11 // DW_FORM_data1
.byte 73 // DW_AT_type
.byte 19 // DW_FORM_ref4
.byte 63 // DW_AT_external
.byte 25 // DW_FORM_flag_present
.byte 0 // EOM(1)
.byte 0 // EOM(2)
.byte 3 // Abbreviation Code
.byte 52 // DW_TAG_variable
.byte 0 // DW_CHILDREN_no
.byte 2 // DW_AT_location
.byte 24 // DW_FORM_exprloc
.byte 3 // DW_AT_name
.byte 14 // DW_FORM_strp
.byte 58 // DW_AT_decl_file
.byte 11 // DW_FORM_data1
.byte 59 // DW_AT_decl_line
.byte 11 // DW_FORM_data1
.byte 73 // DW_AT_type
.byte 19 // DW_FORM_ref4
.byte 0 // EOM(1)
.byte 0 // EOM(2)
.byte 4 // Abbreviation Code
.byte 36 // DW_TAG_base_type
.byte 0 // DW_CHILDREN_no
.byte 3 // DW_AT_name
.byte 14 // DW_FORM_strp
.byte 62 // DW_AT_encoding
.byte 11 // DW_FORM_data1
.byte 11 // DW_AT_byte_size
.byte 11 // DW_FORM_data1
.byte 0 // EOM(1)
.byte 0 // EOM(2)
.byte 0 // EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.word .Ldebug_info_end0-.Ldebug_info_start0 // Length of Unit
.Ldebug_info_start0:
.hword 4 // DWARF version number
.word .debug_abbrev // Offset Into Abbrev. Section
.byte 8 // Address Size (in bytes)
.byte 1 // Abbrev [1] 0xb:0x4f DW_TAG_compile_unit
.word .Linfo_string0 // DW_AT_producer
.hword 12 // DW_AT_language
.word .Linfo_string1 // DW_AT_name
.word .Lline_table_start0 // DW_AT_stmt_list
.word .Linfo_string2 // DW_AT_comp_dir
.xword .Lfunc_begin0 // DW_AT_low_pc
.word .Lfunc_end0-.Lfunc_begin0 // DW_AT_high_pc
.byte 2 // Abbrev [2] 0x2a:0x28 DW_TAG_subprogram
.xword .Lfunc_begin0 // DW_AT_low_pc
.word .Lfunc_end0-.Lfunc_begin0 // DW_AT_high_pc
.byte 1 // DW_AT_frame_base
.byte 111
.word .Linfo_string3 // DW_AT_name
.byte 1 // DW_AT_decl_file
.byte 1 // DW_AT_decl_line
.word 82 // DW_AT_type
// DW_AT_external
.byte 3 // Abbrev [3] 0x43:0xe DW_TAG_variable
.byte 2 // DW_AT_location
.byte 145
.byte 12
.word .Linfo_string5 // DW_AT_name
.byte 1 // DW_AT_decl_file
.byte 2 // DW_AT_decl_line
.word 82 // DW_AT_type
.byte 0 // End Of Children Mark
.byte 4 // Abbrev [4] 0x52:0x7 DW_TAG_base_type
.word .Linfo_string4 // DW_AT_name
.byte 5 // DW_AT_encoding
.byte 4 // DW_AT_byte_size
.byte 0 // End Of Children Mark
.Ldebug_info_end0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "Homebrew clang version 13.0.0" // string offset=0
.Linfo_string1:
.asciz "test.c" // string offset=30
.Linfo_string2:
.asciz "/User/***" // string offset=37
.Linfo_string3:
.asciz "main" // string offset=68
.Linfo_string4:
.asciz "int" // string offset=73
.Linfo_string5:
.asciz "a" // string offset=77
.ident "Homebrew clang version 13.0.0"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:
https://godbolt.org/z/36jshbhYh
compiled with clang produces such code:
I've noticed while parsing binary with debug info (which was compiled with clang), that if we have a variable assignment, mov instruction refers to the previous block, and block with assignment starts only with str instruction on AArch64 (you can see this yellow block from the godbolt link). The same, if you try to set a breakpoint on the second line in gdb it will jump to the str, skipping mov. GCC produces the correct result, block starts with mov instruction.