avr-llvm / llvm

[MERGED UPSTREAM] AVR backend for the LLVM compiler library
220 stars 21 forks source link

Code generation produces overly-large branches #209

Closed shepmaster closed 8 years ago

shepmaster commented 8 years ago

Compiling with llc -march=avr < "$@" | llvm-mc -triple avr -mattr=avr6 -filetype=obj, I'm getting the error error: out of range branch target (expected a integer in the range -128 to 127) for this code:

source_filename = "bugpoint-output-79e0b94.bc"
target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-i64:8:8-f32:8:8-f64:8:8-n8"
target triple = "avr-atmel-none"

%"fmt::Formatter" = type { i32, i32, i8, %"option::Option<usize>", %"option::Option<usize>", { i8*, void (i8*)** }, %"slice::Iter<fmt::ArgumentV1>", { %"fmt::ArgumentV1"*, i16 } }
%"option::Option<usize>" = type { i8, [0 x i8], [2 x i8] }
%"slice::Iter<fmt::ArgumentV1>" = type { %"fmt::ArgumentV1"*, %"fmt::ArgumentV1"*, %"marker::PhantomData<&fmt::ArgumentV1>" }
%"fmt::ArgumentV1" = type { %"fmt::Void"*, i8 (%"fmt::Void"*, %"fmt::Formatter"*)* }
%"fmt::Void" = type {}
%"marker::PhantomData<&fmt::ArgumentV1>" = type {}
%"num::flt2dec::decoder::Decoded" = type { i64, i64, i64, i16, i8 }
%"num::flt2dec::Part" = type { i8, [0 x i8], [4 x i8] }
%"num::flt2dec::Formatted" = type { { i8*, i16 }, { %"num::flt2dec::Part"*, i16 } }
%"num::flt2dec::decoder::FullDecoded" = type { i8, [0 x i8], [27 x i8] }
%"option::Option<(usize, i16)>" = type { i8, [0 x i8], [4 x i8] }

@byte_str28508 = external unnamed_addr constant [1 x i8], align 1
@byte_str28510 = external unnamed_addr constant [1 x i8], align 1

declare void @core_num_flt2dec_strategy_dragon({ i16, i16 }* noalias nocapture sret dereferenceable(4), %"num::flt2dec::decoder::Decoded"* noalias nocapture readonly dereferenceable(27), i8* nonnull, i16, i16) unnamed_addr #7

declare void @core_num_flt2dec_strategy_grisu(%"option::Option<(usize, i16)>"* noalias nocapture sret dereferenceable(5), %"num::flt2dec::decoder::Decoded"* noalias nocapture readonly dereferenceable(27), i8* nonnull, i16, i16) unnamed_addr #7

define internal fastcc i8 @_ZN4core3fmt23float_to_decimal_common17hd3db31e67f00e886E(%"fmt::Formatter"* nocapture dereferenceable(27), float %.val, i1 zeroext) unnamed_addr #7 {
entry-block:
  %full_decoded.i33 = alloca %"num::flt2dec::decoder::FullDecoded", align 8
  %buf = alloca [1024 x i8], align 1
  %parts = alloca [16 x %"num::flt2dec::Part"], align 8
  %formatted = alloca %"num::flt2dec::Formatted", align 8
  %2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %buf, i16 0, i16 0
  %3 = getelementptr inbounds [16 x %"num::flt2dec::Part"], [16 x %"num::flt2dec::Part"]* %parts, i16 0, i16 1, i32 0
  br i1 undef, label %match_case17, label %match_else16
match_else16:                                     ; preds = %entry-block
  br i1 undef, label %match_case14.i.i, label %match_case18.i.i
case_body6.i.i:                                   ; preds = %match_case18.i.i, %match_case14.i.i
  switch i8 undef, label %match_else.i [
    i8 0, label %next.i
    i8 1, label %next10.i
    i8 2, label %next22.i
    i8 3, label %match_case8.i
  ]
match_case14.i.i:                                 ; preds = %match_else16
  switch i8 undef, label %case_body6.i.i [
    i8 0, label %next.i
    i8 2, label %next22.i
  ]
match_case18.i.i:                                 ; preds = %match_else16
  switch i8 undef, label %case_body6.i.i [
    i8 0, label %next.i
    i8 2, label %match_case21.i.i
  ]
match_case21.i.i:                                 ; preds = %match_case18.i.i
  br label %next22.i
match_else.i:                                     ; preds = %case_body6.i.i
  unreachable
match_case8.i:                                    ; preds = %case_body6.i.i
  unreachable
next.i:                                           ; preds = %match_case18.i.i, %match_case14.i.i, %case_body6.i.i
  unreachable
next10.i:                                         ; preds = %case_body6.i.i
  unreachable
next22.i:                                         ; preds = %match_case21.i.i, %match_case14.i.i, %case_body6.i.i
  unreachable
match_case17:                                     ; preds = %entry-block
  %4 = load i16, i16* undef, align 1
  %cond10.i.i.i38 = icmp eq i32 undef, 0
  %.20.i.i.i42 = zext i1 %cond10.i.i.i38 to i8
  %sret_slot.0.i.i.i44 = select i1 undef, i8 undef, i8 %.20.i.i.i42
  %.sroa.6.1..sroa_idx.i59 = getelementptr inbounds %"num::flt2dec::decoder::FullDecoded", %"num::flt2dec::decoder::FullDecoded"* %full_decoded.i33, i16 0, i32 2
  br i1 undef, label %next.i89, label %case_body6.i.i71
case_body6.i.i71:                                 ; preds = %match_case17
  %.65.i.i70 = select i1 undef, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @byte_str28508, i16 0, i16 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @byte_str28510, i16 0, i16 0)
  %sret_slot.0.i.i.i44.off = add i8 %sret_slot.0.i.i.i44, -3
  br i1 undef, label %match_case7.i, label %next9.i
match_case7.i:                                    ; preds = %case_body6.i.i71
  %5 = bitcast [27 x i8]* %.sroa.6.1..sroa_idx.i59 to %"num::flt2dec::decoder::Decoded"*
  %6 = add nuw nsw i16 0, 21
  br i1 undef, label %then-block-15091-.i, label %"core::slice.exit.i"
next.i89:                                         ; preds = %match_case17
  unreachable
next9.i:                                          ; preds = %case_body6.i.i71
  store i8* %.65.i.i70, i8** null, align 8
  br label %_ZN4core3num7flt2dec18to_exact_fixed_str17hb6819e4e346dc0c8E.exit
then-block-15091-.i:                              ; preds = %match_case7.i
  unreachable
"core::slice.exit.i": ; preds = %match_case7.i
  %7 = icmp sgt i16 %4, -1
  %8 = sub i16 0, %4
  %limit.0.i = select i1 %7, i16 %8, i16 -32768
  call void @core_num_flt2dec_strategy_grisu(%"option::Option<(usize, i16)>"* noalias nocapture nonnull sret dereferenceable(5) undef, %"num::flt2dec::decoder::Decoded"* noalias nonnull readonly dereferenceable(27) %5, i8* nonnull %2, i16 %6, i16 %limit.0.i)
  call void @core_num_flt2dec_strategy_dragon({ i16, i16 }* noalias nocapture nonnull sret dereferenceable(4) undef, %"num::flt2dec::decoder::Decoded"* noalias nonnull readonly dereferenceable(27) %5, i8* nonnull %2, i16 %6, i16 %limit.0.i)
  br i1 undef, label %else-block44.i, label %then-block-15146-.i
then-block-15146-.i:                              ; preds = %"core::slice.exit.i"
  %9 = icmp eq i16 %4, 0
  br i1 %9, label %next39.i, label %next31.i
next31.i:                                         ; preds = %then-block-15146-.i
  store i8 0, i8* %3, align 1
  %sign1.sroa.0.0..sroa_idx133.i = getelementptr inbounds %"num::flt2dec::Formatted", %"num::flt2dec::Formatted"* %formatted, i16 0, i32 0, i32 0
  store i8* %.65.i.i70, i8** %sign1.sroa.0.0..sroa_idx133.i, align 8
  br label %_ZN4core3num7flt2dec18to_exact_fixed_str17hb6819e4e346dc0c8E.exit
next39.i:                                         ; preds = %then-block-15146-.i
  unreachable
else-block44.i:                                   ; preds = %"core::slice.exit.i"
  unreachable
_ZN4core3num7flt2dec18to_exact_fixed_str17hb6819e4e346dc0c8E.exit: ; preds = %next31.i, %next9.i
  ret i8 undef
}

Sorry for the largeness of it, but I think that is integral to the problem.

dylanmckay commented 8 years ago

What's happening is that we are selecting the brne instruction for a branch to basic block LBB0_9. When assembling, this block just happens to be more than ~60 instruction away from the calling branch instruction, which is out of range of the 7-bit immediate of the brne instruction.

We should be doing the same thing that AVR-GCC does and promote the brne LBB0_9 to:

brne next
next: jmp LBB0_9

From memory, I think this is called a landing pad.

shepmaster commented 8 years ago

From memory, I think this is called a landing pad.

I think I would have used trampoline. LLVM seems to use landing pad for exception handling, but it also has trampoline intrinsics, so that might not be the right phrase either. :-)

dylanmckay commented 8 years ago

Have fixed this in a46a0aa9320d4e1cdbe812c221ff0f0925634c1d.

Confirmed here:

jake.o:     file format elf32-avr

RELOCATION RECORDS FOR [.text]:
OFFSET   TYPE              VALUE 
00000038 R_AVR_13_PCREL    .text+0x0000010c
0000004a R_AVR_13_PCREL    .text+0x000000d4
0000004c R_AVR_LO8_LDI     byte_str28510
0000004e R_AVR_HI8_LDI     byte_str28510
00000054 R_AVR_7_PCREL     .text+0x000000d4
0000006c R_AVR_7_PCREL     .text+0x0000007c
0000007a R_AVR_13_PCREL    .text+0x0000008c
000000a2 R_AVR_CALL        core_num_flt2dec_strategy_grisu
000000b2 R_AVR_CALL        core_num_flt2dec_strategy_dragon
000000ba R_AVR_7_PCREL     .text+0x000000d4
000000c0 R_AVR_7_PCREL     .text+0x000000d4
000000d2 R_AVR_13_PCREL    .text+0x000000dc
00000112 R_AVR_13_PCREL    .text+0x0000003a
00000118 R_AVR_13_PCREL    .text+0x0000003a
0000011e R_AVR_13_PCREL    .text+0x0000003a
00000124 R_AVR_13_PCREL    .text+0x0000003a
00000128 R_AVR_13_PCREL    .text+0x0000003a