Quuxplusone / LLVMBugzillaTest

0 stars 0 forks source link

Optimize switch wirh icmp with and #47571

Open Quuxplusone opened 3 years ago

Quuxplusone commented 3 years ago
Bugzilla Link PR48602
Status NEW
Importance P enhancement
Reported by David Bolvansky (david.bolvansky@gmail.com)
Reported on 2020-12-26 01:37:24 -0800
Last modified on 2020-12-26 02:22:13 -0800
Version trunk
Hardware PC Linux
CC llvm-bugs@lists.llvm.org, nikita.ppv@gmail.com, spatel+llvm@rotateright.com
Fixed by commit(s)
Attachments
Blocks
Blocked by
See also
enum output_type
{
  type_pde,
  type_relocatable,
  type_pie,
  type_dll,
};

struct bfd_link_info
{
  enum output_type type : 2;
  unsigned int pad : 30;
};

#define bfd_link_pde(info)     ((info)->type == type_pde)
#define bfd_link_dll(info)     ((info)->type == type_dll)
#define bfd_link_relocatable(info) ((info)->type == type_relocatable)
#define bfd_link_pie(info)     ((info)->type == type_pie)
#define bfd_link_executable(info)  (bfd_link_pde (info) || bfd_link_pie (info))
#define bfd_link_pic(info)     (bfd_link_dll (info) || bfd_link_pie (info))

int result;

void test_pic (struct bfd_link_info *info)
{
  if (bfd_link_pic (info))
    result++;
}
void test_exe (struct bfd_link_info *info)
{
  if (bfd_link_executable (info))
    result++;
}

%struct.bfd_link_info = type { i32 }

@result = dso_local local_unnamed_addr global i32 0, align 4

define dso_local void @_Z8test_picP13bfd_link_info(%struct.bfd_link_info*
nocapture readonly %0) local_unnamed_addr #0 {
  %2 = getelementptr %struct.bfd_link_info, %struct.bfd_link_info* %0, i64 0, i32 0
  %3 = load i32, i32* %2, align 4
  %4 = and i32 %3, 2
  %5 = icmp eq i32 %4, 0
  br i1 %5, label %9, label %6

6:                                                ; preds = %1
  %7 = load i32, i32* @result, align 4, !tbaa !2
  %8 = add nsw i32 %7, 1
  store i32 %8, i32* @result, align 4, !tbaa !2
  br label %9

9:                                                ; preds = %1, %6
  ret void
}

define dso_local void @_Z8test_exeP13bfd_link_info(%struct.bfd_link_info*
nocapture readonly %0) local_unnamed_addr #0 {
  %2 = getelementptr %struct.bfd_link_info, %struct.bfd_link_info* %0, i64 0, i32 0
  %3 = load i32, i32* %2, align 4
  %4 = and i32 %3, 3
  switch i32 %4, label %8 [
    i32 0, label %5
    i32 2, label %5
  ]

5:                                                ; preds = %1, %1
  %6 = load i32, i32* @result, align 4, !tbaa !2
  %7 = add nsw i32 %6, 1
  store i32 %7, i32* @result, align 4, !tbaa !2
  br label %8

8:                                                ; preds = %1, %5
  ret void
}

It should be possible to optimize switch away.

Current LLVM codegen:
test_exe(bfd_link_info*):            # @test_exe(bfd_link_info*)
        mov     eax, dword ptr [rdi]
        or      eax, 2
        and     eax, 3
        cmp     eax, 2
        jne     .LBB1_2
        add     dword ptr [rip + result], 1
.LBB1_2:
        ret

GCC:
test_exe(bfd_link_info*):
        test    BYTE PTR [rdi], 1
        jne     .L10
        add     DWORD PTR result[rip], 1
.L10:
        ret
Quuxplusone commented 3 years ago

So the proposed optimization here is: If all switch destinations apart from the default are the same, and the number of (non-default) destinations is one less than the size of the switched over value range, then the switch can be converted into a comparison and conditional branch.

What I don't get is how that would bring us to the GCC codegen. It would eliminate the switch, but we'd still have to do something like (load)&3==1, while GCC seems to have dropped the 0x3 mask entirely.

Quuxplusone commented 3 years ago
https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=16842d34e7fceebcecc24910e9219a1581fffb32

gcc patch

Also this may help
https://gcc.gnu.org/legacy-ml/gcc-patches/2017-01/msg01936/pr67328-2.patch