Open regehr opened 9 years ago
Here's the LLVM that I get -- I don't see any obvious Souper stoppers in there.
; Function Attrs: nounwind uwtable
define void @check_result(i64 %x, i64 %y) #0 {
entry:
ret void
}
; Function Attrs: noreturn nounwind
declare void @__assert_fail(i8*, i8*, i32, i8*) #1
; Function Attrs: nounwind uwtable
define void @check_overflow(i64 %x, i64 %y) #0 {
entry:
%conv = trunc i64 %x to i16
%conv1 = trunc i64 %y to i16
%add.i = add i16 %conv1, %conv
%0 = and i64 %y, %x
%1 = trunc i64 %0 to i16
%2 = icmp slt i16 %1, 0
%cmp10.i = icmp sgt i16 %add.i, -1
%or.cond28.i = and i1 %2, %cmp10.i
br i1 %or.cond28.i, label %checked_add_2.exit, label %lor.rhs.i
lor.rhs.i: ; preds = %entry
%3 = or i64 %y, %x
%4 = trunc i64 %3 to i16
%5 = icmp sgt i16 %4, -1
%cmp20.i = icmp slt i16 %add.i, 0
%cmp20..i = and i1 %5, %cmp20.i
br label %checked_add_2.exit
checked_add_2.exit: ; preds = %entry, %lor.rhs.i
%6 = phi i1 [ true, %entry ], [ %cmp20..i, %lor.rhs.i ]
%shr28.i = lshr i16 %add.i, 15
%shr629.i = lshr i16 %conv, 15
%shr930.i = lshr i16 %conv1, 15
%tobool.i = icmp eq i16 %shr629.i, 0
br i1 %tobool.i, label %checked_add_3.exit, label %land.lhs.true.i
land.lhs.true.i: ; preds = %checked_add_2.exit
%tobool13.i = icmp eq i16 %shr930.i, 0
%tobool15.i = icmp ne i16 %shr28.i, 0
%or.cond.i = or i1 %tobool13.i, %tobool15.i
br i1 %or.cond.i, label %checked_add_3.exit, label %land.lhs.true.i.checked_add_3.exit_crit_edge
land.lhs.true.i.checked_add_3.exit_crit_edge: ; preds = %land.lhs.true.i
br i1 %6, label %cond.end, label %cond.false
checked_add_3.exit: ; preds = %checked_add_2.exit, %land.lhs.true.i
%7 = or i16 %shr930.i, %shr629.i
%8 = icmp eq i16 %7, 0
%tobool20.i = icmp ne i16 %shr28.i, 0
%tobool20..i = and i1 %8, %tobool20.i
%cmptmp = xor i1 %6, %tobool20..i
br i1 %cmptmp, label %cond.false, label %cond.end
cond.false: ; preds = %land.lhs.true.i.checked_add_3.exit_crit_edge, %checked_add_3.exit
tail call void @__assert_fail(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0), i32 34, i8* getelementptr inbounds ([32 x i8]* @__PRETTY_FUNCTION__.check_overflow, i64 0, i64 0)) #3
unreachable
cond.end: ; preds = %land.lhs.true.i.checked_add_3.exit_crit_edge, %checked_add_3.exit
ret void
}
Actually it is LLVM, not Souper, that proves check_result() is empty.
Here's the Souper in question:
%0 = block 3
%1:i64 = var ; x
%2:i16 = trunc %1 ; a
%3:i32 = sext %2 ; a promoted to int
%4:i1 = slt %3, 0:i32 ; a < 0
%5:i64 = var ; y
%6:i16 = trunc %5 ; b
%7:i32 = sext %6 ; b promoted to int
%8:i1 = slt %7, 0:i32 ; b < 0
%9:i1 = and %4, %8 ; a < 0 && b < 0
%10:i32 = zext %2 ; a promoted to int
%11:i32 = zext %6 ; b promoted to int
%12:i32 = addnsw %10, %11 ; a + b
%13:i16 = trunc %12 ; r
%14:i32 = sext %13 ; r promoted to int
%15:i1 = sle 0:i32, %14 ; 0 <= r
%16:i1 = and %9, %15 ; a < 0 && b < 0 && r >= 0
blockpc %0 1 %16 0:i1
blockpc %0 2 %16 0:i1
%17:i1 = sle 0:i32, %3 ; 0 <= a
%18:i1 = sle 0:i32, %7 ; 0 <= b
%19:i1 = and %17, %18 ; a >= 0 && b >= 0
blockpc %0 2 %19 1:i1
%20 = block 3
%21:i32 = ashr %10, 15:i32 ; a >> 15
%22:i16 = trunc %21 ; a >> 15
%23:i32 = zext %22 ; a >> 15 promoted to int
%24:i1 = ne 0:i32, %23 ; (a >> 15) != 0
%25:i32 = ashr %11, 15:i32 ; b >> 15
%26:i16 = trunc %25 ; b >> 15
%27:i32 = zext %26 ; b >> 15 promoted to int
%28:i1 = ne 0:i32, %27 ; (b >> 15) != 0
%29:i1 = and %24, %28 ; ((a >> 15) != 0 && (b >> 15) != 0
%30:i1 = xor 1:i1, %29 ; ((a >> 15) == 0 || (b >> 15) == 0
%31:i32 = zext %13 ; r promoted to int
%32:i32 = ashr %31, 15:i32 ; r >> 15
%33:i16 = trunc %32 ; r >> 15
%34:i1 = ne 0:i16, %33 ; (r >> 15) != 0
%35:i1 = or %30, %34 ; ((a >> 15) == 0 || (b >> 15) == 0 || (r >> 15) != 0
blockpc %20 1 %35 1:i1
blockpc %20 2 %35 1:i1
%36:i1 = ne 0:i16, %22 ; (a >> 15) != 0
%37:i1 = ne 0:i16, %26 ; (b >> 15) != 0
%38:i1 = or %36, %37 ; (a >> 15) != 0 || (b >> 15) != 0
blockpc %20 2 %38 0:i1
%39:i1 = slt %14, 0:i32 ; r < 0
%40:i1 = phi %0, 1:i1, 0:i1, %39
%41:i32 = zext %40
%42:i32 = zext %33
%43:i1 = ne 0:i32, %42
%44:i1 = phi %20, 1:i1, 0:i1, %43
%45:i32 = zext %44
%46:i1 = eq %41, %45
infer %46
And below is the LLVM. I believe this particular LLVM is correct since I tested it with all 2^32 inputs and the assertion never fails. Thus I think it's safe to say that Souper is either buggy or else somehow not up to the task of seeing the meaning of this code.
; ModuleID = 'add_souper_test_2.bc'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [9 x i8] c"o1 == o2\00", align 1
@.str1 = private unnamed_addr constant [18 x i8] c"add_souper_test.c\00", align 1
@__PRETTY_FUNCTION__.check_overflow = private unnamed_addr constant [32 x i8] c"void check_overflow(long, long)\00", align 1
; Function Attrs: nounwind uwtable
define void @check_overflow(i64 %x, i64 %y) #0 {
entry:
%conv = trunc i64 %x to i16
%conv1 = trunc i64 %y to i16
%conv.i = zext i16 %conv to i32
%conv1.i = zext i16 %conv1 to i32
%add.i = add nsw i32 %conv.i, %conv1.i
%conv2.i = trunc i32 %add.i to i16
%conv3.i = sext i16 %conv to i32
%cmp.i = icmp slt i32 %conv3.i, 0
%conv5.i = sext i16 %conv1 to i32
%cmp6.i = icmp slt i32 %conv5.i, 0
%or.cond.i = and i1 %cmp.i, %cmp6.i
%conv9.i = sext i16 %conv2.i to i32
%cmp10.i = icmp sge i32 %conv9.i, 0
%or.cond7.i = and i1 %or.cond.i, %cmp10.i
br i1 %or.cond7.i, label %checked_add_2.exit, label %lor.rhs.i
lor.rhs.i: ; preds = %entry
%conv12.i = sext i16 %conv to i32
%cmp13.i = icmp sge i32 %conv12.i, 0
%conv16.i = sext i16 %conv1 to i32
%cmp17.i = icmp sge i32 %conv16.i, 0
%or.cond8.i = and i1 %cmp13.i, %cmp17.i
br i1 %or.cond8.i, label %land.rhs.i, label %checked_add_2.exit
land.rhs.i: ; preds = %lor.rhs.i
%conv19.i = sext i16 %conv2.i to i32
%cmp20.i = icmp slt i32 %conv19.i, 0
br label %checked_add_2.exit
checked_add_2.exit: ; preds = %land.rhs.i, %lor.rhs.i, %entry
%0 = phi i1 [ true, %entry ], [ false, %lor.rhs.i ], [ %cmp20.i, %land.rhs.i ]
%lor.ext.i = zext i1 %0 to i32
%conv2 = trunc i64 %x to i16
%conv3 = trunc i64 %y to i16
%conv.i3 = zext i16 %conv2 to i32
%conv1.i4 = zext i16 %conv3 to i32
%add.i5 = add nsw i32 %conv.i3, %conv1.i4
%conv2.i6 = trunc i32 %add.i5 to i16
%conv3.i7 = zext i16 %conv2.i6 to i32
%shr.i = ashr i32 %conv3.i7, 15
%conv4.i = trunc i32 %shr.i to i16
%conv5.i8 = zext i16 %conv2 to i32
%shr6.i = ashr i32 %conv5.i8, 15
%conv7.i = trunc i32 %shr6.i to i16
%conv8.i = zext i16 %conv3 to i32
%shr9.i = ashr i32 %conv8.i, 15
%conv10.i = trunc i32 %shr9.i to i16
%conv11.i = zext i16 %conv7.i to i32
%tobool.i = icmp ne i32 %conv11.i, 0
%conv12.i9 = zext i16 %conv10.i to i32
%tobool13.i = icmp ne i32 %conv12.i9, 0
%or.cond.i10 = and i1 %tobool.i, %tobool13.i
%or.cond.not.i = xor i1 %or.cond.i10, true
%tobool15.i = icmp ne i16 %conv4.i, 0
%or.cond7.i11 = or i1 %or.cond.not.i, %tobool15.i
br i1 %or.cond7.i11, label %lor.rhs.i13, label %checked_add_3.exit
lor.rhs.i13: ; preds = %checked_add_2.exit
%tobool16.i = icmp ne i16 %conv7.i, 0
%tobool18.i = icmp ne i16 %conv10.i, 0
%or.cond8.i12 = or i1 %tobool16.i, %tobool18.i
br i1 %or.cond8.i12, label %checked_add_3.exit, label %land.rhs.i15
land.rhs.i15: ; preds = %lor.rhs.i13
%conv19.i14 = zext i16 %conv4.i to i32
%tobool20.i = icmp ne i32 %conv19.i14, 0
br label %checked_add_3.exit
checked_add_3.exit: ; preds = %land.rhs.i15, %lor.rhs.i13, %checked_add_2.exit
%1 = phi i1 [ true, %checked_add_2.exit ], [ false, %lor.rhs.i13 ], [ %tobool20.i, %land.rhs.i15 ]
%lor.ext.i16 = zext i1 %1 to i32
%cmp = icmp eq i32 %lor.ext.i, %lor.ext.i16
br i1 %cmp, label %cond.end, label %cond.false
cond.false: ; preds = %checked_add_3.exit
call void @__assert_fail(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([18 x i8]* @.str1, i32 0, i32 0), i32 27, i8* getelementptr inbounds ([32 x i8]* @__PRETTY_FUNCTION__.check_overflow, i32 0, i32 0)) #2
unreachable
cond.end: ; preds = %checked_add_3.exit
ret void
}
; Function Attrs: noreturn nounwind
declare void @__assert_fail(i8*, i8*, i32, i8*) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn nounwind }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.6.0 (trunk 223586)"}
My guess is somehow we're asking too much of blockpc here but I haven't looked into it at that level.
I'll have a look at the blockpc
code soon.
John, Thanks for the detailed example! I will investigate it.
What I'm not sure about is if the bug is in the blockpc harvester or the solving part.
If I code the overflow checks in branch-free fashion, Souper optimizes everything properly:
static int checked_add_2(int16_t a, int16_t b, int16_t *rp) {
int16_t r = (uint16_t)a + (uint16_t)b;
*rp = r;
// return (a < 0 && b < 0 && r >= 0) || (a >= 0 && b >= 0 && r < 0);
return (a < 0 & b < 0 & r >= 0) | (a >= 0 & b >= 0 & r < 0);
}
static int checked_add_3(int16_t a, int16_t b, int16_t *rp) {
const int BITS = CHAR_BIT * sizeof(int16_t);
uint16_t ur = (uint16_t)a + (uint16_t)b;
uint16_t sr = ur >> (BITS - 1);
uint16_t sa = (uint16_t)a >> (BITS - 1);
uint16_t sb = (uint16_t)b >> (BITS - 1);
*rp = ur;
// return (sa && sb && !sr) || (!sa && !sb && sr);
return (sa & sb & !sr) | (!sa & !sb & sr);
}
Here's a simpler example where Souper misses the obvious optimization. Something definitely is wrong here.
#include <assert.h>
int sign(int x) {
if (x < 0)
return -1;
if (x > 0)
return 1;
return 0;
}
int sign2(int x) {
int y = x >> 31;
unsigned z = 0 - (unsigned)x;
unsigned w = z >> 31;
return w | y;
}
void compare(int x) { assert(sign(x) == sign2(x)); }
#ifdef TEST
int main(void) {
unsigned x = 0;
do {
compare(x);
x++;
} while (x != 0);
return 0;
}
#endif
@regehr @chenyang78 This looks rather bad. sign
is optimized to
define i32 @sign(i32 %x) #0 {
entry:
%cmp = icmp slt i32 %x, 0
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
%cmp1 = icmp sgt i32 %x, 0
%. = zext i1 %cmp1 to i32
br label %return
return: ; preds = %if.end, %entry
%retval.0 = phi i32 [ -1, %entry ], [ %., %if.end ]
ret i32 %retval.0
}
Souper takes this and simplifies this to ret i32 0
. This seems related to issue #166. The conditions generated from the if.end
block are not sufficiently guarded to faithfully represent the execution which skips that block and immediately jumps to the return
block.
Thanks John and Jeroen, I will look at it more closely and try to fix #166 over the weekend.
@jeroenk In my test, souper didn't optimize the code that you referred. Assume I saved the bitcode in the file simple2.ll:
$ llvm-as -o simple2.bc simple2.ll
$ souper -keep-solver-inputs -dump-klee-exprs -z3-path=/home/chenyang/other_programs/z3/build/z3 simple2.bc
; Listing valid replacements.
; Using solver: Z3 + internal cache
Solver input saved to /tmp/input-eb9b23.smt2
Solver input saved to /tmp/input-efb132.smt2
Solver input saved to /tmp/input-7ac65c.smt2
Solver input saved to /tmp/input-2a4427.smt2
Probably I am missing something?
@chenyang78 try passing -souper-infer-iN
@chenyang78 I'm a bit puzzled, I'm no longer able to reproduce the problem either. I'll keep digging a bit more.
@chenyang78 I'm starting to suspect that my binaries were out-of-sync compared to the state of my source tree. Hence, my apologies for the spurious report.
Back to John's last example. This gets optimized to:
entry:
%cmp.i = icmp slt i32 %x, 0
br i1 %cmp.i, label %sign.exit, label %if.end.i
if.end.i: ; preds = %entry
%cmp1.i = icmp sgt i32 %x, 0
%..i = zext i1 %cmp1.i to i32
br label %sign.exit
sign.exit: ; preds = %if.end.i, %entry
%retval.0.i = phi i32 [ -1, %entry ], [ %..i, %if.end.i ]
%shr.i = ashr i32 %x, 31
%sub.i = sub i32 0, %x
%shr1.i = lshr i32 %sub.i, 31
%or.i = or i32 %shr1.i, %shr.i
%lnot = icmp eq i32 %retval.0.i, %or.i
...
The candidate souper generates for the eq
instruction at the bottom is:
%0 = block 2
%1:i32 = var
%2:i1 = slt %1, 0:i32
blockpc %0 1 %2 0:i1
%3:i1 = slt 0:i32, %1
%4:i32 = zext %3
%5:i32 = phi %0, 4294967295:i32, %4
%6:i32 = sub 0:i32, %1
%7:i32 = lshr %6, 31:i32
%8:i32 = ashr %1, 31:i32
%9:i32 = or %7, %8
%10:i1 = eq %5, %9
infer %10
In case we follow the code path corresponding to the 4294967295:i32
(= -1) alternative in the phi node, the value of %1
is not getting constrained to a negative value. This means that the solver is allowed to pick a positive value, set %5
equal to -1 and then notice that %9
is either 0 or 1. Thus, no optimization is found. If I manually add blockpc %0 0 %2 1:i1
to the above candidate then the optimization is found. Hence, it seems that we're generating too few blockpc's.
@jeroenk No problem at all. Thanks for digging into the issue!
The blockpc
that you manually added was not generated by Souper's blockpc harvesting algorithm, because the basicblock sign.exit
has multiple incoming branches. I am thinking that probably we shouldn't rule out the multiple-incoming-branches case. What do you think, Peter @pcc? Thanks.
@pcc might be busy doing other stuff?
Yang, are the modifications difficult? Clearly this needs to wait until #166 is sorted out. I keep running into additional test cases where this fix will matter.
Yes, it needs some modification. I think we should wait until #166 gets fixed.
Sounds good-- we'll back burner this one until the no-UB version of Souper is totally solid.
My impression is that real support for for LLVM's (very nasty) UB model is going to take some time and thought.
Yes. Thanks for running the test. Hopefully the fix to #166 would be good.
Could souper simpilfy all int sign variants to:
int sign(int x) {
return (x > 0) + (x >> 31);
}
Which usually optimal for all platforms?
This one assumes that we take Yang's fix to #154.
The program below has two addition functions that do overflow checking. They are exactly equivalent. Souper is able to prove that check_result() never throws an assertion but for some reason it does not prove that check_overflow() never throws an assertion. Can someone look into it?
Result is independent of solver choice and I'm compiling like this:
C code: