Open Quuxplusone opened 8 years ago
Attached test.ll
(3586 bytes, application/octet-stream): reduced test
The schedule also might be incorrect,
if (p_0_loaded_from_n <= 64)
{
for (int c0 = 0; c0 < p_0_loaded_from_n; c0 += 1)
Stmt_for_body(c0);
if (p_0_loaded_from_n <= 0) <====
Stmt_for_body(0); <====
}
else
{ /* original code */ }
I don't see any dead code here, sorry. For p_0_loaded_from_n > 0 The loop is executed p_0_loaded_from_n times. For p_0_loaded_from_n <= 0 the loop body is executed once as it was a do-while loop before. Note that this block was always executed before the loop backedge condition was evaluated.
I apologize, I misrepresented the problem. What I meant was the loop was being
removed, consider the output,
opt -polly-process-unprofitable -polly-code-generator=isl -polly-scops -polly-
codegen test.ll -S
define void @foo1() #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
%0 = load i32, i32* @n, align 4
%call = tail call i8* @foo(i32 %0)
store i8* %call, i8** bitcast (i16** @A to i8**), align 4
%1 = load i32, i32* @n, align 4
%cmp33 = icmp sgt i32 %1, 0
%2 = bitcast i8* %call to i16*
br i1 %cmp33, label %for.body.preheader, label %for.end20
for.body.preheader: ; preds = %entry.split
br label %polly.split_new_and_old
for.cond2.preheader: ; preds = %for.body
%cmp331 = icmp sgt i32 %1, 64
br i1 %cmp331, label %for.body5.preheader, label %for.end20.region_exiting
for.body5.preheader: ; preds = %for.cond2.preheader
br label %for.body5
polly.split_new_and_old: ; preds = %for.body.preheader
br label %polly.preload.begin
polly.preload.begin: ; preds =
%polly.split_new_and_old
br i1 true, label %polly.start, label %for.body
for.body: ; preds =
%polly.preload.begin, %for.body
%i.034 = phi i32 [ %inc, %for.body ], [ 0, %polly.preload.begin ]
%arrayidx = getelementptr inbounds i16, i16* %2, i32 %i.034
%3 = load i16, i16* %arrayidx, align 2
%add = add i16 %3, 10
store i16 %add, i16* %arrayidx, align 2
%inc = add nuw nsw i32 %i.034, 1
%cmp = icmp slt i32 %inc, %1
br i1 %cmp, label %for.body, label %for.cond2.preheader
for.body5: ; preds = %for.inc18,
%for.body5.preheader
%i.132 = phi i32 [ %inc19, %for.inc18 ], [ 64, %for.body5.preheader ]
%4 = load i32*, i32** @Y, align 4
%arrayidx6 = getelementptr inbounds i32, i32* %4, i32 %i.132
%5 = load i32, i32* %arrayidx6, align 4
%cmp7 = icmp sgt i32 %5, -1
br i1 %cmp7, label %land.lhs.true, label %for.inc18
land.lhs.true: ; preds = %for.body5
%6 = load i16*, i16** @A, align 4
%arrayidx9 = getelementptr inbounds i16, i16* %6, i32 %i.132
%7 = load i16, i16* %arrayidx9, align 2
%cmp11 = icmp eq i16 %7, 1
br i1 %cmp11, label %land.lhs.true13, label %for.inc18
land.lhs.true13: ; preds = %land.lhs.true
%call14 = tail call i32 @foo3(i32 %i.132)
%cmp15 = icmp eq i32 %call14, 10
br i1 %cmp15, label %if.then, label %for.inc18
if.then: ; preds = %land.lhs.true13
%8 = load i32*, i32** @C, align 4
%arrayidx17 = getelementptr inbounds i32, i32* %8, i32 %i.132
store i32 -2, i32* %arrayidx17, align 4
br label %for.inc18
for.inc18: ; preds = %if.then,
%land.lhs.true13, %land.lhs.true, %for.body5
%inc19 = add nuw nsw i32 %i.132, 1
%9 = load i32, i32* @n, align 4
%cmp3 = icmp slt i32 %inc19, %9
br i1 %cmp3, label %for.body5, label %for.end20.loopexit
for.end20.loopexit: ; preds = %for.inc18
br label %for.end20.region_exiting
for.end20.region_exiting: ; preds =
%for.cond2.preheader, %for.end20.loopexit
br label %polly.merge_new_and_old
polly.merge_new_and_old: ; preds = %polly.merge,
%for.end20.region_exiting
br label %for.end20
for.end20: ; preds =
%polly.merge_new_and_old, %entry.split
ret void
polly.start: ; preds = %polly.preload.begin
br label %polly.loop_if
polly.loop_exit: ; preds =
%polly.stmt.for.body, %polly.loop_if
br label %polly.cond
polly.cond: ; preds = %polly.loop_exit
br i1 true, label %polly.then, label %polly.else
polly.merge: ; preds = %polly.else,
%polly.stmt.for.body2
br label %polly.merge_new_and_old
polly.loop_if: ; preds = %polly.start
br i1 false, label %polly.loop_preheader, label %polly.loop_exit
polly.loop_header: ; preds =
%polly.stmt.for.body, %polly.loop_preheader
%polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.for.body ]
br label %polly.stmt.for.body
polly.stmt.for.body: ; preds = %polly.loop_header
%10 = trunc i64 %polly.indvar to i32
%11 = shl i32 %10, 1
%scevgep = getelementptr i8, i8* %call, i32 %11
%scevgep1 = bitcast i8* %scevgep to i16*
%_p_scalar_ = load i16, i16* %scevgep1, align 2, !alias.scope !0, !noalias !2
%p_add = add i16 %_p_scalar_, 10
store i16 %p_add, i16* %scevgep1, align 2, !alias.scope !0, !noalias !2
%12 = trunc i64 %polly.indvar to i32
%13 = add i32 %12, 1
%p_cmp = icmp slt i32 %13, %1
%polly.indvar_next = add nsw i64 %polly.indvar, 1
%polly.loop_cond = icmp slt i64 %polly.indvar, -1
br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
polly.loop_preheader: ; preds = %polly.loop_if
br label %polly.loop_header
polly.then: ; preds = %polly.cond
br label %polly.stmt.for.body2
polly.stmt.for.body2: ; preds = %polly.then
%scevgep3 = getelementptr i8, i8* %call, i32 0
%scevgep34 = bitcast i8* %scevgep3 to i16*
%_p_scalar_5 = load i16, i16* %scevgep34, align 2, !alias.scope !0, !noalias !2
%p_add6 = add i16 %_p_scalar_5, 10
store i16 %p_add6, i16* %scevgep34, align 2, !alias.scope !0, !noalias !2
%p_cmp7 = icmp slt i32 1, %1
br label %polly.merge
polly.else: ; preds = %polly.cond
br label %polly.merge
}
Consider when n > 0
entry->entry.split->
for.body.preheader->
polly.split_new_and_old->
polly.preload.begin-> (true)
polly.start->
polly.loop_if-> (false)
polly.loop_exit->
polly.cond-> (true)
polly.then->
polly.stmt.for.body2->
polly.merge->
polly.merge_new_and_old->
for.end20->end
In polly.stmt.for.body2, it is executed just once.
When the n < 0
entry->entry.split->
for.end20->end
It looks like the upperbound is not being materialized in the
IslNodebBuilder::preloadInvariantLoop.
Unreached loops get removed by simplifycfg,(In reply to comment #0) and we get
the below output.
> When run with -simplifycfg
>
>
> define void @foo1() #0 {
> entry:
> %0 = load i32, i32* @n, align 4
> %call = tail call i8* @foo(i32 %0)
> store i8* %call, i8** bitcast (i16** @A to i8**), align 4
> %1 = load i32, i32* @n, align 4
> %cmp33 = icmp sgt i32 %1, 0
> %2 = bitcast i8* %call to i16*
> br i1 %cmp33, label %polly.stmt.for.body2, label %for.end20
>
> for.end20: ; preds =
> %polly.stmt.for.body2, %entry
> ret void
>
> polly.stmt.for.body2: ; preds = %entry
> %scevgep3 = getelementptr i8, i8* %call, i32 0
> %scevgep34 = bitcast i8* %scevgep3 to i16*
> %_p_scalar_5 = load i16, i16* %scevgep34, align 2, !alias.scope !0,
> !noalias !2
> %p_add6 = add i16 %_p_scalar_5, 10
> store i16 %p_add6, i16* %scevgep34, align 2, !alias.scope !0, !noalias !2
> %p_cmp7 = icmp slt i32 1, %1
> br label %for.end20
> }
"false" as condition for the branch in the polly.loop_if block is definitively hinky. I will take a look at this!
Move bugs to Polly product.
test.ll
(3586 bytes, application/octet-stream)