Open xiangzhai opened 7 years ago
Hi ScaffCC developers,
-loop-simplify will produce the disable unroll meta data (!llvm.loop !2), for example:
-loop-simplify
$ cat cat_state.n044.ll ; ModuleID = 'cat_state.n045a.ll' source_filename = "cat_state.n04_merged.scaffold" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: noinline nounwind uwtable define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 { entry: %0 = load i16, i16* %bit, align 2 tail call void @llvm.H.i16(i16 %0) %cmp1 = icmp sgt i32 %n, 1 br i1 %cmp1, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %entry %1 = add i32 %n, -1 %2 = add i32 %n, -2 %xtraiter = and i32 %1, 1 %3 = icmp ult i32 %2, 1 br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label %for.body.lr.ph.new for.body.lr.ph.new: ; preds = %for.body.lr.ph %unroll_iter = sub i32 %1, %xtraiter br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph.new %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub.1, %for.body ] %idxprom = sext i32 %inc3 to i64 %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom %4 = load i16, i16* %arrayidx1, align 2 %sub = add nsw i32 %inc3, -1 %idxprom2 = sext i32 %sub to i64 %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2 %5 = load i16, i16* %arrayidx3, align 2 tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5) %inc = add nsw i32 %inc3, 1 %niter.nsub = sub i32 %niter, 1 %idxprom.1 = sext i32 %inc to i64 %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1 %6 = load i16, i16* %arrayidx1.1, align 2 %idxprom2.1 = sext i32 %inc3 to i64 %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.1 %7 = load i16, i16* %arrayidx3.1, align 2 tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7) %inc.1 = add nsw i32 %inc, 1 %niter.nsub.1 = sub i32 %niter.nsub, 1 %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0 br i1 %niter.ncmp.1, label %for.body, label %for.cond.for.end_crit_edge.unr-lcssa for.cond.for.end_crit_edge.unr-lcssa: ; preds = %for.body, %for.body.lr.ph %inc3.unr = phi i32 [ 1, %for.body.lr.ph ], [ %inc.1, %for.body ] %lcmp.mod = icmp ne i32 %xtraiter, 0 br i1 %lcmp.mod, label %for.body.epil, label %for.end for.body.epil: ; preds = %for.cond.for.end_crit_edge.unr-lcssa %inc3.epil = phi i32 [ %inc3.unr, %for.cond.for.end_crit_edge.unr-lcssa ] %idxprom.epil = sext i32 %inc3.epil to i64 %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64 %idxprom.epil %8 = load i16, i16* %arrayidx1.epil, align 2 %sub.epil = add nsw i32 %inc3.epil, -1 %idxprom2.epil = sext i32 %sub.epil to i64 %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.epil %9 = load i16, i16* %arrayidx3.epil, align 2 tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9) %inc.epil = add nsw i32 %inc3.epil, 1 %cmp.epil = icmp slt i32 %inc.epil, %n br label %for.end for.end: ; preds = %for.body.epil, %for.cond.for.end_crit_edge.unr-lcssa, %entry ret void } ; Function Attrs: nounwind declare void @llvm.H.i16(i16) #1 ; Function Attrs: nounwind declare void @llvm.CNOT.i16.i16(i16, i16) #1 ; Function Attrs: noinline nounwind uwtable define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 { entry: %storemerge1 = add nsw i32 %n, -1 %cmp2 = icmp sgt i32 %n, 1 br i1 %cmp2, label %for.body.peel, label %for.end for.body.peel: ; preds = %entry %idxprom.peel = sext i32 %storemerge1 to i64 %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64 %idxprom.peel %0 = load i16, i16* %arrayidx.peel, align 2 %sub1.peel = add nsw i32 %n, -2 %idxprom2.peel = sext i32 %sub1.peel to i64 %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.peel %1 = load i16, i16* %arrayidx3.peel, align 2 tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1) %storemerge.peel = add nsw i32 %storemerge1, -1 %cmp.peel = icmp sgt i32 %storemerge1, 1 br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end for.body.lr.ph.peel.newph: ; preds = %for.body.peel br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph.peel.newph %storemerge5 = phi i32 [ %storemerge.peel, %for.body.lr.ph.peel.newph ], [ %storemerge, %for.body ] %storemerge.in3 = phi i32 [ %storemerge1, %for.body.lr.ph.peel.newph ], [ %storemerge5, %for.body ] %idxprom = sext i32 %storemerge5 to i64 %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom %2 = load i16, i16* %arrayidx, align 2 %sub1 = add nsw i32 %storemerge.in3, -2 %idxprom2 = sext i32 %sub1 to i64 %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2 %3 = load i16, i16* %arrayidx3, align 2 tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3) %storemerge = add nsw i32 %storemerge5, -1 %cmp = icmp sgt i32 %storemerge5, 1 br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2 for.end: ; preds = %for.body, %for.body.peel, %entry %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [ %bit, %for.body ] %4 = load i16, i16* %.lcssa, align 2 tail call void @llvm.H.i16(i16 %4) ret void } ; Function Attrs: noinline nounwind uwtable define i32 @main() local_unnamed_addr #0 { entry: %bits = alloca [4 x i16], align 2 %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %bits, i64 0, i64 0 call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %arraydecay, i32 undef) ret i32 0 } define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) { entry.: %0 = load i16, i16* %bit, align 2 tail call void @llvm.H.i16(i16 %0) %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1 %1 = load i16, i16* %arrayidx1., align 2 %2 = load i16, i16* %bit, align 2 tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2) %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2 %3 = load i16, i16* %arrayidx1.1., align 2 %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1 %4 = load i16, i16* %arrayidx3.1., align 2 tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4) %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3 %5 = load i16, i16* %arrayidx1.epil., align 2 %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2 %6 = load i16, i16* %arrayidx3.epil., align 2 tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6) ret void } attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 6.0.0 (git at github.com:llvm-mirror/clang.git 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git at github.com:llvm-mirror/llvm.git d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"} !2 = distinct !{!2, !3} !3 = !{!"llvm.loop.unroll.disable"}
Then it failed to Unroll:
$(OPT) -S cat_state.n044.ll -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll -unroll-threshold=100000000 -sccp -simplifycfg -o cat_state.n045.ll
There are still for-loops in the cat_state.n045.ll, although it is able to workaround use -internalize -globaldce to remove unCatN and catN DeadFunction which including for-loops :)
cat_state.n045.ll
-internalize -globaldce
unCatN
catN
Regards, Leslie Zhai
Fixed in https://github.com/ScaffCC/ScaffCC/commit/7cf9c3cef0901b83dfb41f07f9dea083fd52b15d#diff-4449e30adec5beea4c33c58954ce4d0d
Hi ScaffCC developers,
-loop-simplify
will produce the disable unroll meta data (!llvm.loop !2), for example:Then it failed to Unroll:
There are still for-loops in the
cat_state.n045.ll
, although it is able to workaround use-internalize -globaldce
to removeunCatN
andcatN
DeadFunction which including for-loops :)Regards, Leslie Zhai