Open Quuxplusone opened 10 years ago
PATCH
SLP cannot vectorize the following: target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128" target triple = "aarch64--linux-gnu" ; Function Attrs: nounwind define void @func(i32* nocapture %dst, i32 %value, i32 %count) { entry: %shr = ashr i32 %count, 4 %0 = sext i32 %shr to i64 br label %loop_header loop_exit: ; preds = %loop_header, %entry ret void loop_header: ; preds = %loop_header, %entry %scevgep.phi = phi i32* [ %dst, %entry ], [ %scevgep.inc, %loop_header ] %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %loop_header ] %1 = shl i64 %indvar, 4 store i32 %value, i32* %scevgep.phi, align 4 %scevgep48.sum78 = or i64 %1, 1 %scevgep49 = getelementptr i32* %dst, i64 %scevgep48.sum78 store i32 %value, i32* %scevgep49, align 4 %scevgep50.sum79 = or i64 %1, 2 %scevgep51 = getelementptr i32* %dst, i64 %scevgep50.sum79 store i32 %value, i32* %scevgep51, align 4 %scevgep52.sum80 = or i64 %1, 3 %scevgep53 = getelementptr i32* %dst, i64 %scevgep52.sum80 store i32 %value, i32* %scevgep53, align 4 %indvar_next = add nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar_next, %0 %scevgep.inc = getelementptr i32* %scevgep.phi, i64 16 br i1 %exitcond, label %loop_exit, label %loop_header } using opt -basicaa -slp-vectorizer -S < slp-scevaa.ll Using SCEV to get the base pointer in SLP, along with SCEV-AA enables vectorization: loop_header: ; preds = %loop_header, %entry %scevgep.phi = phi i32* [ %dst, %entry ], [ %scevgep.inc, %loop_header ] %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %loop_header ] %5 = shl i64 %indvar, 4 %scevgep48.sum78 = or i64 %5, 1 %scevgep49 = getelementptr i32* %dst, i64 %scevgep48.sum78 %scevgep50.sum79 = or i64 %5, 2 %scevgep51 = getelementptr i32* %dst, i64 %scevgep50.sum79 %scevgep52.sum80 = or i64 %5, 3 %scevgep53 = getelementptr i32* %dst, i64 %scevgep52.sum80 %6 = bitcast i32* %scevgep.phi to <4 x i32>* store <4 x i32> %4, <4 x i32>* %6, align 4 <===== %indvar_next = add nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar_next, %0 %scevgep.inc = getelementptr i32* %scevgep.phi, i64 16 br i1 %exitcond, label %loop_exit, label %loop_header
Attached PATCH (4191 bytes, text/plain): Proposed patch
PATCH
(4191 bytes, text/plain)