Quuxplusone / LLVMBugzillaTest

0 stars 0 forks source link

SLP not vectorizing without SCEV-AA #21232

Open Quuxplusone opened 10 years ago

Quuxplusone commented 10 years ago
Bugzilla Link PR21233
Status NEW
Importance P normal
Reported by Sanjin Sijaric (ssijaric@codeaurora.org)
Reported on 2014-10-09 19:55:46 -0700
Last modified on 2014-10-21 10:15:40 -0700
Version trunk
Hardware PC Linux
CC llvm-bugs@lists.llvm.org
Fixed by commit(s)
Attachments PATCH (4191 bytes, text/plain)
Blocks
Blocked by
See also
SLP cannot vectorize the following:

target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128"
target triple = "aarch64--linux-gnu"

; Function Attrs: nounwind
define void @func(i32* nocapture %dst, i32 %value, i32 %count) {
entry:
  %shr = ashr i32 %count, 4
  %0 = sext i32 %shr to i64
  br label %loop_header

loop_exit:                                  ; preds = %loop_header, %entry
  ret void

loop_header:                                ; preds = %loop_header, %entry
  %scevgep.phi = phi i32* [ %dst, %entry ], [ %scevgep.inc, %loop_header ]
  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %loop_header ]
  %1 = shl i64 %indvar, 4
  store i32 %value, i32* %scevgep.phi, align 4
  %scevgep48.sum78 = or i64 %1, 1
  %scevgep49 = getelementptr i32* %dst, i64 %scevgep48.sum78
  store i32 %value, i32* %scevgep49, align 4
  %scevgep50.sum79 = or i64 %1, 2
  %scevgep51 = getelementptr i32* %dst, i64 %scevgep50.sum79
  store i32 %value, i32* %scevgep51, align 4
  %scevgep52.sum80 = or i64 %1, 3
  %scevgep53 = getelementptr i32* %dst, i64 %scevgep52.sum80
  store i32 %value, i32* %scevgep53, align 4
  %indvar_next = add nsw i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar_next, %0
  %scevgep.inc = getelementptr i32* %scevgep.phi, i64 16
  br i1 %exitcond, label %loop_exit, label %loop_header
}

using

opt -basicaa -slp-vectorizer -S  < slp-scevaa.ll

Using SCEV to get the base pointer in SLP, along with SCEV-AA enables
vectorization:

loop_header:                                      ; preds = %loop_header, %entry
  %scevgep.phi = phi i32* [ %dst, %entry ], [ %scevgep.inc, %loop_header ]
  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %loop_header ]
  %5 = shl i64 %indvar, 4
  %scevgep48.sum78 = or i64 %5, 1
  %scevgep49 = getelementptr i32* %dst, i64 %scevgep48.sum78
  %scevgep50.sum79 = or i64 %5, 2
  %scevgep51 = getelementptr i32* %dst, i64 %scevgep50.sum79
  %scevgep52.sum80 = or i64 %5, 3
  %scevgep53 = getelementptr i32* %dst, i64 %scevgep52.sum80
  %6 = bitcast i32* %scevgep.phi to <4 x i32>*
  store <4 x i32> %4, <4 x i32>* %6, align 4  <=====
  %indvar_next = add nsw i64 %indvar, 1
  %exitcond = icmp eq i64 %indvar_next, %0
  %scevgep.inc = getelementptr i32* %scevgep.phi, i64 16
  br i1 %exitcond, label %loop_exit, label %loop_header
Quuxplusone commented 10 years ago

Attached PATCH (4191 bytes, text/plain): Proposed patch