EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
443 stars 62 forks source link

Atomic fadd of vector #1635

Open avik-pal opened 2 months ago

avik-pal commented 2 months ago
using LoopVectorization, Enzyme

function simple_lv_loop(x)
    y = similar(x)
    @turbo for i ∈ eachindex(x)
        y[i] = abs2(sin(x[i]))
    end
    return sum(y)
end

x = rand(10)

simple_lv_loop(x)

Enzyme.gradient(Reverse, simple_lv_loop, x)

Logs: https://gist.github.com/avik-pal/d1f2ab140db39bac9b8e146647618bb0

julia> versioninfo()
Julia Version 1.10.4
Commit 48d4fd48430 (2024-06-04 10:41 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 12 × AMD Ryzen 5 4600H with Radeon Graphics
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, znver2)
Threads: 12 default, 0 interactive, 6 GC (on 12 virtual cores)
Environment:
  JULIA_EDITOR = vim
avik-pal commented 2 months ago

Importantly forward mode works fine:

julia> Enzyme.gradient(Forward, simple_lv_loop, x)
(0.5675444639736587, 0.9869097699817291, 0.8598827553126634, 0.7768292717208152, 0.998311282116486, 0.9545627660826727, 0.9856956308307638, 0.9383523981939309, 0.6211593850517739, 0.4285066677918463)
wsmoses commented 2 months ago

Should be fixed by https://github.com/EnzymeAD/Enzyme.jl/pull/1657 please reopen otherwise

avik-pal commented 2 months ago

Getting a new error.

New Error ```julia ERROR: Enzyme execution failed. Enzyme compilation failed. Current scope: ; Function Attrs: mustprogress willreturn define "enzyme_type"="{[-1]:Float@double}" double @preprocess_julia_simple_lv_loop_3488({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="127514709197664" "enzymejl_parmtype_ref"="2" %0) local_unnamed_addr #11 !dbg !726 { top: %1 = call {}*** @julia.get_pgcstack() #13 %ptls_field255 = getelementptr inbounds {}**, {}*** %1, i64 2 %2 = bitcast {}*** %ptls_field255 to i64*** %ptls_load256257 = load i64**, i64*** %2, align 8, !tbaa !10 %3 = getelementptr inbounds i64*, i64** %ptls_load256257, i64 2 %safepoint = load i64*, i64** %3, align 8, !tbaa !14 fence syncscope("singlethread") seq_cst call void @julia.safepoint(i64* %safepoint) #13, !dbg !727 fence syncscope("singlethread") seq_cst %4 = addrspacecast {} addrspace(10)* %0 to {} addrspace(11)*, !dbg !728 %5 = addrspacecast {} addrspace(10)* %0 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !728 %arraylen_ptr = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %5, i64 0, i32 1, !dbg !728 %arraylen = load i64, i64 addrspace(11)* %arraylen_ptr, align 8, !dbg !728, !tbaa !23, !range !26, !alias.scope !27, !noalias !30 %6 = call noalias nonnull "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" {} addrspace(10)* @ijl_alloc_array_1d({} addrspace(10)* noundef addrspacecast ({}* inttoptr (i64 127514709197664 to {}*) to {} addrspace(10)*), i64 %arraylen) #14, !dbg !731 %7 = addrspacecast {} addrspace(10)* %6 to {} addrspace(11)*, !dbg !732 %8 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %7) #15, !dbg !732 %9 = bitcast {}* %8 to i8**, !dbg !732 %arrayptr = load i8*, i8** %9, align 8, !dbg !732, !tbaa !54, !alias.scope !27, !noalias !30, !nonnull !9 %10 = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* noundef %4) #15, !dbg !732 %11 = bitcast {}* %10 to i8**, !dbg !732 %arrayptr5 = load i8*, i8** %11, align 8, !dbg !732, !tbaa !54, !alias.scope !27, !noalias !30, !nonnull !9 %12 = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nofree nonnull %0, {} addrspace(10)* nonnull %6) #13, !dbg !737 %13 = add nuw i64 %arraylen, 3, !dbg !739 %14 = icmp ne i64 %arraylen, 0, !dbg !743 call void @llvm.assume(i1 noundef %14) #13, !dbg !746 %15 = and i64 %arraylen, 9223372036854775800, !dbg !747 %16 = icmp eq i64 %15, 0, !dbg !749 br i1 %16, label %L112, label %L19.lr.ph, !dbg !741 L19.lr.ph: ; preds = %top %ptr.0.i = bitcast i8* %arrayptr5 to double* %ptr.0.i173 = bitcast i8* %arrayptr to double* br label %L19, !dbg !741 L19: ; preds = %L19, %L19.lr.ph %iv = phi i64 [ %iv.next, %L19 ], [ 0, %L19.lr.ph ] %17 = shl nuw nsw i64 %iv, 3, !dbg !751 %iv.next = add nuw nsw i64 %iv, 1, !dbg !751 %ptr.1.i = getelementptr inbounds double, double* %ptr.0.i, i64 %17, !dbg !751 %ptr.1.i249 = bitcast double* %ptr.1.i to <4 x double>*, !dbg !757 %res.i250 = load <4 x double>, <4 x double>* %ptr.1.i249, align 8, !dbg !757, !alias.scope !108 %ptr.1.i246 = getelementptr inbounds double, double* %ptr.1.i, i64 4, !dbg !757 %ptr.2.i247 = bitcast double* %ptr.1.i246 to <4 x double>*, !dbg !757 %res.i248 = load <4 x double>, <4 x double>* %ptr.2.i247, align 8, !dbg !757, !alias.scope !108 %res.i245 = fmul nsz contract <4 x double> %res.i250, , !dbg !760 %res.i244 = fmul nsz contract <4 x double> %res.i248, , !dbg !766 %res.i243 = call fast <4 x double> @llvm.trunc.v4f64(<4 x double> %res.i245) #13, !dbg !769 %res.i242 = call fast <4 x double> @llvm.trunc.v4f64(<4 x double> %res.i244) #13, !dbg !774 %res.i241 = fmul nsz contract <4 x double> %res.i250, , !dbg !777 %res.i240 = fmul nsz contract <4 x double> %res.i248, , !dbg !783 %res.i239 = fmul nsz contract <4 x double> %res.i243, , !dbg !777 %res.i238 = fmul nsz contract <4 x double> %res.i242, , !dbg !783 %res.i237 = fsub nsz contract <4 x double> %res.i241, %res.i239, !dbg !786 %res.i236 = fsub nsz contract <4 x double> %res.i240, %res.i238, !dbg !791 %res.i235 = call fast <4 x double> @llvm.nearbyint.v4f64(<4 x double> %res.i237) #13, !dbg !794 %res.i234 = call fast <4 x double> @llvm.nearbyint.v4f64(<4 x double> %res.i236) #13, !dbg !799 %res.i233 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i243, <4 x double> noundef , <4 x double> %res.i250) #13, !dbg !802 %res.i232 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i242, <4 x double> noundef , <4 x double> %res.i248) #13, !dbg !802 %res.i231 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i235, <4 x double> noundef , <4 x double> %res.i233) #13, !dbg !809 %res.i230 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i234, <4 x double> noundef , <4 x double> %res.i232) #13, !dbg !809 %res.i229 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i243, <4 x double> noundef , <4 x double> %res.i231) #13, !dbg !816 %res.i228 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i242, <4 x double> noundef , <4 x double> %res.i230) #13, !dbg !816 %res.i227 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i235, <4 x double> noundef , <4 x double> %res.i229) #13, !dbg !823 %res.i226 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i234, <4 x double> noundef , <4 x double> %res.i228) #13, !dbg !823 %res.i225 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i243, <4 x double> noundef , <4 x double> %res.i227) #13, !dbg !830 %res.i224 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i242, <4 x double> noundef , <4 x double> %res.i226) #13, !dbg !830 %res.i223 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i235, <4 x double> noundef , <4 x double> %res.i225) #13, !dbg !837 %res.i222 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i234, <4 x double> noundef , <4 x double> %res.i224) #13, !dbg !837 %res.i221 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i243, <4 x double> noundef , <4 x double> %res.i235) #13, !dbg !844 %res.i220 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i242, <4 x double> noundef , <4 x double> %res.i234) #13, !dbg !844 %res.i219 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i221, <4 x double> noundef , <4 x double> %res.i223) #13, !dbg !844 %res.i218 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i220, <4 x double> noundef , <4 x double> %res.i222) #13, !dbg !844 %res.i217 = fmul nsz contract <4 x double> %res.i219, %res.i219, !dbg !851 %res.i216 = fmul nsz contract <4 x double> %res.i218, %res.i218, !dbg !857 %res.i215 = fptosi <4 x double> %res.i235 to <4 x i64>, !dbg !860 %res.i214 = fptosi <4 x double> %res.i234 to <4 x i64>, !dbg !868 %res.i213 = and <4 x i64> %res.i215, , !dbg !871 %res.i212 = and <4 x i64> %res.i214, , !dbg !877 %m.i209.not = icmp eq <4 x i64> %res.i213, zeroinitializer, !dbg !880 %m.i206.not = icmp eq <4 x i64> %res.i212, zeroinitializer, !dbg !886 %res.i205 = fneg nsz contract <4 x double> %res.i219, !dbg !890 %res.i204 = fneg nsz contract <4 x double> %res.i218, !dbg !895 %res.i203 = select reassoc nsz arcp contract <4 x i1> %m.i209.not, <4 x double> %res.i219, <4 x double> %res.i205, !dbg !898 %res.i200 = select reassoc nsz arcp contract <4 x i1> %m.i206.not, <4 x double> %res.i218, <4 x double> %res.i204, !dbg !898 %res.i197 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i217, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !904 %res.i196 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i216, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !904 %res.i195 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i197, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !914 %res.i194 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i196, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !914 %res.i193 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i195, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !921 %res.i192 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i194, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !921 %res.i191 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i193, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !928 %res.i190 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i192, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !928 %res.i189 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i191, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !935 %res.i188 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i190, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !935 %res.i187 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i189, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !942 %res.i186 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i188, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !942 %res.i185 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i187, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !949 %res.i184 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i186, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !949 %res.i183 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i185, <4 x double> %res.i217, <4 x double> noundef ) #13, !dbg !956 %res.i182 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i184, <4 x double> %res.i216, <4 x double> noundef ) #13, !dbg !956 %res.i181 = fmul nsz contract <4 x double> %res.i203, %res.i183, !dbg !963 %res.i180 = fmul nsz contract <4 x double> %res.i200, %res.i182, !dbg !969 %res.i179 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i217, <4 x double> %res.i181, <4 x double> %res.i203) #13, !dbg !972 %res.i178 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i216, <4 x double> %res.i180, <4 x double> %res.i200) #13, !dbg !972 %res.i177 = fmul reassoc nsz arcp contract afn <4 x double> %res.i179, %res.i179, !dbg !978 %res.i176 = fmul reassoc nsz arcp contract afn <4 x double> %res.i178, %res.i178, !dbg !983 %ptr.1.i174 = getelementptr inbounds double, double* %ptr.0.i173, i64 %17, !dbg !986 %ptr.1.i172 = bitcast double* %ptr.1.i174 to <4 x double>*, !dbg !992 store <4 x double> %res.i177, <4 x double>* %ptr.1.i172, align 8, !dbg !992, !noalias !996 %ptr.1.i169 = getelementptr inbounds double, double* %ptr.1.i174, i64 4, !dbg !992 %ptr.2.i170 = bitcast double* %ptr.1.i169 to <4 x double>*, !dbg !992 store <4 x double> %res.i176, <4 x double>* %ptr.2.i170, align 8, !dbg !992, !noalias !996 %res.i167 = add nuw nsw i64 %17, 8, !dbg !999 %18 = icmp eq i64 %res.i167, %15, !dbg !749 br i1 %18, label %L112.loopexit, label %L19, !dbg !741 L112.loopexit: ; preds = %L19 br label %L112, !dbg !1002 L112: ; preds = %L112.loopexit, %top %.not = icmp ult i64 %15, %arraylen, !dbg !1002 br i1 %.not, label %L114, label %L278, !dbg !741 L114: ; preds = %L112 %19 = and i64 %13, 3, !dbg !1004 %ie.i = insertelement <4 x i64> undef, i64 %19, i64 0, !dbg !1012 %v.i = shufflevector <4 x i64> %ie.i, <4 x i64> undef, <4 x i32> zeroinitializer, !dbg !1012 %m.i163 = icmp uge <4 x i64> %v.i, , !dbg !1022 %res.i161 = add nsw i64 %arraylen, -5, !dbg !1024 %.not258 = icmp slt i64 %res.i161, %15, !dbg !1027 br i1 %.not258, label %L124, label %L185, !dbg !741 L124: ; preds = %L114 %res.i160 = shl nsw i64 %15, 3, !dbg !1029 %ptr.1.i155 = getelementptr inbounds i8, i8* %arrayptr5, i64 %res.i160, !dbg !1037 %ptr.2.i156 = bitcast i8* %ptr.1.i155 to <4 x double>*, !dbg !1037 %res.i159 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull %ptr.2.i156, i32 noundef 8, <4 x i1> %m.i163, <4 x double> noundef zeroinitializer) #13, !dbg !1037, !alias.scope !108 %res.i154 = fmul nsz contract <4 x double> %res.i159, , !dbg !1040 %res.i153 = call fast <4 x double> @llvm.trunc.v4f64(<4 x double> %res.i154) #13, !dbg !1043 %res.i152 = fmul nsz contract <4 x double> %res.i159, , !dbg !1045 %res.i151 = fmul nsz contract <4 x double> %res.i153, , !dbg !1045 %res.i150 = fsub nsz contract <4 x double> %res.i152, %res.i151, !dbg !1048 %res.i149 = call fast <4 x double> @llvm.nearbyint.v4f64(<4 x double> %res.i150) #13, !dbg !1050 %res.i148 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i153, <4 x double> noundef , <4 x double> %res.i159) #13, !dbg !1052 %res.i147 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i149, <4 x double> noundef , <4 x double> %res.i148) #13, !dbg !1054 %res.i146 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i153, <4 x double> noundef , <4 x double> %res.i147) #13, !dbg !1056 %res.i145 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i149, <4 x double> noundef , <4 x double> %res.i146) #13, !dbg !1058 %res.i144 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i153, <4 x double> noundef , <4 x double> %res.i145) #13, !dbg !1060 %res.i143 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i149, <4 x double> noundef , <4 x double> %res.i144) #13, !dbg !1062 %res.i142 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i153, <4 x double> noundef , <4 x double> %res.i149) #13, !dbg !1064 %res.i141 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i142, <4 x double> noundef , <4 x double> %res.i143) #13, !dbg !1064 %res.i140 = fmul nsz contract <4 x double> %res.i141, %res.i141, !dbg !1066 %res.i139 = fptosi <4 x double> %res.i149 to <4 x i64>, !dbg !1068 %res.i138 = and <4 x i64> %res.i139, , !dbg !1070 %m.i135.not = icmp eq <4 x i64> %res.i138, zeroinitializer, !dbg !1073 %res.i134 = fneg nsz contract <4 x double> %res.i141, !dbg !1077 %res.i133 = select reassoc nsz arcp contract <4 x i1> %m.i135.not, <4 x double> %res.i141, <4 x double> %res.i134, !dbg !1079 %res.i130 = fmul reassoc nsz arcp contract afn <4 x double> %res.i140, %res.i140, !dbg !1082 %res.i129 = fmul reassoc nsz arcp contract afn <4 x double> %res.i130, %res.i130, !dbg !1087 %res.i128 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i140, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !1091 %res.i127 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i140, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !1091 %res.i126 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i130, <4 x double> %res.i128, <4 x double> %res.i127) #13, !dbg !1098 %res.i125 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i129, <4 x double> noundef , <4 x double> %res.i126) #13, !dbg !1101 %res.i124 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i140, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !1105 %res.i123 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i140, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !1105 %res.i122 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i130, <4 x double> %res.i124, <4 x double> %res.i123) #13, !dbg !1110 %res.i121 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i129, <4 x double> %res.i125, <4 x double> %res.i122) #13, !dbg !1113 %res.i120 = fmul nsz contract <4 x double> %res.i133, %res.i121, !dbg !1117 %res.i119 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i140, <4 x double> %res.i120, <4 x double> %res.i133) #13, !dbg !1119 %res.i118 = fmul reassoc nsz arcp contract afn <4 x double> %res.i119, %res.i119, !dbg !1121 %ptr.1.i113 = getelementptr inbounds i8, i8* %arrayptr, i64 %res.i160, !dbg !1123 %ptr.2.i114 = bitcast i8* %ptr.1.i113 to <4 x double>*, !dbg !1123 call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %res.i118, <4 x double>* nonnull %ptr.2.i114, i32 noundef 8, <4 x i1> %m.i163) #13, !dbg !1123 br label %L278, !dbg !999 L185: ; preds = %L114 %ptr.0.i109 = bitcast i8* %arrayptr5 to double*, !dbg !1126 %ptr.1.i110 = getelementptr inbounds double, double* %ptr.0.i109, i64 %15, !dbg !1126 %ptr.1.i107 = bitcast double* %ptr.1.i110 to <4 x double>*, !dbg !1132 %res.i108 = load <4 x double>, <4 x double>* %ptr.1.i107, align 8, !dbg !1132, !alias.scope !108 %ptr.1.i101 = getelementptr inbounds double, double* %ptr.1.i110, i64 4, !dbg !1135 %ptr.2.i102 = bitcast double* %ptr.1.i101 to <4 x double>*, !dbg !1135 %res.i105 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull %ptr.2.i102, i32 noundef 8, <4 x i1> %m.i163, <4 x double> noundef zeroinitializer) #13, !dbg !1135, !alias.scope !108 %res.i99 = fmul nsz contract <4 x double> %res.i108, , !dbg !760 %res.i98 = fmul nsz contract <4 x double> %res.i105, , !dbg !766 %res.i97 = call fast <4 x double> @llvm.trunc.v4f64(<4 x double> %res.i99) #13, !dbg !769 %res.i96 = call fast <4 x double> @llvm.trunc.v4f64(<4 x double> %res.i98) #13, !dbg !774 %res.i95 = fmul nsz contract <4 x double> %res.i108, , !dbg !777 %res.i94 = fmul nsz contract <4 x double> %res.i105, , !dbg !783 %res.i93 = fmul nsz contract <4 x double> %res.i97, , !dbg !777 %res.i92 = fmul nsz contract <4 x double> %res.i96, , !dbg !783 %res.i91 = fsub nsz contract <4 x double> %res.i95, %res.i93, !dbg !786 %res.i90 = fsub nsz contract <4 x double> %res.i94, %res.i92, !dbg !791 %res.i89 = call fast <4 x double> @llvm.nearbyint.v4f64(<4 x double> %res.i91) #13, !dbg !794 %res.i88 = call fast <4 x double> @llvm.nearbyint.v4f64(<4 x double> %res.i90) #13, !dbg !799 %res.i87 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i97, <4 x double> noundef , <4 x double> %res.i108) #13, !dbg !802 %res.i86 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i96, <4 x double> noundef , <4 x double> %res.i105) #13, !dbg !802 %res.i85 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i89, <4 x double> noundef , <4 x double> %res.i87) #13, !dbg !809 %res.i84 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i88, <4 x double> noundef , <4 x double> %res.i86) #13, !dbg !809 %res.i83 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i97, <4 x double> noundef , <4 x double> %res.i85) #13, !dbg !816 %res.i82 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i96, <4 x double> noundef , <4 x double> %res.i84) #13, !dbg !816 %res.i81 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i89, <4 x double> noundef , <4 x double> %res.i83) #13, !dbg !823 %res.i80 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i88, <4 x double> noundef , <4 x double> %res.i82) #13, !dbg !823 %res.i79 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i97, <4 x double> noundef , <4 x double> %res.i81) #13, !dbg !830 %res.i78 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i96, <4 x double> noundef , <4 x double> %res.i80) #13, !dbg !830 %res.i77 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i89, <4 x double> noundef , <4 x double> %res.i79) #13, !dbg !837 %res.i76 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i88, <4 x double> noundef , <4 x double> %res.i78) #13, !dbg !837 %res.i75 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i97, <4 x double> noundef , <4 x double> %res.i89) #13, !dbg !844 %res.i74 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i96, <4 x double> noundef , <4 x double> %res.i88) #13, !dbg !844 %res.i73 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i75, <4 x double> noundef , <4 x double> %res.i77) #13, !dbg !844 %res.i72 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i74, <4 x double> noundef , <4 x double> %res.i76) #13, !dbg !844 %res.i71 = fmul nsz contract <4 x double> %res.i73, %res.i73, !dbg !851 %res.i70 = fmul nsz contract <4 x double> %res.i72, %res.i72, !dbg !857 %res.i69 = fptosi <4 x double> %res.i89 to <4 x i64>, !dbg !860 %res.i68 = fptosi <4 x double> %res.i88 to <4 x i64>, !dbg !868 %res.i67 = and <4 x i64> %res.i69, , !dbg !871 %res.i66 = and <4 x i64> %res.i68, , !dbg !877 %m.i63.not = icmp eq <4 x i64> %res.i67, zeroinitializer, !dbg !880 %m.i.not = icmp eq <4 x i64> %res.i66, zeroinitializer, !dbg !886 %res.i62 = fneg nsz contract <4 x double> %res.i73, !dbg !890 %res.i61 = fneg nsz contract <4 x double> %res.i72, !dbg !895 %res.i60 = select reassoc nsz arcp contract <4 x i1> %m.i63.not, <4 x double> %res.i73, <4 x double> %res.i62, !dbg !898 %res.i57 = select reassoc nsz arcp contract <4 x i1> %m.i.not, <4 x double> %res.i72, <4 x double> %res.i61, !dbg !898 %res.i54 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i71, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !904 %res.i53 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i70, <4 x double> noundef , <4 x double> noundef ) #13, !dbg !904 %res.i52 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i54, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !914 %res.i51 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i53, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !914 %res.i50 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i52, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !921 %res.i49 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i51, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !921 %res.i48 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i50, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !928 %res.i47 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i49, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !928 %res.i46 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i48, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !935 %res.i45 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i47, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !935 %res.i44 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i46, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !942 %res.i43 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i45, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !942 %res.i42 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i44, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !949 %res.i41 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i43, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !949 %res.i40 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i42, <4 x double> %res.i71, <4 x double> noundef ) #13, !dbg !956 %res.i39 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i41, <4 x double> %res.i70, <4 x double> noundef ) #13, !dbg !956 %res.i38 = fmul nsz contract <4 x double> %res.i60, %res.i40, !dbg !963 %res.i37 = fmul nsz contract <4 x double> %res.i57, %res.i39, !dbg !969 %res.i36 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i71, <4 x double> %res.i38, <4 x double> %res.i60) #13, !dbg !972 %res.i35 = call nsz contract <4 x double> @llvm.fmuladd.v4f64(<4 x double> %res.i70, <4 x double> %res.i37, <4 x double> %res.i57) #13, !dbg !972 %res.i34 = fmul reassoc nsz arcp contract afn <4 x double> %res.i36, %res.i36, !dbg !978 %res.i33 = fmul reassoc nsz arcp contract afn <4 x double> %res.i35, %res.i35, !dbg !983 %ptr.0.i30 = bitcast i8* %arrayptr to double*, !dbg !1138 %ptr.1.i31 = getelementptr inbounds double, double* %ptr.0.i30, i64 %15, !dbg !1138 %ptr.1.i29 = bitcast double* %ptr.1.i31 to <4 x double>*, !dbg !1144 store <4 x double> %res.i34, <4 x double>* %ptr.1.i29, align 8, !dbg !1144, !noalias !996 %ptr.1.i26 = getelementptr inbounds double, double* %ptr.1.i31, i64 4, !dbg !1148 %ptr.2.i27 = bitcast double* %ptr.1.i26 to <4 x double>*, !dbg !1148 call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %res.i33, <4 x double>* nonnull %ptr.2.i27, i32 noundef 8, <4 x i1> %m.i163) #13, !dbg !1148 br label %L278, !dbg !999 L278: ; preds = %L185, %L124, %L112 call void @llvm.julia.gc_preserve_end(token %12) #13, !dbg !737 %20 = addrspacecast {} addrspace(10)* %6 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !1151 %arraylen_ptr6 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %20, i64 0, i32 1, !dbg !1151 %arraylen7 = load i64, i64 addrspace(11)* %arraylen_ptr6, align 8, !dbg !1151, !tbaa !23, !range !26, !alias.scope !27, !noalias !30 switch i64 %arraylen7, label %L291 [ i64 0, label %L321 i64 1, label %L289 ], !dbg !1165 L289: ; preds = %L278 %21 = addrspacecast {} addrspace(10)* %6 to double addrspace(13)* addrspace(11)*, !dbg !1166 %arrayptr11261 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %21, align 8, !dbg !1166, !tbaa !54, !alias.scope !1168, !noalias !30, !nonnull !9 %arrayref = load double, double addrspace(13)* %arrayptr11261, align 8, !dbg !1166, !tbaa !620, !alias.scope !623, !noalias !624 br label %L321, !dbg !1169 L291: ; preds = %L278 %22 = icmp ugt i64 %arraylen7, 15, !dbg !1170 br i1 %22, label %L307, label %L293, !dbg !1171 L293: ; preds = %L291 %23 = addrspacecast {} addrspace(10)* %6 to double addrspace(13)* addrspace(11)*, !dbg !1172 %arrayptr13262 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %23, align 8, !dbg !1172, !tbaa !54, !alias.scope !1168, !noalias !30, !nonnull !9 %arrayref14 = load double, double addrspace(13)* %arrayptr13262, align 8, !dbg !1172, !tbaa !620, !alias.scope !623, !noalias !624 %24 = getelementptr inbounds double, double addrspace(13)* %arrayptr13262, i64 1, !dbg !1174 %arrayref17 = load double, double addrspace(13)* %24, align 8, !dbg !1174, !tbaa !620, !alias.scope !623, !noalias !624 %25 = fadd double %arrayref14, %arrayref17, !dbg !1176 %.not263264 = icmp ugt i64 %arraylen7, 2, !dbg !1179 br i1 %.not263264, label %L302.preheader, label %L321, !dbg !1180 L302.preheader: ; preds = %L293 br label %L302, !dbg !1180 L302: ; preds = %L302.preheader, %L302 %iv1 = phi i64 [ 0, %L302.preheader ], [ %iv.next2, %L302 ] %value_phi18265 = phi double [ %29, %L302 ], [ %25, %L302.preheader ] %26 = add nuw nsw i64 %iv1, 2, !dbg !1181 %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !1181 %27 = add nuw nsw i64 %26, 1, !dbg !1181 %28 = getelementptr inbounds double, double addrspace(13)* %arrayptr13262, i64 %26, !dbg !1183 %arrayref23 = load double, double addrspace(13)* %28, align 8, !dbg !1183, !tbaa !620, !alias.scope !623, !noalias !624 %29 = fadd double %value_phi18265, %arrayref23, !dbg !1184 %exitcond.not = icmp eq i64 %27, %arraylen7, !dbg !1179 br i1 %exitcond.not, label %L321.loopexit, label %L302, !dbg !1180 L307: ; preds = %L291 %30 = call fastcc double @julia_mapreduce_impl_3742({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %6, i64 noundef signext 1, i64 signext %arraylen7) #13, !dbg !1187 br label %L321, !dbg !1188 L321.loopexit: ; preds = %L302 br label %L321, !dbg !1164 L321: ; preds = %L321.loopexit, %L307, %L293, %L289, %L278 %value_phi9 = phi double [ %arrayref, %L289 ], [ %30, %L307 ], [ 0.000000e+00, %L278 ], [ %25, %L293 ], [ %29, %L321.loopexit ] ret double %value_phi9, !dbg !1164 } Unimplemented masked atomic fadd for ptr: %"ptr.2.i156'ipc_unwrap" = bitcast i8* %"ptr.1.i155'ipg_unwrap" to <4 x double>*, !dbg !508 dif: %796 = load <4 x double>, <4 x double>* %"res.i159'de", align 32, !dbg !508 mask: %m.i163_unwrap = icmp uge <4 x i64> %"v.i'_unwrap", , !dbg !598 orig: %res.i159 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* nonnull %ptr.2.i156, i32 noundef 8, <4 x i1> %m.i163, <4 x double> noundef zeroinitializer) #13, !dbg !469, !alias.scope !108 Stacktrace: [1] macro expansion @ ~/.julia/packages/VectorizationBase/LqJbS/src/llvm_intrin/memory_addr.jl:997 [2] __vload @ ~/.julia/packages/VectorizationBase/LqJbS/src/llvm_intrin/memory_addr.jl:997 [3] _vload @ ~/.julia/packages/VectorizationBase/LqJbS/src/strided_pointers/stridedpointers.jl:105 [4] macro expansion @ ~/.julia/packages/LoopVectorization/tIJUA/src/reconstruct_loopset.jl:1107 [5] _turbo_! @ ~/.julia/packages/LoopVectorization/tIJUA/src/reconstruct_loopset.jl:1107 [6] macro expansion @ ~/.julia/packages/LoopVectorization/tIJUA/src/condense_loopset.jl:1179 [7] simple_lv_loop @ ./REPL[3]:3 Stacktrace: ⋮ internal @ Enzyme.Compiler, Unknown [7] macro expansion @ ~/.julia/packages/LoopVectorization/tIJUA/src/condense_loopset.jl:1179 [inlined] [8] simple_lv_loop @ ./REPL[3]:3 [inlined] [9] diffejulia_simple_lv_loop_3488wrap @ ./REPL[3]:0 ⋮ internal @ Unknown [15] gradient(rm::ReverseMode{false, FFIABI, false}, f::typeof(simple_lv_loop), x::Vector{Float64}) @ Enzyme ~/.julia/packages/Enzyme/YDcYf/src/Enzyme.jl:1027 [16] top-level scope @ REPL[6]:1 [17] top-level scope @ none:1 Use `err` to retrieve the full stack trace. ```
avik-pal commented 2 months ago

@wsmoses this needs to be reopened. I don't seem to have that option not sure why...