EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
422 stars 57 forks source link

Enzyme modifies entries of an array in an inactive struct #1569

Open jlk9 opened 3 days ago

jlk9 commented 3 days ago

This is a continuation of issue 485 in KernelAbstractions.jl, which now appears to be more of an Enzyme issue.

Given this kernel called on CPU and function wrapper, where the mesh of our model should be an inactive type:

# Setting meshes to inactive types:
Enzyme.EnzymeRules.inactive_type(::Type{T} where T <:HorzMesh) = true

@kernel function GradientOnEdgeModified(@Const(dcEdge), GradEdge)
    # global indices over nEdges
    iEdge, k = @index(Global, NTuple)

    @inbounds GradEdge[k, iEdge] = GradEdge[k, iEdge] / dcEdge[iEdge]

    @synchronize()
end

function gradient_normSq(grad, mesh::HorzMesh; backend=KA.CPU())

    nEdges = size(grad)[2]
    vert_levels = 1

    kernel! = GradientOnEdgeModified(backend)
    kernel!(mesh.Edges.dcEdge, grad, workgroupsize=64, ndrange=(nEdges, vert_levels))

    KA.synchronize(backend)

    #@show grad

    normSq = 0.0
    for i = 1:nEdges
        normSq += grad[i]^2
    end

    return normSq
end

we stow away the pre-AD mesh: old_mesh = deepcopy(mesh). Then we call autodiff on a given set of gradient values and Horzmesh object:

d_normSq = autodiff(Enzyme.Reverse,
                    gradient_normSq,
                    Duplicated(gradNum, d_gradNum),
                    Duplicated(mesh, d_mesh))

and see that one of the arrays of the mesh is changed:

isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge) = false

mesh.Edges.dcEdge is used in our gradient kernel but not modified, so it should not be changed with a call to autodiff. But two seemingly unrelated changes to the code eliminate the bug:

  1. Adding the @show grad statement to our gradient norm function.
  2. Commenting out the inactive type rule for mesh objects.

This can be replicated with this code: https://github.com/jlk9/MPAS-Ocean.jl/tree/bug-reduce. I've tried reducing further but had difficulties with the bug disappearing. I'll update if further reductions or observations are done.

wsmoses commented 3 days ago

So just because something is inactive doesn't mean its immutable. you could have other unrelated computations modified inplace, which aren't differentiated and Enzyme would also run those same updates as in the orignial function

wsmoses commented 3 days ago

Modifying your code to test this, I don't believe this is a bug per above:

using MOKA: HorzMesh, ReadHorzMesh
using KernelAbstractions
using Enzyme

Enzyme.API.printall!(true)

import Downloads
import KernelAbstractions as KA

backend = KA.CPU()
#=
"""
    HorzMesh

A struct, comprised of SoA, describing a 2-D TRiSK mesh
"""
struct HorzMesh{ET}
    Edges::ET
end

# these are line segments
@kwdef struct Edges{I,FV} 
    dcEdge::FV
    nEdges::I
end

edgeArray = KA.zeros(backend, Float64, 2306)
for i = 1:2306
    edgeArray[i] = i + rand()
end

edges = Edges(edgeArray, 2306)
mesh = HorzMesh(edges)
=#
# Setting meshes to inactive types:
Enzyme.EnzymeRules.inactive_type(::Type{<:HorzMesh}) = true

@kernel function GradientOnEdgeModified(@Const(dcEdge), GradEdge)
    # global indices over nEdges
    iEdge, k = @index(Global, NTuple)

    @inbounds GradEdge[k, iEdge] = GradEdge[k, iEdge] / dcEdge[iEdge]

    @synchronize()
end

# As a cleaner / easier to read test, let's create an outer function that measures the norm of the gradient computed by kernel:
function gradient_normSq(grad, mesh::HorzMesh; backend=KA.CPU())

    nEdges = size(grad)[2]
    vert_levels = 1

    # New modified kernel:
    kernel! = GradientOnEdgeModified(backend)
    kernel!(mesh.Edges.dcEdge, grad, workgroupsize=64, ndrange=(nEdges, vert_levels))

    KA.synchronize(backend)

    @show grad

    normSq = 0.0
    for i = 1:nEdges
        normSq += grad[i]^2
    end

    return normSq
end

mesh_url = "https://gist.github.com/mwarusz/f8caf260398dbe140d2102ec46a41268/raw/e3c29afbadc835797604369114321d93fd69886d/PlanarPeriodic48x48.nc"
mesh_fn  = "MokaMesh.nc"

Downloads.download(mesh_url, mesh_fn)

mesh = ReadHorzMesh(mesh_fn; backend=backend)

# Let's recreate all the variables:
gradNum = KA.zeros(backend, Float64, (1, mesh.Edges.nEdges))
for i = 1:mesh.Edges.nEdges
    gradNum[1,i] = gradNum[1,i] + i
end

d_gradNum = KA.zeros(backend, Float64, (1, mesh.Edges.nEdges))
d_mesh    = Enzyme.make_zero(mesh)

old_mesh = deepcopy(mesh)
gradient_normSq(gradNum, mesh)

@show isequal(mesh.PrimaryCells, old_mesh.PrimaryCells)
@show isequal(mesh.DualCells, old_mesh.DualCells)

@show isequal(mesh.Edges.nEdges, old_mesh.Edges.nEdges)
@show isequal(mesh.Edges.xᵉ, old_mesh.Edges.xᵉ)
@show isequal(mesh.Edges.yᵉ, old_mesh.Edges.yᵉ)
@show isequal(mesh.Edges.zᵉ, old_mesh.Edges.zᵉ)
@show isequal(mesh.Edges.fᵉ, old_mesh.Edges.fᵉ)
@show isequal(mesh.Edges.nEdgesOnEdge, old_mesh.Edges.nEdgesOnEdge)
@show isequal(mesh.Edges.cellsOnEdge, old_mesh.Edges.cellsOnEdge)
@show isequal(mesh.Edges.verticesOnEdge, old_mesh.Edges.verticesOnEdge)
@show isequal(mesh.Edges.edgesOnEdge, old_mesh.Edges.edgesOnEdge)
@show isequal(mesh.Edges.weightsOnEdge, old_mesh.Edges.weightsOnEdge)
@show isequal(mesh.Edges.dvEdge, old_mesh.Edges.dvEdge)
@show isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge)
@show isequal(mesh.Edges.angleEdge, old_mesh.Edges.angleEdge)

@show old_mesh.Edges.dcEdge
@show mesh.Edges.dcEdge
mesh = deepcopy(old_mesh)
d_normSq = autodiff(Enzyme.Reverse,
                    gradient_normSq,
                    Duplicated(gradNum, d_gradNum),
                    Duplicated(mesh, d_mesh))

@show isequal(mesh.PrimaryCells, old_mesh.PrimaryCells)
@show isequal(mesh.DualCells, old_mesh.DualCells)

@show isequal(mesh.Edges.nEdges, old_mesh.Edges.nEdges)
@show isequal(mesh.Edges.xᵉ, old_mesh.Edges.xᵉ)
@show isequal(mesh.Edges.yᵉ, old_mesh.Edges.yᵉ)
@show isequal(mesh.Edges.zᵉ, old_mesh.Edges.zᵉ)
@show isequal(mesh.Edges.fᵉ, old_mesh.Edges.fᵉ)
@show isequal(mesh.Edges.nEdgesOnEdge, old_mesh.Edges.nEdgesOnEdge)
@show isequal(mesh.Edges.cellsOnEdge, old_mesh.Edges.cellsOnEdge)
@show isequal(mesh.Edges.verticesOnEdge, old_mesh.Edges.verticesOnEdge)
@show isequal(mesh.Edges.edgesOnEdge, old_mesh.Edges.edgesOnEdge)
@show isequal(mesh.Edges.weightsOnEdge, old_mesh.Edges.weightsOnEdge)
@show isequal(mesh.Edges.dvEdge, old_mesh.Edges.dvEdge)
@show isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge)
@show isequal(mesh.Edges.angleEdge, old_mesh.Edges.angleEdge)

@show old_mesh.Edges.dcEdge
@show mesh.Edges.dcEdge

Non-differentiated code

isequal(mesh.PrimaryCells, old_mesh.PrimaryCells) = false
isequal(mesh.DualCells, old_mesh.DualCells) = false
isequal(mesh.Edges.nEdges, old_mesh.Edges.nEdges) = true
isequal(mesh.Edges.xᵉ, old_mesh.Edges.xᵉ) = true
isequal(mesh.Edges.yᵉ, old_mesh.Edges.yᵉ) = true
isequal(mesh.Edges.zᵉ, old_mesh.Edges.zᵉ) = true
isequal(mesh.Edges.fᵉ, old_mesh.Edges.fᵉ) = true
isequal(mesh.Edges.nEdgesOnEdge, old_mesh.Edges.nEdgesOnEdge) = true
isequal(mesh.Edges.cellsOnEdge, old_mesh.Edges.cellsOnEdge) = true
isequal(mesh.Edges.verticesOnEdge, old_mesh.Edges.verticesOnEdge) = true
isequal(mesh.Edges.edgesOnEdge, old_mesh.Edges.edgesOnEdge) = true
isequal(mesh.Edges.weightsOnEdge, old_mesh.Edges.weightsOnEdge) = true
isequal(mesh.Edges.dvEdge, old_mesh.Edges.dvEdge) = true
isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge) = true
isequal(mesh.Edges.angleEdge, old_mesh.Edges.angleEdge) = true

Enzyme results

isequal(mesh.PrimaryCells, old_mesh.PrimaryCells) = false
isequal(mesh.DualCells, old_mesh.DualCells) = false
isequal(mesh.Edges.nEdges, old_mesh.Edges.nEdges) = true
isequal(mesh.Edges.xᵉ, old_mesh.Edges.xᵉ) = true
isequal(mesh.Edges.yᵉ, old_mesh.Edges.yᵉ) = true
isequal(mesh.Edges.zᵉ, old_mesh.Edges.zᵉ) = true
isequal(mesh.Edges.fᵉ, old_mesh.Edges.fᵉ) = true
isequal(mesh.Edges.nEdgesOnEdge, old_mesh.Edges.nEdgesOnEdge) = true
isequal(mesh.Edges.cellsOnEdge, old_mesh.Edges.cellsOnEdge) = true
isequal(mesh.Edges.verticesOnEdge, old_mesh.Edges.verticesOnEdge) = true
isequal(mesh.Edges.edgesOnEdge, old_mesh.Edges.edgesOnEdge) = true
isequal(mesh.Edges.weightsOnEdge, old_mesh.Edges.weightsOnEdge) = true
isequal(mesh.Edges.dvEdge, old_mesh.Edges.dvEdge) = true
isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge) = true
isequal(mesh.Edges.angleEdge, old_mesh.Edges.angleEdge) = true

These are the same (in essence Enzyme behaves as if it calls the original function).

jlk9 commented 3 days ago

Enzyme is definitely affecting mesh.Edges.dcEdge. The code block you posted includes the @show grad statement in gradient_normSq, which I already found makes the change to dcEdge disappear.

using MOKA: HorzMesh, ReadHorzMesh
using KernelAbstractions
using Enzyme

import Downloads
import KernelAbstractions as KA

backend = KA.CPU()

# Setting meshes to inactive types:
Enzyme.EnzymeRules.inactive_type(::Type{T} where T <:HorzMesh) = true

@kernel function GradientOnEdgeModified(@Const(dcEdge), GradEdge)
    # global indices over nEdges
    iEdge, k = @index(Global, NTuple)

    @inbounds GradEdge[k, iEdge] = GradEdge[k, iEdge] / dcEdge[iEdge]

    @synchronize()
end

function gradient_normSq(grad, mesh::HorzMesh; backend=KA.CPU())

    nEdges = size(grad)[2]
    vert_levels = 1

    # New modified kernel:
    kernel! = GradientOnEdgeModified(backend)
    kernel!(mesh.Edges.dcEdge, grad, workgroupsize=64, ndrange=(nEdges, vert_levels))

    KA.synchronize(backend)

    #@show grad

    normSq = 0.0
    for i = 1:nEdges
        normSq += grad[i]^2
    end

    return normSq
end

mesh_url = "https://gist.github.com/mwarusz/f8caf260398dbe140d2102ec46a41268/raw/e3c29afbadc835797604369114321d93fd69886d/PlanarPeriodic48x48.nc"
mesh_fn  = "MokaMesh.nc"

Downloads.download(mesh_url, mesh_fn)

mesh = ReadHorzMesh(mesh_fn; backend=backend)

# Let's recreate all the variables:
gradNum = KA.zeros(backend, Float64, (1, mesh.Edges.nEdges))
for i = 1:mesh.Edges.nEdges
    gradNum[1,i] = gradNum[1,i] + i
end

d_gradNum = KA.zeros(backend, Float64, (1, mesh.Edges.nEdges))
d_mesh    = Enzyme.make_zero(mesh)

old_mesh = deepcopy(mesh)

d_normSq = autodiff(Enzyme.Reverse,
                    gradient_normSq,
                    Duplicated(gradNum, d_gradNum),
                    Duplicated(mesh, d_mesh))

#gradient_normSq(gradNum, mesh)

@show isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge)

Here's what happens when I run the above script (available in the link above) comparing mesh.Edges.dcEdge pre and post autodiff with (1) mesh specified as inactive and (2) no @show grad statement in gradient_normSq:

isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge) = false

when I comment out the call to autodiff and uncomment the call of just the forward function gradient_normSq, still without the show statement and with mesh an inactive type, this happens:

isequal(mesh.Edges.dcEdge, old_mesh.Edges.dcEdge) = true

In this circumstance Enzyme affects the values of dcEdge when the original function, on its own, does not.

jlk9 commented 3 days ago

Reduced the example in https://github.com/jlk9/MPAS-Ocean.jl/tree/bug-reduce further. Now it doesn't actually use the MOKA.jl package, instead using a minimal mesh struct as a fill-in (found in bugMesh.jl), and since the datafile MokaMesh is already downloaded we removed that portion too. The change of mesh.Edges.dcEdge values is still present when calling autodiff.

wsmoses commented 2 days ago

Okay I see what's happening here, HorzMesh should likely not be marked inactive since you are differentiating wrt data within. This is implicitly causing runtime activity style issues, but not throwing an error for them.

cc @vchuravy

wsmoses commented 2 days ago
using KernelAbstractions
using Enzyme
Enzyme.API.printall!(true)
import KernelAbstractions as KA

backend = KA.CPU()

struct Edges{FV}
    dcEdge::FV
end

Enzyme.EnzymeRules.inactive_type(::Type{T}) where T <: Edges = true

@kernel function GradientOnEdgeModified(@Const(dcEdge), GradEdge)
    @inbounds GradEdge[1] = dcEdge[1]
    @synchronize()
end

function gradient_normSq(grad, mesh)
    # New modified kernel:
    kernel! = GradientOnEdgeModified(KA.CPU())
    kernel!(mesh.dcEdge, grad, ndrange=(1,1))

    KA.synchronize(backend)

    return @inbounds grad[1]
end

mesh = Edges([3.14])

@show mesh

# Let's recreate all the variables:
gradNum = zeros(1)
d_gradNum = zeros(1)

gradient_normSq(deepcopy(gradNum), deepcopy(mesh))

d_mesh    = Enzyme.make_zero(mesh)

old_mesh = deepcopy(mesh)

d_normSq = autodiff(Enzyme.Reverse,
                    gradient_normSq,
                    Duplicated(gradNum, d_gradNum),
                    Duplicated(mesh, d_mesh))

#gradient_normSq(gradNum, mesh)

@show isequal(mesh.dcEdge, old_mesh.dcEdge)
@show mesh.dcEdge
@show old_mesh.dcEdge
isequal(mesh.dcEdge, old_mesh.dcEdge) = false
mesh.dcEdge = [4.140000000000001]
old_mesh.dcEdge = [3.14]
wsmoses commented 2 days ago

and with no KA

using Enzyme
Enzyme.API.printall!(true)

struct Edges{FV}
    dcEdge::FV
end

Enzyme.EnzymeRules.inactive_type(::Type{T}) where T <: Edges = true

@noinline function GradientOnEdgeModified(dcEdge, GradEdge)
    @inbounds GradEdge[1] = dcEdge[1]
end

function gradient_normSq(grad, mesh)
    GradientOnEdgeModified(mesh.dcEdge, grad)

    return @inbounds grad[1]
end

mesh = Edges([3.14])

@show mesh

# Let's recreate all the variables:
gradNum = zeros(1)
d_gradNum = zeros(1)

gradient_normSq(deepcopy(gradNum), deepcopy(mesh))

d_mesh    = Enzyme.make_zero(mesh)

old_mesh = deepcopy(mesh)

d_normSq = autodiff(Enzyme.Reverse,
                    gradient_normSq,
                    Duplicated(gradNum, d_gradNum),
                    Duplicated(mesh, d_mesh))

#gradient_normSq(gradNum, mesh)

@show isequal(mesh.dcEdge, old_mesh.dcEdge)
@show mesh.dcEdge
@show old_mesh.dcEdge
mesh = Edges{Vector{Float64}}([3.14])
after simplification :
; Function Attrs: mustprogress nofree willreturn
define "enzyme_type"="{[-1]:Float@double}" "enzymejl_parmtype"="136531977971616" "enzymejl_parmtype_ref"="1" double @preprocess_julia_gradient_normSq_2121_inner.1({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %0, [1 x {} addrspace(10)*] "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136532126396496" "enzymejl_parmtype_ref"="1" %1) local_unnamed_addr #5 !dbg !53 {
entry:
  %.fca.0.extract = extractvalue [1 x {} addrspace(10)*] %1, 0, !dbg !54
  %2 = call {}*** @julia.get_pgcstack() #6
  %ptls_field.i4 = getelementptr inbounds {}**, {}*** %2, i64 2
  %3 = bitcast {}*** %ptls_field.i4 to i64***
  %ptls_load.i56 = load i64**, i64*** %3, align 8, !tbaa !10
  %4 = getelementptr inbounds i64*, i64** %ptls_load.i56, i64 2
  %safepoint.i = load i64*, i64** %4, align 8, !tbaa !14
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint.i) #6, !dbg !55
  fence syncscope("singlethread") seq_cst
  %5 = icmp ne {} addrspace(10)* %.fca.0.extract, null
  call void @llvm.assume(i1 noundef %5) #6
  call fastcc void @julia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %.fca.0.extract, {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) %0) #7, !dbg !57
  %6 = addrspacecast {} addrspace(10)* %0 to double addrspace(13)* addrspace(11)*, !dbg !58
  %arrayptr.i7 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %6, align 16, !dbg !58, !tbaa !23, !alias.scope !60, !noalias !31, !nonnull !8
  %arrayref.i = load double, double addrspace(13)* %arrayptr.i7, align 8, !dbg !58, !tbaa !36, !alias.scope !39, !noalias !40
  ret double %arrayref.i, !dbg !54
}

after simplification :
; Function Attrs: mustprogress nofree noinline willreturn
define internal fastcc void @preprocess_julia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %0, {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %1) unnamed_addr #6 !dbg !80 {
top:
  %2 = call {}*** @julia.get_pgcstack() #7
  %ptls_field5 = getelementptr inbounds {}**, {}*** %2, i64 2
  %3 = bitcast {}*** %ptls_field5 to i64***
  %ptls_load67 = load i64**, i64*** %3, align 8, !tbaa !10
  %4 = getelementptr inbounds i64*, i64** %ptls_load67, i64 2
  %safepoint = load i64*, i64** %4, align 8, !tbaa !14
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint) #7, !dbg !81
  fence syncscope("singlethread") seq_cst
  %5 = addrspacecast {} addrspace(10)* %0 to double addrspace(13)* addrspace(11)*, !dbg !82
  %arrayptr8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %5, align 16, !dbg !82, !tbaa !23, !alias.scope !84, !noalias !31, !nonnull !8
  %arrayref = load double, double addrspace(13)* %arrayptr8, align 8, !dbg !82, !tbaa !36, !alias.scope !39, !noalias !40
  %6 = addrspacecast {} addrspace(10)* %1 to double addrspace(13)* addrspace(11)*, !dbg !87
  %arrayptr39 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %6, align 16, !dbg !87, !tbaa !23, !alias.scope !84, !noalias !31, !nonnull !8
  store double %arrayref, double addrspace(13)* %arrayptr39, align 8, !dbg !87, !tbaa !36, !alias.scope !39, !noalias !88
  ret void, !dbg !87
}

; Function Attrs: mustprogress nofree noinline willreturn
define internal fastcc void @augmented_julia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %0, {} addrspace(10)* nocapture nofree align 16 "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %"'", {} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %1, {} addrspace(10)* nocapture nofree align 16 "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %"'1") unnamed_addr #6 !dbg !89 {
top:
  %2 = call {}*** @julia.get_pgcstack() #7
  %ptls_field5 = getelementptr inbounds {}**, {}*** %2, i64 2
  %3 = bitcast {}*** %ptls_field5 to i64***
  %ptls_load67 = load i64**, i64*** %3, align 8, !tbaa !10, !alias.scope !90, !noalias !93
  %4 = getelementptr inbounds i64*, i64** %ptls_load67, i64 2
  %safepoint = load i64*, i64** %4, align 8, !tbaa !14, !alias.scope !95, !noalias !98
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint) #7, !dbg !100
  fence syncscope("singlethread") seq_cst
  %"'ipc2" = addrspacecast {} addrspace(10)* %"'" to double addrspace(13)* addrspace(11)*, !dbg !101
  %5 = addrspacecast {} addrspace(10)* %0 to double addrspace(13)* addrspace(11)*, !dbg !101
  %"arrayptr8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"'ipc2", align 16, !dbg !101, !tbaa !23, !alias.scope !103, !noalias !108, !nonnull !8
  %arrayptr8 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %5, align 16, !dbg !101, !tbaa !23, !alias.scope !110, !noalias !111, !nonnull !8
  %arrayref = load double, double addrspace(13)* %arrayptr8, align 8, !dbg !101, !tbaa !36, !alias.scope !112, !noalias !115
  %"'ipc" = addrspacecast {} addrspace(10)* %"'1" to double addrspace(13)* addrspace(11)*, !dbg !117
  %6 = addrspacecast {} addrspace(10)* %1 to double addrspace(13)* addrspace(11)*, !dbg !117
  %"arrayptr39'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"'ipc", align 16, !dbg !117, !tbaa !23, !alias.scope !118, !noalias !121, !nonnull !8
  %arrayptr39 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %6, align 16, !dbg !117, !tbaa !23, !alias.scope !123, !noalias !124, !nonnull !8
  store double %arrayref, double addrspace(13)* %arrayptr39, align 8, !dbg !117, !tbaa !36, !alias.scope !125, !noalias !128
  ret void, !dbg !117
}

; Function Attrs: mustprogress nofree noinline willreturn
define internal fastcc void @diffejulia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %0, {} addrspace(10)* nocapture nofree align 16 "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %"'", {} addrspace(10)* nocapture nofree readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %1, {} addrspace(10)* nocapture nofree align 16 "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %"'1") unnamed_addr #6 !dbg !130 {
top:
  %"arrayref'de" = alloca double, align 8
  %2 = getelementptr double, double* %"arrayref'de", i64 0
  store double 0.000000e+00, double* %2, align 8
  %3 = call {}*** @julia.get_pgcstack() #7
  %"'ipc2" = addrspacecast {} addrspace(10)* %"'" to double addrspace(13)* addrspace(11)*, !dbg !131
  %"arrayptr8'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"'ipc2", align 16, !dbg !131, !tbaa !23, !alias.scope !133, !noalias !136, !nonnull !8
  %"'ipc" = addrspacecast {} addrspace(10)* %"'1" to double addrspace(13)* addrspace(11)*, !dbg !138
  %"arrayptr39'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"'ipc", align 16, !dbg !138, !tbaa !23, !alias.scope !139, !noalias !142, !nonnull !8
  br label %inverttop, !dbg !138

inverttop:                                        ; preds = %top
  %4 = load double, double addrspace(13)* %"arrayptr39'ipl", align 8, !dbg !138, !tbaa !36, !alias.scope !144, !noalias !147
  store double 0.000000e+00, double addrspace(13)* %"arrayptr39'ipl", align 8, !dbg !138, !tbaa !36, !alias.scope !144, !noalias !147
  %5 = load double, double* %"arrayref'de", align 8, !dbg !138
  %6 = fadd fast double %5, %4, !dbg !138
  store double %6, double* %"arrayref'de", align 8, !dbg !138
  %7 = load double, double* %"arrayref'de", align 8, !dbg !131
  store double 0.000000e+00, double* %"arrayref'de", align 8, !dbg !131
  %8 = load double, double addrspace(13)* %"arrayptr8'ipl", align 8, !dbg !131, !tbaa !36, !alias.scope !149, !noalias !152
  %9 = fadd fast double %8, %7, !dbg !131
  store double %9, double addrspace(13)* %"arrayptr8'ipl", align 8, !dbg !131, !tbaa !36, !alias.scope !149, !noalias !152
  fence syncscope("singlethread") seq_cst
  fence syncscope("singlethread") seq_cst
  ret void
}

; Function Attrs: mustprogress nofree willreturn
define internal "enzyme_type"="{[-1]:Float@double}" "enzymejl_parmtype"="136531977971616" "enzymejl_parmtype_ref"="1" void @diffejulia_gradient_normSq_2121_inner.1({} addrspace(10)* nocapture nofree noundef nonnull readonly align 16 dereferenceable(40) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %0, {} addrspace(10)* nocapture nofree align 16 "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136531905980448" "enzymejl_parmtype_ref"="2" %"'", [1 x {} addrspace(10)*] "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136532126396496" "enzymejl_parmtype_ref"="1" %1, [1 x {} addrspace(10)*] "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Integer, [-1,9]:Integer, [-1,10]:Integer, [-1,11]:Integer, [-1,12]:Integer, [-1,13]:Integer, [-1,14]:Integer, [-1,15]:Integer, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer, [-1,32]:Integer, [-1,33]:Integer, [-1,34]:Integer, [-1,35]:Integer, [-1,36]:Integer, [-1,37]:Integer, [-1,38]:Integer, [-1,39]:Integer}" "enzymejl_parmtype"="136532126396496" "enzymejl_parmtype_ref"="1" %"'1", double %differeturn) local_unnamed_addr #5 !dbg !63 {
entry:
  %"arrayref.i'de" = alloca double, align 8
  %2 = getelementptr double, double* %"arrayref.i'de", i64 0
  store double 0.000000e+00, double* %2, align 8
  %.fca.0.extract = extractvalue [1 x {} addrspace(10)*] %1, 0, !dbg !64
  %".fca.0.extract'ipev" = extractvalue [1 x {} addrspace(10)*] %"'1", 0
  %3 = call {}*** @julia.get_pgcstack() #7
  %ptls_field.i4 = getelementptr inbounds {}**, {}*** %3, i64 2
  %4 = bitcast {}*** %ptls_field.i4 to i64***
  %ptls_load.i56 = load i64**, i64*** %4, align 8, !tbaa !10, !alias.scope !65, !noalias !68
  %5 = getelementptr inbounds i64*, i64** %ptls_load.i56, i64 2
  %safepoint.i = load i64*, i64** %5, align 8, !tbaa !14, !alias.scope !70, !noalias !73
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint.i) #7, !dbg !75
  fence syncscope("singlethread") seq_cst
  %6 = icmp ne {} addrspace(10)* %.fca.0.extract, null
  call void @llvm.assume(i1 noundef %6) #7
  call fastcc void @augmented_julia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree readonly align 16 %.fca.0.extract, {} addrspace(10)* nocapture nofree align 16 %".fca.0.extract'ipev", {} addrspace(10)* nocapture nofree readonly align 16 %0, {} addrspace(10)* nocapture nofree align 16 %"'"), !dbg !77
  %"'ipc" = addrspacecast {} addrspace(10)* %"'" to double addrspace(13)* addrspace(11)*, !dbg !78
  %"arrayptr.i7'ipl" = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %"'ipc", align 16, !dbg !78, !tbaa !23, !alias.scope !80, !noalias !83, !nonnull !8
  br label %invertentry, !dbg !64

invertentry:                                      ; preds = %entry
  store double %differeturn, double* %"arrayref.i'de", align 8
  %7 = load double, double* %"arrayref.i'de", align 8, !dbg !78
  store double 0.000000e+00, double* %"arrayref.i'de", align 8, !dbg !78
  %8 = load double, double addrspace(13)* %"arrayptr.i7'ipl", align 8, !dbg !78, !tbaa !36, !alias.scope !85, !noalias !88
  %9 = fadd fast double %8, %7, !dbg !78
  store double %9, double addrspace(13)* %"arrayptr.i7'ipl", align 8, !dbg !78, !tbaa !36, !alias.scope !85, !noalias !88
  call fastcc void @diffejulia_GradientOnEdgeModified_2124({} addrspace(10)* nocapture nofree readonly align 16 %.fca.0.extract, {} addrspace(10)* nocapture nofree align 16 %".fca.0.extract'ipev", {} addrspace(10)* nocapture nofree readonly align 16 %0, {} addrspace(10)* nocapture nofree align 16 %"'"), !dbg !77
  fence syncscope("singlethread") seq_cst
  fence syncscope("singlethread") seq_cst
  ret void
}

isequal(mesh.dcEdge, old_mesh.dcEdge) = false
mesh.dcEdge = [4.140000000000001]
old_mesh.dcEdge = [3.14]
wsmoses commented 2 days ago

Yeah the problem here isn't auto diff but make zero, knowing it is constant will not create the shadow for you. However passing in a duplicated, the shadow is presumed to exist.

I think I would argue this is an invalid use of inactive type

vchuravy commented 2 days ago

Yeah I think the question is, can we provide a error for this? Also note the the original report used Const so it was doubly inactive.

wsmoses commented 2 days ago

If it was const then that is a different error.