EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
439 stars 62 forks source link

segfault during reverse mode autodiff #1869

Open ExpandingMan opened 2 days ago

ExpandingMan commented 2 days ago
using Enzyme, SparseArrays, Statistics

function bc0_test_function(ps)
    z = view(ps, 26:30)
    C = Matrix{Float64}(undef, 5, 1)
    C .= z
    return C[1]
end

@noinline function bc1_bcs2(x, y)
    x != y && error(2)
    return x
end

@noinline function bc1_affine_normalize(x::AbstractArray)
    _axes = bc1_bcs2(axes(x), axes(x))
    dest = similar(Array{Float32}, _axes)
    bc = convert(Broadcast.Broadcasted{Nothing}, Broadcast.instantiate(Base.broadcasted(+, x, x)))
    copyto!(dest, bc)
    return x
end

function bc1_loss_function(x)
    return bc1_affine_normalize(x)[1]
end

function bc2_affine_normalize(::typeof(identity), x::AbstractArray, xmean, xvar,
    scale::AbstractArray, bias::AbstractArray, epsilon::Real)
    _scale = @. scale / sqrt(xvar + epsilon)
    _bias = @. bias - xmean * _scale
    return @. x * _scale + _bias
end

function bc2_loss_function(x, scale, bias)
    x_ = reshape(x, 6, 6, 3, 2, 2)
    scale_ = reshape(scale, 1, 1, 3, 2, 1)
    bias_ = reshape(bias, 1, 1, 3, 2, 1)

    xmean = mean(x_, dims=(1, 2, 5))
    xvar = var(x_, corrected=false, mean=xmean, dims=(1, 2, 5))

    return sum(abs2, bc2_affine_normalize(identity, x_, xmean, xvar, scale_, bias_, 1e-5))
end

x = rand(Float32, 6, 6, 6, 2)
sc = rand(Float32, 6)
bi = rand(Float32, 6)

Enzyme.autodiff(Reverse, bc2_loss_function, Active, Duplicated(x, Enzyme.make_zero(x)),
    Duplicated(sc, Enzyme.make_zero(sc)), Duplicated(bi, Enzyme.make_zero(bi)))

This segfaults on the call to autodiff.

Here is the stack trace I get

[878854] signal (11.1): Segmentation fault
in expression starting at REPL[6]:1
unknown function (ip: 0x714b629c5f09)
getindex at ./essentials.jl:14 [inlined]
getindex at ./multidimensional.jl:696 [inlined]
centralize_sumabs2! at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:284
#varm!#10 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:307
varm! at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:302 [inlined]
_varm at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:339 [inlined]
#varm#11 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:337 [inlined]
varm at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:337 [inlined]
_var at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:384 [inlined]
#var#15 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:378 [inlined]
var at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:378 [inlined]
bc2_loss_function at /home/expandingman/src/scrap.jl:43 [inlined]
diffejulia_bc2_loss_function_1179wrap at /home/expandingman/src/scrap.jl:0
macro expansion at /home/expandingman/.julia/dev/Enzyme/src/compiler.jl:7061 [inlined]
enzyme_call at /home/expandingman/.julia/dev/Enzyme/src/compiler.jl:6664 [inlined]
CombinedAdjointThunk at /home/expandingman/.julia/dev/Enzyme/src/compiler.jl:6541 [inlined]
autodiff at /home/expandingman/.julia/dev/Enzyme/src/Enzyme.jl:316 [inlined]
autodiff at /home/expandingman/.julia/dev/Enzyme/src/Enzyme.jl:328
unknown function (ip: 0x714b48bbe1f6)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
do_call at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:126
eval_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:223
eval_stmt_value at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:174 [inlined]
eval_body at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:617
jl_interpret_toplevel_thunk at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/interpreter.c:775
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:934
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:877
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:877
jl_toplevel_eval_flex at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:877
ijl_toplevel_eval_in at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/toplevel.c:985
eval at ./boot.jl:385 [inlined]
eval_user_input at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:150
repl_backend_loop at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:246
#start_repl_backend#46 at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:231
start_repl_backend at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:228
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
#run_repl#59 at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:389
run_repl at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/usr/share/julia/stdlib/v1.10/REPL/src/REPL.jl:375
jfptr_run_repl_91805.1 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
#1013 at ./client.jl:432
jfptr_YY.1013_82772.1 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
jl_f__call_latest at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/builtins.c:812
#invokelatest#2 at ./essentials.jl:892 [inlined]
invokelatest at ./essentials.jl:889 [inlined]
run_main_repl at ./client.jl:416
exec_options at ./client.jl:333
_start at ./client.jl:552
jfptr__start_82798.1 at /home/expandingman/.julia/juliaup/julia-1.10.5+0.x64.linux.gnu/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:2895 [inlined]
ijl_apply_generic at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/gf.c:3077
jl_apply at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/julia.h:1982 [inlined]
true_main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:582
jl_repl_entrypoint at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/src/jlapi.c:731
main at /cache/build/builder-amdci4-4/julialang/julia-release-1-dot-10/cli/loader_exe.c:58
unknown function (ip: 0x714b62865e07)
__libc_start_main at /usr/lib/libc.so.6 (unknown line)
unknown function (ip: 0x4010b8)
Allocations: 36941489 (Pool: 36890747; Big: 50742); GC: 48
wsmoses commented 1 day ago

@ExpandingMan this doesn't segfault for me.

Is it deterministic?

What was your exact version of everything/OS/etc

ExpandingMan commented 10 hours ago

I have never been able to get this not to segfault.

System:
  Host: theia1 Kernel: 6.10.6-10-MANJARO arch: x86_64 bits: 64
  Desktop: Qtile v: 0.28.2.dev0+gf1ed49bc.d20240813 Distro: Manjaro Linux
CPU:
  Info: 16-core model: AMD Ryzen 9 7950X bits: 64 type: MT MCP cache:
    L2: 16 MiB
Memory:
  System RAM: total: 64 GiB available: 61.95 GiB used: 7.27 GiB (11.7%)
  Array-1: capacity: 128 GiB slots: 4 modules: 2 EC: None

Let me know if there's anything more specific you're looking for.

ExpandingMan commented 9 hours ago

I thought that maybe this was something very obscure with Revise or something, so I ran it with

julia --startup-file=no scrap2.jl

and it still segfaults the same way.