sdobber / FluxArchitectures.jl

Complex neural network examples for Flux.jl
MIT License
123 stars 15 forks source link

Error running LSTnet - Copy-Paste Code example #41

Closed mentics closed 2 years ago

mentics commented 2 years ago

Running Julia 1.8.0 on Windows 11, I copy-pasted the example code at the top of the examples page into a new Julia environment after adding FluxArchitectures and Plots packages. When it reached the line Flux.train!, it gave the following stack trace:

julia> Flux.train!(loss, Flux.params(model),Iterators.repeated((input, target), 20), ADAM(0.01), cb=cb)
ERROR: BoundsError: attempt to access Tuple{} at index [0]
Stacktrace:
  [1] getindex(t::Tuple, i::Int64)
    @ Base .\tuple.jl:29
  [2] last(a::Tuple{})
    @ Base .\abstractarray.jl:479
  [3] rrule(config::Zygote.ZygoteRuleConfig{Zygote.Context{true}}, ::typeof(foldl), op::Base.var"#57#58"{typeof(Flux.reset!)}, x::Tuple{}; init::Nothing)
    @ ChainRules C:\Users\joel\.julia\packages\ChainRules\fgVxV\src\rulesets\Base\mapreduce.jl:448
  [4] chain_rrule_kw(::Zygote.ZygoteRuleConfig{Zygote.Context{true}}, ::Function, ::NamedTuple{(:init,), Tuple{Nothing}}, ::Function, ::Function, ::Vararg{Any})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:230
  [5] macro expansion
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0 [inlined]
  [6] _pullback(::Zygote.Context{true}, ::Base.var"#foldl##kw", ::NamedTuple{(:init,), Tuple{Nothing}}, ::typeof(foldl), ::Base.var"#57#58"{typeof(Flux.reset!)}, ::Tuple{})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:9
  [7] _pullback
    @ .\tuple.jl:555 [inlined]
  [8] _pullback(::Zygote.Context{true}, ::typeof(foreach), ::typeof(Flux.reset!), ::Tuple{})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
  [9] _pullback
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\layers\recurrent.jl:180 [inlined]
 [10] _pullback(ctx::Zygote.Context{true}, f::typeof(Flux.reset!), args::typeof(relu))
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [11] _pullback
    @ .\abstractarray.jl:2774 [inlined]
 [12] _pullback(::Zygote.Context{true}, ::typeof(foreach), ::typeof(Flux.reset!), ::NamedTuple{(:σ, :weight, :bias, :stride, :pad, :dilation, :groups), Tuple{typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Tuple{Int64, Int64}, NTuple{4, Int64}, Tuple{Int64, Int64}, Int64}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [13] _pullback
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\layers\recurrent.jl:180 [inlined]
 [14] _pullback
    @ .\tuple.jl:555 [inlined]
 [15] #rrule_via_ad#46
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:255 [inlined]
 [16] rrule_via_ad
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:243 [inlined]
 [17] #1678
    @ C:\Users\joel\.julia\packages\ChainRules\fgVxV\src\rulesets\Base\mapreduce.jl:444 [inlined]
 [18] BottomRF
    @ .\reduce.jl:81 [inlined]
 [19] #836
    @ .\accumulate.jl:291 [inlined]
 [20] afoldl(::Base.var"#836#837"{Base.BottomRF{ChainRules.var"#1678#1680"{Zygote.ZygoteRuleConfig{Zygote.Context{true}}, Base.var"#57#58"{typeof(Flux.reset!)}}}}, ::Tuple{Tuple{}, Tuple{Nothing, Nothing}}, ::Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, ::Function)
    @ Base .\operators.jl:548
 [21] accumulate(op::Function, xs::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"}; init::Tuple{Nothing, Nothing})
    @ Base .\accumulate.jl:290
 [22] rrule(config::Zygote.ZygoteRuleConfig{Zygote.Context{true}}, ::typeof(foldl), op::Base.var"#57#58"{typeof(Flux.reset!)}, x::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"}; init::Nothing)
    @ ChainRules C:\Users\joel\.julia\packages\ChainRules\fgVxV\src\rulesets\Base\mapreduce.jl:440
 [23] chain_rrule_kw
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:230 [inlined]
 [24] macro expansion
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0 [inlined]
 [25] _pullback(::Zygote.Context{true}, ::Base.var"#foldl##kw", ::NamedTuple{(:init,), Tuple{Nothing}}, ::typeof(foldl), ::Base.var"#57#58"{typeof(Flux.reset!)}, ::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:9
 [26] _pullback
    @ .\tuple.jl:555 [inlined]
 [27] _pullback(::Zygote.Context{true}, ::typeof(foreach), ::typeof(Flux.reset!), ::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [28] _pullback
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\layers\recurrent.jl:180 [inlined]
 [29] _pullback(ctx::Zygote.Context{true}, f::typeof(Flux.reset!), args::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [30] _pullback
    @ .\abstractarray.jl:2774 [inlined]
 [31] _pullback(::Zygote.Context{true}, ::typeof(foreach), ::typeof(Flux.reset!), ::NamedTuple{(:layers,), Tuple{Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [32] _pullback
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\layers\recurrent.jl:180 [inlined]
 [33] _pullback
    @ .\tuple.jl:555 [inlined]
 [34] #rrule_via_ad#46
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:255 [inlined]
 [35] rrule_via_ad
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:243 [inlined]
 [36] #1678
    @ C:\Users\joel\.julia\packages\ChainRules\fgVxV\src\rulesets\Base\mapreduce.jl:444 [inlined]
 [37] BottomRF
    @ .\reduce.jl:81 [inlined]
 [38] #836
    @ .\accumulate.jl:291 [inlined]
 [39] afoldl
    @ .\operators.jl:548 [inlined]
 [40] #accumulate#835
    @ .\accumulate.jl:290 [inlined]
 [41] #rrule#1677
    @ C:\Users\joel\.julia\packages\ChainRules\fgVxV\src\rulesets\Base\mapreduce.jl:440 [inlined]
 [42] chain_rrule_kw
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\chainrules.jl:230 [inlined]
 [43] macro expansion
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0 [inlined]
 [44] _pullback
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:9 [inlined]
 [45] _pullback
    @ .\tuple.jl:555 [inlined]
 [46] _pullback(::Zygote.Context{true}, ::typeof(foreach), ::typeof(Flux.reset!), ::Tuple{Chain{Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"}}, FluxArchitectures.Seq{Flux.Recur{FluxArchitectures.ReluGRUCell{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, FluxArchitectures.SeqSkip{FluxArchitectures.ReluGRUCell{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Int64}, Chain{Tuple{Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{FluxArchitectures.var"#62#64", Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [47] _pullback
    @ C:\Users\joel\.julia\packages\FluxArchitectures\9iPjE\src\LSTnet.jl:111 [inlined]
 [48] _pullback(ctx::Zygote.Context{true}, f::typeof(Flux.reset!), args::FluxArchitectures.LSTnetCell{Chain{Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FluxArchitectures.var"#61#63"}}, FluxArchitectures.Seq{Flux.Recur{FluxArchitectures.ReluGRUCell{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, FluxArchitectures.SeqSkip{FluxArchitectures.ReluGRUCell{CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Int64}, Chain{Tuple{Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, Chain{Tuple{FluxArchitectures.var"#62#64", Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [49] _pullback
    @ .\REPL[16]:2 [inlined]
 [50] _pullback(::Zygote.Context{true}, ::typeof(loss), ::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, ::CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [51] _apply
    @ .\boot.jl:816 [inlined]
 [52] adjoint
    @ C:\Users\joel\.julia\packages\Zygote\qGFGD\src\lib\lib.jl:203 [inlined]
 [53] _pullback
    @ C:\Users\joel\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:65 [inlined]
 [54] _pullback
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\optimise\train.jl:120 [inlined]
 [55] _pullback(::Zygote.Context{true}, ::Flux.Optimise.var"#37#40"{typeof(loss), Tuple{CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface2.jl:0
 [56] pullback(f::Function, ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface.jl:373
 [57] gradient(f::Function, args::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}})
    @ Zygote C:\Users\joel\.julia\packages\Zygote\qGFGD\src\compiler\interface.jl:96
 [58] macro expansion
    @ C:\Users\joel\.julia\packages\Flux\EXOFx\src\optimise\train.jl:119 [inlined]
 [59] macro expansion
    @ C:\Users\joel\.julia\packages\ProgressLogging\6KXlp\src\ProgressLogging.jl:328 [inlined]
 [60] train!(loss::Function, ps::Zygote.Params{Zygote.Buffer{Any, Vector{Any}}}, data::Base.Iterators.Take{Base.Iterators.Repeated{Tuple{CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}}, opt::Adam; cb::var"#1#2")
    @ Flux.Optimise C:\Users\joel\.julia\packages\Flux\EXOFx\src\optimise\train.jl:117
 [61] top-level scope
    @ REPL[20]:1
sdobber commented 2 years ago

Thank you for reporting - I can recreate this on my own hardware. I'll have a look into this, but I will be out travelling until the end of september, so there might be some time until this is fixed. My personal working configuration on Julia 1.7 uses the following package versions, in case you want to pin them:

Status `~/Uni/Code/FluxArchitectures/Project.toml`
  [fbb218c0] BSON v0.3.5
⌃ [587475ba] Flux v0.13.3
⌅ [5cadff95] JuliennedArrays v0.3.0
  [189a3867] Reexport v1.2.2
⌃ [295af30f] Revise v3.3.3
⌃ [82cb661a] SliceMap v0.2.6
  [bd369af6] Tables v1.7.0
⌃ [e88e6eb3] Zygote v0.6.40
  [4af54fe1] LazyArtifacts
  [9a3f8284] Random
mentics commented 2 years ago

I think this is likely an issue in Zygote's compiler. The stack trace points at the call to Flux.reset! in the loss function. If I remove that, the error goes away. I'll close this issue and open one on Zygote.

mentics commented 2 years ago

I created a simple test case that didn't involve FluxArchitectures, so it's certainly not related to this project. Here's the ticket I created for Zygote: https://github.com/FluxML/Zygote.jl/issues/1297

sdobber commented 2 years ago

Thanks! I am currently also having some issues where my tests break, and I could isolate that to be caused by going from Zygote 0.6.43 to 0.6.44. Apparently there is something wrong with how structs are handled by Zygote, but it doesn't show up in all cases...