Closed mohscorpion closed 3 years ago
# X,y = generate_training_data(3,1,df,0);
X = rand(Float32, 19, 100)
y = rand(Float32, 100, 6);
y = y';
model = Chain(Dense(18,18,relu),Dense(18,180,relu),Dense(180,18,relu),Dense(18,6))
X2 = deepcopy(X[2:end,:]);
y2 = deepcopy(y);
train_loader = Flux.Data.DataLoader((X2,y2),batchsize=1,shuffle=true);
loss0(x, y) = Flux.mse(model(x), y)
opt = Flux.ADAM(1e-3)
ps = Flux.params(model)
nepochs = 1
for ind in 1:nepochs
Flux.train!(loss0, ps,train_loader,opt)
if ind%1 == 0
ind2 = rand(1:size(X)[2],1)
println( (ind*100/epochs), ":->" , ind2 )
end
end
definitely works. Notice I don't have data generation and the print functions. Those should not matter here anyway. I would remove the deepcopy
or collect
it instead anyway.
Where exactly did you get the mutation error (what line in your code)? I don't see any obvious mutation for the code that would be part of the backwards pass.
i just run this line out of the for loop :
Flux.train!(loss0, ps,train_loader,opt)
# X,y = generate_training_data(3,1,df,0); X = rand(Float32, 19, 100) y = rand(Float32, 100, 6); y = y'; model = Chain(Dense(18,18,relu),Dense(18,180,relu),Dense(180,18,relu),Dense(18,6)) X2 = deepcopy(X[2:end,:]); y2 = deepcopy(y); train_loader = Flux.Data.DataLoader((X2,y2),batchsize=1,shuffle=true); loss0(x, y) = Flux.mse(model(x), y) opt = Flux.ADAM(1e-3) ps = Flux.params(model) nepochs = 1 for ind in 1:nepochs Flux.train!(loss0, ps,train_loader,opt) if ind%1 == 0 ind2 = rand(1:size(X)[2],1) println( (ind*100/epochs), ":->" , ind2 ) end end
definitely works. Notice I don't have data generation and the print functions. Those should not matter here anyway. I would remove the
deepcopy
orcollect
it instead anyway.
i will try this one maybe there is something about arrays that are of type "Any"
i will try this one maybe there is something about arrays that are of type "Any"
That will do it. This is because the generic matmul that gets hit in Dense
uses a mutating operation when your array type is unspecified.
Your original code should work as long as X
and y
are not Array{Any}
(preferably Array{Float32}
or CuArray{Float32}
). You can add fix it with the following:
X, y = convert.(Array{Float32}, generate_training_data(3,1,df,0));
# rest of your code
As mentioned, you don't need to do the deepcopy
.
julia> X = rand(Float32, 18, 100);
julia> y = rand(Float32, 6, 100);
julia> X2 = convert(Array{Any}, X);
julia> y2 = convert(Array{Any}, y);
julia> model = Chain(Dense(18,18,relu),Dense(18,180,relu),Dense(180,18,relu),Dense(18,6))
Chain(Dense(18, 18, relu), Dense(18, 180, relu), Dense(180, 18, relu), Dense(18, 6))
julia> loss0(x, y) = Flux.mse(model(x), y)
loss0 (generic function with 1 method)
julia> ps = Flux.params(model);
julia> gradient(() -> loss0(first(train_loader)...), ps)
ERROR: Mutating arrays is not supported
Stacktrace:
[1] error(::String) at ./error.jl:33
[2] (::Zygote.var"#364#365")(::Nothing) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/lib/array.jl:58
[3] (::Zygote.var"#2246#back#366"{Zygote.var"#364#365"})(::Nothing) at /Users/darsnack/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:59
[4] _modify! at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/generic.jl:83 [inlined]
[5] (::typeof(∂(_modify!)))(::Nothing) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[6] _generic_matmatmul! at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:823 [inlined]
[7] (::typeof(∂(_generic_matmatmul!)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[8] generic_matmatmul! at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:728 [inlined]
[9] (::typeof(∂(generic_matmatmul!)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[10] mul! at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:235 [inlined]
[11] mul! at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:208 [inlined]
[12] * at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:153 [inlined]
[13] (::typeof(∂(*)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[14] Dense at /Users/darsnack/.julia/packages/Flux/sY3yx/src/layers/basic.jl:123 [inlined]
[15] (::typeof(∂(λ)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[16] applychain at /Users/darsnack/.julia/packages/Flux/sY3yx/src/layers/basic.jl:36 [inlined]
[17] (::typeof(∂(applychain)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[18] Chain at /Users/darsnack/.julia/packages/Flux/sY3yx/src/layers/basic.jl:38 [inlined]
[19] (::typeof(∂(λ)))(::Array{Float32,2}) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[20] loss0 at ./REPL[18]:1 [inlined]
[21] (::typeof(∂(loss0)))(::Float32) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[22] #150 at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/lib/lib.jl:191 [inlined]
[23] #1694#back at /Users/darsnack/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:59 [inlined]
[24] #3 at ./REPL[23]:1 [inlined]
[25] (::typeof(∂(#3)))(::Float32) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface2.jl:0
[26] (::Zygote.var"#54#55"{Zygote.Params,Zygote.Context,typeof(∂(#3))})(::Float32) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface.jl:172
[27] gradient(::Function, ::Zygote.Params) at /Users/darsnack/.julia/packages/Zygote/ggM8Z/src/compiler/interface.jl:49
[28] top-level scope at REPL[23]:1
Closing, but we should keep this in mind while addressing #613.
for the record the solution was this :
X = float.(X)
hi i have s very simple model like this :
as you can see i tried to deepcopy to avoid probable adjoints but it seems the dataloader does it internally. any suggestion?