JuliaDiff / ReverseDiff.jl

Reverse Mode Automatic Differentiation for Julia
Other
348 stars 57 forks source link

Incorrect zero gradient with no error thrown #171

Open bantin opened 3 years ago

bantin commented 3 years ago

I have am trying to take gradients of a loss function which is defined via this function below. I need to take gradients of tensor_conv w.r.t W and H, which are not mutated inside the function:

function tensor_conv!(est, W::AbstractArray, H::AbstractArray)
    K, N, L = size(W)
    T = size(H, 2)

    @. est = 0
    for lag = 0:(L-1)
        @views s_dot!(est[:, lag+1:T], W[:, :, lag+1]', H, lag, 1, 1)
    end

    return est
end

function s_dot!(B, Wl, H, lag, α, β)
    K, T = size(H)

    if lag < 0
        @views mul!(B, Wl, H[:, 1+lag:T], α, β) 
    else  # lag >= 0
        @views mul!(B, Wl, H[:, 1:T-lag], α, β)
    end

    return B
end

However, reverseDiff always gives me zero gradient when differentiating this function, it does not throw an error. Can someone explain what might be going wrong?