I have am trying to take gradients of a loss function which is defined via this function below. I need to take gradients of tensor_conv w.r.t W and H, which are not mutated inside the function:
function tensor_conv!(est, W::AbstractArray, H::AbstractArray)
K, N, L = size(W)
T = size(H, 2)
@. est = 0
for lag = 0:(L-1)
@views s_dot!(est[:, lag+1:T], W[:, :, lag+1]', H, lag, 1, 1)
end
return est
end
function s_dot!(B, Wl, H, lag, α, β)
K, T = size(H)
if lag < 0
@views mul!(B, Wl, H[:, 1+lag:T], α, β)
else # lag >= 0
@views mul!(B, Wl, H[:, 1:T-lag], α, β)
end
return B
end
However, reverseDiff always gives me zero gradient when differentiating this function, it does not throw an error. Can someone explain what might be going wrong?
I have am trying to take gradients of a loss function which is defined via this function below. I need to take gradients of tensor_conv w.r.t W and H, which are not mutated inside the function:
However, reverseDiff always gives me zero gradient when differentiating this function, it does not throw an error. Can someone explain what might be going wrong?