Open stephensrmmartin opened 6 years ago
Thanks for letting me know!
These all pass their tests though, so I'm not sure how to recreate the issue. Could you please post a reproducible example here so I know where to start?
library(lavaan)
library(greta)
data(package='psych','bfi')
ds.a <- bfi[,1:5]
ds.a <- ds.a[complete.cases(ds.a),]
ds.a$A1 <- 7 - ds.a$A1
#ds.a <- ds.a[1:100,]
ds.a <- scale(ds.a)
ds.g <- as_data(ds.a)
N <- nrow(ds.a)
J <- ncol(ds.a)
# Latent CFA
theta <- normal(0,1,c(N,1))
nu <- normal(0,2,c(J,1))
lambda <- normal(0,1,c(J,1),truncation=c(0,Inf))
resid <- normal(0,2,c(J,1),truncation=c(0,Inf))
mu <- ones(N)%*%t(nu) + theta%*%t(lambda)
Sigma <- zeros(J,J)
diag(Sigma) <- resid
distribution(ds.g) <- multivariate_normal(mu,Sigma)
#distribution(ds.a[,1]) <- normal(mu[,1],resid[1])
#distribution(ds.a[,2]) <- normal(mu[,2],resid[2])
#distribution(ds.a[,3]) <- normal(mu[,3],resid[3])
#distribution(ds.a[,4]) <- normal(mu[,4],resid[4])
#distribution(ds.a[,5]) <- normal(mu[,5],resid[5])
gretaMod <- model(lambda,nu,resid,theta)
gretaOptOut <- greta::opt(gretaMod,optimiser=gradient_descent(),max_iterations = 2000)
I was using the above, I believe. My intuition is that some constraint makes TF spit out NaN, which is then saved into the object, but breaks the R loop. Why some optimisers do that, I'm not sure; perhaps some optimisers just don't get into those problematic regions.
OK great, yes I think you're right about the cause. I'll see if I can catch those on the TensorFlow side.
I get this error when running the current version of {greta}
library(lavaan)
#> This is lavaan 0.6-9
#> lavaan is FREE software! Please report any bugs.
library(greta)
#>
#> Attaching package: 'greta'
#> The following objects are masked from 'package:stats':
#>
#> binomial, cov2cor, poisson
#> The following objects are masked from 'package:base':
#>
#> %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
#> eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
#> tapply
data(package='psych','bfi')
ds.a <- bfi[,1:5]
ds.a <- ds.a[complete.cases(ds.a),]
ds.a$A1 <- 7 - ds.a$A1
#ds.a <- ds.a[1:100,]
ds.a <- scale(ds.a)
ds.g <- as_data(ds.a)
#> ℹ Initialising python and checking dependencies
#> ✓ Initialising python and checking dependencies
#>
N <- nrow(ds.a)
J <- ncol(ds.a)
# Latent CFA
theta <- normal(0,1,c(N,1))
nu <- normal(0,2,c(J,1))
lambda <- normal(0,1,c(J,1),truncation=c(0,Inf))
resid <- normal(0,2,c(J,1),truncation=c(0,Inf))
mu <- ones(N)%*%t(nu) + theta%*%t(lambda)
Sigma <- zeros(J,J)
diag(Sigma) <- resid
distribution(ds.g) <- multivariate_normal(mu,Sigma)
#distribution(ds.a[,1]) <- normal(mu[,1],resid[1])
#distribution(ds.a[,2]) <- normal(mu[,2],resid[2])
#distribution(ds.a[,3]) <- normal(mu[,3],resid[3])
#distribution(ds.a[,4]) <- normal(mu[,4],resid[4])
#distribution(ds.a[,5]) <- normal(mu[,5],resid[5])
gretaMod <- model(lambda,nu,resid,theta)
gretaOptOut <- greta::opt(gretaMod,optimiser=gradient_descent(),max_iterations = 2000)
#> Error in py_call_impl(callable, dots$args, dots$keywords): InvalidArgumentError: Input matrix is not invertible.
#> [[node MultivariateNormalTriL_1/log_prob/affine_linear_operator/inverse/LinearOperatorLowerTriangular/solve/LinearOperatorLowerTriangular/solve/MatrixTriangularSolve/MatrixTriangularSolve (defined at /tensorflow_probability/python/bijectors/affine_linear_operator.py:160) ]]
#>
#> Original stack trace for 'MultivariateNormalTriL_1/log_prob/affine_linear_operator/inverse/LinearOperatorLowerTriangular/solve/LinearOperatorLowerTriangular/solve/MatrixTriangularSolve/MatrixTriangularSolve':
#> File "/tensorflow_probability/python/distributions/distribution.py", line 866, in log_prob
#> return self._call_log_prob(value, name, **kwargs)
#> File "/tensorflow_probability/python/distributions/distribution.py", line 848, in _call_log_prob
#> return self._log_prob(value, **kwargs)
#> File "/tensorflow_probability/python/internal/distribution_util.py", line 2094, in _fn
#> return fn(*args, **kwargs)
#> File "/tensorflow_probability/python/distributions/mvn_linear_operator.py", line 210, in _log_prob
#> return super(MultivariateNormalLinearOperator, self)._log_prob(x)
#> File "/tensorflow_probability/python/distributions/transformed_distribution.py", line 401, in _log_prob
#> x = self.bijector.inverse(y, **bijector_kwargs)
#> File "/tensorflow_probability/python/bijectors/bijector.py", line 977, in inverse
#> return self._call_inverse(y, name, **kwargs)
#> File "/tensorflow_probability/python/bijectors/bijector.py", line 949, in _call_inverse
#> mapping = mapping.merge(x=self._inverse(y, **kwargs))
#> File "/tensorflow_probability/python/bijectors/affine_linear_operator.py", line 160, in _inverse
#> x = self.scale.solvevec(x, adjoint=self.adjoint)
#> File "/tensorflow/python/ops/linalg/linear_operator.py", line 866, in solvevec
#> return self._solvevec(rhs, adjoint=adjoint)
#> File "/tensorflow/python/ops/linalg/linear_operator.py", line 816, in _solvevec
#> solution_mat = self.solve(rhs_mat, adjoint=adjoint)
#> File "/tensorflow/python/ops/linalg/linear_operator.py", line 811, in solve
#> return self._solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg)
#> File "/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py", line 207, in _solve
#> self._tril, rhs, lower=True, adjoint=adjoint)
#> File "/tensorflow/python/ops/linalg/linear_operator_util.py", line 290, in matrix_triangular_solve_with_broadcast
#> adjoint=adjoint and still_need_to_transpose)
#> File "/tensorflow/python/ops/gen_linalg_ops.py", line 1878, in matrix_triangular_solve
#> adjoint=adjoint, name=name)
#> File "/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
#> op_def=op_def)
#> File "/tensorflow/python/util/deprecation.py", line 507, in new_func
#> return func(*args, **kwargs)
#> File "/tensorflow/python/framework/ops.py", line 3616, in create_op
#> op_def=op_def)
#> File "/tensorflow/python/framework/ops.py", line 2005, in __init__
#> self._traceback = tf_stack.extract_stack()
#>
#>
#> Detailed traceback:
#> File "/Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 950, in run
#> run_metadata_ptr)
#> File "/Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1173, in _run
#> feed_dict_tensor, options, run_metadata)
#> File "/Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
#> run_metadata)
#> File "/Users/njtierney/Library/r-miniconda/envs/greta-env/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
#> raise type(e)(node_def, op, message)
Created on 2021-07-02 by the reprex package (v2.0.0)
Using either gradient_descent or momentum (or, possibly others; these are the two I noticed would fail) will cause an error:
This seems to be due to self$diff being NaN, which R counts as missing. Manually altering self$diff to be non-missing appears to correct the problem. This implies that self$diff is not initialized correctly.
However, when using momentum, this is still set to NaN at some point during the loop, and will cause the loop to fail.