Error when using an 2D SPDE component + function of point coordinates

Serra314 commented 4 years ago

There are two problems showed in this document: 1) if a define a function of the coordinates as model component, then, the coordinates of my data has to be called x and y 2) the second one is that when using a 2D SPDE random effect + covariate based on a function of the point coordinate it returns an error

the code simulate some points uniformly in (0,1), the covariate is the distance between a point and a line, it is used finding the distance from the line of the mesh points and projecting them to the new locations.

library(ggplot2)
library(INLA)
library(inlabru)
library(raster)
library(rgeos)

## SIMULATE POINTS

set.seed(1)
longitude <- runif(5)
latitude <- runif(5)

points.df <- data.frame(longitude = longitude,
                 latitude = latitude)

coordinates(points.df) <- ~ longitude + latitude

## CREATE A LINE (distance from this line will be our covariate)

line.x <- seq(0.1,0.9,length.out = 3)
line.y <- 1 - line.x + rnorm(3,sd=0.1)
line.xy <- cbind(line.x,line.y)
line.sp = sp::SpatialPoints(line.xy)
spl <- SpatialLines(list(Lines(Line(line.sp), ID="a")))

plot(points.df)
plot(spl, add = T)


## CREATE BOUNDARY

x.lim <- points.df@bbox[1,] + c(-0.2,0.2)
y.lim <- points.df@bbox[2,] + c(-0.2,0.2)

pl <- Polygon(cbind(c(x.lim[1], x.lim[2], x.lim[2], x.lim[1], x.lim[1]),
                    c(y.lim[1], y.lim[1], y.lim[2], y.lim[2], y.lim[1])), hole=T)

boundary <- SpatialPolygons(list(Polygons(list(pl), '0')))

## CREATE MESH 

mesh <- inla.mesh.2d(boundary = boundary, max.edge = c(0.1, 0.5), cutoff = 0.05)

ggplot() + gg(mesh) + gg(points.df)


## CREATE SPDE

spde2D <- inla.spde2.pcmatern(mesh,
                              # Pr(sigma > 1) = 0.01
                              prior.sigma=c(1, 0.01),
                              # Pr(range < 1) = 0.01
                              prior.range = c(1, 0.01))

### CREATE A RASTER with the same extent of the mesh, it will be used to compare the results 
## obtained using f(fun(x,y), model = 'linear') and f(raster, model = 'linear')

# extract points
mesh.points <- data.frame(mesh$loc[,1:2])
coordinates(mesh.points) <- ~ X1 + X2

# create raster
raster.grid <- raster(extent(mesh.points), res = 0.01)
raster.grid$fdist <- gDistance(spl, as(raster.grid,"SpatialPoints"), byid=TRUE)[,1]  

plot(raster.grid)


mesh.points$fdist <- gDistance(spl, mesh.points, byid = T)[,1]

# if we are interested in new locations
fdist.fun <- function(x, y, covariate_on_mesh = mesh.points$fdist){
  new.point <- data.frame(x = x, y = y)
  coordinates(new.point) = ~ x + y
  A <- inla.spde.make.A(mesh, new.point)
  covariate_on_new_locations <- A %*% covariate_on_mesh
  return(covariate_on_new_locations[,1])
}

points.df$fdist <- fdist.fun(points.df@coords[,1],points.df@coords[,2])

# check for correcteness
gDistance(spl, points.df, byid = T) - points.df$fdist
#>               a
#> 1  2.775558e-17
#> 2  0.000000e+00
#> 3 -4.267577e-04
#> 4 -5.551115e-17
#> 5 -1.110223e-16

## create components 

cmp <- coordinates ~ Intercept + fault_dist(map = fdist.fun(x, y), model = 'linear')  

## first error if I don't set coords name correctly in the data
init.tutorial()
#> Setting defaults for tutorial session.
ff <- lgcp(components = cmp,
           formula = coordinates ~ Intercept + fault_dist,
           data = points.df,
           samplers = boundary) 
#> Error in match.names(clabs, names(xi)): names do not match previous names

## solution
colnames(points.df@coords) <- c('x', 'y')
ff <- lgcp(components = cmp,
           formula = coordinates ~ Intercept + fault_dist,
           data = points.df,
           samplers = boundary) 
#> iinla() iteration 1 [ max: 10 ]. Done. 
#> iinla() iteration 2 [ max: 10 ]. Done. Max deviation from previous: 0.000296% of SD [stop if: <1%]
#> Convergence criterion met, stopping INLA iteration.

# comparison with raster
dist_cov <- as(raster.grid$fdist, 'SpatialGridDataFrame')

cmp.r <- coordinates ~ Intercept + fault_dist(map = dist_cov, model = 'linear')

ff.r <- lgcp(components = cmp,
           formula = coordinates ~ Intercept + fault_dist,
           data = points.df,
           samplers = boundary) 
#> iinla() iteration 1 [ max: 10 ]. Done. 
#> iinla() iteration 2 [ max: 10 ]. Done. Max deviation from previous: 0.000296% of SD [stop if: <1%]
#> Convergence criterion met, stopping INLA iteration.

# same
ff.r$summary.fixed
#>                mean       sd 0.025quant 0.5quant 0.975quant     mode
#> Intercept  1.336885 0.992210 -0.8111919 1.411330   3.080874 1.566017
#> fault_dist 3.870006 3.074892 -2.1951814 3.879546   9.876307 3.898815
#>                     kld
#> Intercept  3.629009e-06
#> fault_dist 9.278534e-07
ff$summary.fixed
#>                mean       sd 0.025quant 0.5quant 0.975quant     mode
#> Intercept  1.336885 0.992210 -0.8111919 1.411330   3.080874 1.566017
#> fault_dist 3.870006 3.074892 -2.1951814 3.879546   9.876307 3.898815
#>                     kld
#> Intercept  3.629009e-06
#> fault_dist 9.278534e-07

## second error when I add the spde, it makes no problem using the raster covariate

cmp <- coordinates ~ Intercept + field(map = coordinates, model = spde2D) +
  fault_dist(map = fdist.fun(x, y), model = 'linear')  

ff <- lgcp(components = cmp,
           formula = coordinates ~ Intercept + fault_dist,
           data = points.df,
           samplers = boundary) 
#> iinla() iteration 1 [ max: 10 ].
#> Warning in max(gp$random.spec[[r]]$group, na.rm = TRUE): no non-missing
#> arguments to max; returning -Inf
#> Warning in (function (formula, family = "gaussian", contrasts = NULL,
#> data, : NAs introduced by coercion to integer range
#> Error in if (nrep > 1 || ngroup > 1) {: missing value where TRUE/FALSE needed

^{Created on 2020-05-25 by the reprex package (v0.3.0)}

finnlindgren commented 3 years ago

The covariate errors should be fixed in the devel version (checked 2020-10-29) but you will need to specify the integration function domain with domain = list(coordinates = mesh)

ff <- lgcp(components = cmp,
           formula = coordinates ~ Intercept + fault_dist,
           data = points.df,
           samplers = boundary,
           domain = list(coordinates = mesh))

The code relating to names of coordinate dimensions still needs to be overhauled in inlabru.

finnlindgren commented 1 year ago

With sp being phased out in favour of sf that doesn't have configurable coordinate names, the coordinate naming issue will not be fixed for sp objects. One needs to use something like

coordinates(.data.)[,1]

and

st_coordinates(.data.)[,"X"]

to access individual coordinate values.

inlabru-org / inlabru

Error when using an 2D SPDE component + function of point coordinates #74