r-lib / isoband

isoband: An R package to generate contour lines and polygons.
https://isoband.r-lib.org
Other
130 stars 14 forks source link

invalid sf geometries #6

Closed rCarto closed 5 years ago

rCarto commented 5 years ago

May be related to #5 .

Here is a piece of code producing isobands as sf polygons from a raster.

library(isoband)
library(raster)
#> Le chargement a nécessité le package : sp
library(sf)
#> Linking to GEOS 3.5.1, GDAL 2.1.2, PROJ 4.9.3
library(lwgeom)
#> Linking to liblwgeom 2.5.0dev r16016, GEOS 3.5.1, PROJ 4.9.3
library(tanaka)
com <- st_read(system.file("gpkg/com.gpkg", package = "tanaka"), quiet = TRUE)
x <- raster(system.file("grd/elev.grd", package = "tanaka"))

# data preparation
ext <- extent(x)
nc <- ncol(x)
nr <- nrow(x)
xr <- xres(x)/2
yr <- yres(x)/2
lon <- seq(ext[1] + xr, ext[2] - xr, length.out = nc)
lat <- seq(ext[4] - yr , ext[3] + yr, length.out = nr)
m <- matrix(data = values(x), nrow = nr, byrow = TRUE)
lev_low = c(88, seq(100,400,20))
lev_high = c(lev_low[-1], 419)

# raster to sf
raw <- isobands(x = lon, y = lat, z = m,
                levels_low = lev_low, levels_high = lev_high)

bands <- iso_to_sfg(raw)

iso <- st_sf(id = 1:length(bands),
             min = lev_low,
             max = lev_high,
             geometry = st_sfc(bands),
             crs = st_crs(x))

st_is_valid(iso, reason = TRUE)
#>  [1] "Self-intersection[586296.663048331 6418692.43022084]"                   
#>  [2] "Too few points in geometry component[596959.063048331 6427147.63022084]"
#>  [3] "Too few points in geometry component[596959.063048331 6427147.63022084]"
#>  [4] "Valid Geometry"                                                         
#>  [5] "Valid Geometry"                                                         
#>  [6] "Valid Geometry"                                                         
#>  [7] "Valid Geometry"                                                         
#>  [8] "Ring Self-intersection[595381.463048331 6420101.63022084]"              
#>  [9] "Ring Self-intersection[595381.463048331 6420101.63022084]"              
#> [10] "Ring Self-intersection[599080.663048331 6421402.43022084]"              
#> [11] "Ring Self-intersection[599080.663048331 6421402.43022084]"              
#> [12] "Valid Geometry"                                                         
#> [13] "Valid Geometry"                                                         
#> [14] "Self-intersection[600603.863048331 6416903.83022084]"                   
#> [15] "Self-intersection[600603.863048331 6416903.83022084]"                   
#> [16] "Valid Geometry"                                                         
#> [17] "Valid Geometry"
st_intersection(iso,com) 
#> Error in CPL_geos_op2(op, st_geometry(x), st_geometry(y)): Evaluation error: TopologyException: Input geom 0 is invalid: Too few points in geometry component at or near point 596959.06304833107 6427147.6302208398 at 596959.06304833107 6427147.6302208398.

iso is composed of valid and invalid geometries. It is not possible to use geometric operators (e.g. st_intersection()) on these geometries.

I use lwgeom::st_make_valid() to clean the geometries. It produces a GEOMETRY column (polylines and polygons), hence the use of st_collection_extract().

iso <- lwgeom::st_make_valid(iso)
st_is_valid(iso, reason=TRUE)
#>  [1] "Valid Geometry" "Valid Geometry" "Valid Geometry" "Valid Geometry"
#>  [5] "Valid Geometry" "Valid Geometry" "Valid Geometry" "Valid Geometry"
#>  [9] "Valid Geometry" "Valid Geometry" "Valid Geometry" "Valid Geometry"
#> [13] "Valid Geometry" "Valid Geometry" "Valid Geometry" "Valid Geometry"
#> [17] "Valid Geometry"
class(st_geometry(iso))
#> [1] "sfc_GEOMETRY" "sfc"
iso <- st_collection_extract(iso, "POLYGON")
class(st_geometry(iso))
#> [1] "sfc_MULTIPOLYGON" "sfc"
st_agr(iso) <- "constant"
st_intersection(iso,st_union(st_geometry(com))) 
#> Simple feature collection with 14 features and 3 fields
#> geometry type:  GEOMETRY
#> dimension:      XY
#> bbox:           xmin: 586133.5 ymin: 6416579 xmax: 603215.1 ymax: 6431809
#> epsg (SRID):    NA
#> proj4string:    +proj=lcc +lat_1=44 +lat_2=49 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs
#> First 10 features:
#>    id min max                       geometry
#> 1   1  88 100 MULTIPOLYGON (((590431.1 64...
#> 2   2 100 120 MULTIPOLYGON (((587430.9 64...
#> 3   3 120 140 MULTIPOLYGON (((592947.4 64...
#> 4   4 140 160 MULTIPOLYGON (((589834 6419...
#> 5   5 160 180 MULTIPOLYGON (((590777.3 64...
#> 6   6 180 200 MULTIPOLYGON (((591232.8 64...
#> 7   7 200 220 MULTIPOLYGON (((591426.9 64...
#> 8   8 220 240 MULTIPOLYGON (((593124 6420...
#> 9   9 240 260 MULTIPOLYGON (((591433.4 64...
#> 10 10 260 280 MULTIPOLYGON (((595377.4 64...

sessionInfo()
#> R version 3.5.3 (2019-03-11)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Debian GNU/Linux 9 (stretch)
#> 
#> Matrix products: default
#> BLAS: /usr/lib/libblas/libblas.so.3.7.0
#> LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
#> 
#> locale:
#>  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
#>  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
#>  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] tanaka_0.1.0  lwgeom_0.1-6  sf_0.7-3      raster_2.8-19 sp_1.3-1     
#> [6] isoband_0.2.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.0       knitr_1.21       magrittr_1.5     units_0.6-2     
#>  [5] lattice_0.20-38  stringr_1.4.0    highr_0.7        tools_3.5.3     
#>  [9] rgdal_1.3-9      grid_3.5.3       xfun_0.5         e1071_1.7-0.1   
#> [13] DBI_1.0.0        htmltools_0.3.6  class_7.3-15     yaml_2.2.0      
#> [17] digest_0.6.18    codetools_0.2-16 evaluate_0.13    rmarkdown_1.11  
#> [21] stringi_1.3.1    compiler_3.5.3   classInt_0.3-1

Created on 2019-03-13 by the reprex package (v0.2.1)

clauswilke commented 5 years ago

Is this with the latest development code?

If yes, would you be able to dig deeper and find specific examples of the exact problems that these geometries have? E.g., “too few points”, how many points are there? Also, what do the self-intersections look like? The isoband algorithm shouldn’t produce any self-intersections, but it can produce degenerate polygons with zero height or width, and the validation algorithm may interpret those as not valid.

rCarto commented 5 years ago

Yes v0.2.0 from github master. Here is an example of self intersection:

library(raster)
#> Le chargement a nécessité le package : sp
library(sf)
#> Linking to GEOS 3.5.1, GDAL 2.1.2, PROJ 4.9.3
library(isoband)
library(lwgeom)
#> Linking to liblwgeom 2.5.0dev r16016, GEOS 3.5.1, PROJ 4.9.3
x <-  c(365, 363, 362, 
        361, 359, 357, 358, 359, 360, 360, 359, 357, 357, 357, 359, 
        360, 359, 357, 362, 359, 359, 360, 359, 357, 370, 364, 361, 
        360, 359, 356, 379, 370, 364, 361, 359, 357, 388, 379, 370, 
        364, 361, 359)
m <- matrix(data = x, nrow = 7, byrow = TRUE)
raw <- isobands(x = 1:6, y = 7:1, z = m,
                levels_low = c(350, 360, 380),
                levels_high = c(360,380,390))
bands <- st_sfc(iso_to_sfg(raw))
plot(bands, col = 2:4)

plot(bands[1], col = 2)

plot(bands[2], col = 3)

st_is_valid(bands, reason=TRUE)
#> [1] "Self-intersection[4 4]" "Self-intersection[4 4]"
#> [3] "Valid Geometry"

What I was expecting:


band_valid <- st_make_valid(bands)
band_valid_poly <- st_collection_extract(band_valid, "POLYGON")
plot(band_valid_poly, col = 2:4)

Created on 2019-03-13 by the reprex package (v0.2.1)

clauswilke commented 5 years ago

Great example, thanks! It looks to me like the problem occurs at the original isobanding stage, not the conversion to sf. I'll look into this to see whether there's a good reason the code fails to merge the two polygons or whether it's an oversight that can be fixed.

Do you also have a similar example for the Too few points in geometry component error?

clauswilke commented 5 years ago

The problem is that mathematically the isobanding algorithm is doing the right thing here. There's a vertical line of zero width that belongs to the green band, not the red one. I'll have to ponder whether this can be addressed within isoband. Maybe st_make_valid() is always going to be required.

library(isoband)

x <-  c(365, 363, 362, 
        361, 359, 357, 358, 359, 360, 360, 359, 357, 357, 357, 359, 
        360, 359, 357, 362, 359, 359, 360, 359, 357, 370, 364, 361, 
        360, 359, 356, 379, 370, 364, 361, 359, 357, 388, 379, 370, 
        364, 361, 359)
m <- matrix(data = x, nrow = 7, byrow = TRUE)
plot_iso(m, vlo = 360, vhi = 370, fill_band = 3)

Created on 2019-03-13 by the reprex package (v0.2.1)

rCarto commented 5 years ago

All right. I'm ok with the use of st_make_valid() anyway :-) .

Here is an example with Too few points in geometry component...:

library(sf)
#> Linking to GEOS 3.5.1, GDAL 2.1.2, PROJ 4.9.3
library(isoband)
library(lwgeom)
#> Linking to liblwgeom 2.5.0dev r16016, GEOS 3.5.1, PROJ 4.9.3
xx <- c(3237, 4000, 4000, 3752, 2989, 3313, 4043, 
        4026, 4009, 3993,4270, 5000, 5000, 4996, 4426, 4220, 
        4935, 4923, 4651, 3394, 3190, 3531, 2577, 
        1629, 674)
m <- matrix(data = xx, nrow = 5, byrow = TRUE)
raw <- isobands(x = 1:5, y = 5:1, z = m,
                levels_low = c( 674, 2000, 4000),
                levels_high = c(2000,4000,5000))
bands <- st_sfc(iso_to_sfg(raw))
plot(bands, col = 2:4)

st_is_valid(bands, reason=TRUE)
#> [1] "Valid Geometry"                           
#> [2] "Valid Geometry"                           
#> [3] "Too few points in geometry component[3 3]"

band_valid <- st_make_valid(bands)
band_valid_poly <- st_collection_extract(band_valid, "POLYGON")
plot(band_valid_poly, col = 2:4)

Created on 2019-03-14 by the reprex package (v0.2.1)

clauswilke commented 5 years ago

Thanks. Again the isobanding algorithm is technically correct, so that's comforting. There's a ridge of height zero that doesn't belong to the blue band. However, I thought I had filtered out those degenerate polygons in the conversion to sf, so I'll have to look more closely what is happening here.

library(isoband)

xx <- c(3237, 4000, 4000, 3752, 2989, 3313, 4043, 
        4026, 4009, 3993,4270, 5000, 5000, 4996, 4426, 4220, 
        4935, 4923, 4651, 3394, 3190, 3531, 2577, 
        1629, 674)
m <- matrix(data = xx, nrow = 5, byrow = TRUE)
plot_iso(m, vlo = 4000, vhi = 5000, fill_band = 4)

Created on 2019-03-14 by the reprex package (v0.2.1)

clauswilke commented 5 years ago

I've decided to close this by improving the documentation of iso_to_sfg(). There's no simple way to avoid these invalid geometries, and lwgeom::st_make_valid() is probably the best way to deal with them. So now the documentation states this.