aourednik / historical-basemaps

Collection of georeferenced boundaries of world countries and cultural regions for use in mapping historical data on global or continental scale
GNU General Public License v3.0
441 stars 75 forks source link

fix geojson: #21

Closed Fil closed 2 years ago

Fil commented 2 years ago

closes https://github.com/aourednik/historical-basemaps/issues/18

the script I used:

function normalizeWindingOrder(geo) {
  switch (geo.type) {
    case "FeatureCollection":
      geo.features.forEach(normalizeWindingOrder);
      return geo;
    case "Feature":
      normalizeWindingOrder(geo.geometry);
      return geo;
    case "GeometryCollection":
      normalizeWindingOrder(geo.geometries);
      return geo;
    case "MultiPolygon":
      geo.coordinates.forEach(normalizePolygon);
      return geo;
    case "Polygon":
      normalizePolygon(geo.coordinates);
      return geo;
  }
  return geo;

  function normalizePolygon(coordinates) {
    coordinates.forEach((ring) =>
      ring.forEach((p) => {
        if (Math.abs(p[1]) > 90) p[1] = 90 * Math.sign(p[1]);
      })
    );
    coordinates.forEach(
      (ring, i) =>
        (d3.geoArea({ type: "Polygon", coordinates: [ring] }) >= 2 * Math.PI) ^
          !!i && ring.reverse()
    );
  }
}
aourednik commented 2 years ago

Many thanks!

Fil commented 2 years ago

A demo on how to use these files with Observable and Plot https://observablehq.com/@visionscarto/historical-boundaries

aourednik commented 2 years ago

Hi @Fil , I'm having trouble keeping the geojson files in a D3-geo compatible state. I convert from gpkg (edited with QGIS) to geojson with the R code below. The result displays well on https://historicborders.app/ and ont the GitHub preview, but on my d3-geo webapp, many geometries explode in all projections. They also do now in your Observable example.

I understand the code snippet you provide above solves the issue. Could you tell me more about what noramalizePolygon() does? If I manage to understand what's wrong with my geojson files, I could port the repair algorithm to R.

require(sf) 
require(stringr)
require(magrittr)
require(data.table)
require(jsonlite)
nVerts = function(x) {
  out = if (is.list(x)) sapply(sapply(x, nVerts), sum) else {
    if (is.matrix(x))
      nrow(x)
    else {
      if (sf::st_is_empty(x)) 0 else 1
    }
  }
  unname(out)
}

basedir <- dirname(rstudioapi::getActiveDocumentContext()$path)
geodatas <- list.files(file.path(basedir,"gpkg"),pattern="world_.*gpkg$|places.gpkg$")
geodatasFull <- list.files(file.path(basedir,"gpkg"),pattern="world_.*gpkg$|places.gpkg$",full.names = TRUE)

# i <- 3
for (i in 1:length(geodatas)) {

    # Create working variables ----
    geofileNoFolder <- geodatas[i]
    geofilePure <- str_extract(geofileNoFolder,"[^\\.]*")
    print(paste("________________________________________",geofilePure))
    geofile_fullpath <- geodatasFull[i]
    geofile_mtime <- file.mtime(geofile_fullpath) 
    print(paste("geofile",geofile_mtime))
    geofile_wal_mtime <- file.mtime(paste0(dirname(geofile_fullpath),"/",geofilePure,".gpkg-wal"))
    print(paste("geofile_wal",geofile_wal_mtime))
    geodata <- st_read(geofile_fullpath, quiet=TRUE) # 2nd argument can specify the name of a layer;

    # check validity and repair if necessary
    geodata_year <- str_extract(geofilePure,"[^_]*$")  %>% str_remove(.,"bc") %>% as.numeric
    if(!is.na(str_match(geofilePure, "bc")[1])) {geodata_year <- geodata_year * -1}
    geodata$geometrytest <- sapply(geodata$geom,function(x){st_dimension(x)})   # if st_dimension != 2, it is not a multiplolygon
    geodata$geometryempty <- sapply(geodata$geom,function(x){st_is_empty(x)})
    geodata <- geodata[!(is.na(geodata$geometrytest) & geodata$geometryempty),]
    geodata$geomclass <- sapply(geodata$geom,function(x){class(x)[2]}) 
    geodata$valid <- sapply(geodata$geom,function(x){st_is_valid(x)})
    geodata$valid_problem <- sapply(geodata$geom,function(x){st_is_valid(x,reason=T)})
    invalid_features <- data.table(id=which(!geodata$valid),problem=geodata[!geodata$valid,]$valid_problem)
    pointFile <- unique(geodata$geomclass) == "POINT"
    changeGeopackageFile <- FALSE

    # Clean file and repair geometries ----
    # https://r-spatial.org/r/2017/03/19/invalid.html
    if (nrow(invalid_features)>0) {
        print("invalid features:")
        print(invalid_features)
        directly_removable <- which(geodata$valid_problem %like% "Too few points")
        if (length(directly_removable)>0){
            print(paste("removing",length(directly_removable), "polygons with too few points"))
            geodata <- geodata[-directly_removable,]
            changeGeopackageFile <- TRUE
        }
    }
    geodata$valid <- NULL
    geodata$valid_problem <- NULL

    if (!pointFile & nrow(geodata[geodata$geomclass!="MULTIPOLYGON",]) > 0) {
        print("Repairing geometry !")
        changeGeopackageFile <- TRUE
        wrongrows_indices <- which(geodata$geomclass!="MULTIPOLYGON")
        for (j in wrongrows_indices){
            x <- geodata[j,]$geom
            if (nVerts(x)<3) {
                geodata <- geodata[-c(j),]
            } else {
                mypolygons <- st_collection_extract(x) # this convert polygons, even if we provide linestrings
                geodata[j,]$geom <- st_multipolygon(mypolygons)%>%st_sfc
            }
        }
    }
    geodata$geometrytest <- NULL
    geodata$geometryempty <- NULL
    geodata$geomclass <- NULL

    if(!pointFile) {
        # Add missing columns ----
        if(!"SUBJECTO" %in% colnames(geodata)) {
                geodata$SUBJECTO <- geodata$NAME
                changeGeopackageFile <- TRUE
        }
        if(!"BORDERPRECISION" %in% colnames(geodata)) {
                geodata$BORDERPRECISION <- ifelse(geodata_year<1646,1,3)
                changeGeopackageFile <- TRUE
        }
        if(!"PARTOF" %in% colnames(geodata)) {
                geodata$PARTOF <- geodata$NAME
                changeGeopackageFile <- TRUE
        }
        if(!"ABBREVN" %in% colnames(geodata)) {
                geodata$ABBREVN <- geodata$NAME
                changeGeopackageFile <- TRUE
        }

        # Fill missing fields
        if (nrow(geodata[is.na(geodata$ABBREVNAME) & !is.na(geodata$NAME),]) + nrow(geodata[is.na(geodata$SUBJECTO) & !is.na(geodata$NAME),]) + nrow(geodata[is.na(geodata$SUBJECTO) & !is.na(geodata$NAME) ,]) > 0) {
            changeGeopackageFile <- TRUE
            geodata[is.na(geodata$ABBREVN),]$ABBREVN <- geodata[is.na(geodata$ABBREVN),]$NAME
            geodata[is.na(geodata$SUBJECTO),]$SUBJECTO <- geodata[is.na(geodata$SUBJECTO),]$NAME
            geodata[is.na(geodata$PARTOF),]$PARTOF <- geodata[is.na(geodata$PARTOF),]$NAME
        }

        if (nrow(geodata[is.na(geodata$BORDERPRECISION),])>0) {
            changeGeopackageFile <- TRUE
            geodata[is.na(geodata$BORDERPRECISION),]$BORDERPRECISION <- ifelse(geodata_year<1646,1,3) 
        }
    }

    # Write the repaired GeoPackage ----
    if (changeGeopackageFile) {
        # st_write(geodata,geofile_fullpath,append=FALSE,layer_options = "ENCODING=UTF-8") # SHP
        st_write(geodata,geofile_fullpath,append=FALSE)
    } 
    geojson <- file.path(basedir,"git","geojson",paste0(geofilePure,".geojson"))
    geojson_mtime <- ifelse(file.exists(geojson), file.mtime(geojson), geofile_mtime-1)
    print(paste("geojson",as.POSIXct(geojson_mtime, origin="1970-01-01")))
    # only write the file if the wal has been edited in the current QGIS session AND if the wal is more recent than the geojson file
    if (is.na(geofile_wal_mtime)) next
    if(geofile_wal_mtime > geofile_mtime + 10 & geofile_wal_mtime > geojson_mtime ) {
        if (file.exists(geojson)) {file.remove(geojson)}
        st_write(geodata,geojson)
    }            
}
Fil commented 2 years ago

The first part makes sure that latitude is never above +90 or below -90.

The second part makes sure that none of the polygons are "reversed". I compute the area of each ring, and if it's greater than 2π I reverse it. For interior rings (holes, which have a position i>0), the check is reversed, since we want every hole to have an area > 2π (a hole's area is 4π minus the space that is "carved" by the hole).

aourednik commented 2 years ago

Thanks @Fil

Turns out porting this to the R sf package is not immediately easy, but I managed to wrap your functions in a js file executable by a system call from R. Posting this here to keep a trace:

Call from R

geojson <- file.path(path,to,geojson,file)
system(paste("/opt/homebrew/bin/node repair_geometries.js --file",geojson))

The JS script:

// yarn add d3
// yarn add d3-geo

let d3 = {};
runapp();

async function runapp(){
  const fs = require('fs');
  d3 = await Promise.all([
    import("d3"),
    import("d3-geo")
  ]).then(d => Object.assign({}, ...d));
  let currentMapFile = process.argv[3];
  let data = fs.readFileSync(currentMapFile);
  data = JSON.parse(data);
  console.log(data);
  data.features = data.features.map(dat => normalizeWindingOrder(dat));
  fs.writeFileSync( currentMapFile, JSON.stringify(data));
}

function normalizeWindingOrder(geo) {
  switch (geo.type) {
    case "FeatureCollection":
      geo.features.forEach(normalizeWindingOrder);
      return geo;
    case "Feature":
      normalizeWindingOrder(geo.geometry);
      return geo;
    case "GeometryCollection":
      normalizeWindingOrder(geo.geometries);
      return geo;
    case "MultiPolygon":
      geo.coordinates.forEach(normalizePolygon);
      return geo;
    case "Polygon":
      normalizePolygon(geo.coordinates);
      return geo;
  }
  return geo;

  function normalizePolygon(coordinates) {
    coordinates.forEach((ring) =>
      ring.forEach((p) => {
        if (Math.abs(p[1]) > 90) p[1] = 90 * Math.sign(p[1]);
      })
    );
    coordinates.forEach(
      (ring, i) =>
        (d3.geoArea({ type: "Polygon", coordinates: [ring] }) >= 2 * Math.PI) ^
          !!i && ring.reverse()
    );
  }
}