hypertidy / silicate

A general form for complex data
https://hypertidy.github.io/silicate/
53 stars 4 forks source link

vec_group_id replace group ident #124

Open mdsumner opened 4 years ago

mdsumner commented 4 years ago

See vctrs::vec_group_id a drop-in replace for group_indices().

Discussed here: https://github.com/r-lib/vctrs/issues/332

The example below is part of ongoing discussion for changes here and in sfheaders/geometries: https://github.com/dcooley/geometries/issues/4

Maybe consider whether dplyr can be dropped (this is coming from sfheaders direction as well so can replace gibble but the joins will be tough).

new_SC0 <- function(vertex, object, index, crs = NA_character_, meta = NULL) {
  meta1 <- tibble::tibble(proj = crs, ctime = Sys.time())
  if (!is.null(meta)) {
    meta <- rbind(meta1, meta)
  }
  object[["topology_"]] <- index
  structure(list(object = object, vertex = vertex,
                 meta = meta), class = c("SC0", "sc"))
}

## build SC0 with sfheaders

sc0 <- function(x, ...) {
  df <- sfheaders::sf_to_df(x)
  crs <- crsmeta::crs_proj(x)

  x[[attr(x, "sf_column")]] <- NULL
  object <- tibble::as_tibble(x)
  object$object_ <- 1:nrow(object)

  ## deduplicate in xy
  df[["vertex_"]] <-  vctrs::vec_group_id(df[c("x", "y")])
  ## the vertex table, separated out (we need vertex_ to remap)
  v <- df[!duplicated(df[["vertex_"]]), c("x", "y", "vertex_")]
  ## now remap (can this be done better?) 
  ## (alt. is unjoin())
  df[["vertex_"]] <- match(df$vertex_, v$vertex_)

  ## cleanup
  v[["vertex_"]] <- NULL
  df[["x"]] <- NULL  ## not really necessary to remove but highlights the point
  df[["y"]] <- NULL  ## that these are now indexed in 'v'

  ## a global linestring_id 
  if ("multipolygon_id" %in% names(df)) {
    df[["path_"]] <- vctrs::vec_group_id(df[c("sfg_id", "polygon_id", "linestring_id")])

  } else {
    df[["path_"]] <- vctrs::vec_group_id(df, c("sfg_id", "linestring_id"))

  }
  featurelist <- split(df, df$sfg_id)
  feature_segments <- vector("list", length(featurelist))
  .path2seg <- function(x, pathid = NULL) {
    cbind(.vx0 = x[-length(x)], .vx1 = x[-1L], path_ = pathid)
  }

  for (i in seq_along(featurelist)) {
    segments <- lapply(split(featurelist[[i]][c("vertex_", "path_")], 
                             featurelist[[i]]$path_), 
                       function(lstring) .path2seg(lstring[["vertex_"]], 
                                                   pathid = lstring[["path_"]][1L]))
    feature_segments[[i]] <- tibble::as_tibble(do.call(rbind, segments))
  }
  names(v) <- c("x_", "y_")
  new_SC0(v, object, feature_segments, crs = crs)
}
library(silicate)
plot(sc0(inlandwaters))
rbenchmark::benchmark(sc0(inlandwaters), 
                      SC0(inlandwaters))
#1 sc0(inlandwaters)          100    5.76    1.000      5.67     0.05         NA        NA
#2 SC0(inlandwaters)          100    8.61    1.495      8.49     0.13         NA        NA