WikiWatershed / mmw-geoprocessing

A Spark Job Server job for Model My Watershed geoprocessing.
Apache License 2.0
6 stars 6 forks source link

Design Input and Output Formats for List-Taking Operation #82

Closed rajadain closed 6 years ago

rajadain commented 6 years ago

For #80, as discussed here.

rajadain commented 6 years ago

A MapShed job currently sends the following JSON requests:

"nlcd_soil": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "rasters": [
      "nlcd-2011-30m-epsg5070-512-int8",
      "ssurgo-hydro-groups-30m-epsg5070-512-int8"
    ],
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterGroupedCount",
    "zoom": 0
  }
}
"nlcd_streams": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "vector": [
      "{\"type\":\"LineString\",\"coordinates\":[[-75.1436574074125,39.9180092592633],[-75.1437500000051,39.9180092592633],[-75.1438425925977,39.9181018518559],[-75.1439351851903,39.9181944444485],[-75.1439351851903,39.918287037041]]}",
      "{\"type\":\"LineString\",\"coordinates\":[[-75.1518981481533,39.918287037041],[-75.1518055555607,39.918287037041],[-75.1517129629681,39.918287037041]]}",
      "{\"type\":\"LineString\",\"coordinates\":[[-75.2225462963017,39.9106018518558],[-75.2224537037091,39.9105092592632],[-75.2223611111166,39.9105092592632],[-75.222268518524,39.9105092592632],[-75.2221759259314,39.9106018518558],[-75.2221759259314,39.9106944444484],[-75.2221759259314,39.910787037041]]}"
    ],
    "vectorCRS": "LatLng",
    "rasters": [
      "nlcd-2011-30m-epsg5070-512-int8"
    ],
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterLinesJoin",
    "zoom": 0
  }
}
"gwn": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "rasters": [
      "us-groundwater-nitrogen-30m-epsg5070-512"
    ],
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterGroupedCount",
    "zoom": 0
  }
}
"avg_awc": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "targetRaster": "us-ssurgo-aws100-30m-epsg5070-512",
    "rasters": [],
    "rasterCRS": "ConusAlbers",
    "polygonCRS": "LatLng",
    "operationType": "RasterGroupedAverage",
    "zoom": 0
  }
}
"nlcd_slope": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "rasters": [
      "nlcd-2011-30m-epsg5070-512-int8",
      "us-percent-slope-30m-epsg5070-512"
    ],
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterGroupedCount",
    "zoom": 0
  }
}
"slope": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "rasters": [],
    "targetRaster": "us-percent-slope-30m-epsg5070-512",
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterGroupedAverage",
    "zoom": 0
  }
}
"nlcd_kfactor": {
  "input": {
    "polygon": [
      "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    ],
    "polygonCRS": "LatLng",
    "rasters": [
      "nlcd-2011-30m-epsg5070-512-int8"
    ],
    "targetRaster": "us-ssugro-kfactor-30m-epsg5070-512",
    "rasterCRS": "ConusAlbers",
    "operationType": "RasterGroupedAverage",
    "zoom": 0
  }
}
rajadain commented 6 years ago

In the new format, we'll have a principle shape (the containing parent HUC) and a number of component shapes (child HUCs contained within the parent). We can represent this either as a single list, where the first item in the list is the Principle Shape, and all the subsequent ones the Component Shapes. Or, we can make a separate key for Principle Shape, and have a separate list for Component Shapes. I prefer the latter, since it is clearer.

rajadain commented 6 years ago

This may also be a good time to implement this:

For the sake of clarity, however, it would be good to change that parameter to be a single MultiLineString object rather than an array which conventionally has a single item in it.

from https://github.com/WikiWatershed/model-my-watershed/pull/2506

rajadain commented 6 years ago

An initial pass looks like this:

"multi_operation": {
  "parentShape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}",
  "childShapes": [
    "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}",
    "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}",
    "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
  ],
  "streamLines": "{\"type\":\"MultiLineString\",\"coordinates\":[[[-75.1436574074125,39.9180092592633],[-75.1437500000051,39.9180092592633],[-75.1438425925977,39.9181018518559],[-75.1439351851903,39.9181944444485],[-75.1439351851903,39.918287037041]]]}",
  "operations": [
    {
      "type": "RasterGroupedCount",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8",
        "ssurgo-hydro-groups-30m-epsg5070-512-int8"
      ]
    },
    {
      "type": "RasterLinesJoin",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8"
      ]
    },
    {
      "type": "RasterGroupedCount",
      "rasters": [
        "us-groundwater-nitrogen-30m-epsg5070-512"
      ]
    },
    {
      "type": "RasterGroupedAverage",
      "targetRaster": "us-ssurgo-aws100-30m-epsg5070-512",
      "rasters": [
        "us-groundwater-nitrogen-30m-epsg5070-512"
      ]
    },
    {
      "type": "RasterGroupedCount",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8",
        "us-percent-slope-30m-epsg5070-512"
      ],
    },
    {
      "type": "RasterGroupedAverage",
      "targetRaster": "us-percent-slope-30m-epsg5070-512",
      "rasters": []
    },
    {
      "type": "RasterGroupedAverage",
      "targetRaster": "us-ssugro-kfactor-30m-epsg5070-512",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8"
      ]
    },
  ]
}
rajadain commented 6 years ago

It may be a good idea to include the HUC ids in the shapes so they can be included in the responses, rather than relying on the order. We should do the same for the operations.

rajadain commented 6 years ago

Here's a second pass, with ids added to shapes and names to operations:

{
  "parentShape": {
    "id": "huc8_5421",
    "shape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
  },
  "childShapes": [
    {
      "id": "huc12_120",
      "shape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    },
    {
      "id": "huc12_121",
      "shape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    },
    {
      "id": "huc12_122",
      "shape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    },
    {
      "id": "huc12_123",
      "shape": "{\"type\":\"MultiPolygon\",\"coordinates\":[[[[-75.1626205444336,39.95580659996906],[-75.25531768798828,39.94514735903112],[-75.22785186767578,39.89446035777916],[-75.1461410522461,39.88761144548104],[-75.09309768676758,39.91078961774283],[-75.09464263916016,39.93817189499188],[-75.12039184570312,39.94435771955196],[-75.1626205444336,39.95580659996906]]]]}"
    }
  ],
  "streamLines": "{\"type\":\"MultiLineString\",\"coordinates\":[[[-75.1436574074125,39.9180092592633],[-75.1437500000051,39.9180092592633],[-75.1438425925977,39.9181018518559],[-75.1439351851903,39.9181944444485],[-75.1439351851903,39.918287037041]]]}",
  "operations": [
    {
      "name": "nlcd_soils",
      "type": "RasterGroupedCount",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8",
        "ssurgo-hydro-groups-30m-epsg5070-512-int8"
      ]
    },
    {
      "name": "nlcd_streams",
      "type": "RasterLinesJoin",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8"
      ]
    },
    {
      "name": "gwn",
      "type": "RasterGroupedCount",
      "rasters": [
        "us-groundwater-nitrogen-30m-epsg5070-512"
      ]
    },
    {
      "name": "avg_awc",
      "type": "RasterGroupedAverage",
      "targetRaster": "us-ssurgo-aws100-30m-epsg5070-512",
      "rasters": [
        "us-groundwater-nitrogen-30m-epsg5070-512"
      ]
    },
    {
      "name": "nlcd_slope",
      "type": "RasterGroupedCount",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8",
        "us-percent-slope-30m-epsg5070-512"
      ],
    },
    {
      "name": "slope",
      "type": "RasterGroupedAverage",
      "targetRaster": "us-percent-slope-30m-epsg5070-512",
      "rasters": []
    },
    {
      "name": "nlcd_kfactor",
      "type": "RasterGroupedAverage",
      "targetRaster": "us-ssugro-kfactor-30m-epsg5070-512",
      "rasters": [
        "nlcd-2011-30m-epsg5070-512-int8"
      ]
    }
  ]
}
rajadain commented 6 years ago

The parentShape can be included in this design or not. Reasons for including it:

Reasons for not including it:

I don't have a good intuition for what is more performant, so will err on the side of explicitness and include it. I don't imagine the performance penalty to be severe either way.

rajadain commented 6 years ago

Given the above input in https://github.com/WikiWatershed/mmw-geoprocessing/issues/82#issuecomment-371924840, the output can look like:

{
  "huc12_120": {
    "nlcd_soils": {
      "List(11, -2147483648, -2147483648)": 1,
      "List(11, -2147483648, 38)": 13,
      "List(11, -2147483648, 41)": 18,
      "List(11, 1, 38)": 2,
      "List(11, 4, 38)": 5
    },
    "nlcd_streams": {
      "List(24)": 25494,
      "List(31)": 101,
      "List(41)": 13336,
      "List(42)": 58,
      "List(43)": 841,
      "List(52)": 1562
    },
    "gwn": {
      "List(0)": 3200
    },
    "avg_awc": {
      "List(0)": 9.937211446569115
    },
    "nlcd_slope": {
      "List(52)": 1562,
      "List(71)": 597,
      "List(81)": 1282,
      "List(82)": 2708,
      "List(90)": 24158,
      "List(95)": 3332
    },
    "slope": {
      "List(0)": 9.937211446569115
    },
    "nlcd_kfactor": {
      "List(11)": 2282,
      "List(21)": 34415,
      "List(22)": 45118,
      "List(23)": 39960,
      "List(24)": 25494,
      "List(31)": 101
    }
  },
  "huc12_121": {
    "nlcd_soils": {
      "List(11, -2147483648, -2147483648)": 1,
      "List(11, -2147483648, 38)": 13,
      "List(11, -2147483648, 41)": 18,
      "List(11, 1, 38)": 2,
      "List(11, 4, 38)": 5
    },
    "nlcd_streams": {
      "List(24)": 25494,
      "List(31)": 101,
      "List(41)": 13336,
      "List(42)": 58,
      "List(43)": 841,
      "List(52)": 1562
    },
    "gwn": {
      "List(0)": 3200
    },
    "avg_awc": {
      "List(0)": 9.937211446569115
    },
    "nlcd_slope": {
      "List(52)": 1562,
      "List(71)": 597,
      "List(81)": 1282,
      "List(82)": 2708,
      "List(90)": 24158,
      "List(95)": 3332
    },
    "slope": {
      "List(0)": 9.937211446569115
    },
    "nlcd_kfactor": {
      "List(11)": 2282,
      "List(21)": 34415,
      "List(22)": 45118,
      "List(23)": 39960,
      "List(24)": 25494,
      "List(31)": 101
    }
  },
  "huc12_122": {
    "nlcd_soils": {
      "List(11, -2147483648, -2147483648)": 1,
      "List(11, -2147483648, 38)": 13,
      "List(11, -2147483648, 41)": 18,
      "List(11, 1, 38)": 2,
      "List(11, 4, 38)": 5
    },
    "nlcd_streams": {
      "List(24)": 25494,
      "List(31)": 101,
      "List(41)": 13336,
      "List(42)": 58,
      "List(43)": 841,
      "List(52)": 1562
    },
    "gwn": {
      "List(0)": 3200
    },
    "avg_awc": {
      "List(0)": 9.937211446569115
    },
    "nlcd_slope": {
      "List(52)": 1562,
      "List(71)": 597,
      "List(81)": 1282,
      "List(82)": 2708,
      "List(90)": 24158,
      "List(95)": 3332
    },
    "slope": {
      "List(0)": 9.937211446569115
    },
    "nlcd_kfactor": {
      "List(11)": 2282,
      "List(21)": 34415,
      "List(22)": 45118,
      "List(23)": 39960,
      "List(24)": 25494,
      "List(31)": 101
    }
  },
  "huc12_124": {
    "nlcd_soils": {
      "List(11, -2147483648, -2147483648)": 1,
      "List(11, -2147483648, 38)": 13,
      "List(11, -2147483648, 41)": 18,
      "List(11, 1, 38)": 2,
      "List(11, 4, 38)": 5
    },
    "nlcd_streams": {
      "List(24)": 25494,
      "List(31)": 101,
      "List(41)": 13336,
      "List(42)": 58,
      "List(43)": 841,
      "List(52)": 1562
    },
    "gwn": {
      "List(0)": 3200
    },
    "avg_awc": {
      "List(0)": 9.937211446569115
    },
    "nlcd_slope": {
      "List(52)": 1562,
      "List(71)": 597,
      "List(81)": 1282,
      "List(82)": 2708,
      "List(90)": 24158,
      "List(95)": 3332
    },
    "slope": {
      "List(0)": 9.937211446569115
    },
    "nlcd_kfactor": {
      "List(11)": 2282,
      "List(21)": 34415,
      "List(22)": 45118,
      "List(23)": 39960,
      "List(24)": 25494,
      "List(31)": 101
    }
  }
}

Since MapShed will run on each shape, we group the results by shape first then operation, rather than the other way around.