mmomtchev / node-gdal-async

Node.js bindings for GDAL (Geospatial Data Abstraction Library) with full async support
https://mmomtchev.github.io/node-gdal-async/
Apache License 2.0
129 stars 26 forks source link

Unsupported operation `layers.features.set` #77

Closed kochis closed 1 year ago

kochis commented 1 year ago

I have the following script where I am converting a GeoTIFF to GeoJSON via polygonizeAsync. I'm trying to add an additional property class to the output feature properties, but am getting an unsupported operation error.

If there's any guidance on what I might be doing wrong here, would be greatly appreciated.

script

const path = require('path');
const gdal = require('gdal-async');

const polygonize = async (inputFile) => {
  const filename = path.basename(inputFile, '.tif');
  const outputFile = `./output/geojson/${filename}.geojson`;
  const classValue = filename.split('-')[1];

  // open the input raster file
  const inputDataset = await gdal.openAsync(inputFile);
  const inputBand = inputDataset.bands.get(1);

  // create the output vector file (GeoJSONSeq)
  const geojsonDriver = gdal.drivers.get('GeoJSONSeq');
  const outputDataset = await geojsonDriver.createAsync(outputFile, 0, 0, 0, gdal.GDT_Unknown);

  // create the output vector layer
  const srs = inputDataset.srs;
  const layer = outputDataset.layers.create('polygons', srs, gdal.wkbPolygon);
  layer.fields.add(new gdal.FieldDefn('class', gdal.OFTString));

  // polygonize the raster to create features
  await gdal.polygonizeAsync({
    src: inputBand,
    dst: layer,
    pixValField: 0,
    connectedness: 8,
  });

  // update generated features with "class" property
  layer.features.forEach((feature) => {
    feature.fields.set('class', classValue);
    layer.features.set(feature); // <-- ERROR HAPPENS HERE
  });

  // close the datasets
  inputDataset.close();
  outputDataset.close();
}

module.exports = polygonize;

output

Error: Unsupported operation
    at /Users/craig/Projects/gdal-test/src/polygonize.js:33:20
    at LayerFeatures.gdal.LayerFeatures.forEach (/Users/craig/Projects/gdal-test/node_modules/gdal-async/lib/iterators.js:96:11)
    at polygonize (/Users/craig/Projects/gdal-test/src/polygonize.js:31:18)
mmomtchev commented 1 year ago

In GDAL, GeoJSONSeq implements only reading and creation. Random-access writing, even if possible in theory, would defeat the point of this format which is to be able to read it incrementally even if it can't fit in RAM.

Create a GeoJSON and then, as a final operation, transform it to GeoJSONSeq. GeoJSON supports random-access writing.

kochis commented 1 year ago

That makes sense, thanks!

kochis commented 1 year ago

Me again!

I made some changes to the script, this time I use a Memory data set to create the vector features, then iterate over the features to add the desired class property before saving out to GeoJSONSeq.

It creates the file, but it only includes the DN property, which seems to be getting set from the polygonize function. Any idea why field isn't being persisted in the output?

Script

const path = require('path');
const gdal = require('gdal-async');

const polygonize = async (inputFile) => {
  const filename = path.basename(inputFile, '.tif');
  const outputFile = `./output/geojson/${filename}.geojson`;

  // open the input raster file
  const inputDataset = await gdal.openAsync(inputFile);
  const inputBand = inputDataset.bands.get(1);

  // create temporary memory driver to write the files
  const driver = gdal.drivers.get('Memory');
  const tempDataset = driver.create(`${filename}.tmp`); // filename is required?

  // create the output vector layer
  const srs = inputDataset.srs;
  const layer = tempDataset.layers.create('polygons', srs, gdal.wkbPolygon);
  layer.fields.add(new gdal.FieldDefn('DN', gdal.OFTInteger));
  layer.fields.add(new gdal.FieldDefn('class', gdal.OFTString));

  // polygonize the raster to create features
  await gdal.polygonizeAsync({
    src: inputBand,
    dst: layer,
    pixValField: 0,
    connectedness: 8,
  });

  // set the "class" property on each feature
  const classValue = filename.split('-')[1];
  layer.features.forEach((feature) => {
    feature.fields.set('class', classValue);
  });

  // create a new GeoJSONSeq dataset
  const geojsonDriver = gdal.drivers.get('GeoJSONSeq');
  const geojsonDataset = geojsonDriver.create(outputFile, 0, 0, 0, gdal.GDT_Unknown);

  // copy the layers from the in-memory dataset to the GeoJSONSeq dataset
  tempDataset.layers.forEach((layer) => {
    geojsonDataset.layers.copyAsync(layer, layer.name, []);
  });

  // flush the cache to ensure data is written to disk
  geojsonDataset.flush();

  // close the datasets
  inputDataset.close();
  tempDataset.close();
  geojsonDataset.close();

  return outputFile;
}

module.exports = polygonize;

output

Output is missing the class field

{
  ...,
  "properties": {
    "DN": 10
  },
}
mmomtchev commented 1 year ago

Change

layer.features.forEach((feature) => {
  feature.fields.set('class', classValue);
});

to

layer.features.forEach((feature) => {
  feature.fields.set('class', classValue);
  layer.features.set(feature);
});

When you get a feature from a layer, you get a copy, not a reference. You must set it back. This is GDAL's API.

Also, I would suggest that you exercise extreme caution when mixing sync and async calls - especially on the same Dataset - that is why I added a warning. In your case the loop with the copyAsync does not have a chance to finish before you pass onto the flush step. This is the reason for the console warning. It works by chance, because copyAsync locks that Dataset and flush synchronously waits for this lock, triggering the warning. Go either full sync or full async unless you have a very good reason. This is why nearly every method has an Async counterpart.

kochis commented 1 year ago

Nice, that seems to be working now 🙌

On the async note, I updated the code to the following, but am still getting the synchronous warnings (its unclear to me where)

  ...

  // set the "class" property on each feature
  const classValue = filename.split('-')[1];

  const updates = [];
  layer.features.forEach((feature) => {
    feature.fields.set('class', classValue);
    updates.push(layer.features.setAsync(feature));
  });
  await Promise.all(updates);

  // create a new GeoJSONSeq dataset
  const geojsonDriver = gdal.drivers.get('GeoJSONSeq');
  const geojsonDataset = geojsonDriver.create(outputFile, 0, 0, 0, gdal.GDT_Unknown);

  // copy the layers from the in-memory dataset to the GeoJSONSeq dataset
  const copies = [];
  tempDataset.layers.forEach((layer) => {
    copies.push(geojsonDataset.layers.copyAsync(layer, layer.name, []));
  });
  await Promise.all(copies);

  // flush the cache to ensure data is written to disk
  await geojsonDataset.flushAsync();

  // close the datasets
  inputDataset.close();
  tempDataset.close();
  geojsonDataset.close();

  return outputFile;
}

module.exports = polygonize;
mmomtchev commented 1 year ago
const updates = [];
for await (const feature of layer.features) {
  feature.fields.set('class', classValue);
  updates.push(layer.features.setAsync(feature));
};
await Promise.all(updates);