Open TysonAndre opened 2 years ago
I have a working implementation for the deduplication, though I would want to see if geobuf would accept that
/**
* @author Tyson Andre
*/
/**
* @param {array} value
* @returns {Boolean} is this an array where all fields are numbers (including the empty array).
*/
function isNumericArray(value) {
for (var i = 0; i < value.length; i++) {
if (typeof(value[i]) !== 'number') {
return false;
}
}
return true;
}
/**
* Compress data returned by geobuf's decode function.
* Objects are modified in place.
*
* This is useful in cases where the polygons will be used for a long time.
* By default, arrays are reserved with extra capacity that won't be used.
* (The empty array starts with a capacity of 16 elements by now,
* which is inefficient for decoded points of length 2)
*
* This has an optional option to deduplicate identical points,
* which may be useful for collections of polygons sharing points as well
* as for calling compress multiple times with different objects.
*
* @param {any} value the value to compress.
* @param {Map} [cache] by default, a new cache is created each time for external calls to compress.
* Must support get/has/set.
* @param {null|Map} [numericArrayCache] if non-null, this will be used to deduplicate
* numeric arrays of any length, including empty arrays.
*
* This deduplication may be unsafe if callers would modify arrays.
* @return {any} value with all fields compressed.
*/
function compress(value, cache = new Map(), numericArrayCache = null) {
if (cache.has(value)) {
return cache.get(value);
}
if (Array.isArray(value)) {
// By default, v8 allocates an array with a capacity of 16 elements.
// This wastes memory for small arrays such as Points of length 2.
//
// The function slice is used because it was available in older JS versions
// and experimentally appears to reduce capacity used.
var result = value.slice();
if (numericArrayCache && isNumericArray(result)) {
var cacheKey = JSON.stringify(result);
var cachedEntry = numericArrayCache.get(cacheKey);
if (cachedEntry) {
cache.set(value, cachedEntry);
return cachedEntry;
}
// Reuse array instances such as [], [1.5, 1.5]
numericArrayCache.set(cacheKey, result);
cache.set(value, result);
// Nothing left to compress.
return result;
}
// Store this in the cache immediately to guard against infinite recursion on
// invalid inputs.
cache.set(value, result);
for (var i = 0; i < result.length; i++) {
result[i] = compress(result[i], cache, numericArrayCache);
}
return result;
} else if (value && typeof value === 'object') {
// Compress fields of the object in place.
// Set this to the cache immediately to prevent infinite recursion on invalid data.
cache.set(value, value)
var entries = Object.entries(value);
for (var i = 0; i < entries.length; i++) {
var entry = entries[i];
var field = entry[1];
var compressedValue = compress(field, cache, numericArrayCache);
if (field !== compressedValue) {
// Replace object field for this key with the compressed version
value[entry[0]] = compressedValue;
}
}
} else if (typeof value === 'string') {
// Deduplicate strings.
cache.set(value, value);
}
return value;
}
module.exports = compress;
var val = require('.');
var startMem = process.memoryUsage.rss();
var m = new Map();
var originalSet = m.set;
// 1. No Override: 1.280 GB (1.8 seconds)
// 2. Defaults for cache(no numericArrayCache): 0.708 GB
// 2. Adding the second Map (numericArrayCache): 0.435 GB (6.7 seconds)
m.set = function(key, value) {
originalSet.call(m, key, compress(value, new Map(), new Map()));
};
val.setCache({
//store: m,
preload: true,
});
var endMem = process.memoryUsage.rss();
console.log({startMem, endMem, diff: endMem - startMem});
Arrays in nodejs need to be able to quickly add elements without resizing frequently, so they have both a size and a capacity.
geobuf will create
'Polygon'
objects with readLinePart, and those arrays will be created with excess capacity(16) that is never freed.Replacing
coords.push(p)
withcoords.push(p.slice())
in node_modules/geobuf/decode.js resulted in memory use of loading the entire quad tree from 1,282,134,016 to 528,089,088 for me (1.28GB to 0.53GB) - the latter does not have excess capacityFrom babel/issues/6233
Note that
new Array(size)
would be worse for performance due to js needing more arrays to represent arrays with mixes of types.Solution: Add a post-processing step replacing arrays with slices after decoding in loadFeature, or fix it in geobuf upstream