evansiroky / node-geo-tz

A node.js module to find the timezone based on gps coordinates
MIT License
395 stars 45 forks source link

Performance issue: Points from geobuf polygons use more array capacity than needed, wasting memory. #131

Open TysonAndre opened 2 years ago

TysonAndre commented 2 years ago

Arrays in nodejs need to be able to quickly add elements without resizing frequently, so they have both a size and a capacity.

geobuf will create 'Polygon' objects with readLinePart, and those arrays will be created with excess capacity(16) that is never freed.

Replacing coords.push(p) with coords.push(p.slice()) in node_modules/geobuf/decode.js resulted in memory use of loading the entire quad tree from 1,282,134,016 to 528,089,088 for me (1.28GB to 0.53GB) - the latter does not have excess capacity

From babel/issues/6233

In V8, an empty array gets a buffer of 16 elements. This gives it a little bit of room to grow without needing reallocation. Once you add a 17th element, the buffer expands by 50%. This formula continues after that every time reallocation is needed.

Note that new Array(size) would be worse for performance due to js needing more arrays to represent arrays with mixes of types.


Solution: Add a post-processing step replacing arrays with slices after decoding in loadFeature, or fix it in geobuf upstream

TysonAndre commented 2 years ago

I have a working implementation for the deduplication, though I would want to see if geobuf would accept that

/**
 * @author Tyson Andre
 */
/**
 * @param {array} value
 * @returns {Boolean} is this an array where all fields are numbers (including the empty array).
 */
function isNumericArray(value) {
    for (var i = 0; i < value.length; i++) {
        if (typeof(value[i]) !== 'number') {
            return false;
        }
    }
    return true;
}

/**
 * Compress data returned by geobuf's decode function.
 * Objects are modified in place.
 *
 * This is useful in cases where the polygons will be used for a long time.
 * By default, arrays are reserved with extra capacity that won't be used.
 * (The empty array starts with a capacity of 16 elements by now,
 * which is inefficient for decoded points of length 2)
 *
 * This has an optional option to deduplicate identical points,
 * which may be useful for collections of polygons sharing points as well
 * as for calling compress multiple times with different objects.
 *
 * @param {any} value the value to compress.
 * @param {Map} [cache] by default, a new cache is created each time for external calls to compress.
 *              Must support get/has/set.
 * @param {null|Map} [numericArrayCache] if non-null, this will be used to deduplicate
 *                   numeric arrays of any length, including empty arrays.
 *
 *                   This deduplication may be unsafe if callers would modify arrays.
 * @return {any} value with all fields compressed.
 */
function compress(value, cache = new Map(), numericArrayCache = null) {
    if (cache.has(value)) {
        return cache.get(value);
    }
    if (Array.isArray(value)) {
        // By default, v8 allocates an array with a capacity of 16 elements.
        // This wastes memory for small arrays such as Points of length 2.
        //
        // The function slice is used because it was available in older JS versions
        // and experimentally appears to reduce capacity used.
        var result = value.slice();
        if (numericArrayCache && isNumericArray(result)) {
            var cacheKey = JSON.stringify(result);
            var cachedEntry = numericArrayCache.get(cacheKey);
            if (cachedEntry) {
                cache.set(value, cachedEntry);
                return cachedEntry;
            }
            // Reuse array instances such as [], [1.5, 1.5]
            numericArrayCache.set(cacheKey, result);
            cache.set(value, result);
            // Nothing left to compress.
            return result;
        }
        // Store this in the cache immediately to guard against infinite recursion on
        // invalid inputs.
        cache.set(value, result);
        for (var i = 0; i < result.length; i++) {
            result[i] = compress(result[i], cache, numericArrayCache);
        }
        return result;
    } else if (value && typeof value === 'object') {
        // Compress fields of the object in place.
        // Set this to the cache immediately to prevent infinite recursion on invalid data.
        cache.set(value, value)
        var entries = Object.entries(value);
        for (var i = 0; i < entries.length; i++) {
            var entry = entries[i];
            var field = entry[1];
            var compressedValue = compress(field, cache, numericArrayCache);
            if (field !== compressedValue) {
                // Replace object field for this key with the compressed version
                value[entry[0]] = compressedValue;
            }
        }
    } else if (typeof value === 'string') {
        // Deduplicate strings.
        cache.set(value, value);
    }
    return value;
}
module.exports = compress;
var val = require('.');

var startMem = process.memoryUsage.rss();
var m = new Map();
var originalSet = m.set;
// 1. No Override:                               1.280 GB (1.8 seconds)
// 2. Defaults for cache(no numericArrayCache):  0.708 GB
// 2. Adding the second Map (numericArrayCache): 0.435 GB (6.7 seconds)
m.set = function(key, value) {
  originalSet.call(m, key, compress(value, new Map(), new Map()));
};
val.setCache({
  //store: m,
  preload: true,
});

var endMem = process.memoryUsage.rss();
console.log({startMem, endMem, diff: endMem - startMem});