adraffy / ens-normalize.js

ENSIP-15 in JS
https://adraffy.github.io/ens-normalize.js/test/resolver.html
MIT License
63 stars 17 forks source link

Memory leak when calling decode_arithmetic() #29

Open christopherwxyz opened 1 month ago

christopherwxyz commented 1 month ago

Currently using @farcaster/shuttle and experiencing a gnarly memory leak. I loaded up a heapdump and narrowed it down to a dynamically growing array or possibly a few variants.

Screenshot 2024-06-10 at 4 28 48 PM

export function decode_arithmetic(bytes) {
    let pos = 0;
    function u16() { return (bytes[pos++] << 8) | bytes[pos++]; }

    // decode the frequency table
    let symbol_count = u16();
    let total = 1;
    let acc = [0, 1]; // first symbol has frequency 1
    for (let i = 1; i < symbol_count; i++) {
        acc.push(total += u16()); // Potential memory growth issue
    }

    // skip the sized-payload that the last 3 symbols index into
    let skip = u16();
    let pos_payload = pos;
    pos += skip;

    let read_width = 0;
    let read_buffer = 0; 
    function read_bit() {
        if (read_width == 0) {
            read_buffer = (read_buffer << 8) | bytes[pos++]; // Potential out-of-bounds read
            read_width = 8;
        }
        return (read_buffer >> --read_width) & 1;
    }

    const N = 31;
    const FULL = 2**N;
    const HALF = FULL >>> 1;
    const QRTR = HALF >> 1;
    const MASK = FULL - 1;

    // fill register
    let register = 0;
    for (let i = 0; i < N; i++) register = (register << 1) | read_bit();

    let symbols = []; // Potential memory growth issue
    let low = 0;
    let range = FULL; // treat like a float
    while (true) {
        let value = Math.floor((((register - low + 1) * total) - 1) / range);
        let start = 0;
        let end = symbol_count;
        while (end - start > 1) { // binary search loop
            let mid = (start + end) >>> 1;
            if (value < acc[mid]) {
                end = mid;
            } else {
                start = mid;
            }
        }
        if (start == 0) break; // first symbol is end mark
        symbols.push(start); // Potential memory growth issue
        let a = low + Math.floor(range * acc[start]   / total);
        let b = low + Math.floor(range * acc[start+1] / total) - 1
        while (((a ^ b) & HALF) == 0) {
            register = (register << 1) & MASK | read_bit();
            a = (a << 1) & MASK;
            b = (b << 1) & MASK | 1;
        }
        while (a & ~b & QRTR) {
            register = (register & HALF) | ((register << 1) & (MASK >>> 1)) | read_bit();
            a = (a << 1) ^ HALF;
            b = ((b ^ HALF) << 1) | HALF | 1;
        }
        low = a;
        range = 1 + b - a;
    }
    let offset = symbol_count - 4;
    return symbols.map(x => { // Potential out-of-bounds access
        switch (x - offset) {
            case 3: return offset + 0x10100 + ((bytes[pos_payload++] << 16) | (bytes[pos_payload++] << 8) | bytes[pos_payload++]);
            case 2: return offset + 0x100 + ((bytes[pos_payload++] << 8) | bytes[pos_payload++]);
            case 1: return offset + bytes[pos_payload++];
            default: return x - 1;
        }
    });
}

Steps to reproduce:

  1. Run https://github.com/christopherwxyz/enterprise-shuttle in a local environment (happy to share this on a call since setup is non-trivial).
  2. Observe heapdump after 5 minutes of run.
  3. Log out to Chrome devtools.
adraffy commented 1 month ago

That function shouldn't run more than once per blob, of which there are 2.

Let me see if I can run this.

adraffy commented 1 month ago

There's not enough information here for me to run this, can you provide steps?

christopherwxyz commented 1 month ago
  1. Not sure if it's minimally reproducible. It's being called from enterprise-shuttle -> @farcaster/hub-nodejs -> @farcaster/core -> viem -> ens-normalizer
  2. You'll need to run a shuttle instance and let it expand the dynamic array out to a size large enough to prevent garbage collection.
  3. I ran this in the dockerfile container locally, then heapdump to a local file to analyze.
adraffy commented 1 month ago

I temporarily added an deinit() function and called init(); deinit(); in a loop while observing process.memoryUsage() and see no leak. IMO, it very unlikely that there is a leak at this level, as I'm only building dag-like structures with Set, Map, and primitives. I also took special care to avoid large spreads to avoid JS engine limitations.

However, this library does have a non-zero setup cost to decompress the full spec and build the appropriate structures. Is your project (or some part of it) using a short-lived worker?

Can you provide me a zip or the JS bundle of whatever code is actually being executed? Possibly the inline base64 blob is getting mangled during your build process? decode_arithmetic() will certainty do something silly if the incoming data is non-sense (eg. zip-bomb equivalent expansion)