kleisauke / wasm-vips

libvips for the browser and Node.js, compiled to WebAssembly with Emscripten.
https://kleisauke.github.io/wasm-vips/
MIT License
519 stars 26 forks source link

writeToBuffer increases RSS size till OOMKilled #52

Closed Akash-Panigrahi closed 1 year ago

Akash-Panigrahi commented 1 year ago

I am trying to perform certain operations on a multi-page tiff image using the sharp package. Except for trim, flip and rotate, all needed operations are successfully performed by sharp.

For flip, sharp flips the whole image and its order too. For trim and rotate, it simply throws an error. This is a known issue, https://github.com/lovell/sharp/issues/1580.

Using the vips.Image.arrayjoin function, I was able to get the desired output, but overtime it increases my rss memory size drastically, eventually killing the process. The heap is not increasing as the GC works promptly. The memory jump happens at the writeToBuffer function call.

Is there any different way of getting the same results, without the rss size jump?

Attaching the code and the input image. Thank you.


Code to create the image (handle-multi-image.mjs)
import { readFileSync } from "node:fs";
import Vips from "wasm-vips";
import sharp from "sharp";

function log(msg) {
    const obj = {};

    for (const [key, value] of Object.entries(process.memoryUsage())) {
        obj[key] = Math.round(value / (1024 * 1024))
    }

    console.log(msg, "\n" + JSON.stringify(obj, null, 4) + "\n");
}

async function createMultiPageImage(buffer, op, ...params) {
    const buffers = [];
    const { pages: pageCount } = await sharp(buffer, { pages: -1 }).metadata();

    for (let i = 0; i < pageCount; i++) {
        buffers.push(sharp(buffer, { pages: 1, page: i })[op](...params).toBuffer());
    }

    const results = await Promise.allSettled(buffers);
    const pages = [];

    for (const { status, value, reason } of results) {
        if (status === "fulfilled") {
            pages.push(value);
        } else {
            throw reason;
        }
    }

    const vips = await Vips();
    vips.concurrency(1);
    vips.Cache.max(0);

    const vipsPages = pages.map((page) => vips.Image.newFromBuffer(page, "", { access: vips.Access.sequential }));
    const vipsImage = vips.Image.arrayjoin(vipsPages, { across: 1 });

    const { height, format } = await sharp(pages[0]).metadata(); // height changes can occur

    const vipsBuffer = vipsImage.writeToBuffer("." + format, { page_height: height }).buffer;

    vipsPages.map((vipsPage) => vipsPage.delete());
    vipsImage.delete();
    vips.shutdown();

    return Buffer.from(vipsBuffer);
}

let buffer = readFileSync("./multi_page_images/multipage.tiff");

buffer = await sharp(buffer, { pages: -1 }).flop().toBuffer();
log("flop");

buffer = await sharp(buffer, { pages: -1 }).negate().toBuffer();
log("negate");

buffer = await createMultiPageImage(buffer, "flip");
log("flip");

buffer = await createMultiPageImage(buffer, "rotate", 30);
log("rotate");

buffer = await createMultiPageImage(buffer, "trim");
log("trim");

process.exit();
Output
flop 
{
    "rss": 62,
    "heapTotal": 12,
    "heapUsed": 7,
    "external": 1,
    "arrayBuffers": 0
}

negate 
{
    "rss": 65,
    "heapTotal": 12,
    "heapUsed": 7,
    "external": 1,
    "arrayBuffers": 0
}

flip 
{
    "rss": 314,
    "heapTotal": 32,
    "heapUsed": 17,
    "external": 8,
    "arrayBuffers": 7
}

rotate 
{
    "rss": 447,
    "heapTotal": 36,
    "heapUsed": 25,
    "external": 18,
    "arrayBuffers": 17
}

trim 
{
    "rss": 500,
    "heapTotal": 58,
    "heapUsed": 21,
    "external": 24,
    "arrayBuffers": 24
}

Image file multipage.tiff

kleisauke commented 1 year ago

It looks like you initialize wasm-vips on every createMultiPageImage() call. I would initialize wasm-vips only once and reuse its instance. For example:

Details ```diff --- a/handle-multi-image.mjs +++ b/handle-multi-image.mjs @@ -12,6 +12,10 @@ function log(msg) { console.log(msg, "\n" + JSON.stringify(obj, null, 4) + "\n"); } +const vips = await Vips(); +vips.concurrency(1); +vips.Cache.max(0); + async function createMultiPageImage(buffer, op, ...params) { const buffers = []; const { pages: pageCount } = await sharp(buffer, { pages: -1 }).metadata(); @@ -31,10 +35,6 @@ async function createMultiPageImage(buffer, op, ...params) { } } - const vips = await Vips(); - vips.concurrency(1); - vips.Cache.max(0); - const vipsPages = pages.map((page) => vips.Image.newFromBuffer(page, "", { access: vips.Access.sequential })); const vipsImage = vips.Image.arrayjoin(vipsPages, { across: 1 }); @@ -44,7 +44,6 @@ async function createMultiPageImage(buffer, op, ...params) { vipsPages.map((vipsPage) => vipsPage.delete()); vipsImage.delete(); - vips.shutdown(); return Buffer.from(vipsBuffer); } ```

With that, I see:

flop 
{
    "rss": 123,
    "heapTotal": 31,
    "heapUsed": 13,
    "external": 7,
    "arrayBuffers": 5
}

negate 
{
    "rss": 125,
    "heapTotal": 31,
    "heapUsed": 13,
    "external": 7,
    "arrayBuffers": 5
}

flip 
{
    "rss": 242,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 9,
    "arrayBuffers": 7
}

rotate 
{
    "rss": 263,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 14,
    "arrayBuffers": 11
}

trim 
{
    "rss": 275,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 19,
    "arrayBuffers": 16
}

Which is probably reasonable for image processing in WebAssembly.

Akash-Panigrahi commented 1 year ago

This definitely looks promising. Currently busy with work, but will definitely check this out and revert back. Thank you for checking the issue. 🙏🏾

Akash-Panigrahi commented 1 year ago

Just implemented the above logic and working flawlessly. Apart from the initial memory jump, all other subsequent vips calls are within constraint. Thank you @kleisauke for the fix and also for the amazing package. 🙇🏾‍♂️