kleisauke / wasm-vips

libvips for the browser and Node.js, compiled to WebAssembly with Emscripten.
https://kleisauke.github.io/wasm-vips/
MIT License
463 stars 25 forks source link

writeToBuffer increases RSS size till OOMKilled #52

Closed Akash-Panigrahi closed 10 months ago

Akash-Panigrahi commented 10 months ago

I am trying to perform certain operations on a multi-page tiff image using the sharp package. Except for trim, flip and rotate, all needed operations are successfully performed by sharp.

For flip, sharp flips the whole image and its order too. For trim and rotate, it simply throws an error. This is a known issue, https://github.com/lovell/sharp/issues/1580.

Using the vips.Image.arrayjoin function, I was able to get the desired output, but overtime it increases my rss memory size drastically, eventually killing the process. The heap is not increasing as the GC works promptly. The memory jump happens at the writeToBuffer function call.

Is there any different way of getting the same results, without the rss size jump?

Attaching the code and the input image. Thank you.


Code to create the image (handle-multi-image.mjs)
import { readFileSync } from "node:fs";
import Vips from "wasm-vips";
import sharp from "sharp";

function log(msg) {
    const obj = {};

    for (const [key, value] of Object.entries(process.memoryUsage())) {
        obj[key] = Math.round(value / (1024 * 1024))
    }

    console.log(msg, "\n" + JSON.stringify(obj, null, 4) + "\n");
}

async function createMultiPageImage(buffer, op, ...params) {
    const buffers = [];
    const { pages: pageCount } = await sharp(buffer, { pages: -1 }).metadata();

    for (let i = 0; i < pageCount; i++) {
        buffers.push(sharp(buffer, { pages: 1, page: i })[op](...params).toBuffer());
    }

    const results = await Promise.allSettled(buffers);
    const pages = [];

    for (const { status, value, reason } of results) {
        if (status === "fulfilled") {
            pages.push(value);
        } else {
            throw reason;
        }
    }

    const vips = await Vips();
    vips.concurrency(1);
    vips.Cache.max(0);

    const vipsPages = pages.map((page) => vips.Image.newFromBuffer(page, "", { access: vips.Access.sequential }));
    const vipsImage = vips.Image.arrayjoin(vipsPages, { across: 1 });

    const { height, format } = await sharp(pages[0]).metadata(); // height changes can occur

    const vipsBuffer = vipsImage.writeToBuffer("." + format, { page_height: height }).buffer;

    vipsPages.map((vipsPage) => vipsPage.delete());
    vipsImage.delete();
    vips.shutdown();

    return Buffer.from(vipsBuffer);
}

let buffer = readFileSync("./multi_page_images/multipage.tiff");

buffer = await sharp(buffer, { pages: -1 }).flop().toBuffer();
log("flop");

buffer = await sharp(buffer, { pages: -1 }).negate().toBuffer();
log("negate");

buffer = await createMultiPageImage(buffer, "flip");
log("flip");

buffer = await createMultiPageImage(buffer, "rotate", 30);
log("rotate");

buffer = await createMultiPageImage(buffer, "trim");
log("trim");

process.exit();
Output
flop 
{
    "rss": 62,
    "heapTotal": 12,
    "heapUsed": 7,
    "external": 1,
    "arrayBuffers": 0
}

negate 
{
    "rss": 65,
    "heapTotal": 12,
    "heapUsed": 7,
    "external": 1,
    "arrayBuffers": 0
}

flip 
{
    "rss": 314,
    "heapTotal": 32,
    "heapUsed": 17,
    "external": 8,
    "arrayBuffers": 7
}

rotate 
{
    "rss": 447,
    "heapTotal": 36,
    "heapUsed": 25,
    "external": 18,
    "arrayBuffers": 17
}

trim 
{
    "rss": 500,
    "heapTotal": 58,
    "heapUsed": 21,
    "external": 24,
    "arrayBuffers": 24
}

Image file multipage.tiff

kleisauke commented 10 months ago

It looks like you initialize wasm-vips on every createMultiPageImage() call. I would initialize wasm-vips only once and reuse its instance. For example:

Details ```diff --- a/handle-multi-image.mjs +++ b/handle-multi-image.mjs @@ -12,6 +12,10 @@ function log(msg) { console.log(msg, "\n" + JSON.stringify(obj, null, 4) + "\n"); } +const vips = await Vips(); +vips.concurrency(1); +vips.Cache.max(0); + async function createMultiPageImage(buffer, op, ...params) { const buffers = []; const { pages: pageCount } = await sharp(buffer, { pages: -1 }).metadata(); @@ -31,10 +35,6 @@ async function createMultiPageImage(buffer, op, ...params) { } } - const vips = await Vips(); - vips.concurrency(1); - vips.Cache.max(0); - const vipsPages = pages.map((page) => vips.Image.newFromBuffer(page, "", { access: vips.Access.sequential })); const vipsImage = vips.Image.arrayjoin(vipsPages, { across: 1 }); @@ -44,7 +44,6 @@ async function createMultiPageImage(buffer, op, ...params) { vipsPages.map((vipsPage) => vipsPage.delete()); vipsImage.delete(); - vips.shutdown(); return Buffer.from(vipsBuffer); } ```

With that, I see:

flop 
{
    "rss": 123,
    "heapTotal": 31,
    "heapUsed": 13,
    "external": 7,
    "arrayBuffers": 5
}

negate 
{
    "rss": 125,
    "heapTotal": 31,
    "heapUsed": 13,
    "external": 7,
    "arrayBuffers": 5
}

flip 
{
    "rss": 242,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 9,
    "arrayBuffers": 7
}

rotate 
{
    "rss": 263,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 14,
    "arrayBuffers": 11
}

trim 
{
    "rss": 275,
    "heapTotal": 31,
    "heapUsed": 14,
    "external": 19,
    "arrayBuffers": 16
}

Which is probably reasonable for image processing in WebAssembly.

Akash-Panigrahi commented 10 months ago

This definitely looks promising. Currently busy with work, but will definitely check this out and revert back. Thank you for checking the issue. 🙏🏾

Akash-Panigrahi commented 10 months ago

Just implemented the above logic and working flawlessly. Apart from the initial memory jump, all other subsequent vips calls are within constraint. Thank you @kleisauke for the fix and also for the amazing package. 🙇🏾‍♂️