PipedreamHQ / pipedream

Connect APIs, remarkably fast. Free for developers.
https://pipedream.com
Other
8.82k stars 5.27k forks source link

Strange async behavior #12676

Closed denisbetsi closed 2 months ago

denisbetsi commented 3 months ago

Describe the bug Custom Node unzipping is unstable, there are no errors however once in a while, unzipping action will result in less files than available in the zip. Simple re-run of the custom node will produce more files from identical zip file. I assume it's async fs.promises related and execution of the await terminates prematurely.

To Reproduce Steps to reproduce the behavior: N/A : It occurs randomly, I have yet to reproduce this reliably by following a set number of steps.

Expected behavior Unzip action completing as intended

Additional context Custom unzipping Node.js codę:

import { axios } from '@pipedream/platform';
import path from 'path';
import unzip from 'node-unzip-2';
import fs from 'fs';

async function getFileContent(file, $) {
    const tmpFilePath = `/tmp/${path.basename(file.path)}`;

    try {
        let fileUrl = await file.toUrl();
        let response;

        try {
            response = await axios($, {
                url: fileUrl,
                method: 'GET',
                responseType: 'arraybuffer'
            });
            await fs.promises.writeFile(tmpFilePath, response);
        } catch (error) {
            console.log(`Error fetching file, attempting to refresh: ${file.path}`);
            file = await $.files.open(file.path);
            await file.toFile(tmpFilePath);
        }

        const fileContent = await fs.promises.readFile(tmpFilePath);
        return fileContent;
    } catch (error) {
        console.error(`Error getting file content for ${file.path}:`, error);
        throw error;
    } finally {
        // Clean up tmp file
        try {
            await fs.promises.unlink(tmpFilePath);
        } catch (unlinkError) {
            console.error(`Error deleting temporary file ${tmpFilePath}:`, unlinkError);
        }
    }
}

async function getAllFiles(dir) {
    const subdirs = await fs.promises.readdir(dir);
    const files = await Promise.all(
        subdirs.map(async (subdir) => {
            const res = path.resolve(dir, subdir);
            return (await fs.promises.stat(res)).isDirectory() ? getAllFiles(res) : res;
        })
    );
    return files.flat();
}

async function unzipFile(zipFile, $) {
    console.log('Starting unzipFile function');
    const tmpDir = '/tmp/matterport_unzip';
    const folderName = path.basename(zipFile.path, path.extname(zipFile.path));
    const outputFileName = `${folderName}-original.svg`;
    const outputFilePath = path.join('matterport', folderName, outputFileName);

    console.log(`Temporary directory: ${tmpDir}`);
    console.log(`Folder name: ${folderName}`);

    // Ensure tmp directory exists
    await fs.promises.mkdir(tmpDir, { recursive: true });
    console.log('Temporary directory created or already exists');

    // Write zip file to tmp directory
    const tmpZipPath = path.join(tmpDir, path.basename(zipFile.path));

    console.log('Downloading zip file content', zipFile);
    const response = await getFileContent(zipFile);

    await fs.promises.writeFile(tmpZipPath, response);
    console.log('Zip file written to temporary directory');

    // Unzip the file
    console.log('Starting to unzip the file');
    await new Promise((resolve, reject) => {
        fs.createReadStream(tmpZipPath)
            .pipe(unzip.Extract({ path: tmpDir }))
            .on('close', () => {
                console.log('Unzip process completed');
                resolve();
            })
            .on('error', (error) => {
                console.error('Error during unzip:', error);
                reject(error);
            });
    });

    // Find the largest SVG file
    console.log('Finding the largest SVG file');
    const files = await getAllFiles(tmpDir);
    console.log('Files', files);
    let largestSvgFile = null;
    let largestSize = 0;

    for (const file of files) {
        const fileExt = path.extname(file).toLowerCase();
        if (fileExt === '.svg') {
            const stats = await fs.promises.stat(file);
            if (stats.size > largestSize) {
                largestSvgFile = file;
                largestSize = stats.size;
            }
        }
    }

    if (largestSvgFile) {
        console.log(`Largest SVG file found: ${largestSvgFile}`);
        const storeFile = await $.files.open(outputFilePath).fromFile(largestSvgFile);
        console.log(`File stored in File Store: ${outputFilePath}`);

        // Clean up tmp directory
        console.log('Cleaning up temporary directory');
        await fs.promises.rm(tmpDir, { recursive: true, force: true });
        console.log('Temporary directory removed');

        return { unzippedFile: storeFile, folderName };
    } else {
        throw new Error('No SVG file found in the zip archive');
    }
}

async function unzipWithRetry(zipFile, $, retries = 3, delay = 1000) {
    for (let i = 0; i < retries; i++) {
        try {
            return await unzipFile(zipFile, $);
        } catch (error) {
            if (error.message === 'No SVG file found in the zip archive' && i < retries - 1) {
                console.log(`Attempt ${i + 1} failed. Retrying after ${delay}ms...`);
                await new Promise((resolve) => setTimeout(resolve, delay));
            } else {
                throw error;
            }
        }
    }
}

export default defineComponent({
    async run({ steps, $ }) {
        const zipFile = steps.download_floor_plan.zipFile;
        const { unzippedFile, folderName } = await unzipWithRetry(zipFile, $);
        $.export('unzippedFile', unzippedFile);
        $.export('folderName', folderName);
    }
});
vunguyenhung commented 3 months ago

Hi @denisbetsi,

From your description, my understanding is that this is the problem with fs.promises, and it is affecting your unzipping usecase. For your unzipping usecase, Pipedream engineer is not responsible for your code, and can not identify issues it have.

Could you provide the minimal reproduce steps for the fs.promises issue you suspect?

dylburger commented 2 months ago

Without more information, I'm going to close this issue. Feel free to reopen and add more details, if you'd like.