mafintosh / tar-stream

tar-stream is a streaming tar parser and generator.
MIT License
400 stars 93 forks source link

File corrupted when combining extract with gzip #138

Closed loretoparisi closed 1 year ago

loretoparisi commented 2 years ago

I have to extract a .tar.gz archive. My solution was to pipe a tar-stream extract to gunzip in this way:

    const fs = require('fs');
    const os = require('os');
    const zlib = require('zlib');
    const tar = require('tar-stream');
    const { promisify } = require('util');
    const writeFile = promisify(fs.writeFile);

    const extract = tar.extract();
    const gunzip = zlib.createGunzip();

    const { Duplex } = require('stream'); // Native Node Module 

    function bufferToStream(myBuffer) {
        let tmp = new Duplex();
        tmp.push(myBuffer);
        tmp.push(null);
        return tmp;
    }

    var chunks = [];
    extract.on('entry', function (header, stream, next) {
        if(header.name==='./podcastindex_feeds.db')
            stream.on('data', function (chunk) {
                chunks.push(chunk);
            });
        stream.on('end', function () {
            next();
        });
        stream.resume();
    });
    extract.on('finish', async function () {
        if (chunks && chunks.length) {
            console.log(chunks.length)
            const myReadableStream = bufferToStream(Buffer.from(chunks));
            myReadableStream
                .pipe(fs.createWriteStream(destPath))
                .on('close', async function () {
                    consoleLogger.info("wrote %s", destPath);
                })
                .on('error', (error) => {
                    consoleLogger.warn("gunzip error:%@", error.toString());
                })
        }
    })
        .on('error', (error) => {
            consoleLogger.warn("gunzip error:%@", error.toString());
        })

    fs.createReadStream(tmpPath)
        .pipe(gunzip)
        .pipe(extract)

but the resulting file size is less than 200KB, while the source file was 900MB (3GB when extracted).

hzaun commented 2 years ago

I'm facing a similar issue, except that my file is twice the size it's supposed to be.

hzaun commented 2 years ago

I was originally referring this to extract my file.

Looking at your code again, I realized I was using a string to compile the data. I switched to using an array, and it worked for me:

    const extract = tar.extract()
    const buffer = []

    extract.on("entry", function(header, stream, cb) {
        stream.on("data", function(chunk) {
            if (header.type == "file" &&
                (!fileToExtract || getFileName(header.name) == fileToExtract)
            ) {
                buffer.push(chunk)
            }
        })

        stream.on("end", function() {
            cb()
        })

        stream.on("error", function() {
            cb()
        })
        stream.resume()
    })

    extract.on("finish", function() {
        const bufferedData = Buffer.concat(buffer)
        fs.writeFile(localFilePath, bufferedData, () => undefined)
    })

    return await fs.createReadStream(tarFilePath)
        .pipe(zlib.createGunzip())
        .pipe(extract)

My file is only around 6MB though.