Open jeroen opened 6 months ago
Here is what I puzzled together. Does this seem right? Is there a more efficient way to do this:
const fs = require('fs')
const tar = require('tar-stream')
const gunzip = require('gunzip-maybe');
function tar_index(path){
const input = fs.createReadStream(path);
const extract = tar.extract();
let output = [];
return new Promise(function(resolve, reject) {
function process_entry(header, stream, next) {
var offset = extract._buffer.shifted
//console.log(stream)
output.push({
name: header.name,
offset: offset,
size: header.size
});
stream.on('end', function () {
next() //read for next file
})
stream.on('error', reject);
stream.resume();
}
function finish_stream(){
resolve(output);
}
var extract = tar.extract({allowUnknownFormat: true})
.on('entry', process_entry).on('finish', finish_stream).on('error', reject)
input.pipe(gunzip()).pipe(extract);
}).finally(function(){
input.destroy();
});
}
tar_index('myfile.tar.gz').then(console.log)
Should be easy to add yea. Feel free to PR that
I would like to generate an index of a tar file with the start and end offset of each file in the tarball, such that I can mmap or extract a single file later on. Is this possible with tar-stream?
The documentation of headers only mentions the size of each file, but I would also need the offset within the tar.
From hacking it looks like the global property
extract._buffer.shifted
contains what I need but this is mostly a guess. It would be nice if theheader
callback could include the offset property for each entry.