nathanpeck / exiftool

A Node.js wrapper around exiftool, providing metadata extraction from numerous audio, video, document, and binary filetypes
MIT License
81 stars 26 forks source link

How do I get metadata for video file greater than 10GB? #12

Closed vaibhavmule closed 8 years ago

vaibhavmule commented 8 years ago
RangeError: File size is greater than possible Buffer: 0x7fffffff bytes
    at FSReqWrap.readFileAfterStat [as oncomplete] (fs.js:351:11)
nathanpeck commented 8 years ago

For such files I'd recommend using a custom implementation based on extracting metadata from just the first or final bytes of the file as described here: http://stackoverflow.com/questions/17491080/extracting-metadata-from-incomplete-video-files

Piping a full 10 GB stream through your node process is really, really bad.

vaibhavmule commented 8 years ago

@nathanpeck is this code make sense? https://github.com/Mindgreppers/OHHDL/blob/hard-disk-sync/backend/app/hard-disk/sync.js

nathanpeck commented 8 years ago

_.forEach() is synchronous, so if you have a lot of files in the directory (say 1000), then you will end up opening 1000 filestreams, and reading the first MB of all those files in parallel and your node process will probably die a horrible death.

You will want to use async.eachLimit() to limit the number of parallel file read ops to 5-10 or so at any one time.

mastersilv3r commented 8 years ago

Reading the first few bytes of every file as suggested and it works. Thanks!

The buffer handling, as given in the code gist by @vaibhavmule needed to be changed.

walk('/home/master/Desktop/ghoo')
  .then(function(files) {
    return async.eachLimit(files, 2, function(file) {

      if (file.name.charAt(0) === '.') {
        return
      }
      var deferred = Q.defer()

      var chunks = []
      var rstream = fs.createReadStream(file.root + '/' + file.name, {
        encoding: null,
        start: 0,
        end: 2048 * 1024
      })
      rstream
        .on('data', function(chunk) {
          chunks.push(chunk)
        })
        .on('end', function() { // done
          exif.metadata(Buffer.concat(chunks), function(err, metadata) {
            if (err) {
              console.log(err, 'err')
              deferred.reject(err)
            }
            else {
              deferred.resolve()
              debug(metadata, 'metadata')
            }
          })
        })
      return deferred.promise
    })
  }).catch(function(err) {
    debug(err, 'err')
  })
vadimshvetsov commented 5 years ago

A simple function if you want for await later:

/**
 * Get file part with stream from location
 *
 * @param {string} location - File path for reading
 * @returns {Promise} Resolves buffer of first file chunks or rejects with error
 */
const getFilePart = location => new Promise((resolve, reject) => {
  const chunks = [];
  fs.createReadStream(location, {
    encoding: null,
    start: 0,
    end: 2048 * 1024,
  })
    .on('data', chunk => { chunks.push(chunk); })
    .on('end', () => { resolve(Buffer.concat(chunks)); })
    .on('error', reject);
});
// Later in the app
(async function() {
  const file = await getFilePart('any/path/to/video.mov')
})()