aheckmann / gridfs-stream

Easily stream files to and from MongoDB
MIT License
615 stars 120 forks source link

Piping readstream to express response results in corrupt file #76

Closed devonsams closed 9 years ago

devonsams commented 9 years ago

Here are a few recent StackOverflow questions that cover this issue: http://stackoverflow.com/questions/30108313/encoding-issue-with-gridfs-stream-multiparty-middleware

http://stackoverflow.com/questions/28324914/angular-fullstack-generator-can-not-send-iamge-stored-in-mongodb/30111236#30111236

http://stackoverflow.com/questions/30109041/node-js-file-upload-express-4-mongodb-gridfs-gridfs-stream (mine)

I verifiied that the file is posted correctly by exporting a copy from the db to disk with this command:

    mongofiles --db [dbname] get [filename]

Edit:

Here's all of my code. The issue is that the read code results in a corrupt file (or incorrectly encoded):

'use strict';

var mongoose = require('mongoose');
var _ = require('lodash');
var multiparty = require('multiparty');
var Grid = require('gridfs-stream');
Grid.mongo = mongoose.mongo;
var gfs = new Grid(mongoose.connection.db);
var Busboy = require('busboy');
var pathModule = require('path');
var fs = require('fs');

exports.create = function(req, res) {
    var busboy = new Busboy({ headers: req.headers });
    var fileId = new mongoose.Types.ObjectId();

    busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
        var writeStream = gfs.createWriteStream({
            _id: fileId,
            filename: filename,
            mode: 'w',
            content_type: mimetype,
            metadata: {
                uploadedBy: req.user._id,
                encoding: encoding,
            }
        });

        file.pipe(writeStream);
    });

    busboy.on('finish', function() {
        return res.status(200).send({
            message: fileId.toString()
        });
    });

    req.pipe(busboy);
};

exports.read = function(req, res) {
    gfs.findOne({ _id: req.params.id }, function (err, file) {
        if (err) return res.status(400).send(err);
        if (!file) return res.status(404).send('');

        res.set('Content-Type', file.contentType);
        res.set('Content-Disposition', 'attachment; filename="' + file.filename + '"');

        var readstream = gfs.createReadStream({
            _id: file._id
        });

        readstream.on("error", function(err) {
            console.log("Got error while processing stream " + err.message);
            res.end();
        });

        readstream.pipe(res);
    });
}

What should I do differently?

devonsams commented 9 years ago

Could this issue be because I'm streaming the file directly into the DB from the request? I have tried multiparty and busboy and the problem is the same with both libraries. Perhaps these libraries change the encoding somehow? But then why does this command export the file just fine?

mongofiles --db [dbname] get [filename]
Reggino commented 9 years ago

Could you add a test to this project and create a pull request? I'ld be happy to look into it.

devonsams commented 9 years ago

I just tried first writing the file to disk, then streaming from disk to gridfs, and it worked....

exports.create = function(req, res) {
    var busboy = new Busboy({ headers: req.headers });
    var fileId = new mongoose.Types.ObjectId();

    busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
        var tmpPath = '/tmp/' + filename;
        var fsWriteStream = fs.createWriteStream(tmpPath);

        busboy.on('finish', function() {

            var fsReadStream = fs.createReadStream(tmpPath);

            var writeStream = gfs.createWriteStream({
                _id: fileId,
                filename: filename,
                mode: 'w',
                content_type: mimetype,
                metadata: {
                    uploadedBy: req.user._id,
                    encoding: encoding,
                }
            });

            fsReadStream.on('end', function() {
                fs.unlink(tmpPath, function (err) {
                    if (err) throw err;
                    res.status(200).send({
                        message: fileId.toString()
                    }); 
                });
            });

            fsReadStream.pipe(writeStream);

        });

        file.pipe(fsWriteStream);
    });

    req.pipe(busboy);
};

But this isn't ideal.

So, it seems like something is going haywire trying to stream the file into gridfs directly from the busboy/multiparty file streams.

devonsams commented 9 years ago

Here's a simple implementation that I copied from another developer and modified. This is working for me: (I'm still trying to figure out why it won't work in my original express app. Something seems to be interfering)

https://gist.github.com/pos1tron/094ac862c9d116096572

var Busboy = require('busboy'); // 0.2.9
var express = require('express'); // 4.12.3
var mongo = require('mongodb'); // 2.0.31
var Grid = require('gridfs-stream'); // 1.1.1"
var app = express();
var server = app.listen(9002);

var db = new mongo.Db('test', new mongo.Server('127.0.0.1', 27017));
var gfs;
db.open(function(err, db) {
  if (err) throw err;
  gfs = Grid(db, mongo);
});

app.post('/file', function(req, res) {
  var busboy = new Busboy({ headers : req.headers });
  var fileId = new mongo.ObjectId();

  busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
    console.log('got file', filename, mimetype, encoding);
    var writeStream = gfs.createWriteStream({
      _id: fileId,
      filename: filename,
      mode: 'w',
      content_type: mimetype,
    });
    file.pipe(writeStream);
  }).on('finish', function() {
    // show a link to the uploaded file
    res.writeHead(200, {'content-type': 'text/html'});
    res.end('<a href="/file/' + fileId.toString() + '">download file</a>');
  });

  req.pipe(busboy);
});

app.get('/', function(req, res) {
  // show a file upload form
  res.writeHead(200, {'content-type': 'text/html'});
  res.end(
    '<form action="/file" enctype="multipart/form-data" method="post">'+
    '<input type="file" name="file"><br>'+
    '<input type="submit" value="Upload">'+
    '</form>'
  );
});

app.get('/file/:id', function(req, res) {
  gfs.findOne({ _id: req.params.id }, function (err, file) {
    if (err) return res.status(400).send(err);
    if (!file) return res.status(404).send('');

    res.set('Content-Type', file.contentType);
    res.set('Content-Disposition', 'attachment; filename="' + file.filename + '"');

    var readstream = gfs.createReadStream({
      _id: file._id
    });

    readstream.on("error", function(err) {
      console.log("Got error while processing stream " + err.message);
      res.end();
    });

    readstream.pipe(res);
  });
});
wdawson4 commented 9 years ago

My comment on the issue created on busboy.

I had the same problem but I managed to debug the issue. I narrowed it down to where i was confident that the problem was a piece of express middleware modified the request. I disabled my middleware one by one until i found the unlikely culprit: connect-livereload

I commented out app.use(require('connect-livereload')()); and the problem went away. I believe it was injecting the livereload script into the response (a binary image file).

devonsams commented 9 years ago

Thanks for closing the loop here! After closing this, I later came to the same realization about connect-livereload.