strongloop / strong-oracle

Deprecated: Node.js Driver for Oracle databases (Use https://github.com/oracle/node-oracledb instead)
Other
45 stars 18 forks source link

Possible Memory Leak In Reader? #40

Closed kathan closed 8 years ago

kathan commented 9 years ago

I've been testing the code below and I've noticed a persistent memory leak. Basically, this script queries a table with ~2 million rows and streams them to a CSV file. It's been using ~800 MB of memory and GC is not cleaning it up. There is another issue where my streaming logic doesn't always write out every row but I'll deal with that on my own.

var settings = {},
    db = require('strong-oracle')(settings),
    config = {
        hostname: 'server.name',
        database: 'db',
        port: 1521,
        username: 'user',
        password: 'pw'
    },
    path = '/path/to/out/file/out.csv',
    Fs = require('fs'),
    stream = Fs.createWriteStream(path, 'w'),
    start = Date.now(),
    reader,
    row_count = 0;

db.connect(config, function(err, conn){
    if(err){
        return console.log(err);
    }else{
        stream.on('error', function(err){
            //If there is an error writing to the file, finish the operation.
            return done(err);
        });
        var sql = "SELECT * FROM large_table";
        conn.setPrefetchRowCount(1000);
        reader = conn.reader(sql, []);
        return doRead(done);
    }
});

function done(err){
    if(err){
        var result = err;
    }else{
        var end = Date.now(),
            result = (end-start).toString();
    }

    console.log(result);
    return res.send(result);
}

function doRead(cb) {
    reader.nextRows(function(err, rows) {
        if (err){
            return cb(err);
        }else{
            if (rows.length > 0) {
                var row_str = '';
                for(var r in rows){
                    var row = rows[r],
                        col_num = row.length,
                        col_count = 0;

                    if(row_count > 0){
                        //This is not the first row. Start with line feed
                        row_str += '\n';
                    }else{
                        //If this is the first row, build header row
                        for(var c in row){
                            col_count > 0 ? row_str += ',' : '';
                            row_str += '"'+c+'"';
                            col_count++;
                        }
                        row_str += '\n';
                        col_count = 0;
                    }

                    for(var c in row){
                        //Build row 
                        col_count > 0 ? row_str += ',' : '';
                        row_str += '"'+row[c]+'"';
                        col_count++;
                    }
                    row_count++;
                }

                //Write to file
                if(stream.write(row_str)){
                    return doRead(cb);
                }else{
                    console.log('pausing...');
                    stream.once('drain', function(){
                        console.log('restarting...');
                        return doRead(cb);
                    });
                }
            } else {
                //No more rows
                if(stream.end()){
                    return cb();
                }else{
                    stream.once('finish', function(){
                        console.log('finished writing');
                        return cb();
                    });
                }
            }
        }
    });
}

I was able to remedy the memory leak by adding returns to the nextRow callbacks in the oracle.js file. In addition, my script runs nearly 400% faster.

Before:

Reader.prototype.nextRow = function (cb) {
  var self = this;
  if (!self._handle || self._error || (self._rows && self._rows.length > 0)) {
    process.nextTick(function () {
      cb(self._error, self._rows && self._rows.shift());
    });
  } else {
    // nextRows willl use the prefetch row count as window size
    self._handle.nextRows(function (err, result) {
      self._error = err || self._error;
      self._rows = result;
      if (err || result.length === 0) {
        self._handle = null;
      }
      cb(self._error, self._rows && self._rows.shift());
    });
  }
};

After:

Reader.prototype.nextRow = function (cb) {
  var self = this;
  if (!self._handle || self._error || (self._rows && self._rows.length > 0)) {
    return process.nextTick(function () {
      return cb(self._error, self._rows && self._rows.shift());
    });
  } else {
    // nextRows willl use the prefetch row count as window size
    self._handle.nextRows(function (err, result) {
      self._error = err || self._error;
      self._rows = result;
      if (err || result.length === 0) {
        self._handle = null;
      }
      return cb(self._error, self._rows && self._rows.shift());
    });
  }
};
raymondfeng commented 9 years ago

Cool, would you like to submit a pull request?