I've been testing the code below and I've noticed a persistent memory leak. Basically, this script queries a table with ~2 million rows and streams them to a CSV file. It's been using ~800 MB of memory and GC is not cleaning it up. There is another issue where my streaming logic doesn't always write out every row but I'll deal with that on my own.
var settings = {},
db = require('strong-oracle')(settings),
config = {
hostname: 'server.name',
database: 'db',
port: 1521,
username: 'user',
password: 'pw'
},
path = '/path/to/out/file/out.csv',
Fs = require('fs'),
stream = Fs.createWriteStream(path, 'w'),
start = Date.now(),
reader,
row_count = 0;
db.connect(config, function(err, conn){
if(err){
return console.log(err);
}else{
stream.on('error', function(err){
//If there is an error writing to the file, finish the operation.
return done(err);
});
var sql = "SELECT * FROM large_table";
conn.setPrefetchRowCount(1000);
reader = conn.reader(sql, []);
return doRead(done);
}
});
function done(err){
if(err){
var result = err;
}else{
var end = Date.now(),
result = (end-start).toString();
}
console.log(result);
return res.send(result);
}
function doRead(cb) {
reader.nextRows(function(err, rows) {
if (err){
return cb(err);
}else{
if (rows.length > 0) {
var row_str = '';
for(var r in rows){
var row = rows[r],
col_num = row.length,
col_count = 0;
if(row_count > 0){
//This is not the first row. Start with line feed
row_str += '\n';
}else{
//If this is the first row, build header row
for(var c in row){
col_count > 0 ? row_str += ',' : '';
row_str += '"'+c+'"';
col_count++;
}
row_str += '\n';
col_count = 0;
}
for(var c in row){
//Build row
col_count > 0 ? row_str += ',' : '';
row_str += '"'+row[c]+'"';
col_count++;
}
row_count++;
}
//Write to file
if(stream.write(row_str)){
return doRead(cb);
}else{
console.log('pausing...');
stream.once('drain', function(){
console.log('restarting...');
return doRead(cb);
});
}
} else {
//No more rows
if(stream.end()){
return cb();
}else{
stream.once('finish', function(){
console.log('finished writing');
return cb();
});
}
}
}
});
}
I was able to remedy the memory leak by adding returns to the nextRow callbacks in the oracle.js file. In addition, my script runs nearly 400% faster.
I've been testing the code below and I've noticed a persistent memory leak. Basically, this script queries a table with ~2 million rows and streams them to a CSV file. It's been using ~800 MB of memory and GC is not cleaning it up. There is another issue where my streaming logic doesn't always write out every row but I'll deal with that on my own.
I was able to remedy the memory leak by adding returns to the
nextRow
callbacks in theoracle.js
file. In addition, my script runs nearly 400% faster.Before:
After: