dominictarr / JSONStream

rawStream.pipe(JSONStream.parse()).pipe(streamOfObjects)
Other
1.92k stars 165 forks source link

Performance much worse than a simple implementation #147

Closed brandonros closed 6 years ago

brandonros commented 6 years ago
function writeMethodOne() {
  var stream = fs.createWriteStream(`/tmp/data.json`, {highWaterMark: Math.pow(2,16)});

  console.time('loop');

  stream.write('[\n');

  for (var i = 0; i < 1000000; ++i) {
    if (i % 1000 === 0) {
      console.log(new Date(), i);
    }

    stream.write(JSON.stringify(record) + ',\n');
  }

  stream.write(']\n');

  console.timeEnd('loop');

  console.log('Flushing stream...');

  console.time('flushingStream');

  stream.end();

  stream.on('finish', function() {
    console.timeEnd('flushingStream');
  });
}

function writeMethodTwo() {
  var stream = JSONStream.stringify();

  stream.pipe(fs.createWriteStream(`/tmp/data.json`, {highWaterMark: Math.pow(2,16)}));

  console.time('loop');

  for (var i = 0; i < 1000000; ++i) {
    if (i % 1000 === 0) {
      console.log(new Date(), i);
    }

    stream.write(record);
  }

  console.timeEnd('loop');

  console.log('Flushing stream...');

  console.time('flushingStream');

  stream.end();

  stream.on('finish', function() {
    console.timeEnd('flushingStream');
  });
}

function readMethodOne() {
  console.time('read');

  var lineReader = require('readline').createInterface({
    input: require('fs').createReadStream('/tmp/data.json', {highWaterMark: Math.pow(2,16)})
  });

  var results = [];

  var index = 0;

  lineReader.on('line', function (line) {
    results.push(JSON.parse(line));

    index += 1;

    if (index % 1000 === 0) {
      console.log(new Date(),  index);
    }
  });

  lineReader.on('close', function() {
    console.timeEnd('read');

    console.log(results.length);
  });
}

function readMethodTwo() {
  console.time('read');

  var transformStream = JSONStream.parse('*');
  var inputStream = fs.createReadStream('/tmp/data.json', {highWaterMark: Math.pow(2,16)});

  inputStream.pipe(transformStream);

  var results = [];

  var index = 0;

  transformStream.on('data', function(data) {
    results.push(data);

    index += 1;

    if (index % 1000 === 0) {
      console.log(new Date(),  index);
    }
  });

  inputStream.on('end', function() {
    console.timeEnd('read');

    console.log(results.length);
  });
}

readMethodOne: read: 38932.477ms readMethodTwo: read: 78337.645ms

writeMethodOne: loop: 12520.245ms flushingStream: 3026.069ms writeMethodTwo: loop: 13056.963ms flushingStream: over 20 minutes

node --max-old-space-size=8192 stream.js

dominictarr commented 6 years ago

yes, line delimited JSON is much faster than a large valid json object. then you get to use very optimized JSON.parse, however sometimes you do not control the input and need to stream parse a very large json object and that is what this module is for.