Open masihyeganeh opened 9 years ago
Hi! I modified your code to invoke gc()
before measuring memory usage, and the "leak" is fixed. I think it's just because V8's garbage collection didn't kick in.
It's possible to use cheerio instead of jsdom and I will work on it this week.
Here is the code:
var read = require('./src/readability');
function useNodeReadability() {
read('http://farsnews.com/newstext.php?nn=13930926000105', function(error, article, meta) {
if (error)
{
console.error('Fetch Error');
process.exit();
}
console.log('Readability work done here');
article.close();
});
}
setInterval(function() {
// invoke gc() manually
gc();
console.log(process.memoryUsage());
}, 1000);
setInterval(useNodeReadability, 1000);
Here is the result:
node-readablity git:master ❯ node --expose-gc memory.js
{ rss: 95911936, heapTotal: 74054656, heapUsed: 32973616 }
{ rss: 92536832, heapTotal: 70958848, heapUsed: 32188488 }
{ rss: 92807168, heapTotal: 70958848, heapUsed: 31538008 }
{ rss: 91865088, heapTotal: 70958848, heapUsed: 29624024 }
{ rss: 90894336, heapTotal: 68894976, heapUsed: 29717464 }
{ rss: 90894336, heapTotal: 68894976, heapUsed: 29694872 }
{ rss: 90906624, heapTotal: 68894976, heapUsed: 30046536 }
{ rss: 90390528, heapTotal: 68894976, heapUsed: 30022824 }
{ rss: 90906624, heapTotal: 68894976, heapUsed: 30026312 }
{ rss: 90906624, heapTotal: 68894976, heapUsed: 29951448 }
{ rss: 90316800, heapTotal: 68894976, heapUsed: 29947728 }
{ rss: 90943488, heapTotal: 68894976, heapUsed: 30016232 }
{ rss: 90177536, heapTotal: 68894976, heapUsed: 29731560 }
{ rss: 90943488, heapTotal: 68894976, heapUsed: 29731504 }
{ rss: 90943488, heapTotal: 68894976, heapUsed: 29858584 }
{ rss: 90251264, heapTotal: 68894976, heapUsed: 29854184 }
Readability work done here
{ rss: 100114432, heapTotal: 77114368, heapUsed: 36983144 }
{ rss: 97644544, heapTotal: 74042624, heapUsed: 36669056 }
{ rss: 97677312, heapTotal: 74042624, heapUsed: 32258792 }
{ rss: 94117888, heapTotal: 70946816, heapUsed: 31517080 }
{ rss: 93282304, heapTotal: 69926912, heapUsed: 30693072 }
{ rss: 92876800, heapTotal: 69926912, heapUsed: 30666992 }
{ rss: 93290496, heapTotal: 69926912, heapUsed: 30291792 }
Readability work done here
{ rss: 99975168, heapTotal: 76094464, heapUsed: 36653544 }
{ rss: 98095104, heapTotal: 74042624, heapUsed: 36299872 }
{ rss: 98455552, heapTotal: 74042624, heapUsed: 32254072 }
{ rss: 94498816, heapTotal: 69926912, heapUsed: 31272872 }
{ rss: 94265344, heapTotal: 69926912, heapUsed: 30434600 }
{ rss: 93818880, heapTotal: 69926912, heapUsed: 30645680 }
Readability work done here
{ rss: 101142528, heapTotal: 77114368, heapUsed: 36876512 }
{ rss: 97959936, heapTotal: 74042624, heapUsed: 32215192 }
{ rss: 95494144, heapTotal: 70946816, heapUsed: 32491976 }
{ rss: 95969280, heapTotal: 70946816, heapUsed: 31518840 }
{ rss: 94175232, heapTotal: 69926912, heapUsed: 30679464 }
{ rss: 93573120, heapTotal: 69926912, heapUsed: 30605224 }
{ rss: 92794880, heapTotal: 69926912, heapUsed: 30211272 }
{ rss: 93577216, heapTotal: 69926912, heapUsed: 30192224 }
{ rss: 93601792, heapTotal: 69926912, heapUsed: 30377776 }
{ rss: 92954624, heapTotal: 69926912, heapUsed: 30370184 }
{ rss: 93626368, heapTotal: 69926912, heapUsed: 30396936 }
{ rss: 93626368, heapTotal: 69926912, heapUsed: 30677936 }
{ rss: 93626368, heapTotal: 69926912, heapUsed: 30690752 }
{ rss: 93626368, heapTotal: 69926912, heapUsed: 30720816 }
{ rss: 93630464, heapTotal: 69926912, heapUsed: 30839056 }
{ rss: 93634560, heapTotal: 69926912, heapUsed: 30857640 }
{ rss: 93634560, heapTotal: 69926912, heapUsed: 30857880 }
{ rss: 93634560, heapTotal: 69926912, heapUsed: 30450040 }
{ rss: 92991488, heapTotal: 69926912, heapUsed: 30497912 }
{ rss: 93376512, heapTotal: 68894976, heapUsed: 30771344 }
{ rss: 93380608, heapTotal: 68894976, heapUsed: 30836104 }
Readability work done here
{ rss: 101642240, heapTotal: 77114368, heapUsed: 37034992 }
{ rss: 98238464, heapTotal: 74042624, heapUsed: 32222168 }
{ rss: 95629312, heapTotal: 70946816, heapUsed: 32405536 }
{ rss: 96325632, heapTotal: 70946816, heapUsed: 31658664 }
{ rss: 94560256, heapTotal: 69926912, heapUsed: 30827824 }
{ rss: 93474816, heapTotal: 68894976, heapUsed: 30798544 }
{ rss: 93499392, heapTotal: 68894976, heapUsed: 30772608 }
{ rss: 93114368, heapTotal: 69926912, heapUsed: 30766664 }
{ rss: 93523968, heapTotal: 69926912, heapUsed: 30770896 }
{ rss: 93552640, heapTotal: 69926912, heapUsed: 30859096 }
{ rss: 93179904, heapTotal: 70958848, heapUsed: 30852856 }
{ rss: 93601792, heapTotal: 70958848, heapUsed: 30834992 }
{ rss: 93646848, heapTotal: 71990784, heapUsed: 30940136 }
{ rss: 93659136, heapTotal: 71990784, heapUsed: 30947448 }
{ rss: 93671424, heapTotal: 71990784, heapUsed: 30956032 }
{ rss: 93675520, heapTotal: 71990784, heapUsed: 30556752 }
{ rss: 93233152, heapTotal: 73022720, heapUsed: 30700192 }
{ rss: 93749248, heapTotal: 73022720, heapUsed: 30608880 }
{ rss: 93216768, heapTotal: 74054656, heapUsed: 30612440 }
{ rss: 93777920, heapTotal: 74054656, heapUsed: 30428480 }
{ rss: 93380608, heapTotal: 75086592, heapUsed: 30683944 }
{ rss: 93925376, heapTotal: 75086592, heapUsed: 30912720 }
{ rss: 93925376, heapTotal: 75086592, heapUsed: 30913408 }
{ rss: 93949952, heapTotal: 75086592, heapUsed: 30960192 }
{ rss: 93982720, heapTotal: 75086592, heapUsed: 30995944 }
{ rss: 94027776, heapTotal: 75086592, heapUsed: 31090280 }
{ rss: 94126080, heapTotal: 75086592, heapUsed: 31040504 }
{ rss: 93790208, heapTotal: 76118528, heapUsed: 31029536 }
Readability work done here
{ rss: 102293504, heapTotal: 80210176, heapUsed: 37298736 }
Readability work done here
Readability work done here
{ rss: 110424064, heapTotal: 84325888, heapUsed: 39052136 }
Readability work done here
{ rss: 109940736, heapTotal: 84325888, heapUsed: 44442608 }
{ rss: 107823104, heapTotal: 81254144, heapUsed: 39483320 }
{ rss: 101552128, heapTotal: 75074560, heapUsed: 34321984 }
{ rss: 99885056, heapTotal: 74054656, heapUsed: 31560392 }
{ rss: 96325632, heapTotal: 71990784, heapUsed: 30611856 }
{ rss: 96911360, heapTotal: 71990784, heapUsed: 30764144 }
{ rss: 96403456, heapTotal: 73022720, heapUsed: 30677512 }
Memory usage
Readability or jsdom are using a huge amount of ram (10+ MBs) to parse a small web page (500kb) and they never free used memory. This will not let us use node-readability for our web scraper.
I'm not sure if it is caused by jsdom or not, if so, will be an easy way to switch jsdom with cheerio? It would be great if there was a config for that.
My environment
How to reproduce problem
Expected result
Memory usage should be free after each execution
Actual result
Heap and RSS memory are increasing
My results:
thanks