matthewmueller / x-ray

The next web scraper. See through the <html> noise.
MIT License
5.86k stars 350 forks source link

ERR_STREAM_WRITE_AFTER_END when using async/await #366

Open alexmuch opened 4 years ago

alexmuch commented 4 years ago

When I try to use async and await with x-ray, I encounter this error ERR_STREAM_WRITE_AFTER_END. The 2nd time I use the x variable is when this program fails, not sure why. Perhaps xray just wasn't built with async in mind?

var Xray = require('x-ray');
var x = Xray();

parseReddit('https://old.reddit.com/r/anime/comments/g7w9ac/kaguyasama_wa_kokurasetai_tensaitachi_no_renai/'); // is currently releasing

function parseReddit(redditUrl) {
  x(redditUrl, {
    anilistLink: '#siteTable .md ul:nth-of-type(2) li:nth-of-type(2) a@href'
  })
  (async function(err, obj) {
    console.log(obj);

    // if the anime is currently releasing, anilist has a slightly different structure
    let isReleasing = await isAnimeCurrentlyReleasing(obj);
    console.log('isReleasing: ' + isReleasing)
    if ( isReleasing ){
      scrapeCurrentAnime(obj); // fails here
    }
    else {
      scrapeOldAnime(obj);
    }
  })
}

async function isAnimeCurrentlyReleasing(animeJson){
  var isReleasing = false;
  await x(animeJson.anilistLink, {
    airingStatus: '.data-set:nth-of-type(5) .value'
  })
  (function(err, obj) {
    if (obj.airingStatus === 'Releasing') isReleasing = true;
  })

  return isReleasing;
}

function scrapeCurrentAnime(animeJson){
  x(animeJson.anilistLink, { 
    studio:  '.data-set:nth-of-type(11) .value'
  })
  (function(err, obj) {
    console.log(obj);
  })
}

function scrapeOldAnime(animeJson){
  x(animeJson.anilistLink, { 
    studio:  '.data-set:nth-of-type(10) .value',
  })
  (function(err, obj) {
    console.log(obj);
  })
}

And here's my code output

{ anilistLink: 'https://anilist.co/anime/112641' }
isReleasing: true
events.js:287
      throw er; // Unhandled 'error' event
      ^

Error [ERR_STREAM_WRITE_AFTER_END]: write after end
    at writeAfterEnd (_stream_writable.js:266:14)
    at Writable.write (_stream_writable.js:315:5)
    at Writable.end (_stream_writable.js:585:10)
    at _stream_object (c:\Users\Orange\node_modules\x-ray\lib\stream.js:46:16)
    at next (c:\Users\Orange\node_modules\x-ray\index.js:145:11)
    at c:\Users\Orange\node_modules\x-ray\index.js:259:7
    at c:\Users\Orange\node_modules\x-ray\lib\walk.js:56:12
    at c:\Users\Orange\node_modules\batch\index.js:161:14
    at processTicksAndRejections (internal/process/task_queues.js:79:11)
Emitted 'error' event on Writable instance at:
    at errorOrDestroy (internal/streams/destroy.js:108:12)
    at writeAfterEnd (_stream_writable.js:268:3)
    at Writable.write (_stream_writable.js:315:5)
    [... lines matching original stack trace ...]
    at processTicksAndRejections (internal/process/task_queues.js:79:11) {
  code: 'ERR_STREAM_WRITE_AFTER_END'
}
alexmuch commented 4 years ago

I wound up using puppeteer instead. Has great async/await support although it's not specifically designed to be a webscraper. Still works great however.