regular / unbzip2-stream

streaming unbzip2 implementatio in pure javascript for node and browsers
Other
29 stars 23 forks source link

ERROR: Unhandled stream error in pipe #28

Open malikalimoekhamedov opened 5 years ago

malikalimoekhamedov commented 5 years ago

Hello friends,

I'm getting an error which is similar to the one in issue #7. However, this time on, the error occurs in Stream.end. According to @sfriesel, this might be related to a truncated input stream. The problem is, the error will be thrown at random moments every time I launch the script so I can't really provide any test files for you to reproduce the issue. I'm using unbzip2-stream v6.9.0.

Here's what I do:

axios(axiosConfig)
    .then((response) => {
        if (response.status === 200) {
            response.data
                .pipe(bz2())
                .pipe(tarExtract)
                .on('error', (err) => {
                      console.error(error);
                      process.exit(1);
                });
             }
          })
          .catch((err) => {
                console.log(err);
          });

And here's the traceback I'm getting:

internal/streams/legacy.js:59
      throw er; // Unhandled stream error in pipe.
      ^

TypeError: Cannot read property '0' of undefined
    at f (.../node_modules/unbzip2-stream/lib/bit_iterator.js:30:34)
    at Object.bzip2.decompress (.../node_modules/unbzip2-stream/lib/bzip2.js:278:13)
    at decompressBlock (.../node_modules/unbzip2-stream/index.js:30:29)
    at decompressAndQueue (.../node_modules/unbzip2-stream/index.js:47:20)
    at Stream.end (.../node_modules/unbzip2-stream/index.js:82:17)
    at _end (.../node_modules/through/index.js:65:9)
    at Stream.stream.end (.../node_modules/through/index.js:74:5)

I'm trying to download a bunch of large .tbz (tar with bzip2 compression) files from a remote Apche server via https with axios. I'm hitting this error randomly. Sometimes sooner, sometimes later. Some files are being correctly decompressed but then I hit this wall and everything stops. Obviously, it mainly happens with files that are larger than others.

Can anyone cast some light on this issue?

ivan-kuzma-scx commented 5 years ago

Hello, faced this trouble too, with same circumstances

kostyantin2216 commented 4 years ago

Hey, im having the same issue :/

Christilut commented 4 years ago

Same problem here... With a large file of about 30GB. It's been happening lately only. So far it was fine for almost a year daily.

TypeError: Cannot read property '0' of undefined
  File "/app/node_modules/unbzip2-stream/lib/bit_iterator.js", line 30, col 34, in f
    result |= ((bytes[byte] & (BITMASK[n] << (8 - n - bit))) >> (8 - n - bit));
  File "/app/node_modules/unbzip2-stream/lib/bzip2.js", line 283, col 28, in Object.bzip2.decompress
    j = (j << 1) | bits(1);
  File "/app/node_modules/unbzip2-stream/index.js", line 30, col 29, in decompressBlock
    streamCRC = bz2.decompress(bitReader, f, buf, bufsize, streamCRC);
  File "/app/node_modules/unbzip2-stream/index.js", line 47, col 20, in decompressAndQueue
    return decompressBlock(function(d) {
  File "/app/node_modules/unbzip2-stream/index.js", line 82, col 17, in Stream.end
    decompressAndQueue(this);
  File "/app/node_modules/through/index.js", line 65, col 9, in _end
    end.call(stream)
  File "/app/node_modules/through/index.js", line 74, col 5, in Stream.stream.end
    _end() // will emit or queue
  File "/app/node_modules/duplexify/index.js", line 30, col 6, in end
    ws.end()
  File "/app/node_modules/duplexify/index.js", line 216, col 5, in null.<anonymous>
    end(self._forwardEnd && self._writable, function() {
  File "/app/node_modules/duplexify/index.js", line 12, col 8, in onuncork
    else fn()
  File "/app/node_modules/duplexify/index.js", line 215, col 3, in Pumpify.Duplexify._finish
    onuncork(this, function() {
  File "/app/node_modules/duplexify/index.js", line 205, col 42, in Pumpify.Duplexify._write
    if (data === SIGNAL_FLUSH) return this._finish(cb)
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 338, col 64, in doWrite
    if (writev) stream._writev(chunk, state.onwrite);else stream._write(chunk, encoding, state.onwrite);
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 327, col 5, in writeOrBuffer
    doWrite(stream, state, false, len, chunk, encoding, cb);
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 264, col 11, in Pumpify.Writable.write
    ret = writeOrBuffer(this, state, isBuf, chunk, encoding, cb);
  File "/app/node_modules/duplexify/index.js", line 230, col 41, in Pumpify.Duplexify.end
    if (!this._writableState.ending) this.write(SIGNAL_FLUSH)
  File "/app/node_modules/duplexify/index.js", line 226, col 47, in Pumpify.Duplexify.end
    if (typeof data === 'function') return this.end(null, null, data)
  File "/app/node_modules/duplexify/index.js", line 29, col 36, in end
    if (ws._writableState) return ws.end(fn)
  File "/app/node_modules/duplexify/index.js", line 216, col 5, in null.<anonymous>
    end(self._forwardEnd && self._writable, function() {
  File "/app/node_modules/duplexify/index.js", line 12, col 8, in onuncork
    else fn()
  File "/app/node_modules/duplexify/index.js", line 215, col 3, in Duplexify._finish
    onuncork(this, function() {
  File "/app/node_modules/duplexify/index.js", line 205, col 42, in Duplexify._write
    if (data === SIGNAL_FLUSH) return this._finish(cb)
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 338, col 64, in doWrite
    if (writev) stream._writev(chunk, state.onwrite);else stream._write(chunk, encoding, state.onwrite);
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 327, col 5, in writeOrBuffer
    doWrite(stream, state, false, len, chunk, encoding, cb);
  File "/app/node_modules/readable-stream/lib/_stream_writable.js", line 264, col 11, in Duplexify.Writable.write
    ret = writeOrBuffer(this, state, isBuf, chunk, encoding, cb);
  File "/app/node_modules/duplexify/index.js", line 230, col 41, in Duplexify.end
    if (!this._writableState.ending) this.write(SIGNAL_FLUSH)
  File "_stream_readable.js", line 595, col 10, in IncomingMessage.onend
  File "events.js", line 313, col 30, in Object.onceWrapper
  File "events.js", line 111, col 20, in emitNone
  File "events.js", line 208, col 7, in IncomingMessage.emit
  File "_stream_readable.js", line 1064, col 12, in endReadableNT
  File "internal/process/next_tick.js", line 139, col 11, in _combinedTickCallback
  File "internal/process/next_tick.js", line 219, col 9, in process._tickDomainCallback
sfriesel commented 4 years ago

@Christilut do you still have access to the file and are you sure it is valid? As mentioned in #7, this exception could occur either because the input stream does not end in a valid bzip2 footer causing the implementation to correctly expect more data. Or because there is a bug causing the implementation to incorrectly expect more data. We need reproducible test data to move this forward.

Christilut commented 4 years ago

Actually I think this was probably #7 since I'm using unbzip2-stream through bzip2-maybe which used version 1.0.9 which is right before #7 was fixed. I'll run it again the next few days with the latest version. If it breaks again, I'll report back.

sfriesel commented 4 years ago

Your stack trace says the error occurs during Stream.end, but testing with the latest version may still be a good idea :)

Christilut commented 4 years ago

I'll let you know when I find out more. It's a very very slow running process that takes about 12 hours per run so hopefully in a few days I know more :)

Christilut commented 4 years ago

Same thing happens with the latest version. But it doesn't happen locally, only on Heroku. Maybe they changed something that can cause this? It's been running well for almost a year without issues on Heroku.

It's actually this project that runs on Heroku. It downloads a 30GB zip file and streams it into a line reader. Maybe the external FTP server causes this? But I'd expect a different kind of error then.

Any ideas?

sfriesel commented 4 years ago
regular commented 4 years ago

@sfriesel For a rewrite, I'd probably take the C sources and compile them to webassembly and add a node streaming interface.

regular commented 4 years ago

@Christilut since it happens at seemingly random positions in the stream, maybe it is because your are hitting some IP traffic quota that heroku enforces? Do you have error handling on the http/ftp side of things so you would detect when the input stream is aborted? Do you know the expected file size (for example from a Content-Size header) in advance? Can you take decompression out of the pipeline and just see if you actually download the number of bytes you expect? Is there any alternative to downloading a 30GB file? Does the server support byte-ranges so you can use a library that can resume a download after it failed?

Christilut commented 4 years ago

I'm running it on my own server right now without any restrictions so hopefully that will tell me more soon :)

Christilut commented 4 years ago

Well it runs fine on a real server so it must be a Heroku thing. Maybe they hang up long running requests now or something. I don't know what they recently changed.

jrauschenbusch commented 4 years ago

I also ran into this issue in the context of an NodeJS app running inside an Kubernetes cluster. As a root cause i could identify a lot of socket hung up errors traced by my instrumentation suite. Increasing the socket timeout of the Axios client (and therefore of the underlying HTTP client) the problem could be fixed. Locally everything worked fine with a low timeout configuration. Possibly it has something to do with a higher latency of the k8s network routing.

jrauschenbusch commented 3 years ago

I was able to reproduce the problem with an Axios client while downloading a valid Bzip2 compressed text file that consisted of a large number of lines.

TL; DR: In my case the configured HTTP timeout value was too low.

In my case the HTTP timeout was set to 1000ms which was enough to start the stream but too small for the whole streaming / decompressing step. That is, the HTTP timeout caused the stream to end, which in turn led to the known error.

Maybe the message of an error could be more human readable (e.g. "Invalid end of bzip2 stream"), but imho this ticket could be closed.