Closed rgaudin closed 1 month ago
Here's the log of a run for https://www.imagepond.net/davidmark that crashed apparently trying to decompress something using brotli.
I notice there are several ERR_CONNECTION_REFUSED in this very short log.
ERR_CONNECTION_REFUSED
[zimit::2024-09-10 08:26:24,462] INFO:Running browsertrix-crawler crawl: crawl --failOnFailedSeed --waitUntil load --depth -1 --timeout 90 --behaviors autoplay,autofetch,siteSpecific --behaviorTimeout 90 --sizeLimit 4294967296 --diskUtilization 90 --timeLimit 7200 --url https://www.imagepond.net/davidmark --userAgentSuffix zimit.kiwix.org+ contact+zimfarm@kiwix.org --mobileDevice Pixel 2 --cwd /output/.tmputm27tp6 --statsFilename /output/crawl.json {"timestamp":"2024-09-10T08:26:26.837Z","logLevel":"info","context":"general","message":"Browsertrix-Crawler 1.3.0-beta.1 (with warcio.js 2.3.1)","details":{}} {"timestamp":"2024-09-10T08:26:26.841Z","logLevel":"info","context":"general","message":"Seeds","details":[{"url":"https://www.imagepond.net/davidmark","scopeType":"prefix","include":["/^https?:\\/\\/www\\.imagepond\\.net\\//"],"exclude":[],"allowHash":false,"depth":-1,"sitemap":null,"auth":null,"_authEncoded":null,"maxExtraHops":0,"maxDepth":1000000}]} {"timestamp":"2024-09-10T08:26:26.841Z","logLevel":"info","context":"general","message":"Behavior Options","details":{"message":"{\"autoplay\":true,\"autofetch\":true,\"siteSpecific\":true,\"log\":\"__bx_log\",\"startEarly\":true}"}} {"timestamp":"2024-09-10T08:26:28.370Z","logLevel":"info","context":"worker","message":"Creating 1 workers","details":{}} {"timestamp":"2024-09-10T08:26:28.373Z","logLevel":"info","context":"worker","message":"Worker starting","details":{"workerid":0}} {"timestamp":"2024-09-10T08:26:29.671Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/davidmark"}} {"timestamp":"2024-09-10T08:26:29.676Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":0,"total":1,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:28.379Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/davidmark\",\"added\":\"2024-09-10T08:26:27.029Z\",\"depth\":0}"]}} {"timestamp":"2024-09-10T08:26:30.224Z","logLevel":"info","context":"general","message":"Awaiting page load","details":{"page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:31.047Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.googletagmanager.com/gtag/js?id=G-CWJKBWX8WJ","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:34.051Z","logLevel":"info","context":"behavior","message":"Running behaviors","details":{"frames":1,"frameUrls":["https://www.imagepond.net/davidmark"],"page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:34.052Z","logLevel":"info","context":"behavior","message":"Run Script Started","details":{"frameUrl":"https://www.imagepond.net/davidmark","page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:36.511Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.facebook.com/share.php?u=__url__","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:38.891Z","logLevel":"info","context":"behavior","message":"Run Script Finished","details":{"frameUrl":"https://www.imagepond.net/davidmark","page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:38.892Z","logLevel":"info","context":"behavior","message":"Behaviors finished","details":{"finished":1,"page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:39.941Z","logLevel":"info","context":"pageStatus","message":"Page Finished","details":{"loadState":4,"page":"https://www.imagepond.net/davidmark","workerid":0}} {"timestamp":"2024-09-10T08:26:39.983Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/"}} {"timestamp":"2024-09-10T08:26:39.985Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":1,"total":79,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:39.981Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/\",\"added\":\"2024-09-10T08:26:33.736Z\",\"depth\":1}"]}} {"timestamp":"2024-09-10T08:26:40.160Z","logLevel":"info","context":"general","message":"Awaiting page load","details":{"page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:41.748Z","logLevel":"info","context":"behavior","message":"Running behaviors","details":{"frames":1,"frameUrls":["https://www.imagepond.net/"],"page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:41.748Z","logLevel":"info","context":"behavior","message":"Run Script Started","details":{"frameUrl":"https://www.imagepond.net/","page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:41.795Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.facebook.com/share.php?u=__url__","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:42.642Z","logLevel":"info","context":"behavior","message":"Run Script Finished","details":{"frameUrl":"https://www.imagepond.net/","page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:42.643Z","logLevel":"info","context":"behavior","message":"Behaviors finished","details":{"finished":1,"page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:43.647Z","logLevel":"info","context":"pageStatus","message":"Page Finished","details":{"loadState":4,"page":"https://www.imagepond.net/","workerid":0}} {"timestamp":"2024-09-10T08:26:43.666Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/upload"}} {"timestamp":"2024-09-10T08:26:43.668Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":2,"total":118,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:43.665Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/upload\",\"added\":\"2024-09-10T08:26:33.738Z\",\"depth\":1}"]}} node:events:497 throw er; // Unhandled 'error' event ^ Error: unexpected end of file at genericNodeError (node:internal/errors:984:15) at wrappedFn (node:internal/errors:538:14) at BrotliDecoder.zlibOnError [as onerror] (node:zlib:191:17) Emitted 'error' event on BrotliDecompress instance at: at emitErrorNT (node:internal/streams/destroy:169:8) at emitErrorCloseNT (node:internal/streams/destroy:128:3) at process.processTicksAndRejections (node:internal/process/task_queues:82:21) { errno: -5, code: 'Z_BUF_ERROR' } Node.js v20.17.0
Have a workaround via #688, but I think the underlying issue may be a node bug reported in: https://github.com/nodejs/undici/issues/3616
Here's the log of a run for https://www.imagepond.net/davidmark that crashed apparently trying to decompress something using brotli.
I notice there are several
ERR_CONNECTION_REFUSED
in this very short log.