Amazon crawler - this configuration will extract items for a keywords that you will specify in the input, and it will automatically extract all pages for the given keyword. You can specify more keywords on the input for one run.
I have a project that uses your crawler in the past. We've decided to stop it few months ago and stop the Apify subscription. Recently we restart the project and try to run your crawler in Apify without subscription, but I keep getting error.
2020-05-16T05:34:52.631Z ACTOR: Creating Docker container.
2020-05-16T05:34:57.465Z ACTOR: Starting Docker container.
2020-05-16T05:34:58.769Z
2020-05-16T05:34:58.769Z > amazon@1.0.0 start /usr/src/app
2020-05-16T05:34:58.770Z > node ./src/main.js
2020-05-16T05:34:58.771Z
2020-05-16T05:35:00.372Z INFO: System info {"apifyVersion":"0.19.1","apifyClientVersion":"0.5.26","osType":"Linux","nodeVersion":"v12.16.1"}
2020-05-16T05:35:00.373Z WARNING: You are using an outdated version (0.19.1) of Apify SDK. We recommend you to update to the latest version (0.20.4).
2020-05-16T05:35:00.374Z Read more about Apify SDK versioning at: https://help.apify.com/en/articles/3184510-updates-and-versioning-of-apify-sdk
2020-05-16T05:35:00.491Z INFO: Going to enqueue 1 requests from input.
2020-05-16T05:35:00.492Z https://www.amazon.com/dp/B07P6Y8L3F
2020-05-16T05:35:02.277Z INFO: AutoscaledPool state {"currentConcurrency":0,"desiredConcurrency":2,"systemStatus":{"isSystemIdle":true,"memInfo":{"isOverloaded":false,"limitRatio":0.2,"actualRatio":null},"eventLoopInfo":{"isOverloaded":false,"limitRatio":0.4,"actualRatio":null},"cpuInfo":{"isOverloaded":false,"limitRatio":0.4,"actualRatio":null},"clientInfo":{"isOverloaded":false,"limitRatio":0.3,"actualRatio":null}}}
2020-05-16T05:35:02.392Z ERROR: BasicCrawler: handleRequestFunction failed, reclaiming failed request back to the list or queue {"url":"https://www.amazon.com/dp/B07P6Y8L3F","retryCount":1,"id":"CxCejBi58nyfOEt"}
2020-05-16T05:35:02.393Z Error: Request for https://www.amazon.com/dp/B07P6Y8L3F aborted due to abortFunction
2020-05-16T05:35:02.393Z at DuplexWrapper.<anonymous> (/usr/src/app/node_modules/@apify/http-request/src/index.js:167:25)
2020-05-16T05:35:02.394Z at DuplexWrapper.emit (events.js:311:20)
2020-05-16T05:35:02.394Z at EventEmitter.<anonymous> (/usr/src/app/node_modules/got/source/as-stream.js:60:9)
2020-05-16T05:35:02.395Z at EventEmitter.emit (events.js:311:20)
2020-05-16T05:35:02.395Z at module.exports (/usr/src/app/node_modules/got/source/get-response.js:22:10)
2020-05-16T05:35:02.396Z at ClientRequest.handleResponse (/usr/src/app/node_modules/got/source/request-as-event-emitter.js:155:5)
2020-05-16T05:35:02.396Z at Object.onceWrapper (events.js:418:26)
2020-05-16T05:35:02.397Z at ClientRequest.emit (events.js:323:22)
2020-05-16T05:35:02.397Z at ClientRequest.origin.emit (/usr/src/app/node_modules/@szmarczak/http-timer/source/index.js:37:11)
2020-05-16T05:35:02.398Z at HTTPParser.parserOnIncomingClient [as onIncoming] (_http_client.js:603:27)
2020-05-16T05:35:02.398Z at HTTPParser.parserOnHeadersComplete (_http_common.js:119:17)
2020-05-16T05:35:02.398Z at Socket.socketOnData (_http_client.js:476:22)
2020-05-16T05:35:02.399Z at Socket.emit (events.js:311:20)
2020-05-16T05:35:02.399Z at Socket.Readable.read (_stream_readable.js:512:10)
2020-05-16T05:35:02.400Z at Socket.read (net.js:618:39)
2020-05-16T05:35:02.401Z at flow (_stream_readable.js:989:34)
2020-05-16T05:35:02.401Z at resume_ (_stream_readable.js:970:3)
2020-05-16T05:35:02.402Z at processTicksAndRejections (internal/process/task_queues.js:84:21)
2020-05-16T05:35:05.483Z ERROR: BasicCrawler: handleRequestFunction failed, reclaiming failed request back to the list or queue {"url":"https://www.amazon.com/dp/B07P6Y8L3F","retryCount":2,"id":"CxCejBi58nyfOEt"}
2020-05-16T05:35:05.484Z Error: Request for https://www.amazon.com/dp/B07P6Y8L3F aborted due to abortFunction
2020-05-16T05:35:05.484Z at DuplexWrapper.<anonymous> (/usr/src/app/node_modules/@apify/http-request/src/index.js:167:25)
2020-05-16T05:35:05.485Z at DuplexWrapper.emit (events.js:311:20)
2020-05-16T05:35:05.485Z at EventEmitter.<anonymous> (/usr/src/app/node_modules/got/source/as-stream.js:60:9)
2020-05-16T05:35:05.486Z at EventEmitter.emit (events.js:311:20)
2020-05-16T05:35:05.494Z at module.exports (/usr/src/app/node_modules/got/source/get-response.js:22:10)
2020-05-16T05:35:05.494Z at ClientRequest.handleResponse (/usr/src/app/node_modules/got/source/request-as-event-emitter.js:155:5)
2020-05-16T05:35:05.495Z at Object.onceWrapper (events.js:418:26)
2020-05-16T05:35:05.495Z at ClientRequest.emit (events.js:323:22)
2020-05-16T05:35:05.496Z at ClientRequest.origin.emit (/usr/src/app/node_modules/@szmarczak/http-timer/source/index.js:37:11)
2020-05-16T05:35:05.496Z at HTTPParser.parserOnIncomingClient [as onIncoming] (_http_client.js:603:27)
2020-05-16T05:35:05.496Z at HTTPParser.parserOnHeadersComplete (_http_common.js:119:17)
2020-05-16T05:35:05.497Z at Socket.socketOnData (_http_client.js:476:22)
2020-05-16T05:35:05.501Z at Socket.emit (events.js:311:20)
2020-05-16T05:35:05.501Z at Socket.Readable.read (_stream_readable.js:512:10)
2020-05-16T05:35:05.502Z at Socket.read (net.js:618:39)
2020-05-16T05:35:05.502Z at flow (_stream_readable.js:989:34)
2020-05-16T05:35:05.503Z at resume_ (_stream_readable.js:970:3)
2020-05-16T05:35:05.504Z at processTicksAndRejections (internal/process/task_queues.js:84:21)
2020-05-16T05:35:08.654Z ERROR: BasicCrawler: handleRequestFunction failed, reclaiming failed request back to the list or queue {"url":"https://www.amazon.com/dp/B07P6Y8L3F","retryCount":3,"id":"CxCejBi58nyfOEt"}
2020-05-16T05:35:08.656Z Error: Request for https://www.amazon.com/dp/B07P6Y8L3F aborted due to abortFunction
2020-05-16T05:35:08.657Z at DuplexWrapper.<anonymous> (/usr/src/app/node_modules/@apify/http-request/src/index.js:167:25)
2020-05-16T05:35:08.658Z at DuplexWrapper.emit (events.js:311:20)
2020-05-16T05:35:08.659Z at EventEmitter.<anonymous> (/usr/src/app/node_modules/got/source/as-stream.js:60:9)
2020-05-16T05:35:08.660Z at EventEmitter.emit (events.js:311:20)
2020-05-16T05:35:08.660Z at module.exports (/usr/src/app/node_modules/got/source/get-response.js:22:10)
2020-05-16T05:35:08.661Z at ClientRequest.handleResponse (/usr/src/app/node_modules/got/source/request-as-event-emitter.js:155:5)
2020-05-16T05:35:08.662Z at Object.onceWrapper (events.js:418:26)
2020-05-16T05:35:08.663Z at ClientRequest.emit (events.js:323:22)
2020-05-16T05:35:08.663Z at ClientRequest.origin.emit (/usr/src/app/node_modules/@szmarczak/http-timer/source/index.js:37:11)
2020-05-16T05:35:08.664Z at HTTPParser.parserOnIncomingClient [as onIncoming] (_http_client.js:603:27)
2020-05-16T05:35:08.664Z at HTTPParser.parserOnHeadersComplete (_http_common.js:119:17)
2020-05-16T05:35:08.666Z at Socket.socketOnData (_http_client.js:476:22)
2020-05-16T05:35:08.666Z at Socket.emit (events.js:311:20)
2020-05-16T05:35:08.667Z at Socket.Readable.read (_stream_readable.js:512:10)
2020-05-16T05:35:08.667Z at Socket.read (net.js:618:39)
2020-05-16T05:35:08.668Z at flow (_stream_readable.js:989:34)
2020-05-16T05:35:08.668Z at resume_ (_stream_readable.js:970:3)
2020-05-16T05:35:08.669Z at processTicksAndRejections (internal/process/task_queues.js:84:21)
2020-05-16T05:35:11.922Z INFO: Request https://www.amazon.com/dp/B07P6Y8L3F failed 4 times
2020-05-16T05:35:11.924Z ERROR: BasicCrawler: runTaskFunction error handler threw an exception. This places the crawler and its underlying storages into an unknown state and crawling will be terminated. This may have happened due to an internal error of Apify's API or due to a misconfigured crawler. If you are sure that there is no error in your code, selecting "Restart on error" in the actor's settingswill make sure that the run continues where it left off, if programmed to handle restarts correctly.
2020-05-16T05:35:11.924Z ReferenceError: $ is not defined
2020-05-16T05:35:11.926Z at BasicCrawler.handleFailedRequestFunction (/usr/src/app/src/main.js:161:46)
2020-05-16T05:35:11.926Z at BasicCrawler._requestFunctionErrorHandler (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:475:17)
2020-05-16T05:35:11.927Z at processTicksAndRejections (internal/process/task_queues.js:97:5)
2020-05-16T05:35:11.928Z at async BasicCrawler._runTaskFunction (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:413:9)
2020-05-16T05:35:11.928Z at async AutoscaledPool._maybeRunTask (/usr/src/app/node_modules/apify/build/autoscaling/autoscaled_pool.js:463:7)
2020-05-16T05:35:11.929Z ERROR: AutoscaledPool: runTaskFunction failed.
2020-05-16T05:35:11.930Z ReferenceError: $ is not defined
2020-05-16T05:35:11.931Z at BasicCrawler.handleFailedRequestFunction (/usr/src/app/src/main.js:161:46)
2020-05-16T05:35:11.931Z at BasicCrawler._requestFunctionErrorHandler (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:475:17)
2020-05-16T05:35:11.932Z at processTicksAndRejections (internal/process/task_queues.js:97:5)
2020-05-16T05:35:11.933Z at async BasicCrawler._runTaskFunction (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:413:9)
2020-05-16T05:35:11.947Z at async AutoscaledPool._maybeRunTask (/usr/src/app/node_modules/apify/build/autoscaling/autoscaled_pool.js:463:7)
2020-05-16T05:35:11.948Z INFO: Crawler final request statistics: {"avgDurationMillis":null,"perMinute":0,"finished":0,"failed":1,"retryHistogram":[null,null,null,1]}
2020-05-16T05:35:11.949Z ERROR: The function passed to Apify.main() threw an exception:
2020-05-16T05:35:11.950Z ReferenceError: $ is not defined
2020-05-16T05:35:11.950Z at BasicCrawler.handleFailedRequestFunction (/usr/src/app/src/main.js:161:46)
2020-05-16T05:35:11.951Z at BasicCrawler._requestFunctionErrorHandler (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:475:17)
2020-05-16T05:35:11.952Z at processTicksAndRejections (internal/process/task_queues.js:97:5)
2020-05-16T05:35:11.952Z at async BasicCrawler._runTaskFunction (/usr/src/app/node_modules/apify/build/crawlers/basic_crawler.js:413:9)
2020-05-16T05:35:11.953Z at async AutoscaledPool._maybeRunTask (/usr/src/app/node_modules/apify/build/autoscaling/autoscaled_pool.js:463:7)
2020-05-16T05:35:11.953Z npm ERR! code ELIFECYCLE
2020-05-16T05:35:11.954Z npm ERR! errno 91
2020-05-16T05:35:11.955Z npm ERR! amazon@1.0.0 start: `node ./src/main.js`
2020-05-16T05:35:11.955Z npm ERR! Exit status 91
2020-05-16T05:35:11.956Z npm ERR!
2020-05-16T05:35:11.956Z npm ERR! Failed at the amazon@1.0.0 start script.
2020-05-16T05:35:11.957Z npm ERR! This is probably not a problem with npm. There is likely additional logging output above.
2020-05-16T05:35:11.957Z
2020-05-16T05:35:11.958Z npm ERR! A complete log of this run can be found in:
2020-05-16T05:35:11.959Z npm ERR! /root/.npm/_logs/2020-05-16T05_35_11_942Z-debug.log
Could you help us to give guidance on the issue, is it something to do with the crawler, or it is something to do with our Apify subscription.
Hi @VaclavRut,
I have a project that uses your crawler in the past. We've decided to stop it few months ago and stop the Apify subscription. Recently we restart the project and try to run your crawler in Apify without subscription, but I keep getting error.
Input
Output
Could you help us to give guidance on the issue, is it something to do with the crawler, or it is something to do with our Apify subscription.