Just got a ton of these errors on an amazon bestseller crawl. I believe what is happening is that we unexpectedly got a bunch of empty 200 responses and the below validator crashes trying to test text[-0] when text = ''.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/scrapy/utils/signal.py", line 30, in send_catch_log
response = robustApply(receiver, signal=signal, sender=sender, *arguments, *named)
File "/usr/local/lib/python3.10/dist-packages/pydispatch/robustapply.py", line 55, in robustApply
return receiver(arguments, **named)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/extension.py", line 70, in log_response_middleware
self.response_stats(request=request, response=response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/core/core.py", line 55, in response_stats
self.request_response_middleware.process(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 33, in process
self.validate_response_data(request_response_object, response)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 163, in validate_response_data
ResponseValidator.validate(request_response_object, response, domain_tests=domain_tests, generic_tests=self._generic_validators)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 15, in validate
if ResponseValidator.run_validation_test(request_response_object, response, test.get('validation_tests', [])) is False:
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 45, in run_validation_test
if ResponseValidator.string_check(ResponseValidator.get_response_text(request_response_object, response), test.get('text_check', ''), test.get('comparison_type'), text_slice=test.get('text_slice')):
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 102, in string_check
text = ResponseValidator.string_slice(text, text_slice)
File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 116, in string_slice
return text[-text_slice.get('slice_lower_threshold', 0)]
IndexError: string index out of range
Just got a ton of these errors on an amazon bestseller crawl. I believe what is happening is that we unexpectedly got a bunch of empty 200 responses and the below validator crashes trying to test
text[-0]
whentext = ''
.2023-09-19 12:01:13 [scrapy.utils.signal] ERROR: Error caught on signal handler: <bound method ScrapeOpsMonitor.log_response_middleware of <scrapeops_scrapy.extension.ScrapeOpsMonitor object at 0x7eff870fcb50>> Traceback (most recent call last): File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1660, in _inlineCallbacks result = current_context.run(gen.send, result) StopIteration: <200 https://www.amazon.com/acp/p13n-zg-list-grid-desktop/p13n-zg-list-grid-desktop-04a0353d-de90-433c-baae-b0489d0167eb-1693478974679/nextPage>
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/scrapy/utils/signal.py", line 30, in send_catch_log response = robustApply(receiver, signal=signal, sender=sender, *arguments, *named) File "/usr/local/lib/python3.10/dist-packages/pydispatch/robustapply.py", line 55, in robustApply return receiver(arguments, **named) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/extension.py", line 70, in log_response_middleware self.response_stats(request=request, response=response) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/core/core.py", line 55, in response_stats self.request_response_middleware.process(request_response_object, response) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 33, in process self.validate_response_data(request_response_object, response) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/normalizer/middleware.py", line 163, in validate_response_data ResponseValidator.validate(request_response_object, response, domain_tests=domain_tests, generic_tests=self._generic_validators) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 15, in validate if ResponseValidator.run_validation_test(request_response_object, response, test.get('validation_tests', [])) is False: File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 45, in run_validation_test if ResponseValidator.string_check(ResponseValidator.get_response_text(request_response_object, response), test.get('text_check', ''), test.get('comparison_type'), text_slice=test.get('text_slice')): File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 102, in string_check text = ResponseValidator.string_slice(text, text_slice) File "/usr/local/lib/python3.10/dist-packages/scrapeops_scrapy/validators/response_validator.py", line 116, in string_slice return text[-text_slice.get('slice_lower_threshold', 0)] IndexError: string index out of range