Closed fmfnjnf closed 1 year ago
Скорее всего дело не в скрипте, а в Авито. Возможно он банит по IP или другим признакам, потому как селекторы у него не менялись. В следующей версии постараюсь сделать упор на обход данных блокировок
Спасибо
Здравствуйте, появилась ошибка с обращением к странице. An error has been caught in function '__paginator', process 'MainProcess' (250780), thread 'MainThread' (140391046656000): Traceback (most recent call last):
File "/root/parser/parser_avito-master/parser_cls.py", line 208, in
File "/root/parser/parser_avito-master/parser_cls.py", line 160, in parse self.paginator() -> <main__.AvitoParse object at 0x7faf54df6fe0>
File "/root/parser/parser_avito-master/parser_cls.py", line 74, in paginator self.parse_page() -> <main.AvitoParse object at 0x7faf54df6fe0>
File "/root/parser/parser_avito-master/parser_cls.py", line 97, in parse_page titles = self.driver.find_elements(*LocatorAvito.TITLES) | | | | -> ('css selector', "[data-marker='item']") | | | -> <class 'locator.LocatorAvito'> | | -> <function WebDriver.find_elements at 0x7faf5560a710> | -> <undetected_chromedriver.Chrome (session="c481b457004252340f1a928edffba412")> -> <main__.AvitoParse object at 0x7faf54df6fe0>
File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webdriver.py", line 861, in find_elements
return self.execute(Command.FIND_ELEMENTS, {"using": by, "value": value})["value"] or []
| | | | | -> "[data-marker='item']"
| | | | -> 'css selector'
| | | -> 'findElements'
| | -> <class 'selenium.webdriver.remote.command.Command'>
| -> <function WebDriver.execute at 0x7faf55609480>
-> <undetected_chromedriver.Chrome (session="c481b457004252340f1a928edffba412")>
File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webdriver.py", line 440, in execute
self.error_handler.check_response(response)
| | | -> {'status': 500, 'value': '{"value":{"error":"unknown error","message":"unknown error: session deleted because of page crash\...
| | -> <function ErrorHandler.check_response at 0x7faf557ec820>
| -> <selenium.webdriver.remote.errorhandler.ErrorHandler object at 0x7faf54c2eb90>
-> <undetected_chromedriver.Chrome (session="c481b457004252340f1a928edffba412")>
File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/errorhandler.py", line 245, in check_response
raise exception_class(message, screen, stacktrace)
| | | -> ['#0 0x55a69a6d4133
selenium.common.exceptions.WebDriverException: Message: unknown error: session deleted because of page crash from unknown error: cannot determine loading status from tab crashed (Session info: chrome=113.0.5672.92) Stacktrace:
Это всё тоже самое. Меня интересует дальнейшая работа скрипта, после данной ошибки он продолжил свою работу (сразу же или на след. итерации)?
Да, продолжил, в таком случае можно убрать логи для того, чтобы не выводились ошибки?
Можете закомментировать или удалить в файле parser_cls.py строку 221: logger.error(error)
Спасибо большое за помощь
Баг в версии 1.05 исправлен
Скрипт продолжил работу, но ошибка никуда не исчезла. 2023-05-08 17:38:59.209 | ERROR | main:paginator:74 - An error has been caught in function 'paginator', process 'MainProcess' (142117), thread 'MainT hread' (140551600046080): Traceback (most recent call last):
File "/root/parser/parser_avito-master/parser_cls.py", line 208, in
).parse()
File "/root/parser/parser_avito-master/parser_cls.py", line 160, in parse self.paginator() └ <main__.AvitoParse object at 0x7fd4b6a4f5b0>
File "/root/parser/parser_avito-master/parser_cls.py", line 100, in __parse_pa ge description = title.find_element(LocatorAvito.DESCRIPTIONS).text │ │ │ └ ('css selector', "[class=' item-description']") │ │ └ <class 'locator.LocatorAvito'> │ └ <function WebElement.find_element at 0x7fd4b71a1090> └ <undetected_chromedriver.webelement.WebElement (session="8e4 780bde71e07501846c8dafd2d46cb", element="9484AD4459801A6B5423AC83...
File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webele ment.py", line 426, in find_element return self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": valu e})["value"] │ │ │ │ │ └ "[ class='item-description']" │ │ │ │ └ 'css selector' │ │ │ └ 'findChildElement' │ │ └ <class 'selenium.webdriver.remote.command.Command'> │ └ <function WebElement._execute at 0x7fd4b71a1000> └ <undetected_chromedriver.webelement.WebElement (session="8e4780bde7 1e07501846c8dafd2d46cb", element="9484AD4459801A6B5423AC83... File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webele ment.py", line 404, in _execute return self._parent.execute(command, params) │ │ │ │ └ {'using': 'css selector', 'value': "[ class='item-description']", 'id': '9484AD4459801A6B5423AC835E1F9AD5_element_477 '} │ │ │ └ 'findChildElement' │ │ └ <function WebDriver.execute at 0x7fd4b71b1510> │ └ <undetected_chromedriver.Chrome (session="8e4780bde71e07501846 c8dafd2d46cb")> └ <undetected_chromedriver.webelement.WebElement (session="8e4780bde7 1e07501846c8dafd2d46cb", element="9484AD4459801A6B5423AC83... File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webdri ver.py", line 440, in execute self.error_handler.check_response(response) │ │ │ └ {'status': 404, 'value': '{"value":{"err or":"no such element","message":"no such element: Unable to locate element: {\" metho... │ │ └ <function ErrorHandler.check_response at 0x7fd4b73948b0 > │ └ <selenium.webdriver.remote.errorhandler.ErrorHandler object at 0x7fd4 b6a4fbe0> └ <undetected_chromedriver.Chrome (session="8e4780bde71e07501846c8dafd2d46cb ")> File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/errorh andler.py", line 245, in check_response raise exception_class(message, screen, stacktrace) │ │ │ └ ['#0 0x562e04be8133', '#1 0x562e0491c966 ', '#2 0x562e049590dc ', '#3 0x562e04959211 ', ...
│ │ └ None
│ └ 'no such element: Unable to locate element: {"method ":"css selector","selector":"[class*=\'item-description\']"}\n (Session ...
└ <class 'selenium.common.exceptions.NoSuchElementException'>
selenium.common.exceptions.NoSuchElementException: Message: no such element: Una ble to locate element: {"method":"css selector","selector":"[class*='item-descri ption']"} (Session info: chrome=113.0.5672.63) Stacktrace:
0 0x562e04be8133
1 0x562e0491c966
2 0x562e049590dc
3 0x562e04959211
4 0x562e0494ee26
5 0x562e04978bfd
6 0x562e0494ed13
7 0x562e04978d9e
8 0x562e049911c7
9 0x562e049789a3
10 0x562e0494d46a
11 0x562e0494e55e
12 0x562e04ba7cae
13 0x562e04bab8fe
14 0x562e04bb4f20
15 0x562e04bac923
16 0x562e04b7fc0e
17 0x562e04bcfb08
18 0x562e04bcfc97
19 0x562e04be0113
20 0x7f4da3007b43
--- Logging error --- Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/loguru/_logger.py", line 1251, i n catch_wrapper return function(*args, *kwargs) File "/root/parser/parser_avito-master/parser_cls.py", line 100, in __parse_pa ge description = title.find_element(LocatorAvito.DESCRIPTIONS).text File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webele ment.py", line 426, in find_element return self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": valu e})["value"] File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webele ment.py", line 404, in _execute return self._parent.execute(command, params) File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/webdri ver.py", line 440, in execute self.error_handler.check_response(response) File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/errorh andler.py", line 245, in check_response raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: no such element: Una ble to locate element: {"method":"css selector","selector":"[class*='item-descri ption']"} (Session info: chrome=113.0.5672.63) Stacktrace:
0 0x562e04be8133
1 0x562e0491c966
2 0x562e049590dc
3 0x562e04959211
4 0x562e0494ee26
5 0x562e04978bfd
6 0x562e0494ed13
7 0x562e04978d9e
8 0x562e049911c7
9 0x562e049789a3
10 0x562e0494d46a
11 0x562e0494e55e
12 0x562e04ba7cae
13 0x562e04bab8fe
14 0x562e04bb4f20
15 0x562e04bac923
16 0x562e04b7fc0e
17 0x562e04bcfb08
18 0x562e04bcfc97
19 0x562e04be0113
20 0x7f4da3007b43
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/notifiers/logging.py", line 50, in emit self.provider.notify(raise_on_errors=True, data) File "/usr/local/lib/python3.10/dist-packages/notifiers/core.py", line 303, in notify data = self._process_data(kwargs) File "/usr/local/lib/python3.10/dist-packages/notifiers/core.py", line 235, in _process_data self._validate_data(data) File "/usr/local/lib/python3.10/dist-packages/notifiers/core.py", line 208, in _validate_data raise BadArguments(validation_error=msg, provider=self.name, data=data) notifiers.exceptions.BadArguments: Error with sent data: 'An error has been caug ht in function \'paginator\', process \'MainProcess\' (142117), thread \'MainT hread\' (140551600046080):\nTraceback (most recent call last):\n\n File "/root/ parser/parser_avito-master/parser_cls.py", line 208, in\n ).parse()\ n\n File "/root/parser/parser_avito-master/parser_cls.py", line 160, in parse\n self. paginator()\n -> <main.AvitoParse object at 0x7fd4b6a4f5b0>\n\ n> File "/root/parser/parser_avito-master/parser_cls.py", line 74, in paginato r\n self.parse_page()\n -> <main.AvitoParse object at 0x7fd4b6a4f5b0 >\n\n File "/root/parser/parser_avito-master/parser_cls.py", line 100, in par se_page\n description = title.find_element(LocatorAvito.DESCRIPTIONS).text\n | | | -> (\'css selector\', "[class =\'item-description\']")\n | | -> <class \'loc ator.LocatorAvito\'>\n | -> <function WebElement.find_eleme nt at 0x7fd4b71a1090>\n -> <undetected_chromedriver.webelement. WebElement (session="8e4780bde71e07501846c8dafd2d46cb", element="9484AD4459801A6 B5423AC83...\n\n File "/usr/local/lib/python3.10/dist-packages/selenium/webdriv er/remote/webelement.py", line 426, in find_element\n return self._execute(Co mmand.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]\n | | | | | -> "[class=\'ite m-description\']"\n | | | | -> \'css selector\'\n | | | -> \'findChildElement\'\ n | | -> <class \'selenium.webdriver.remote.command.Command\ '>\n | -> <function WebElement._execute at 0x7fd4b71a1000>\n -> <undetected_chromedriver.webelement.WebElement (session="8e4780bde71e0750 1846c8dafd2d46cb", element="9484AD4459801A6B5423AC83...\n File "/usr/local/lib/ python3.10/dist-packages/selenium/webdriver/remote/webelement.py", line 404, in _execute\n return self._parent.execute(command, params)\n | | | | -> {\'using\': \'css selector\', \'value\': "[class=\'item -description\']", \'id\': \'9484AD4459801A6B5423AC835E1F9AD5_element477\'}\n | | | -> \'findChildElement\'\n | | -> <function WebDriver.execute at 0x7fd4b71b1510>\n | -> <undetected chromedriver.Chrome (session="8e4780bde71e07501846c8dafd2d46cb")>\n -> <undetected_chromedriver.webelement.WebElement (session="8e4780bde71e07501846c8 dafd2d46cb", element="9484AD4459801A6B5423AC83...\n File "/usr/local/lib/python 3.10/dist-packages/selenium/webdriver/remote/webdriver.py", line 440, in execute \n self.error_handler.check_response(response)\n | | | -> {\'status\': 404, \'value\': \'{"value":{"error":"no such element"," message":"no such element: Unable to locate element: {\\"metho...\n | | -> <function ErrorHandler.check_response at 0x7fd4b73948b0>\n | -> <selenium.webdriver.remote.errorhandler.ErrorHandler object at 0x7fd4b6a4fb e0>\n -> <undetected_chromedriver.Chrome (session="8e4780bde71e07501846c8dafd 2d46cb")>\n File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/re mote/errorhandler.py", line 245, in check_response\n raise exception_class(me ssage, screen, stacktrace)\n | | | -> [\'#0 0x562e04be8133 \', \'#1 0x562e0491c966 \', \'#2 0x562e049590dc \', \'#3 0x562e04959211 \', ...\n | | -> None\n | -> \'no such element: Unable to locate element: {"method":"css selector","selector":"[class=\\'item-description\\'] "}\n (Session ...\n -> <class \'selenium.common.exceptions.NoSuchElem entException\'>\n\nselenium.common.exceptions.NoSuchElementException: Message: n o such element: Unable to locate element: {"method":"css selector","selector":"[ class=\'item-description\']"}\n (Session info: chrome=113.0.5672.63)\nStacktra ce:\n#0 0x562e04be8133 \n#1 0x562e0491c966 \n#2 0x562e049590dc \n#3 0x562e04959211 \n#4 0x562e0494ee26 \n#5 0x562e0 4978bfd \n#6 0x562e0494ed13 \n#7 0x562e04978d9e \n#8 0x562e049911c7 \n#9 0x562e049789a3 \n#10 0x562e0494d46a \n#11 0x562e0494e55e \n#12 0x562e04ba7cae \n#13 0x562e04bab 8fe \n#14 0x562e04bb4f20 \n#15 0x562e04bac923 \n#16 0 x562e04b7fc0e \n#17 0x562e04bcfb08 \n#18 0x562e04bcfc97 \n#19 0x562e04be0113 \n#20 0x7f4da3007b43 \n\n\n' is too lo ng
Call stack:
File "/root/parser/parser_avito-master/parser_cls.py", line 208, in
).parse()
File "/root/parser/parser_avito-master/parser_cls.py", line 160, in parse
self. paginator()
File "/root/parser/parser_avito-master/parser_cls.py", line 74, in paginator
self.parse_page()
File "/usr/local/lib/python3.10/dist-packages/loguru/_logger.py", line 1250, i n catch_wrapper
with catcher:
File "/usr/local/lib/python3.10/dist-packages/loguru/_logger.py", line 1217, i n exit
logger._log(level, from_decorator, catch_options, message, (), {})
File "/usr/local/lib/python3.10/dist-packages/loguru/_logger.py", line 2002, i n _log
handler.emit(log_record, level_id, from_decorator, raw, colored_message)
File "/usr/local/lib/python3.10/dist-packages/loguru/_handler.py", line 197, i n emit
self._sink.write(str_record)
File "/usr/local/lib/python3.10/dist-packages/loguru/_simple_sinks.py", line 5 0, in write
self._handler.handle(record)
File "/usr/lib/python3.10/logging/init.py", line 968, in handle
self.emit(record)
File "/usr/local/lib/python3.10/dist-packages/notifiers/logging.py", line 52, in emit
self.handleError(record)
Message: 'An error has been caught in function \'paginator\', process \'MainPr ocess\' (142117), thread \'MainThread\' (140551600046080):\nTraceback (most rece nt call last):\n\n File "/root/parser/parser_avito-master/parser_cls.py", line 208, in \n ).parse()\n\n File "/root/parser/parser_avito-master/pars er_cls.py", line 160, in parse\n self. paginator()\n -> <main.AvitoPa rse object at 0x7fd4b6a4f5b0>\n\n> File "/root/parser/parser_avito-master/parser _cls.py", line 74, in paginator\n self.parse_page()\n -> <main.Avi toParse object at 0x7fd4b6a4f5b0>\n\n File "/root/parser/parser_avito-master/pa rser_cls.py", line 100, in __parse_page\n description = title.find_element(L ocatorAvito.DESCRIPTIONS).text\n | | | -> (\'css selector\', "[class=\'item-description\']")\n | | -> <class \'locator.LocatorAvito\'>\n | -> <function WebElement.find_element at 0x7fd4b71a1090>\n -> <und etected_chromedriver.webelement.WebElement (session="8e4780bde71e07501846c8dafd2 d46cb", element="9484AD4459801A6B5423AC83...\n\n File "/usr/local/lib/python3.1 0/dist-packages/selenium/webdriver/remote/webelement.py", line 426, in find_elem ent\n return self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]\n | | | | | -> "[class=\'item-description\']"\n | | | | -> \'css selector\'\n | | | -> \'findChildElement\'\n | | -> <class \'selenium.w ebdriver.remote.command.Command\'>\n | -> <function WebElement._exe cute at 0x7fd4b71a1000>\n -> <undetected_chromedriver.webelement.WebEl ement (session="8e4780bde71e07501846c8dafd2d46cb", element="9484AD4459801A6B5423 AC83...\n File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remo te/webelement.py", line 404, in _execute\n return self._parent.execute(comman d, params)\n | | | | -> {\'using\': \'css select or\', \'value\': "[class=\'item-description\']", \'id\': \'9484AD4459801A6B5423 AC835E1F9AD5_element_477\'}\n | | | -> \'findChildEleme nt\'\n | | -> <function WebDriver.execute at 0x7fd4b71b1510>\ n | -> <undetected_chromedriver.Chrome (session="8e4780bde71e075018 46c8dafd2d46cb")>\n -> <undetected_chromedriver.webelement.WebElement (session="8e4780bde71e07501846c8dafd2d46cb", element="9484AD4459801A6B5423AC83.. .\n File "/usr/local/lib/python3.10/dist-packages/selenium/webdriver/remote/web driver.py", line 440, in execute\n self.error_handler.check_response(response )\n | | | -> {\'status\': 404, \'value\': \'{"val ue":{"error":"no such element","message":"no such element: Unable to locate elem ent: {\\"metho...\n | | -> <function ErrorHandler.check_resp onse at 0x7fd4b73948b0>\n | -> <selenium.webdriver.remote.errorhandler.Err orHandler object at 0x7fd4b6a4fbe0>\n -> <undetected_chromedriver.Chrome (ses sion="8e4780bde71e07501846c8dafd2d46cb")>\n File "/usr/local/lib/python3.10/dis t-packages/selenium/webdriver/remote/errorhandler.py", line 245, in check_respon se\n raise exception_class(message, screen, stacktrace)\n | | | -> [\'#0 0x562e04be8133 \', \'#1 0x562e0491c966 \', \'#2 0x562e049590dc \', \'#3 0x562e04959211 \', . ..\n | | -> None\n | -> \'n o such element: Unable to locate element: {"method":"css selector","selector":"[ class=\\'item-description\\']"}\n (Session ...\n -> <class \'selen ium.common.exceptions.NoSuchElementException\'>\n\nselenium.common.exceptions.No SuchElementException: Message: no such element: Unable to locate element: {"meth od":"css selector","selector":"[class=\'item-description\']"}\n (Session info: chrome=113.0.5672.63)\nStacktrace:\n#0 0x562e04be8133 \n#1 0x562e0491c 966 \n#2 0x562e049590dc \n#3 0x562e04959211 \n#4 0x56 2e0494ee26 \n#5 0x562e04978bfd \n#6 0x562e0494ed13 \n #7 0x562e04978d9e \n#8 0x562e049911c7 \n#9 0x562e049789a3 \n#10 0x562e0494d46a \n#11 0x562e0494e55e \n#12 0x562e04b a7cae \n#13 0x562e04bab8fe \n#14 0x562e04bb4f20 \n#15 0x562e04bac923 \n#16 0x562e04b7fc0e \n#17 0x562e04bcfb08 \n#18 0x562e04bcfc97 \n#19 0x562e04be0113 \n#20 0x7f4da30 07b43 \n\n'
Arguments: ()