bellingcat / cisticola

Coordinates scrapers and interfaces with database
17 stars 1 forks source link

Bitchute transformer fails on certain (all?) channel info #60

Closed loganwilliams closed 2 years ago

loganwilliams commented 2 years ago
2022-06-10 12:18:19.498 | ERROR    | cisticola.transformer.base:transform_all_untransformed_info:264 - An error has been caught in function 'transform_all_untransformed_info', process 'MainProcess' (2896011), thread 'MainThread' (139804020750144):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f26661ca940>

  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f2666d5a700>
    └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>

> File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f2666d5a3a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f2666048130>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f2666048130>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f2666d59940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>
    │           │              └ RawChannelInfo(scraper='BitchuteScraper 0.0.1', platform='Bitchute', channel=283, raw_data='{"description": "Qlobal-Change \\...
    │           └ <function BitchuteTransformer.transform_info at 0x7f2666d5ab80>
    └ <cisticola.transformer.bitchute.BitchuteTransformer object at 0x7f26660d9c70>

  File "/root/cisticola/cisticola/transformer/bitchute.py", line 52, in transform_info
    date_created=dateutil.parser.parse(raw['created']),
                 │        │      │     └ {'description': 'Qlobal-Change è una piattaforma che unisce le notizie alternativi.\nEsplora i nostri NFT: https://opensea.io...
                 │        │      └ <function parse at 0x7f26a0611790>
                 │        └ <module 'dateutil.parser' from '/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser...
                 └ <module 'dateutil' from '/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/__init__.py'>

  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 1368, in parse
    return DEFAULTPARSER.parse(timestr, **kwargs)
           │             │     │          └ {}
           │             │     └ '1 year, 5 months ago'
           │             └ <function parser.parse at 0x7f26a062cb80>
           └ <dateutil.parser._parser.parser object at 0x7f26a062b1f0>
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 643, in parse
    raise ParserError("Unknown string format: %s", timestr)
          │                                        └ '1 year, 5 months ago'
          └ <class 'dateutil.parser._parser.ParserError'>

dateutil.parser._parser.ParserError: Unknown string format: 1 year, 5 months ago
2022-06-10 12:18:19.620 | ERROR    | __main__:transform_info:93 - An error has been caught in function 'transform_info', process 'MainProcess' (2896011), thread 'MainThread' (139804020750144):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f26661ca940>

> File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f2666d5a700>
    └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>

  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f2666d5a3a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f2666048130>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f2666048130>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f2666d59940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f26a8a15640>
    │           │              └ RawChannelInfo(scraper='BitchuteScraper 0.0.1', platform='Bitchute', channel=283, raw_data='{"description": "Qlobal-Change \\...
    │           └ <function BitchuteTransformer.transform_info at 0x7f2666d5ab80>
    └ <cisticola.transformer.bitchute.BitchuteTransformer object at 0x7f26660d9c70>

  File "/root/cisticola/cisticola/transformer/bitchute.py", line 52, in transform_info
    date_created=dateutil.parser.parse(raw['created']),
                 │        │      │     └ {'description': 'Qlobal-Change è una piattaforma che unisce le notizie alternativi.\nEsplora i nostri NFT: https://opensea.io...
                 │        │      └ <function parse at 0x7f26a0611790>
                 │        └ <module 'dateutil.parser' from '/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser...
                 └ <module 'dateutil' from '/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/__init__.py'>

  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 1368, in parse
    return DEFAULTPARSER.parse(timestr, **kwargs)
           │             │     │          └ {}
           │             │     └ '1 year, 5 months ago'
           │             └ <function parser.parse at 0x7f26a062cb80>
           └ <dateutil.parser._parser.parser object at 0x7f26a062b1f0>
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 643, in parse
    raise ParserError("Unknown string format: %s", timestr)
          │                                        └ '1 year, 5 months ago'
          └ <class 'dateutil.parser._parser.ParserError'>

dateutil.parser._parser.ParserError: Unknown string format: 1 year, 5 months ago
Traceback (most recent call last):
  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
  File "/root/cisticola/cisticola/transformer/bitchute.py", line 52, in transform_info
    date_created=dateutil.parser.parse(raw['created']),
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 1368, in parse
    return DEFAULTPARSER.parse(timestr, **kwargs)
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/dateutil/parser/_parser.py", line 643, in parse
    raise ParserError("Unknown string format: %s", timestr)
dateutil.parser._parser.ParserError: Unknown string format: 1 year, 5 months ago