bellingcat / cisticola

Coordinates scrapers and interfaces with database
15 stars 0 forks source link

Rumble transformer fails on certain (all?) channel info #61

Closed loganwilliams closed 1 year ago

loganwilliams commented 2 years ago
2022-06-10 12:20:22.055 | ERROR    | cisticola.transformer.base:transform_all_untransformed_info:264 - An error has been caught in function 'transform_all_untransformed_info', process 'MainProcess' (2896027), thread 'MainThread' (139652955944768):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f0339f2b940>

  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f033aab9700>
    └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>

> File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f033aab93a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f0339e60550>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f0339e60550>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f033aab8940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>
    │           │              └ RawChannelInfo(scraper='RumbleScraper 0.0.1', platform='Rumble', channel=53, raw_data='{"name": "QApel", "verified": false, "...
    │           └ <function RumbleTransformer.transform_info at 0x7f0339fe45e0>
    └ <cisticola.transformer.rumble.RumbleTransformer object at 0x7f0339e3c550>

  File "/root/cisticola/cisticola/transformer/rumble.py", line 28, in transform_info
    platform_id=raw['id'],
                └ {'name': 'QApel', 'verified': False, 'thumbnail': 'https://sp.rmbl.ws/z8/r/b/r/b/rbrba.baa-QApel-qjunun.png', 'cover': 'https...

KeyError: 'id'
2022-06-10 12:20:22.179 | ERROR    | __main__:transform_info:93 - An error has been caught in function 'transform_info', process 'MainProcess' (2896027), thread 'MainThread' (139652955944768):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f0339f2b940>

> File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f033aab9700>
    └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>

  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f033aab93a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f0339e60550>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f0339e60550>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f033aab8940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f0339ef67c0>
    │           │              └ RawChannelInfo(scraper='RumbleScraper 0.0.1', platform='Rumble', channel=53, raw_data='{"name": "QApel", "verified": false, "...
    │           └ <function RumbleTransformer.transform_info at 0x7f0339fe45e0>
    └ <cisticola.transformer.rumble.RumbleTransformer object at 0x7f0339e3c550>

  File "/root/cisticola/cisticola/transformer/rumble.py", line 28, in transform_info
    platform_id=raw['id'],
                └ {'name': 'QApel', 'verified': False, 'thumbnail': 'https://sp.rmbl.ws/z8/r/b/r/b/rbrba.baa-QApel-qjunun.png', 'cover': 'https...

KeyError: 'id'
Traceback (most recent call last):
  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
  File "/root/cisticola/cisticola/transformer/rumble.py", line 28, in transform_info
    platform_id=raw['id'],
KeyError: 'id'