bellingcat / cisticola

Coordinates scrapers and interfaces with database
15 stars 0 forks source link

Gettr transformer fails on certain (all?) channel info #59

Closed loganwilliams closed 1 year ago

loganwilliams commented 2 years ago
2022-06-10 12:16:48.119 | ERROR    | cisticola.transformer.base:transform_all_untransformed_info:264 - An error has been caught in function 'transform_all_untransformed_info', process 'MainProcess' (2895963), thread 'MainThread' (139697882900288):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f0dafcda940>

  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f0db086c700>
    └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>

> File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f0db086c3a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f0dafb464c0>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f0dafb464c0>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f0db0869940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>
    │           │              └ RawChannelInfo(scraper='GettrScraper 0.0.1', platform='Gettr', channel=100, raw_data='{"udate": "1649677532157", "_t": "uinf"...
    │           └ <function GettrTransformer.transform_info at 0x7f0dafd94790>
    └ <cisticola.transformer.gettr.GettrTransformer object at 0x7f0dafbef250>

  File "/root/cisticola/cisticola/transformer/gettr.py", line 35, in transform_info
    description_url=raw['website'],
                    └ {'udate': '1649677532157', '_t': 'uinf', '_id': 'magali_robin', 'nickname': 'magali_robin', 'username': 'magali_robin', 'ouse...

KeyError: 'website'
2022-06-10 12:16:48.238 | ERROR    | __main__:transform_info:93 - An error has been caught in function 'transform_info', process 'MainProcess' (2895963), thread 'MainThread' (139697882900288):
Traceback (most recent call last):

  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
    │              └ Namespace(command='transform-info', gsheet=None, media=False)
    └ <function transform_info at 0x7f0dafcda940>

> File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
    │          └ <function ETLController.transform_all_untransformed_info at 0x7f0db086c700>
    └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>

  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
    │    │              └ [RawChannelInfo(scraper='VkontakteScraper 0.0.1', platform='VK', channel=44, raw_data='{"username": "public199284310", "name"...
    │    └ <function ETLController.transform_info at 0x7f0db086c3a0>
    └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>

  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
    │           │              │                   │    │                     │                └ <sqlalchemy.orm.session.Session object at 0x7f0dafb464c0>
    │           │              │                   │    │                     └ <sqlalchemy.orm.session.Session object at 0x7f0dafb464c0>
    │           │              │                   │    └ <function ETLController.insert_or_select at 0x7f0db0869940>
    │           │              │                   └ <cisticola.transformer.base.ETLController object at 0x7f0df2524850>
    │           │              └ RawChannelInfo(scraper='GettrScraper 0.0.1', platform='Gettr', channel=100, raw_data='{"udate": "1649677532157", "_t": "uinf"...
    │           └ <function GettrTransformer.transform_info at 0x7f0dafd94790>
    └ <cisticola.transformer.gettr.GettrTransformer object at 0x7f0dafbef250>

  File "/root/cisticola/cisticola/transformer/gettr.py", line 35, in transform_info
    description_url=raw['website'],
                    └ {'udate': '1649677532157', '_t': 'uinf', '_id': 'magali_robin', 'nickname': 'magali_robin', 'username': 'magali_robin', 'ouse...

KeyError: 'website'
Traceback (most recent call last):
  File "/root/cisticola/app.py", line 138, in <module>
    transform_info(args)
  File "/root/cisticola/app.py", line 93, in transform_info
    controller.transform_all_untransformed_info()
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 264, in transform_all_untransformed_info
    self.transform_info(batch)
  File "/root/.local/share/virtualenvs/cisticola-w9lVstN7/lib/python3.9/site-packages/loguru/_logger.py", line 1220, in catch_wrapper
    return function(*args, **kwargs)
  File "/root/cisticola/cisticola/transformer/base.py", line 229, in transform_info
    transformer.transform_info(result, lambda obj: self.insert_or_select(obj, session, False), session)
  File "/root/cisticola/cisticola/transformer/gettr.py", line 35, in transform_info
    description_url=raw['website'],
KeyError: 'website'