Closed Munger closed 4 years ago
In [1]: from selectorlib import Extractor
In [2]: import requests
In [3]: yaml_schema = """
...: product:
...: css: li.g-item-sortable
...: multiple: true
...: children:
...: data_id:
...: css: ""
...: type: Attribute
...: attribute: data-id
...: data_reposition_action_params:
...: css: ""
...: type: Attribute
...: attribute: data-reposition-action-params
...: """
In [4]: res = requests.get('https://www.amazon.co.uk/hz/wishlist/ls/1IVZSOPV0TPMB')
In [5]: extractor = Extractor.from_yaml_string(yaml_schema)
In [6]: extractor.extract(res.text)
Out[6]:
{'product': [{'data_id': '1IVZSOPV0TPMB',
'data_reposition_action_params': '{"itemExternalId":"ASIN:B00DN43PQ6|A1F83G8C2ARO7P","listType":"wishlist","sid":"000-0000000-0000000"}'},
{'data_id': '1IVZSOPV0TPMB',
'data_reposition_action_params': '{"itemExternalId":"ASIN:B00H859M36|A1F83G8C2ARO7P","listType":"wishlist","sid":"000-0000000-0000000"}'},
{'data_id': '1IVZSOPV0TPMB',
'data_reposition_action_params': '{"itemExternalId":"ASIN:B00062RLZO|A1F83G8C2ARO7P","listType":"wishlist","sid":"000-0000000-0000000"}'},
{'data_id': '1IVZSOPV0TPMB',
'data_reposition_action_params': '{"itemExternalId":"ASIN:B005RUPID2|A1F83G8C2ARO7P","listType":"wishlist","sid":"000-0000000-0000000"}'}]}
Thank you! It was the css: "" I missed. Couldn't figure out how to reference the same node.
I'm trying to parse an Amazon wishlist, for example https://www.amazon.co.uk/hz/wishlist/ls/1IVZSOPV0TPMB
The selector for each wishlist item is li.g-item-sortable, but some key attributes are stored in the li element itself, for example, data-itemid and data-reposition-action-params. How can I pull these as named attributes?