spendright / msd

Merge SpendRight scraper data
Apache License 2.0
0 stars 1 forks source link

Hard-coded extra categories #13

Open coyotemarin opened 8 years ago

coyotemarin commented 8 years ago

Here are some extra categories I added for companies/brands for an earlier version of SpendRight. Most of these are still useful:

# custom corrections; should push these back into "companies" scraper
COMPANY_TO_EXTRA_CATS = {
    '1800Flowers': {'Flowers and Plants', 'Gift Baskets'},
    'Abercrombie & Fitch': {'Personal Care'},  # just some lotions, shaves
    'Alter Eco': {'Beans and Grains'},
    'Amazon.com': {'Electronics'},
    'Armor Holdings': {'Security', 'Electronics'},
    'Blauer Manufacturing Company': {'Electronics', 'Security'},
    'Bob Barker': {  # prison supplies. From https://www.bobbarker.com:
        'Bedding',
        'Clothing',
        'Food Service',
        'Furnishings',
        'Laundry',
        'Medical Supplies',
        'Personal Care',
        'Recreation',
        'Security',
    },
    'BZT Fashion': {'Apparel'},  # not just shoes!
    "Carter's": {"Children's Equipment"},
    'Chiquita': {'Apparel'},  # banana swag!
    'Garan': {'Baby'},
    'General Electric': {
        'Electronics', 'Appliances', 'Tools and Home Improvement'},
    'Henri Bendel': {'Beauty'},
    'Hitachi': {'Tools and Home Improvement'},
    'Mothercare': {"Toys and Children's Equipment"},
    'Office Depot': {'Electronics', 'Office Products'},
    'Philips': {'Baby'},  # for AVENT brand, delete once added
}

# map from brand to extra categories for that brand. Note that if a
# brand currently inherits its categories from its company, adding
# brand(s) will shut that off, so you need to make sure the brand list
# you give is comprehensive
BRAND_TO_EXTRA_CATS = {
    ('ABF', 'Ovomaltine'): {'Food and Drink'},
    ('ABF plc.', 'Ovomaltine'): {'Food and Drink'},  # will be just "ABF" soon
    ('Ahold USA', 'Giant'): {'Grocery'},
    ('Burger King', 'Burger King'): {'Food Services', 'Toys'},
    ('Clorox', 'Kingsford'): {'Grills and Outdoor Cooking'},
    ('Clorox', 'Hidden Valley'): {'Food and Drink'},
    ('Clorox', 'Soy Vay'): {'Food and Drink'},
    ('Clorox', 'Kitchen Bouquet'): {'Food and Drink'},
    ('Coca-Cola', 'Coca-Cola'): {
        'Home Decor',
        'Home Solutions',   # branded glasses
        'Soda',
    },
    ('Coca-Cola', 'Diet Coke'): {
        'Home Decor',
        'Home Solutions',
        'Soda',
    },
    ('Disney', 'Disney'): {
        'Apparel',
        'Baby',
        'Home Solutions',  # branded egg poachers, waffle irons?!!
        'Media',
        'Toys'
    },
    ('Herm\xe8s', 'Herm\xe8s'): {'Beauty'},
    ('Google', 'Android'): {'Electronics', 'Internet', 'Software'},
    ('Hanover Direct', 'The Company Store'): {
        'Bedding',
        'Bath',
        'Home Decor',
    },
    ('Hardy Life', 'Ed Hardy'): {'Beauty'},
    ("Hershey's", 'Mauna Loa'): {'Nuts'},
    ('Hitachi', 'Hitachi'): {'Electronics', 'Tools'},
    ('HSN', 'Ballard Designs'): {  # almost everything, apparently
        'Garden',
        'Home Solutions',
        'Office Supplies',
    },
    ('HSN', 'Grandin Road'): {  # almost everything, apparently
        'Electronics',
        'Fashion',
        'Garden',
        'Home Solutions',
    },
    ('HSN', 'Improvements'): {
        'Bedding and Bath',
        'Home Decor',
        'Furnishings',
        'Home Solutions',
        'Kitchen',
        'Laundry',
        'Pet Care',
        'Storage and Organization',
    },
    ('J. Choo', 'Jimmy Choo'): {'Beauty'},
    ('Kroger', "Scott's"): {'Grocery'},
    ('Kroger', "Smith's"): {'Grocery'},
    ('Newell Rubbermaid', 'Dymo'): {'Electronics', 'Office Products'},
    ('Pepsi', 'Pepsi'): {'Personal Care'},
    ('Philips', 'AVENT'): {'Baby'},  # AVENT brand doesn't yet exist

    # TODO: delete once upstream problems are fixed
    ('The Hershey Company', 'Mauna Loa'): {'Nuts'}, # should be Hershey's
    ('Time Warner', 'Sports Illustrated'): {'Sports'},
    ('Whirlpool', 'Kitchen Aid'): {'Kitchen'},
}