redpanda-ai / Meerkat

Used for the Meerkat project
Other
1 stars 1 forks source link

Agg Index Search #952

Open yodjie opened 8 years ago

yodjie commented 8 years ago

Input: agg_input.json

 {'Agg_Name': ['Starbucks US'],
 'CNN': {'category': 'Restaurants', 'label': 'Starbucks', 'threshold': '0.60'},
 'RNN_merchant_name': 'STARBUCKS',
 'agg_search': {'address': '1363 Hillcrest Rd',
                'city': 'Mobile',
                'list_name': 'Starbucks US',
                'phone_number': '(251) 776-1161',
                'state': 'AL',
                'store_number': '10537-96355',
                'zip_code': '36695-3925'},
 'amount': 10,
 'city': 'Mobile',
 'container': 'card',
 'country': 'US',
 'date': '2016-10-14',
 'description': 'STARBUCKS #10537 MOBILE  Mobile       AL',
 'ledger_entry': 'debit',
 'merchant_score': '1.0',
 'phone_number': '',
 'postal_code': '36695',
 'state': 'AL',
 'store_number': '#10537',
 'transaction_id': 17776}

Query:

{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number'],
 'query': {'bool': {'must': [{'term': {'list_name': 'Starbucks US'}},
                             {'match': {'city': 'Mobile'}},
                             {'match': {'state': 'AL'}},
                             {'match': {'zip_code': {'boost': 2,
                                                     'fuzziness': 'AUTO',
                                                     'query': '36695'}}}],
                    'should': [{'match': {'store_number': {'boost': 3,
                                                           'fuzziness': 'AUTO',
                                                           'query': '#10537'}}}]}},
 'size': 10000}
yodjie commented 8 years ago
  {
    "CNN_Name": "Abercrombie & Fitch",
    "Agg_Name": "Abercrombie and Fitch"
  },
  {
    "CNN_Name": "Abercrombie & Fitch",
    "Agg_Name": "Abercrombie Kids"
  },

Query:

{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number'],
 'query': {'bool': {'must': [{'bool': {'minimum_should_match': 1,
                                       'should': [{'term': {'list_name': 'Abercrombie '
                                                                         'and '
                                                                         'Fitch'}},
                                                  {'term': {'list_name': 'Abercrombie '
                                                                         'Kids'}}]}},
                             {'match': {'state': 'CA'}}],
                    'should': [{'match': {'city': 'San Jose'}}]}},
 'size': 1000}
diwu001 commented 8 years ago
            {
                "Agg_Name": [
                    "Jamba Juice"
                ],
                "CNN": {
                    "category": "Restaurants",
                    "label": "Jamba Juice",
                    "threshold": "0.80"
                },
                "agg_search": {
                    "address": "173 West County Mall Drive",
                    "city": "St. Louis",
                    "list_name": "Jamba Juice",
                    "phone_number": "(314) 966-1066",
                    "state": "MO",
                    "zip_code": "63131"
                },
                "amount": 10,
                "city": "SAINT LOUIS",
                "container": "card",
                "country": "US",
                "date": "2016-10-14",
                "description": "JAMBA JUICE 1353",
                "ledger_entry": "debit",
                "merchant_score": "1.0",
                "phone_number": "",
                "postal_code": "63131",
                "state": "MO",
                "transaction_id": 79811
            }
diwu001 commented 8 years ago
            {
                "Agg_Name": [
                    "Dollar Tree"
                ],
                "CNN": {
                    "category": "Electronics/General Merchandise",
                    "label": "Dollar Tree",
                    "threshold": "0.80"
                },
                "agg_search": {
                    "address": "7421 West Colonial Drive",
                    "city": "Orlando",
                    "list_name": "Dollar Tree",
                    "phone_number": "(407) 296-2075",
                    "state": "FL",
                    "store_number": "167",
                    "zip_code": "32818"
                },
                "amount": 10,
                "city": "DAYTONA BEACH",
                "container": "card",
                "country": "US",
                "date": "2016-10-14",
                "description": "DOLRTREE 223  00002238",
                "ledger_entry": "debit",
                "merchant_score": "1.0",
                "phone_number": "",
                "postal_code": "32118",
                "state": "FL",
                "transaction_id": 30960
            }
yodjie commented 8 years ago

Result:

{'Agg_Name': ["Dick's Sporting Goods"],
 'CNN': {'category': 'Personal/Family',
         'label': "Dick's Sporting Goods",
         'threshold': '0.80'},
 'RNN_merchant_name': 'DICKS SPORTING',
 'agg_search': {'address': '501 Monroeville Mall',
                'city': 'Monroeville',
                'list_name': "Dick's Sporting Goods",
                'phone_number': '(412) 843-0024',
                'state': 'PA',
                'store_number': '1107',
                'zip_code': '15146'},
 'amount': 10,
 'city': 'MONROEVILLE',
 'container': 'card',
 'country': 'US',
 'date': '2016-10-14',
 'description': 'DICKS SPORTING GOODS1107 MONROEVILLE  PA',
 'ledger_entry': 'debit',
 'merchant_score': '1.0',
 'phone_number': '',
 'postal_code': '15146',
 'state': 'PA',
 'store_number': 'GOODS1107',
 'transaction_id': 64773}

Query:

{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number'],
 'query': {'bool': {'must': [{'term': {'list_name': "Dick's Sporting "
                                                    'Goods'}},
                             {'match': {'city': 'MONROEVILLE'}},
                             {'match': {'state': 'PA'}},
                             {'match': {'zip_code': {'boost': 2,
                                                     'fuzziness': 'AUTO',
                                                     'query': '15146'}}}],
                    'should': [{'match': {'store_number': {'boost': 3,
                                                           'fuzziness': 'AUTO',
                                                           'query': 'GOODS1107'}}}]}},
 'size': 10000}
yodjie commented 8 years ago

Changed the query for multiple store numbers

{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number',
             'latitude',
             'longitude',
             'source_url'],
 'query': {'bool': {'must': [{'term': {'list_name': 'Target'}},
                             {'match': {'city': 'Peoria'}},
                             {'match': {'state': 'AZ'}},
                             {'match': {'zip_code': {'boost': 2,
                                                     'fuzziness': 'AUTO',
                                                     'query': '85382'}}}],
                    'should': [{'bool': {'minimum_should_match': 1,
                                         'should': [{'match': {'store_number': {'boost': 3,
                                                                                'fuzziness': 'AUTO',
                                                                                'query': '00008250'}}},
                                                    {'match': {'store_number': {'boost': 3,
                                                                                'fuzziness': 'AUTO',
                                                                                'query': '00003219'}}},
                                                    {'match': {'store_number': {'boost': 3,
                                                                                'fuzziness': 'AUTO',
                                                                                'query': '00001234'}}}]}},
                               {'match': {'phone_number': {'boost': 2,
                                                           'fuzziness': 'AUTO',
                                                           'query': '888-888-8888'}}}]}},
 'size': 10000}
yodjie commented 8 years ago

zip_code stays in should query if store number exists, otherwise stays in must query with ZERO fuzziness.

{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number',
             'latitude',
             'longitude',
             'source_url'],
 'query': {'bool': {'must': [{'term': {'list_name': 'CVS'}},
                             {'match': {'city': 'St. Louis'}},
                             {'match': {'state': 'MO'}}],
                    'should': [{'match': {'store_number': {'boost': 3,
                                                           'fuzziness': 'AUTO',
                                                           'query': '#02376'}}},
                               {'match': {'zip_code': {'boost': 2,
                                                       'fuzziness': 'AUTO',
                                                       'query': '63108'}}}]}},
 'size': 10000}
{'_source': ['list_name',
             'address',
             'city',
             'state',
             'zip_code',
             'phone_number',
             'store_number',
             'latitude',
             'longitude',
             'source_url'],
 'query': {'bool': {'must': [{'term': {'list_name': 'CVS'}},
                             {'match': {'city': 'St. Louis'}},
                             {'match': {'state': 'MO'}},
                             {'match': {'zip_code': {'boost': 2,
                                                     'fuzziness': 0,
                                                     'query': '63108'}}}],
                    'should': []}},
 'size': 10000}