ckan / ckanapi

A command line interface and Python module for accessing the CKAN Action API
Other
176 stars 74 forks source link

Fail to filter by harvest_source_id #143

Closed avdata99 closed 4 years ago

avdata99 commented 4 years ago

This URLs works for filter packages by _harvest_sourceid: https://catalog.data.gov/api/action/package_search?rows=2&q=harvest_source_id:50104281-92a3-4534-9d38-141bc82276c5

But this fails:

from ckanapi import RemoteCKAN
remote_ckan = RemoteCKAN('https://catalog.data.gov', user_agent='My User agent v 0.1')

# test several IDs
harvest_source_ids = [
    'de90314a-7c7d-4aff-bd84-87b134bba13d',  # Treasury JSON
    '50104281-92a3-4534-9d38-141bc82276c5',  # NYC JSON
    'afb32af7-87ba-4f27-ae5c-f0d4d0e039dc'  # CFPB JSON
    ]
for harvest_source_id in harvest_source_ids:
    print('***************************************************************')
    print(f'Filter harvest source id: {harvest_source_id}')
    print('***************************************************************')
    query = f'harvest_source_id:{harvest_source_id}'
    results = remote_ckan.action.package_search(q=query)

    packages = 0
    errors = []
    oks = 0
    for result in results['results']:
        packages += 1
        this_harvest_source_id = None
        for extra in result['extras']:
            if extra['key'] == 'harvest_source_id':
                this_harvest_source_id = extra['value']
                if this_harvest_source_id == harvest_source_id:
                    oks += 1
                else:
                    errors.append(f'Bad Harves source ID {harvest_source_id} != {this_harvest_source_id}')

    print(f'{packages} packages returned')
    print(f'Harvest source id OK: {oks}')
    print(f'Harvest source id fails: {len(errors)}')

    for error in errors:
        print(f'ERROR: {error}')