Closed andylolz closed 6 years ago
Thanks @andylolz - I looked into upgrading the version of ckanapi
yesterday but found more substantive modifications to the crawler would also be required since the structure of the ckanapi responses has changed between the current and latest version.
For example, ckanapi.action.package_show
returns different responses for a corresponding HTTP dataset API call, with only the result
part of the API call returned in version 4.1:
ckanapi==4.1
Dales-MacBook-Pro:temp dalepotter$ pip list
[...]
ckanapi (4.1)
[...]
Dales-MacBook-Pro:temp dalepotter$ python
Python 2.7.14 (default, Mar 22 2018, 16:30:24)
[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import ckanapi
>>> from pprint import pprint
>>> CKAN_API = 'https://iatiregistry.org'
>>> registry = ckanapi.RemoteCKAN(CKAN_API)
>>> name = 'buildafrica-org'
>>> ds_reg = registry.action.package_show(id=name)
>>> pprint(ds_reg)
{u'author': None,
u'author_email': u'kathryn@build-africa.org.uk',
u'creator_user_id': u'4d2f34e7-48f4-4eb1-ab8b-ec65aebf50ef',
u'extras': [{u'key': u'country', u'value': u''},
{u'key': u'data_updated', u'value': u'2013-09-26 17:00:54'},
{u'key': u'filetype', u'value': u'organisation'},
{u'key': u'iati_version', u'value': u'1.02'},
{u'key': u'language', u'value': u'en'},
{u'key': u'publisher_source_type', u'value': u'primary_source'},
{u'key': u'publisher_organization_type', u'value': u'21'},
{u'key': u'publisher_country', u'value': u'GB'},
{u'key': u'publisher_iati_id', u'value': u'GB-CHC-298316'},
{u'key': u'publisher_source_type', u'value': u'primary_source'},
{u'key': u'publisher_organization_type', u'value': u'21'},
{u'key': u'publisher_country', u'value': u'GB'},
{u'key': u'publisher_iati_id', u'value': u'GB-CHC-298316'}],
u'groups': [],
u'id': u'd973a953-1873-4464-a2d7-2c32f5d556f7',
u'isopen': True,
u'license_id': u'cc-by',
u'license_title': u'Creative Commons Attribution',
u'license_url': u'http://www.opendefinition.org/licenses/cc-by',
u'maintainer': None,
u'maintainer_email': None,
u'metadata_created': u'2013-09-26T17:01:02.301443',
u'metadata_modified': u'2018-01-31T03:24:54.062571',
u'name': u'buildafrica-org',
u'notes': None,
u'num_resources': 1,
u'num_tags': 0,
u'organization': {u'approval_status': u'approved',
u'created': u'2012-06-11T12:34:29.507254',
u'description': u'',
u'id': u'e23a5afe-ef7c-4476-bc9e-c9bd651d5ba4',
u'image_url': None,
u'is_organization': True,
u'name': u'buildafrica',
u'revision_id': u'47e931e5-bb70-46c4-89cc-64d6770d78f7',
u'state': u'active',
u'title': u'Build Africa',
u'type': u'organization'},
u'owner_org': u'e23a5afe-ef7c-4476-bc9e-c9bd651d5ba4',
u'private': False,
u'relationships_as_object': [],
u'relationships_as_subject': [],
u'resources': [{u'cache_last_updated': None,
u'cache_url': None,
u'description': u'',
u'format': u'iati-xml',
u'hash': u'4bf952440c5b681ff53865d98010491b84d2a29a',
u'id': u'3440eb91-6313-4630-acde-9d6cc1382e73',
u'last_modified': None,
u'mimetype': u'text/html',
u'mimetype_inner': None,
u'name': None,
u'package_id': u'd973a953-1873-4464-a2d7-2c32f5d556f7',
u'position': 0,
u'resource_type': None,
u'revision_id': u'40a295d0-e7d9-4e5e-83b3-19f89e2d7392',
u'size': 368,
u'state': u'active',
u'url': u'http://www.aidstream.org/files/xml/buildafrica-org.xml',
u'url_type': None}],
u'revision_id': u'418d5917-19d3-4b29-bc3f-56969e400959',
u'state': u'active',
u'tags': [],
u'title': u'Organisation File buildafrica-org',
u'type': u'dataset',
u'url': None,
u'version': None}
ckanapi==1.5
(pyenv2)Dales-MacBook-Pro:ckanapi-1.5 dalepotter$ pip list
ckanapi (1.5)
pip (6.0.8)
setuptools (12.0.5)
(pyenv2)Dales-MacBook-Pro:ckanapi-1.5 dalepotter$ python
Python 2.7.10 (default, Jul 14 2015, 19:46:27)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.39)] on darwin
>>> import ckanapi
>>> from pprint import pprint
>>> CKAN_API = 'https://iatiregistry.org'
>>> registry = ckanapi.RemoteCKAN(CKAN_API)
>>> name = 'buildafrica-org'
>>> ds_reg = registry.action.package_show(id=name)
>>> pprint(ds_reg)
{u'help': u'https://iatiregistry.org/api/3/action/help_show?name=package_show',
u'result': {u'author': None,
u'author_email': u'kathryn@build-africa.org.uk',
u'creator_user_id': u'4d2f34e7-48f4-4eb1-ab8b-ec65aebf50ef',
u'extras': [{u'key': u'country', u'value': u''},
{u'key': u'data_updated',
u'value': u'2013-09-26 17:00:54'},
{u'key': u'filetype', u'value': u'organisation'},
{u'key': u'iati_version', u'value': u'1.02'},
{u'key': u'language', u'value': u'en'},
{u'key': u'publisher_source_type',
u'value': u'primary_source'},
{u'key': u'publisher_organization_type',
u'value': u'21'},
{u'key': u'publisher_country', u'value': u'GB'},
{u'key': u'publisher_iati_id',
u'value': u'GB-CHC-298316'},
{u'key': u'publisher_source_type',
u'value': u'primary_source'},
{u'key': u'publisher_organization_type',
u'value': u'21'},
{u'key': u'publisher_country', u'value': u'GB'},
{u'key': u'publisher_iati_id',
u'value': u'GB-CHC-298316'}],
u'groups': [],
u'id': u'd973a953-1873-4464-a2d7-2c32f5d556f7',
u'isopen': True,
u'license_id': u'cc-by',
u'license_title': u'Creative Commons Attribution',
u'license_url': u'http://www.opendefinition.org/licenses/cc-by',
u'maintainer': None,
u'maintainer_email': None,
u'metadata_created': u'2013-09-26T17:01:02.301443',
u'metadata_modified': u'2018-01-31T03:24:54.062571',
u'name': u'buildafrica-org',
u'notes': None,
u'num_resources': 1,
u'num_tags': 0,
u'organization': {u'approval_status': u'approved',
u'created': u'2012-06-11T12:34:29.507254',
u'description': u'',
u'id': u'e23a5afe-ef7c-4476-bc9e-c9bd651d5ba4',
u'image_url': None,
u'is_organization': True,
u'name': u'buildafrica',
u'revision_id': u'47e931e5-bb70-46c4-89cc-64d6770d78f7',
u'state': u'active',
u'title': u'Build Africa',
u'type': u'organization'},
u'owner_org': u'e23a5afe-ef7c-4476-bc9e-c9bd651d5ba4',
u'private': False,
u'relationships_as_object': [],
u'relationships_as_subject': [],
u'resources': [{u'cache_last_updated': None,
u'cache_url': None,
u'description': u'',
u'format': u'iati-xml',
u'hash': u'4bf952440c5b681ff53865d98010491b84d2a29a',
u'id': u'3440eb91-6313-4630-acde-9d6cc1382e73',
u'last_modified': None,
u'mimetype': u'text/html',
u'mimetype_inner': None,
u'name': None,
u'package_id': u'd973a953-1873-4464-a2d7-2c32f5d556f7',
u'position': 0,
u'resource_type': None,
u'revision_id': u'40a295d0-e7d9-4e5e-83b3-19f89e2d7392',
u'size': 368,
u'state': u'active',
u'url': u'http://www.aidstream.org/files/xml/buildafrica-org.xml',
u'url_type': None}],
u'revision_id': u'418d5917-19d3-4b29-bc3f-56969e400959',
u'state': u'active',
u'tags': [],
u'title': u'Organisation File buildafrica-org',
u'type': u'dataset',
u'url': None,
u'version': None},
u'success': True}
This has implications for code such as https://github.com/IATI/IATI-Datastore/blob/master/iati_datastore/iatilib/crawler.py#L128-L154 which expects the full response. There appears to be other places in crawler.py
where modifications would be needed.
The two obvious options for #307 were to 1) make these modifications or 2) ditch use of ckanapi
and make API calls using requests
instead. Both options would have needed some sort of test for the problem (probably involving mocking a response - not sure how that would have worked with continued use of ckanapi
)
As it happened, updating the URL resolved the problem so I didn't go down those routes. However, for this PR to be accepted, a choice of either options 1 or 2 (or something else?!) would be needed.
Good catch @dalepotter, yes! I did this one quite quickly, and I’m afraid I didn’t test or check it thoroughly.
I’ve now fixed the tests to mock ckanapi 4.1 responses (b16fe52), and made the necessary updates (04ff096). (So, option 1.)
Refs https://github.com/IATI/IATI-Datastore/pull/307#issuecomment-380086152.