Closed dpriskorn closed 3 years ago
Is there any reason to have the FunctionsEngine in core? WBI would probably get cleaner and faster if the auxiliary functions were moved to say wbi_functions
here is code that works with requests but NOT with WBI:
import requests
from wikibaseintegrator import wbi_core
from pprint import pprint
engine = wbi_core.FunctionsEngine()
params = {
'action': 'query',
'list': 'search',
'format': 'json',
'utf8': '1',
# All scientific articles without any description
'srsearch': "haswbstatement:P31=Q13442814 -hasdescription:*",
}
pprint(params)
data = requests.get(
url="https://www.wikidata.org/w/api.php",
params=params
)
pprint(data.json())
data = engine.mediawiki_api_call(
"GET",
mediawiki_api_url="https://wikidata.org/w/api.php",
data=params
)
pprint(data)
Result in ipython:
{'action': 'query',
'format': 'json',
'list': 'search',
'srsearch': 'haswbstatement:P31=Q13442814 -hasdescription:*',
'utf8': '1'}
{'batchcomplete': '',
'continue': {'continue': '-||', 'sroffset': 10},
'query': {'search': [{'ns': 0,
'pageid': 68611026,
'size': 10983,
'snippet': 'The Parkes-MIT-NRAO (PMN) surveys. III. '
'Source catalog for the tropical survey '
'(-29deg < Delta < -9.5deg)',
'timestamp': '2019-09-28T16:16:29Z',
'title': 'Q68943262',
'wordcount': 17},
{'ns': 0,
'pageid': 68610872,
'size': 10985,
'snippet': 'The Parkes-MIT-NRAO (PMN) Surveys. VI. '
'Source Catalog for the Equatorial Survey '
'(-9.5 deg < Dec. < +10.0 deg)',
'timestamp': '2019-09-28T16:13:51Z',
'title': 'Q68943107',
'wordcount': 20},
{'ns': 0,
'pageid': 68610923,
'size': 10983,
'snippet': 'The Parkes-MIT-NRAO (PMN) Surveys: II. '
'Source catalog for the Southern Survey '
'(-87deg.5 < delta < -37deg)',
'timestamp': '2019-09-28T16:14:40Z',
'title': 'Q68943158',
'wordcount': 17},
{'ns': 0,
'pageid': 66070093,
'size': 5125,
'snippet': 'Coordinates for objects requiring NED '
'measured positions',
'timestamp': '2019-09-23T05:46:38Z',
'title': 'Q66469849',
'wordcount': 7},
{'ns': 0,
'pageid': 92956759,
'size': 23680,
'snippet': 'DNA vaccines',
'timestamp': '2020-07-05T05:30:36Z',
'title': 'Q93896322',
'wordcount': 2},
{'ns': 0,
'pageid': 66215541,
'size': 5289,
'snippet': 'La mesure des vitesses radiales au prisme '
'objectif. XII. 5eme liste de vitesses '
'radiales determinees au prisme objectif a '
'vision directe',
'timestamp': '2019-09-24T15:54:45Z',
'title': 'Q66616967',
'wordcount': 21},
{'ns': 0,
'pageid': 66215563,
'size': 6349,
'snippet': 'La mesure des vitesses radiales au prisme '
'objectif. XIII. 6eme liste de vitesses '
'radiales determinees au prisme objectif a '
'vision directe',
'timestamp': '2019-09-24T15:54:54Z',
'title': 'Q66616989',
'wordcount': 21},
{'ns': 0,
'pageid': 66215842,
'size': 7292,
'snippet': 'Radial velocities of southern B stars '
'determined at the Radcliffe Observatory',
'timestamp': '2019-09-22T06:00:53Z',
'title': 'Q66617268',
'wordcount': 11},
{'ns': 0,
'pageid': 92992247,
'size': 20824,
'snippet': 'Chronic obstructive pulmonary disease',
'timestamp': '2020-07-11T07:43:46Z',
'title': 'Q93936786',
'wordcount': 4},
{'ns': 0,
'pageid': 66215642,
'size': 5122,
'snippet': "Etude cinematique et photometrique d'une "
'region de Cas',
'timestamp': '2019-09-21T21:44:25Z',
'title': 'Q66617068',
'wordcount': 8}],
'searchinfo': {'totalhits': 192457}},
'warnings': {'search': {'*': 'The search timed out, only partial results are '
'available.'}}}
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-2-6126b53cc0a0> in <module>
18 )
19 pprint(data.json())
---> 20 data = engine.mediawiki_api_call(
21 "GET",
22 mediawiki_api_url="https://wikidata.org/w/api.php",
/usr/lib/python3.9/site-packages/wikibaseintegrator/wbi_core.py in mediawiki_api_call(method, mediawiki_api_url, session, max_retries, retry_after, **kwargs)
963
964 response.raise_for_status()
--> 965 json_data = response.json()
966 """
967 Mediawiki api response has code = 200 even if there are errors.
/usr/lib/python3.9/site-packages/requests/models.py in json(self, **kwargs)
898 # used.
899 pass
--> 900 return complexjson.loads(self.text, **kwargs)
901
902 @property
/usr/lib/python3.9/site-packages/simplejson/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, use_decimal, **kw)
523 parse_constant is None and object_pairs_hook is None
524 and not use_decimal and not kw):
--> 525 return _default_decoder.decode(s)
526 if cls is None:
527 cls = JSONDecoder
/usr/lib/python3.9/site-packages/simplejson/decoder.py in decode(self, s, _w, _PY3)
368 if _PY3 and isinstance(s, bytes):
369 s = str(s, self.encoding)
--> 370 obj, end = self.raw_decode(s)
371 end = _w(s, end).end()
372 if end != len(s):
/usr/lib/python3.9/site-packages/simplejson/decoder.py in raw_decode(self, s, idx, _w, _PY3)
398 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
399 idx += 3
--> 400 return self.scan_once(s, idx=_w(s, idx).end())
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The issue with FunctionsEngine.mediawiki_api_call_helper
come from the format of the response.
FunctionsEngine.mediawiki_api_call
only accept json as format, so you need to add 'format': 'json'
to the params.
Like this
from wikibaseintegrator import wbi_core
query = {
'action': 'query',
'prop': 'revisions',
'titles': 'Q2',
'rvlimit': 10,
'format': 'json'
}
print(wbi_core.FunctionsEngine.mediawiki_api_call_helper(query, allow_anonymous=True))
It's already fixed in PR #129
If there is no format, wbi force to json If there is a format different than 'json', wbi raise en exception.