LeMyst / WikibaseIntegrator

A Python module to manipulate data on a Wikibase instance (like Wikidata) through the MediaWiki Wikibase API and the Wikibase SPARQL endpoint.
MIT License
67 stars 14 forks source link

mediawiki_api_call_helper example results in an error #136

Closed dpriskorn closed 3 years ago

dpriskorn commented 3 years ago
from wikibaseintegrator import wbi_core

query = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'Q2',
    'rvlimit': 10
}

print(wbi_core.FunctionsEngine.mediawiki_api_call_helper(query, allow_anonymous=True))
->
Traceback (most recent call last):
  File "/home/egil/src/python/descriptionbot/test.py", line 10, in <module>
    print(wbi_core.FunctionsEngine.mediawiki_api_call_helper(query, allow_anonymous=True))
  File "/usr/lib/python3.9/site-packages/wikibaseintegrator/wbi_core.py", line 1130, in mediawiki_api_call_helper
    return FunctionsEngine.mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries,
  File "/usr/lib/python3.9/site-packages/wikibaseintegrator/wbi_core.py", line 965, in mediawiki_api_call
    json_data = response.json()
  File "/usr/lib/python3.9/site-packages/requests/models.py", line 900, in json
    return complexjson.loads(self.text, **kwargs)
  File "/usr/lib/python3.9/site-packages/simplejson/__init__.py", line 525, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python3.9/site-packages/simplejson/decoder.py", line 370, in decode
    obj, end = self.raw_decode(s)
  File "/usr/lib/python3.9/site-packages/simplejson/decoder.py", line 400, in raw_decode
    return self.scan_once(s, idx=_w(s, idx).end())
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
dpriskorn commented 3 years ago

Is there any reason to have the FunctionsEngine in core? WBI would probably get cleaner and faster if the auxiliary functions were moved to say wbi_functions

dpriskorn commented 3 years ago

here is code that works with requests but NOT with WBI:

import requests
from wikibaseintegrator import wbi_core
from pprint import pprint

engine = wbi_core.FunctionsEngine()
    params = {
        'action': 'query',
        'list': 'search',
        'format': 'json',
        'utf8': '1',
        # All scientific articles without any description
        'srsearch': "haswbstatement:P31=Q13442814 -hasdescription:*",
    }
    pprint(params)
    data = requests.get(
        url="https://www.wikidata.org/w/api.php",
        params=params
    )
    pprint(data.json())
    data = engine.mediawiki_api_call(
        "GET",
        mediawiki_api_url="https://wikidata.org/w/api.php",
        data=params
    )
    pprint(data)

Result in ipython:

{'action': 'query',
 'format': 'json',
 'list': 'search',
 'srsearch': 'haswbstatement:P31=Q13442814 -hasdescription:*',
 'utf8': '1'}
{'batchcomplete': '',
 'continue': {'continue': '-||', 'sroffset': 10},
 'query': {'search': [{'ns': 0,
                       'pageid': 68611026,
                       'size': 10983,
                       'snippet': 'The Parkes-MIT-NRAO (PMN) surveys. III. '
                                  'Source catalog for the tropical survey '
                                  '(-29deg &lt; Delta &lt; -9.5deg)',
                       'timestamp': '2019-09-28T16:16:29Z',
                       'title': 'Q68943262',
                       'wordcount': 17},
                      {'ns': 0,
                       'pageid': 68610872,
                       'size': 10985,
                       'snippet': 'The Parkes-MIT-NRAO (PMN) Surveys. VI. '
                                  'Source Catalog for the Equatorial Survey '
                                  '(-9.5 deg &lt; Dec. &lt; +10.0 deg)',
                       'timestamp': '2019-09-28T16:13:51Z',
                       'title': 'Q68943107',
                       'wordcount': 20},
                      {'ns': 0,
                       'pageid': 68610923,
                       'size': 10983,
                       'snippet': 'The Parkes-MIT-NRAO (PMN) Surveys: II. '
                                  'Source catalog for the Southern Survey '
                                  '(-87deg.5 &lt; delta &lt; -37deg)',
                       'timestamp': '2019-09-28T16:14:40Z',
                       'title': 'Q68943158',
                       'wordcount': 17},
                      {'ns': 0,
                       'pageid': 66070093,
                       'size': 5125,
                       'snippet': 'Coordinates for objects requiring NED '
                                  'measured positions',
                       'timestamp': '2019-09-23T05:46:38Z',
                       'title': 'Q66469849',
                       'wordcount': 7},
                      {'ns': 0,
                       'pageid': 92956759,
                       'size': 23680,
                       'snippet': 'DNA vaccines',
                       'timestamp': '2020-07-05T05:30:36Z',
                       'title': 'Q93896322',
                       'wordcount': 2},
                      {'ns': 0,
                       'pageid': 66215541,
                       'size': 5289,
                       'snippet': 'La mesure des vitesses radiales au prisme '
                                  'objectif. XII. 5eme liste de vitesses '
                                  'radiales determinees au prisme objectif a '
                                  'vision directe',
                       'timestamp': '2019-09-24T15:54:45Z',
                       'title': 'Q66616967',
                       'wordcount': 21},
                      {'ns': 0,
                       'pageid': 66215563,
                       'size': 6349,
                       'snippet': 'La mesure des vitesses radiales au prisme '
                                  'objectif. XIII. 6eme liste de vitesses '
                                  'radiales determinees au prisme objectif a '
                                  'vision directe',
                       'timestamp': '2019-09-24T15:54:54Z',
                       'title': 'Q66616989',
                       'wordcount': 21},
                      {'ns': 0,
                       'pageid': 66215842,
                       'size': 7292,
                       'snippet': 'Radial velocities of southern B stars '
                                  'determined at the Radcliffe Observatory',
                       'timestamp': '2019-09-22T06:00:53Z',
                       'title': 'Q66617268',
                       'wordcount': 11},
                      {'ns': 0,
                       'pageid': 92992247,
                       'size': 20824,
                       'snippet': 'Chronic obstructive pulmonary disease',
                       'timestamp': '2020-07-11T07:43:46Z',
                       'title': 'Q93936786',
                       'wordcount': 4},
                      {'ns': 0,
                       'pageid': 66215642,
                       'size': 5122,
                       'snippet': "Etude cinematique et photometrique d'une "
                                  'region de Cas',
                       'timestamp': '2019-09-21T21:44:25Z',
                       'title': 'Q66617068',
                       'wordcount': 8}],
           'searchinfo': {'totalhits': 192457}},
 'warnings': {'search': {'*': 'The search timed out, only partial results are '
                              'available.'}}}
---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
<ipython-input-2-6126b53cc0a0> in <module>
     18     )
     19 pprint(data.json())
---> 20 data = engine.mediawiki_api_call(
     21         "GET",
     22         mediawiki_api_url="https://wikidata.org/w/api.php",

/usr/lib/python3.9/site-packages/wikibaseintegrator/wbi_core.py in mediawiki_api_call(method, mediawiki_api_url, session, max_retries, retry_after, **kwargs)
    963 
    964             response.raise_for_status()
--> 965             json_data = response.json()
    966             """
    967             Mediawiki api response has code = 200 even if there are errors.

/usr/lib/python3.9/site-packages/requests/models.py in json(self, **kwargs)
    898                     # used.
    899                     pass
--> 900         return complexjson.loads(self.text, **kwargs)
    901 
    902     @property

/usr/lib/python3.9/site-packages/simplejson/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, use_decimal, **kw)
    523             parse_constant is None and object_pairs_hook is None
    524             and not use_decimal and not kw):
--> 525         return _default_decoder.decode(s)
    526     if cls is None:
    527         cls = JSONDecoder

/usr/lib/python3.9/site-packages/simplejson/decoder.py in decode(self, s, _w, _PY3)
    368         if _PY3 and isinstance(s, bytes):
    369             s = str(s, self.encoding)
--> 370         obj, end = self.raw_decode(s)
    371         end = _w(s, end).end()
    372         if end != len(s):

/usr/lib/python3.9/site-packages/simplejson/decoder.py in raw_decode(self, s, idx, _w, _PY3)
    398             elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
    399                 idx += 3
--> 400         return self.scan_once(s, idx=_w(s, idx).end())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)
LeMyst commented 3 years ago

The issue with FunctionsEngine.mediawiki_api_call_helper come from the format of the response. FunctionsEngine.mediawiki_api_call only accept json as format, so you need to add 'format': 'json' to the params.

Like this

from wikibaseintegrator import wbi_core

query = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'Q2',
    'rvlimit': 10,
    'format': 'json'
}

print(wbi_core.FunctionsEngine.mediawiki_api_call_helper(query, allow_anonymous=True))

It's already fixed in PR #129

If there is no format, wbi force to json If there is a format different than 'json', wbi raise en exception.