kevin1024 / vcrpy

Automatically mock your HTTP interactions to simplify and speed up testing
MIT License
2.71k stars 388 forks source link

HTTPX Responses are not supported for JSON serializer #651

Open YoniMelki opened 2 years ago

YoniMelki commented 2 years ago

Hi! First of all, I wanted to thank you for this package and also for the new support to httpx!

I think that we should support a JSON serializer for recorded responses. Here is some code with YAML first:

import unittest

import httpx
from vcr import VCR
from vcr.record_mode import RecordMode

def scrub_malware_bazaar_sample_response(response: dict):
    response.update({"content": httpx.Response(status_code=200)})
    return response

vcr = VCR(
    serializer="yaml",
    record_mode=RecordMode.ONCE,
    path_transformer=VCR.ensure_suffix(".yaml"),
    before_record_response=scrub_malware_bazaar_sample_response,
    cassette_library_dir="fixtures/cassettes",
)

class TestPuller(unittest.IsolatedAsyncioTestCase):
    @vcr.use_cassette(
        cassette_library_dir=vcr.cassette_library_dir + "/malware_bazaar_samples",
        filter_headers=["API-KEY"],
    )
    def test_puller_malware_bazaar_sample_when_request_succeed(self):
        response = httpx.post(
            url="https://mb-api.abuse.ch/api/v1/",
            headers={"API-KEY": "Some Key"},
            data={"query": "get_recent", "selector": "time"},
        )
        print(response)

When running the test below, everything works fine and we indeed succeed to update the key content with an httpx object here is how it looks:

Screen Shot 2022-07-13 at 9 49 59

However, when changing the serializer to JSON, we get an exception with the following traceback:

cassette_dict = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}

    def serialize(cassette_dict):
        error_message = (
            "Does this HTTP interaction contain binary data? "
            "If so, use a different serializer (like the yaml serializer) "
            "for this request?"
        )

        try:
>           return json.dumps(cassette_dict, indent=4) + "\n"

../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serializers/jsonserializer.py:19: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

obj = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
skipkeys = False, ensure_ascii = True, check_circular = True, allow_nan = True
cls = <class 'json.encoder.JSONEncoder'>, indent = 4, separators = None
default = None

    def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
            allow_nan=True, cls=None, indent=None, separators=None,
            default=None, sort_keys=False, **kw):
        """Serialize ``obj`` to a JSON formatted ``str``.

        If ``skipkeys`` is true then ``dict`` keys that are not basic types
        (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
        instead of raising a ``TypeError``.

        If ``ensure_ascii`` is false, then the return value can contain non-ASCII
        characters if they appear in strings contained in ``obj``. Otherwise, all
        such characters are escaped in JSON strings.

        If ``check_circular`` is false, then the circular reference check
        for container types will be skipped and a circular reference will
        result in an ``RecursionError`` (or worse).

        If ``allow_nan`` is false, then it will be a ``ValueError`` to
        serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
        strict compliance of the JSON specification, instead of using the
        JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).

        If ``indent`` is a non-negative integer, then JSON array elements and
        object members will be pretty-printed with that indent level. An indent
        level of 0 will only insert newlines. ``None`` is the most compact
        representation.

        If specified, ``separators`` should be an ``(item_separator, key_separator)``
        tuple.  The default is ``(', ', ': ')`` if *indent* is ``None`` and
        ``(',', ': ')`` otherwise.  To get the most compact JSON representation,
        you should specify ``(',', ':')`` to eliminate whitespace.

        ``default(obj)`` is a function that should return a serializable version
        of obj or raise TypeError. The default simply raises TypeError.

        If *sort_keys* is true (default: ``False``), then the output of
        dictionaries will be sorted by key.

        To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
        ``.default()`` method to serialize additional types), specify it with
        the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.

        """
        # cached encoder
        if (not skipkeys and ensure_ascii and
            check_circular and allow_nan and
            cls is None and indent is None and separators is None and
            default is None and not sort_keys and not kw):
            return _default_encoder.encode(obj)
        if cls is None:
            cls = JSONEncoder
>       return cls(
            skipkeys=skipkeys, ensure_ascii=ensure_ascii,
            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
            separators=separators, default=default, sort_keys=sort_keys,
            **kw).encode(obj)

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py:234: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <json.encoder.JSONEncoder object at 0x11034b940>
o = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}

    def encode(self, o):
        """Return a JSON string representation of a Python data structure.

        >>> from json.encoder import JSONEncoder
        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
        '{"foo": ["bar", "baz"]}'

        """
        # This is for extremely simple cases and benchmarks.
        if isinstance(o, str):
            if self.ensure_ascii:
                return encode_basestring_ascii(o)
            else:
                return encode_basestring(o)
        # This doesn't pass the iterator directly to ''.join() because the
        # exceptions aren't as detailed.  The list call should be roughly
        # equivalent to the PySequence_Fast that ''.join() would do.
        chunks = self.iterencode(o, _one_shot=True)
        if not isinstance(chunks, (list, tuple)):
>           chunks = list(chunks)

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:201: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

o = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
_current_indent_level = 0

    def _iterencode(o, _current_indent_level):
        if isinstance(o, str):
            yield _encoder(o)
        elif o is None:
            yield 'null'
        elif o is True:
            yield 'true'
        elif o is False:
            yield 'false'
        elif isinstance(o, int):
            # see comment for int/float in _make_iterencode
            yield _intstr(o)
        elif isinstance(o, float):
            # see comment for int/float in _make_iterencode
            yield _floatstr(o)
        elif isinstance(o, (list, tuple)):
            yield from _iterencode_list(o, _current_indent_level)
        elif isinstance(o, dict):
>           yield from _iterencode_dict(o, _current_indent_level)

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:431: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dct = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
_current_indent_level = 1

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = sorted(dct.items())
        else:
            items = dct.items()
        for key, value in items:
            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                # see comment for int/float in _make_iterencode
                key = _floatstr(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif isinstance(key, int):
                # see comment for int/float in _make_iterencode
                key = _intstr(key)
            elif _skipkeys:
                continue
            else:
                raise TypeError(f'keys must be str, int, float, bool or None, '
                                f'not {key.__class__.__name__}')
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, str):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, int):
                # see comment for int/float in _make_iterencode
                yield _intstr(value)
            elif isinstance(value, float):
                # see comment for int/float in _make_iterencode
                yield _floatstr(value)
            else:
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
>               yield from chunks

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

lst = [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encoding': ['gzip, defl...lf'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}]
_current_indent_level = 2

    def _iterencode_list(lst, _current_indent_level):
        if not lst:
            yield '[]'
            return
        if markers is not None:
            markerid = id(lst)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = lst
        buf = '['
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            separator = _item_separator + newline_indent
            buf += newline_indent
        else:
            newline_indent = None
            separator = _item_separator
        first = True
        for value in lst:
            if first:
                first = False
            else:
                buf = separator
            if isinstance(value, str):
                yield buf + _encoder(value)
            elif value is None:
                yield buf + 'null'
            elif value is True:
                yield buf + 'true'
            elif value is False:
                yield buf + 'false'
            elif isinstance(value, int):
                # Subclasses of int/float may override __repr__, but we still
                # want to encode them as integers/floats in JSON. One example
                # within the standard library is IntEnum.
                yield buf + _intstr(value)
            elif isinstance(value, float):
                # see comment above for int
                yield buf + _floatstr(value)
            else:
                yield buf
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
>               yield from chunks

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:325: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dct = {'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encoding': ['gzip, defla...elf'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}
_current_indent_level = 3

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = sorted(dct.items())
        else:
            items = dct.items()
        for key, value in items:
            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                # see comment for int/float in _make_iterencode
                key = _floatstr(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif isinstance(key, int):
                # see comment for int/float in _make_iterencode
                key = _intstr(key)
            elif _skipkeys:
                continue
            else:
                raise TypeError(f'keys must be str, int, float, bool or None, '
                                f'not {key.__class__.__name__}')
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, str):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, int):
                # see comment for int/float in _make_iterencode
                yield _intstr(value)
            elif isinstance(value, float):
                # see comment for int/float in _make_iterencode
                yield _floatstr(value)
            else:
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
>               yield from chunks

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

dct = {'content': <Response [200 OK]>, 'headers': {'Cache-Control': ['max-age=2592000'], 'Connection': ['Keep-Alive'], 'Cont...self'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}
_current_indent_level = 4

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + _indent * _current_indent_level
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = sorted(dct.items())
        else:
            items = dct.items()
        for key, value in items:
            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                # see comment for int/float in _make_iterencode
                key = _floatstr(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif isinstance(key, int):
                # see comment for int/float in _make_iterencode
                key = _intstr(key)
            elif _skipkeys:
                continue
            else:
                raise TypeError(f'keys must be str, int, float, bool or None, '
                                f'not {key.__class__.__name__}')
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, str):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, int):
                # see comment for int/float in _make_iterencode
                yield _intstr(value)
            elif isinstance(value, float):
                # see comment for int/float in _make_iterencode
                yield _floatstr(value)
            else:
                if isinstance(value, (list, tuple)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
>               yield from chunks

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

o = <Response [200 OK]>, _current_indent_level = 4

    def _iterencode(o, _current_indent_level):
        if isinstance(o, str):
            yield _encoder(o)
        elif o is None:
            yield 'null'
        elif o is True:
            yield 'true'
        elif o is False:
            yield 'false'
        elif isinstance(o, int):
            # see comment for int/float in _make_iterencode
            yield _intstr(o)
        elif isinstance(o, float):
            # see comment for int/float in _make_iterencode
            yield _floatstr(o)
        elif isinstance(o, (list, tuple)):
            yield from _iterencode_list(o, _current_indent_level)
        elif isinstance(o, dict):
            yield from _iterencode_dict(o, _current_indent_level)
        else:
            if markers is not None:
                markerid = id(o)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid] = o
>           o = _default(o)

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:438: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <json.encoder.JSONEncoder object at 0x11034b940>, o = <Response [200 OK]>

    def default(self, o):
        """Implement this method in a subclass such that it returns
        a serializable object for ``o``, or calls the base implementation
        (to raise a ``TypeError``).

        For example, to support arbitrary iterators, you could
        implement default like this::

            def default(self, o):
                try:
                    iterable = iter(o)
                except TypeError:
                    pass
                else:
                    return list(iterable)
                # Let the base class default method raise the TypeError
                return JSONEncoder.default(self, o)

        """
>       raise TypeError(f'Object of type {o.__class__.__name__} '
                        f'is not JSON serializable')
E       TypeError: Object of type Response is not JSON serializable

/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:179: TypeError

During handling of the above exception, another exception occurred:
/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/unittest/async_case.py:64: in _callTestMethod
    self._callMaybeAsync(method)
/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/unittest/async_case.py:83: in _callMaybeAsync
    ret = func(*args, **kwargs)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:100: in __call__
    return type(self)(self.cls, args_getter)._execute_function(function, args, kwargs)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:114: in _execute_function
    return self._handle_function(fn=handle_function)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:138: in _handle_function
    return fn(cassette)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:91: in __exit__
    next(self.__finish, None)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:69: in _patch_generator
    cassette._save()
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:331: in _save
    self._persister.save_cassette(self._path, self._as_dict(), serializer=self._serializer)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/persisters/filesystem.py:20: in save_cassette
    data = serialize(cassette_dict, serializer)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serialize.py:58: in serialize
    return serializer.serialize(data)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

cassette_dict = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}

    def serialize(cassette_dict):
        error_message = (
            "Does this HTTP interaction contain binary data? "
            "If so, use a different serializer (like the yaml serializer) "
            "for this request?"
        )

        try:
            return json.dumps(cassette_dict, indent=4) + "\n"
        except UnicodeDecodeError as original:  # py2
            raise UnicodeDecodeError(
                original.encoding,
                b"Error serializing cassette to JSON",
                original.start,
                original.end,
                original.args[-1] + error_message,
            )
        except TypeError:  # py3
>           raise TypeError(error_message)
E           TypeError: Does this HTTP interaction contain binary data? If so, use a different serializer (like the yaml serializer) for this request?

../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serializers/jsonserializer.py:29: TypeError

I think that we should support binary data for JSON responses as well.

Thank you!

boechat107 commented 2 years ago

@YoniMelki, why are you replacing the response content with a httpx.Response object? Should this function just erase the content of the original response?

def scrub_malware_bazaar_sample_response(response: dict):
    response.update({"content": httpx.Response(status_code=200)})
    return response

At the point of this update, the original httpx.Response was already serialized and the "content" was supposed to contain a UTF-8 string. Your code says you have a response whose content contains a nested response object. We would never get a real HTTP response like that.

YoniMelki commented 1 year ago

@boechat107 So how would you suggest to do? I still wants all the fields of the response