getThumbnail does not handle all encodings in TileOutputMimeTypes

banesullivan commented 2 years ago

The following test and note it fails for some encodings listed as supported

Note that this is causing failures for django-large-image: https://github.com/girder/django-large-image/runs/8082224710?check_suite_focus=true

import pytest
import large_image
from large_image.constants import TileOutputMimeTypes

@pytest.fixture
def src():
    return large_image.open('/Users/bane/Desktop/rasters/converted.tif')

@pytest.mark.parametrize('encoding', TileOutputMimeTypes.keys())
def test_thumbnail(src, encoding):
    thumb_data, mime_type = src.getThumbnail(encoding=encoding)
    assert thumb_data

================================================= test session starts ==================================================
platform darwin -- Python 3.9.13, pytest-7.1.2, pluggy-1.0.0 -- /Users/bane/anaconda3/envs/dli-dev/bin/python
cachedir: .pytest_cache
rootdir: /Users/bane/Software/ResonantGeoData
plugins: anyio-3.6.1, factoryboy-2.5.0, mock-3.7.0, django-s3-file-field-0.3.0, Faker-13.13.0, django-4.5.2, cov-3.0.0
collected 21 items

test_thumbnail.py::test_thumbnail[JPEG] PASSED                                                                   [  4%]
test_thumbnail.py::test_thumbnail[PNG] PASSED                                                                    [  9%]
test_thumbnail.py::test_thumbnail[TIFF] PASSED                                                                   [ 14%]
test_thumbnail.py::test_thumbnail[TILED] PASSED                                                                  [ 19%]
test_thumbnail.py::test_thumbnail[JFIF] PASSED                                                                   [ 23%]
test_thumbnail.py::test_thumbnail[BMP] PASSED                                                                    [ 28%]
test_thumbnail.py::test_thumbnail[DIB] PASSED                                                                    [ 33%]
test_thumbnail.py::test_thumbnail[PCX] PASSED                                                                    [ 38%]
test_thumbnail.py::test_thumbnail[EPS] PASSED                                                                    [ 42%]
test_thumbnail.py::test_thumbnail[GIF] PASSED                                                                    [ 47%]
test_thumbnail.py::test_thumbnail[JPEG2000] PASSED                                                               [ 52%]
test_thumbnail.py::test_thumbnail[ICNS] PASSED                                                                   [ 57%]
test_thumbnail.py::test_thumbnail[ICO] PASSED                                                                    [ 61%]
test_thumbnail.py::test_thumbnail[MPO] PASSED                                                                    [ 66%]
test_thumbnail.py::test_thumbnail[PALM] FAILED                                                                   [ 71%]
test_thumbnail.py::test_thumbnail[PDF] PASSED                                                                    [ 76%]
test_thumbnail.py::test_thumbnail[PPM] PASSED                                                                    [ 80%]
test_thumbnail.py::test_thumbnail[SGI] PASSED                                                                    [ 85%]
test_thumbnail.py::test_thumbnail[TGA] PASSED                                                                    [ 90%]
test_thumbnail.py::test_thumbnail[WEBP] PASSED                                                                   [ 95%]
test_thumbnail.py::test_thumbnail[XBM] FAILED                                                                    [100%]

======================================================= FAILURES =======================================================
_________________________________________________ test_thumbnail[PALM] _________________________________________________

src = GDALFileTileSource ('/Users/bane/Desktop/rasters/converted.tif', 'JPEG', 95, 0, 'raw', False, '__STYLESTART__', None, '__STYLEEND__'),None,None,None,None
encoding = 'PALM'

    @pytest.mark.parametrize('encoding', TileOutputMimeTypes.keys())
    def test_thumbnail(src, encoding):
>       thumb_data, mime_type = src.getThumbnail(encoding=encoding)

test_thumbnail.py:13:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1022: in getThumbnail
    return super().getThumbnail(width, height, **kwargs)
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1619: in getThumbnail
    return self.getRegion(**params)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1181: in getRegion
    return super().getRegion(format, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1789: in getRegion
    return _encodeImage(image, format=format, **kwargs)
../kw/large_image/large_image/tilesource/utilities.py:144: in _encodeImage
    imageData = _encodeImageBinary(
../kw/large_image/large_image/tilesource/utilities.py:102: in _encodeImageBinary
    image.save(output, encoding, **params)
../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/Image.py:2300: in save
    save_handler(self, fp, filename)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

im = <PIL.Image.Image image mode=RGB size=256x256 at 0x111E61580>, fp = <_io.BytesIO object at 0x1121615e0>
filename = ''

    def _save(im, fp, filename):

        if im.mode == "P":

            # we assume this is a color Palm image with the standard colormap,
            # unless the "info" dict has a "custom-colormap" field

            rawmode = "P"
            bpp = 8
            version = 1

        elif im.mode == "L":
            if im.encoderinfo.get("bpp") in (1, 2, 4):
                # this is 8-bit grayscale, so we shift it to get the high-order bits,
                # and invert it because
                # Palm does greyscale from white (0) to black (1)
                bpp = im.encoderinfo["bpp"]
                im = im.point(
                    lambda x, shift=8 - bpp, maxval=(1 << bpp) - 1: maxval - (x >> shift)
                )
            elif im.info.get("bpp") in (1, 2, 4):
                # here we assume that even though the inherent mode is 8-bit grayscale,
                # only the lower bpp bits are significant.
                # We invert them to match the Palm.
                bpp = im.info["bpp"]
                im = im.point(lambda x, maxval=(1 << bpp) - 1: maxval - (x & maxval))
            else:
                raise OSError(f"cannot write mode {im.mode} as Palm")

            # we ignore the palette here
            im.mode = "P"
            rawmode = "P;" + str(bpp)
            version = 1

        elif im.mode == "1":

            # monochrome -- write it inverted, as is the Palm standard
            rawmode = "1;I"
            bpp = 1
            version = 0

        else:

>           raise OSError(f"cannot write mode {im.mode} as Palm")
E           OSError: cannot write mode RGB as Palm

../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/PalmImagePlugin.py:157: OSError
_________________________________________________ test_thumbnail[XBM] __________________________________________________

src = GDALFileTileSource ('/Users/bane/Desktop/rasters/converted.tif', 'JPEG', 95, 0, 'raw', False, '__STYLESTART__', None, '__STYLEEND__'),None,None,None,None
encoding = 'XBM'

    @pytest.mark.parametrize('encoding', TileOutputMimeTypes.keys())
    def test_thumbnail(src, encoding):
>       thumb_data, mime_type = src.getThumbnail(encoding=encoding)

test_thumbnail.py:13:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1022: in getThumbnail
    return super().getThumbnail(width, height, **kwargs)
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1619: in getThumbnail
    return self.getRegion(**params)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1181: in getRegion
    return super().getRegion(format, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1789: in getRegion
    return _encodeImage(image, format=format, **kwargs)
../kw/large_image/large_image/tilesource/utilities.py:144: in _encodeImage
    imageData = _encodeImageBinary(
../kw/large_image/large_image/tilesource/utilities.py:102: in _encodeImageBinary
    image.save(output, encoding, **params)
../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/Image.py:2300: in save
    save_handler(self, fp, filename)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

im = <PIL.Image.Image image mode=RGB size=256x256 at 0x1123BAFA0>, fp = <_io.BytesIO object at 0x111b0b680>
filename = ''

    def _save(im, fp, filename):

        if im.mode != "1":
>           raise OSError(f"cannot write mode {im.mode} as XBM")
E           OSError: cannot write mode RGB as XBM

../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/XbmImagePlugin.py:73: OSError
=============================================== short test summary info ================================================
FAILED test_thumbnail.py::test_thumbnail[PALM] - OSError: cannot write mode RGB as Palm
FAILED test_thumbnail.py::test_thumbnail[XBM] - OSError: cannot write mode RGB as XBM
============================================= 2 failed, 19 passed in 1.85s =============================================

banesullivan commented 2 years ago

django-larege-image is failing for these formats:

FAILED example/core/tests/test_data.py::test_thumbnail[pcx] - ValueError: Can...
FAILED example/core/tests/test_data.py::test_thumbnail[eps] - ValueError: ima...
FAILED example/core/tests/test_data.py::test_thumbnail[mpo] - OSError: cannot...
FAILED example/core/tests/test_data.py::test_thumbnail[palm] - OSError: canno...
FAILED example/core/tests/test_data.py::test_thumbnail[pdf] - ValueError: can...
FAILED example/core/tests/test_data.py::test_thumbnail[xbm] - OSError: cannot...

manthey commented 2 years ago

Curious. Some formats require certain image modes (e.g., based on the error above, palm MUST be black and white, not greyscale or color). I wonder if there is a way to either coerce the image mode to satisfy the format or determine when they are incompatible and not list them.

banesullivan commented 2 years ago

I wonder if there is a way to either coerce the image mode to satisfy the format

This would be preferred IMO

banesullivan commented 2 years ago

At the very least, I would expect opening the image with the same encoding to work. E.g.:

src = large_image.open('/Users/bane/Desktop/rasters/converted.tif', encoding='PALM')
thumb_data, mime_type = src.getThumbnail(encoding='PALM')

_________________________________________________ test_thumbnail[PALM] _________________________________________________

encoding = 'PALM'

    @pytest.mark.parametrize('encoding', TileOutputMimeTypes.keys())
    def test_thumbnail(encoding):
        src = large_image.open('/Users/bane/Desktop/rasters/converted.tif', encoding=encoding)
>       thumb_data, mime_type = src.getThumbnail(encoding=encoding)

test_thumbnail.py:14:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1022: in getThumbnail
    return super().getThumbnail(width, height, **kwargs)
../kw/large_image/large_image/cache_util/cache.py:90: in wrapper
    v = func(self, *args, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1619: in getThumbnail
    return self.getRegion(**params)
../kw/large_image/sources/gdal/large_image_source_gdal/__init__.py:1181: in getRegion
    return super().getRegion(format, **kwargs)
../kw/large_image/large_image/tilesource/base.py:1789: in getRegion
    return _encodeImage(image, format=format, **kwargs)
../kw/large_image/large_image/tilesource/utilities.py:144: in _encodeImage
    imageData = _encodeImageBinary(
../kw/large_image/large_image/tilesource/utilities.py:102: in _encodeImageBinary
    image.save(output, encoding, **params)
../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/Image.py:2300: in save
    save_handler(self, fp, filename)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

im = <PIL.Image.Image image mode=RGB size=256x256 at 0x1971F0880>, fp = <_io.BytesIO object at 0x1974d8a90>
filename = ''

    def _save(im, fp, filename):

        if im.mode == "P":

            # we assume this is a color Palm image with the standard colormap,
            # unless the "info" dict has a "custom-colormap" field

            rawmode = "P"
            bpp = 8
            version = 1

        elif im.mode == "L":
            if im.encoderinfo.get("bpp") in (1, 2, 4):
                # this is 8-bit grayscale, so we shift it to get the high-order bits,
                # and invert it because
                # Palm does greyscale from white (0) to black (1)
                bpp = im.encoderinfo["bpp"]
                im = im.point(
                    lambda x, shift=8 - bpp, maxval=(1 << bpp) - 1: maxval - (x >> shift)
                )
            elif im.info.get("bpp") in (1, 2, 4):
                # here we assume that even though the inherent mode is 8-bit grayscale,
                # only the lower bpp bits are significant.
                # We invert them to match the Palm.
                bpp = im.info["bpp"]
                im = im.point(lambda x, maxval=(1 << bpp) - 1: maxval - (x & maxval))
            else:
                raise OSError(f"cannot write mode {im.mode} as Palm")

            # we ignore the palette here
            im.mode = "P"
            rawmode = "P;" + str(bpp)
            version = 1

        elif im.mode == "1":

            # monochrome -- write it inverted, as is the Palm standard
            rawmode = "1;I"
            bpp = 1
            version = 0

        else:

>           raise OSError(f"cannot write mode {im.mode} as Palm")
E           OSError: cannot write mode RGB as Palm

../../anaconda3/envs/dli-dev/lib/python3.9/site-packages/PIL/PalmImagePlugin.py:157: OSError

manthey commented 2 years ago

So we can test which modes/formats works and be more intelligent on how we handle them, but at a cost of ~90ms startup time on my test machine:

def test():
    s = time.time()
    modes = ['RGBA', 'RGB', 'L', '1']
    allowed = {}
    for format in PIL.Image.MIME.keys():
        for mode in modes:
            image = PIL.Image.new(mode, (32, 32))
            output = io.BytesIO()
            try:
                image.save(output, format)
                allowed.setdefault(format, [])
                allowed[format].append(image.mode)
            except Exception:
                pass
    print(time.time() - s)
    print(allowed)

banesullivan commented 2 years ago

Could we implement a SafeTileOutputMimeTypes value that computes that lazily?

manthey commented 2 years ago

See https://github.com/girder/large_image/pull/951 for a fix. This has a blacklist for a few formats -- the icon formats output fixed sizes, so they don't work as general output formats. And, some formats aren't readable with PIL (at least not from a byte string), so they are harder to test.

This will gracefully fallback to lower color modes as needed for formats where that is needed.

girder / large_image

getThumbnail does not handle all encodings in TileOutputMimeTypes #950