pyxem / kikuchipy

Toolbox for analysis of electron backscatter diffraction (EBSD) patterns
https://kikuchipy.org
GNU General Public License v3.0
79 stars 30 forks source link

Test suite failure in custom Dask chunking functions #565

Closed ericpre closed 1 year ago

ericpre commented 1 year ago

From https://github.com/hyperspy/hyperspy-extensions-list/actions/runs/3162982508/jobs/5150109391 - most likely related to a dask update.

=================================== FAILURES ===================================
__________________________ TestDask.test_chunk_bytes ___________________________

self = <kikuchipy.signals.util.tests.test_dask.TestDask object at 0x7fd56fbd1030>

    def test_chunk_bytes(self):
        s = LazyEBSD(da.zeros((32, 32, 256, 256), dtype=np.uint16))
        chunks = get_chunking(s, chunk_bytes=15e6)
>       assert chunks == ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))
E       assert ((10, 10, 10,...256,), (256,)) == ((8, 8, 8, 8)...256,), (256,))
E         At index 0 diff: (10, 10, 10, 2) != (8, 8, 8, 8)
E         Full diff:
E         - ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))
E         ?   ^  ^  ^  ^    ^  ^  ^  ^
E         + ((10, 10, 10, 2), (10, 10, 10, 2), (256,), (256,))
E         ?   ^^  ^^  ^^  ^    ^^  ^^  ^^  ^

/usr/share/miniconda3/lib/python3.10/site-packages/kikuchipy/signals/util/tests/test_dask.py:44: AssertionError
_______________________ TestDask.test_get_chunking_dtype _______________________

self = <kikuchipy.signals.util.tests.test_dask.TestDask object at 0x7fd56fbd2a40>

    def test_get_chunking_dtype(self):
        s = LazyEBSD(da.zeros((32, 32, 256, 256), dtype=np.uint8))
        chunks0 = get_chunking(s, dtype=np.float32)
        chunks1 = get_chunking(s)
>       assert chunks0 == ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))
E       assert ((10, 10, 10,...256,), (256,)) == ((8, 8, 8, 8)...256,), (256,))
E         At index 0 diff: (10, 10, 10, 2) != (8, 8, 8, 8)
E         Full diff:
E         - ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))
E         ?   ^  ^  ^  ^    ^  ^  ^  ^
E         + ((10, 10, 10, 2), (10, 10, 10, 2), (256,), (256,))
E         ?   ^^  ^^  ^^  ^    ^^  ^^  ^^  ^

/usr/share/miniconda3/lib/python3.10/site-packages/kikuchipy/signals/util/tests/test_dask.py:50: AssertionError
___ TestDask.test_get_chunking_no_signal[shape0-2-2-uint16-desired_chunks0] ____

self = <kikuchipy.signals.util.tests.test_dask.TestDask object at 0x7fd56fbd0a[90](https://github.com/hyperspy/hyperspy-extensions-list/actions/runs/3162982508/jobs/5150109391#step:15:91)>
shape = (32, 32, 256, 256), nav_dim = 2, sig_dim = 2
dtype = <class 'numpy.uint16'>
desired_chunks = ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))

    @pytest.mark.parametrize(
        "shape, nav_dim, sig_dim, dtype, desired_chunks",
        [
            (
                (32, 32, 256, 256),
                2,
                2,
                np.uint16,
                ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,)),
            ),
            ((32, 32, 256, 256), 2, 2, np.uint8, ((16, 16), (16, 16), (256,), (256,))),
        ],
    )
    def test_get_chunking_no_signal(
        self, shape, nav_dim, sig_dim, dtype, desired_chunks
    ):
        chunks = get_chunking(
            data_shape=shape, nav_dim=nav_dim, sig_dim=sig_dim, dtype=dtype
        )
>       assert chunks == desired_chunks
E       assert ((15, 15, 2),...256,), (256,)) == ((8, 8, 8, 8)...256,), (256,))
E         At index 0 diff: (15, 15, 2) != (8, 8, 8, 8)
E         Full diff:
E         - ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,))
E         + ((15, 15, 2), (15, 15, 2), (256,), (256,))

/usr/share/miniconda3/lib/python3.10/site-packages/kikuchipy/signals/util/tests/test_dask.py:72: AssertionError
____ TestDask.test_get_chunking_no_signal[shape1-2-2-uint8-desired_chunks1] ____

self = <kikuchipy.signals.util.tests.test_dask.TestDask object at 0x7fd56fbd0[190](https://github.com/hyperspy/hyperspy-extensions-list/actions/runs/3162982508/jobs/5150109391#step:15:191)>
shape = (32, 32, 256, 256), nav_dim = 2, sig_dim = 2
dtype = <class 'numpy.uint8'>
desired_chunks = ((16, 16), (16, 16), (256,), (256,))

    @pytest.mark.parametrize(
        "shape, nav_dim, sig_dim, dtype, desired_chunks",
        [
            (
                (32, 32, 256, 256),
                2,
                2,
                np.uint16,
                ((8, 8, 8, 8), (8, 8, 8, 8), (256,), (256,)),
            ),
            ((32, 32, 256, 256), 2, 2, np.uint8, ((16, 16), (16, 16), (256,), (256,))),
        ],
    )
    def test_get_chunking_no_signal(
        self, shape, nav_dim, sig_dim, dtype, desired_chunks
    ):
        chunks = get_chunking(
            data_shape=shape, nav_dim=nav_dim, sig_dim=sig_dim, dtype=dtype
        )
>       assert chunks == desired_chunks
E       assert ((21, 11), (2...256,), (256,)) == ((16, 16), (1...256,), (256,))
E         At index 0 diff: (21, 11) != (16, 16)
E         Full diff:
E         - ((16, 16), (16, 16), (256,), (256,))
E         ?    -   ^     -   ^
E         + ((21, 11), (21, 11), (256,), (256,))
E         ?   +    ^    +    ^

/usr/share/miniconda3/lib/python3.10/site-packages/kikuchipy/signals/util/tests/test_dask.py:72: AssertionError
_____________________ TestDask.test_chunk_bytes_indirectly _____________________

self = <kikuchipy.signals.util.tests.test_dask.TestDask object at 0x7fd5706404c0>

    def test_chunk_bytes_indirectly(self):
        s = EBSD(np.zeros((10, 10, 8, 8)))
        array_out0 = get_dask_array(s)
        array_out1 = get_dask_array(s, chunk_bytes="25KiB")
        array_out2 = get_dask_array(s, chunk_bytes=25e3)
        assert array_out0.chunksize != array_out1.chunksize
>       assert array_out1.chunksize == array_out2.chunksize
E       assert (7, 7, 8, 8) == (6, 6, 8, 8)
E         At index 0 diff: 7 != 6
E         Full diff:
E         - (6, 6, 8, 8)
E         ?  ^  ^
E         + (7, 7, 8, 8)
E         ?  ^  ^

/usr/share/miniconda3/lib/python3.10/site-packages/kikuchipy/signals/util/tests/test_dask.py:90: AssertionError
hakonanes commented 1 year ago

Thank you for reporting this, @ericpre.

I cannot tell without looking into our chunking functions, but if the new behavior is a problem, we'll fix it and release a patch 0.6.2 within the next few days. If it's not a problem, we'll update the tests and release this as part of 0.7.0, which I believe we'll release within 2-3 weeks.

hakonanes commented 1 year ago

I don't see any change in performance following the slightly new chunks returned from dask.array.core.normalizie() (used in the mentioned chunking functions), so I've updated the tests in #567 which should close this.