OSOceanAcoustics / echopype

Enabling interoperability and scalability in ocean sonar data analysis
https://echopype.readthedocs.io/
Apache License 2.0
89 stars 70 forks source link

Remove Background Noise Removal Failure #1332

Open ctuguinay opened 2 weeks ago

ctuguinay commented 2 weeks ago

Found in PR #1331, background noise removal fails at during test_raw_to_mvbs in the test file echopype/tests/utils/test_processinglevels_integration.py. Not quite sure what the problem is but I'll get to it later since it is out of the scope of #1331.

______________________________________ test_raw_to_mvbs[AZFP-AZFP-raw_and_xml_paths1-extras1] ______________________________________

sonar_model = 'AZFP', path_model = 'AZFP', raw_and_xml_paths = ('17082117.01A', '17041823.XML')
extras = {'latitude': 45.0, 'longitude': -60.0, 'pressure': 59, 'salinity': 27.9}
test_path = {'AD2CP': PosixPath('/home/exouser/echopype/echopype/test_data/ad2cp'), 'AZFP': PosixPath('/home/exouser/echopype/echo...me/exouser/echopype/echopype/test_data/ea640'), 'ECS': PosixPath('/home/exouser/echopype/echopype/test_data/ecs'), ...}

    @pytest.mark.test
    @pytest.mark.parametrize(
        ["sonar_model", "path_model", "raw_and_xml_paths", "extras"],
        [
            pytest.param(
                "EK60",
                "EK60",
                ("Winter2017-D20170115-T150122.raw", None),
                {},
                # marks=pytest.mark.skipif(sys.platform == "win32", reason="Test data not available on windows tests"),
            ),
            pytest.param(
                "AZFP",
                "AZFP",
                ("17082117.01A", "17041823.XML"),
                {"longitude": -60.0, "latitude": 45.0, "salinity": 27.9, "pressure": 59},
                # marks=pytest.mark.skipif(sys.platform == "win32", reason="Test data not available on windows tests"),
            ),
        ],
    )
    def test_raw_to_mvbs(
            sonar_model,
            path_model,
            raw_and_xml_paths,
            extras,
            test_path
    ):
        # Prepare the Sv dataset
        raw_path = test_path[path_model] / raw_and_xml_paths[0]
        if raw_and_xml_paths[1]:
            xml_path = test_path[path_model] / raw_and_xml_paths[1]
        else:
            xml_path = None

        def _presence_test(test_ds, processing_level):
            assert "processing_level" in test_ds.attrs
            assert "processing_level_url" in test_ds.attrs
            assert test_ds.attrs["processing_level"] == processing_level

        def _absence_test(test_ds):
            assert "processing_level" not in test_ds.attrs
            assert "processing_level_url" not in test_ds.attrs

        # ---- Convert raw file and update_platform
        def _var_presence_notnan_test(name):
            if name in ed['Platform'].data_vars and not ed["Platform"][name].isnull().all():
                return True
            else:
                return False

        ed = ep.open_raw(raw_path, xml_path=xml_path, sonar_model=sonar_model)
        if _var_presence_notnan_test("longitude") and _var_presence_notnan_test("latitude"):
            _presence_test(ed["Top-level"], "Level 1A")
        elif "longitude" in extras and "latitude" in extras:
            _absence_test(ed["Top-level"])
            point_ds = xr.Dataset(
                {
                    "latitude": (["time"], np.array([float(extras["latitude"])])),
                    "longitude": (["time"], np.array([float(extras["longitude"])])),
                },
                coords={
                    "time": (["time"], np.array([ed["Sonar/Beam_group1"]["ping_time"].values.min()]))
                },
            )
            ed.update_platform(point_ds, variable_mappings={"latitude": "latitude", "longitude": "longitude"})
            _presence_test(ed["Top-level"], "Level 1A")
        else:
            _absence_test(ed["Top-level"])
            raise RuntimeError(
                "Platform latitude and longitude are not present and cannot be added "
                "using update_platform based on test raw file and included parameters."
            )

        # ---- Calibrate and add_latlon
        env_params = None
        if sonar_model == "AZFP":
            # AZFP data require external salinity and pressure
            env_params = {
                "temperature": ed["Environment"]["temperature"].values.mean(),
                "salinity": extras["salinity"],
                "pressure": extras["pressure"],
            }

        ds = ep.calibrate.compute_Sv(echodata=ed, env_params=env_params)
        _absence_test(ds)

        Sv_ds = ep.consolidate.add_location(ds=ds, echodata=ed)
        assert "longitude" in Sv_ds.data_vars and "latitude" in Sv_ds.data_vars
        _presence_test(Sv_ds, "Level 2A")

        # ---- Noise removal
>       denoised_ds = ep.clean.remove_background_noise(Sv_ds, ping_num=10, range_sample_num=20)

echopype/tests/utils/test_processinglevels_integration.py:103: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
echopype/utils/prov.py:237: in inner
    dataobj = func(*args, **kwargs)
echopype/clean/api.py:358: in remove_background_noise
    Sv_noise = estimate_background_noise(ds_Sv, ping_num, range_sample_num, noise_max=noise_max)
echopype/clean/api.py:310: in estimate_background_noise
    noise.reindex({"ping_time": power_cal["ping_time"]}, method="ffill")
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/util/deprecation_helpers.py:115: in inner
    return func(*args, **kwargs)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/dataarray.py:2176: in reindex
    return alignment.reindex(
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:999: in reindex
    aligner.align()
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:582: in align
    self.reindex_all()
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:557: in reindex_all
    self.results = tuple(
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:558: in <genexpr>
    self._reindex_one(obj, matching_indexes)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:544: in _reindex_one
    dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/alignment.py:510: in _get_dim_pos_indexers
    indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/indexes.py:823: in reindex_like
    return {self.dim: get_indexer_nd(self.index, other.index, method, tolerance)}
../miniforge3/envs/echopype/lib/python3.9/site-packages/xarray/core/indexes.py:561: in get_indexer_nd
    flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance)
../miniforge3/envs/echopype/lib/python3.9/site-packages/pandas/core/indexes/base.py:3893: in get_indexer
    return self._get_indexer_non_comparable(target, method=method, unique=True)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Index([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190, 200, 210, 220, 230],
      dtype='int64', name='ping_time')
target = DatetimeIndex(['2017-08-21 17:05:37', '2017-08-21 17:05:40',
               '2017-08-21 17:05:43', '2017-08-21 17:05:4...            '2017-08-21 17:53:31', '2017-08-21 17:53:34'],
              dtype='datetime64[ns]', length=240, freq=None)
method = 'pad', unique = True

    @final
    def _get_indexer_non_comparable(
        self, target: Index, method, unique: bool = True
    ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
        """
        Called from get_indexer or get_indexer_non_unique when the target
        is of a non-comparable dtype.

        For get_indexer lookups with method=None, get_indexer is an _equality_
        check, so non-comparable dtypes mean we will always have no matches.

        For get_indexer lookups with a method, get_indexer is an _inequality_
        check, so non-comparable dtypes mean we will always raise TypeError.

        Parameters
        ----------
        target : Index
        method : str or None
        unique : bool, default True
            * True if called from get_indexer.
            * False if called from get_indexer_non_unique.

        Raises
        ------
        TypeError
            If doing an inequality check, i.e. method is not None.
        """
        if method is not None:
            other_dtype = _unpack_nested_dtype(target)
>           raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")
E           TypeError: Cannot compare dtypes int64 and datetime64[ns]

../miniforge3/envs/echopype/lib/python3.9/site-packages/pandas/core/indexes/base.py:6301: TypeError
ctuguinay commented 4 days ago

Ok so the problem happens here:

spreading_loss = 20 * np.log10(ds_Sv["echo_range"].where(ds_Sv["echo_range"] >= 1, other=1))
absorption_loss = 2 * ds_Sv["sound_absorption"] * ds_Sv["echo_range"]

# Compute power binned averages
power_cal = _log2lin(ds_Sv["Sv"] - spreading_loss - absorption_loss)
power_cal_binned_avg = 10 * np.log10(
    power_cal.coarsen(
        ping_time=ping_num,
        range_sample=range_sample_num,
        boundary="pad",
    ).mean()
)

# Compute noise
noise = power_cal_binned_avg.min(dim="range_sample", skipna=True)

# Align noise `ping_time` to the first index of each coarsened `ping_time` bin
noise = noise.assign_coords(ping_time=ping_num * np.arange(len(noise["ping_time"])))

# Limit max noise level
noise = noise.where(noise < noise_max, noise_max) if noise_max is not None else noise

# Upsample noise to original ping time dimension
Sv_noise = (
    noise.reindex({"ping_time": power_cal["ping_time"]}, method="ffill")
    + spreading_loss
    + absorption_loss
)

When printing out the ping time indices for noise after the reassignment of coordinates I get this:

image

And when printing out the original ping time indices for power_cal I get this:

image

That being the case, the reindex errors out since there's a comparison between integers and datetime objects being doing within that function.

The reason that the remove background noise test passes is because the ping time index for the mock Sv DataArray is consisting of integers and not datetime objects:

def test_remove_background_noise():
    """Test remove_background_noise on toy data"""

    # Parameters for fake data
    nchan, npings, nrange_samples = 1, 10, 100
    chan = np.arange(nchan).astype(str)
    ping_index = np.arange(npings)
    range_sample = np.arange(nrange_samples)
    data = np.ones(nrange_samples)

    # Insert noise points
    np.put(data, 30, -30)
    np.put(data, 60, -30)
    # Add more pings
    data = np.array([data] * npings)
    # Make DataArray
    Sv = xr.DataArray(
        [data],
        coords=[
            ('channel', chan),
            ('ping_time', ping_index),
            ('range_sample', range_sample),
        ],
    )