audeering / audinterface

Generic interfaces for signal processing
https://audeering.github.io/audinterface/
Other
4 stars 0 forks source link

audinterface.Segment fails with using *.process_signal as process_func #117

Open hagenw opened 1 year ago

hagenw commented 1 year ago

The documentation of audinterface.Segment states that we can use any callable that returns a pandas.MultiIndex with the entries start and end as entries as processing function.

This means it should work with the following VAD:

import audb
import audeer
import audiofile
import auvad

db = audb.load('emodb', version='1.4.1')
vad = auvad.Vad()
signal, sampling_rate = audiofile.read(audeer.path(db.root, db.files[0]))

Then we get:

>>> vad.process_signal(signal, sampling_rate)
MultiIndex([('0 days 00:00:00.120000', '0 days 00:00:01.760000')],
           names=['start', 'end'])

But when trying to use this with audinterface.Segment

import audinterface

interface = audinterface.Segment(process_func=vad.process_signal)

we get:

>>> interface.process_index(db.files[:1], root=db.root)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)                                                                                                                    
Cell In[5], line 1                                                                             
----> 1 interface.process_index(db.files[:1], root=db.root)        

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/audinterface/core/segment.py:400, in Segment.process_index(self, index, root)
    397 if index.empty:
    398     return index
--> 400 return self.process_files(
    401     index.get_level_values('file'),
    402     starts=index.get_level_values('start'),
    403     ends=index.get_level_values('end'),
    404     root=root,
    405 )

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/audinterface/core/segment.py:330, in Segment.process_files(self, files, starts, ends, root)
    328 for (file, start, _), index in y.items():
    329     files.extend([file] * len(index))
--> 330     starts.extend(index.levels[0] + start)
    331     ends.extend(index.levels[1] + start)
    333 return audformat.segmented_index(files, starts, ends)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/ops/common.py:81, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
     77             return NotImplemented
     79 other = item_from_zerodim(other)
---> 81 return method(self, other)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/arraylike.py:186, in OpsMixin.__add__(self, other)
     98 @unpack_zerodim_and_defer("__add__")
     99 def __add__(self, other):
    100     """
    101     Get Addition of DataFrame and other, column-wise.
    102 
   (...)
    184     moose     3.0     NaN
    185     """
--> 186     return self._arith_method(other, operator.add)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/indexes/base.py:6814, in Index._arith_method(self, other, op)
   6804 if (
   6805     isinstance(other, Index)
   6806     and is_object_dtype(other.dtype)
   (...)
   6810     # a chance to implement ops before we unwrap them.
   6811     # See https://github.com/pandas-dev/pandas/issues/31109
   6812     return NotImplemented
-> 6814 return super()._arith_method(other, op)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/base.py:1348, in IndexOpsMixin._arith_method(self, other, op)
   1345 rvalues = ensure_wrapped_if_datetimelike(rvalues)
   1347 with np.errstate(all="ignore"):
-> 1348     result = ops.arithmetic_op(lvalues, rvalues, op)
   1350 return self._construct_result(result, name=res_name)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/ops/array_ops.py:224, in arithmetic_op(left, right, op)
    211 # NB: We assume that extract_array and ensure_wrapped_if_datetimelike
    212 #  have already been called on `left` and `right`,
    213 #  and `maybe_prepare_scalar_for_op` has already been called on `right`
    214 # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy
    215 # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390)
    217 if (
    218     should_extension_dispatch(left, right)
    219     or isinstance(right, (Timedelta, BaseOffset, Timestamp))
   (...)
    222     # Timedelta/Timestamp and other custom scalars are included in the check
    223     # because numexpr will fail on it, see GH#31457
--> 224     res_values = op(left, right)
    225 else:
    226     # TODO we should handle EAs consistently and move this check before the if/else
    227     # (https://github.com/pandas-dev/pandas/issues/41165)
    228     _bool_arith_check(op, left, right)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/ops/common.py:81, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
     77             return NotImplemented
     79 other = item_from_zerodim(other)
---> 81 return method(self, other)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/arraylike.py:186, in OpsMixin.__add__(self, other)
     98 @unpack_zerodim_and_defer("__add__")
     99 def __add__(self, other):
    100     """
    101     Get Addition of DataFrame and other, column-wise.
    102 
   (...)
    184     moose     3.0     NaN
    185     """
--> 186     return self._arith_method(other, operator.add)

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/core/arrays/string_.py:541, in StringArray._cmp_method(self, other, op)
    539     result = np.empty_like(self._ndarray, dtype="object")
    540     result[mask] = libmissing.NA
--> 541     result[valid] = op(self._ndarray[valid], other)
    542     return StringArray(result)
    543 else:
    544     # logical

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyx:778, in pandas._libs.tslibs.timedeltas._binary_op_method_timede
ltalike.f()

File /data/hwierstorf/.envs/projectsmile-bison-agent-tone-11.0.0/lib/python3.8/site-packages/pandas/_libs/tslibs/timedeltas.pyx:1903, in pandas._libs.tslibs.timedeltas.Timedelta.lambda4()

TypeError: unsupported operand type(s) for +: 'Timedelta' and 'str'
hagenw commented 1 year ago

The issue can be solved by providing vad instead of vad.process_signal as process_func. But as vad.process_signal does also return the desired multi index it is not obvious why it should not work.