mitre / menelaus

Online and batch-based concept and data drift detection algorithms to monitor and maintain ML performance.
https://menelaus.readthedocs.io/en/latest/
Apache License 2.0
66 stars 7 forks source link

Label Dirichlet Injector Issue #147

Closed 951378644 closed 1 year ago

951378644 commented 1 year ago

Here is my code:

injector =  LabelDirichletInjector()
drift_start = 8000
drift_end = 12000
col = 'rain'
df = df_o.copy()
df = injector(df,drift_start,drift_end,col,{0:4,1:1})

Here is the error I got:

TypeError                                 Traceback (most recent call last)
File ~\Python\Lib\site-packages\pandas\core\indexes\base.py:3652, in Index.get_loc(self, key)
   3651 try:
-> 3652     return self._engine.get_loc(casted_key)
   3653 except KeyError as err:

File ~\Python\Lib\site-packages\pandas\_libs\index.pyx:147, in pandas._libs.index.IndexEngine.get_loc()

File ~\Python\Lib\site-packages\pandas\_libs\index.pyx:153, in pandas._libs.index.IndexEngine.get_loc()

TypeError: '(slice(None, None, None), 8)' is an invalid key

During handling of the above exception, another exception occurred:

InvalidIndexError                         Traceback (most recent call last)
Cell In[114], line 6
      4 col = 'rain'
      5 df = df_o.copy()
----> 6 df = injector(df,drift_start,drift_end,col,{0:4,1:1})

File ~\Downloads\menelaus-dev\menelaus\injection\label_manipulation.py:233, in LabelDirichletInjector.__call__(self, data, from_index, to_index, target_col, alpha)
    231 # use class_probability_shift with fully-specified distribution
    232 label_prob_injector = LabelProbabilityInjector()
--> 233 return label_prob_injector(
    234     data,
    235     from_index=from_index,
    236     to_index=to_index,
    237     target_col=target_col,
    238     class_probabilities=self._dirichlet_probabilities,
    239 )

File ~\Downloads\menelaus-dev\menelaus\injection\label_manipulation.py:161, in LabelProbabilityInjector.__call__(self, data, from_index, to_index, target_col, class_probabilities)
    159 # locate each class in window
    160 for cls in all_classes:
--> 161     cls_idx = np.where(data[:, target_col] == cls)[0]
    162     cls_idx = cls_idx[(cls_idx < to_index) & (cls_idx >= from_index)]
    164     # each member has p_class / class_size chance, represented as bool to avoid div/0

File ~\Python\Lib\site-packages\pandas\core\frame.py:3761, in DataFrame.__getitem__(self, key)
   3759 if self.columns.nlevels > 1:
   3760     return self._getitem_multilevel(key)
-> 3761 indexer = self.columns.get_loc(key)
   3762 if is_integer(indexer):
   3763     indexer = [indexer]

File ~\Python\Lib\site-packages\pandas\core\indexes\base.py:3659, in Index.get_loc(self, key)
   3654     raise KeyError(key) from err
   3655 except TypeError:
   3656     # If we have a listlike key, _check_indexing_error will raise
   3657     #  InvalidIndexError. Otherwise we fall through and re-raise
   3658     #  the TypeError.
-> 3659     self._check_indexing_error(key)
   3660     raise

File ~\Python\Lib\site-packages\pandas\core\indexes\base.py:5736, in Index._check_indexing_error(self, key)
   5732 def _check_indexing_error(self, key):
   5733     if not is_scalar(key):
   5734         # if key is not a scalar, directly raise an error (the code below
   5735         # would convert to numpy arrays and raise later any way) - GH29926
-> 5736         raise InvalidIndexError(key)

InvalidIndexError: (slice(None, None, None), 8)