05_case_retrieval does not run

hapion1 commented 2 years ago

Hi, this is hopefully the last issue :)

Like the title says, notebook 05 does not run further than (my) ln [11] - for context please refer to the screenshot:

The complete output of this line is the following:

tensor([[-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -0.8637],
        [-1.0000, -1.0000, -1.0000,  ...,     nan,     nan,     nan],
        [-1.0000, -1.0000, -1.0000,  ...,     nan,     nan,     nan],
        ...,
        [-1.0000, -1.0000, -1.0000,  ...,     nan,     nan,     nan],
        [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -0.9264],
        [-1.0000, -1.0000, -1.0000,  ..., -1.0000, -1.0000, -0.9699]])

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_8568/1161536451.py in <module>
----> 1 xb,yb=dls.one_batch()

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastai-2.2.7-py3.7.egg/fastai/data/load.py in one_batch(self)
    146     def one_batch(self):
    147         if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
--> 148         with self.fake_l.no_multiproc(): res = first(self)
    149         if hasattr(self, 'it'): delattr(self, 'it')
    150         return res

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/basics.py in first(x, f, negate, **kwargs)
    545     x = iter(x)
    546     if f: x = filter_ex(x, f=f, negate=negate, gen=True, **kwargs)
--> 547     return next(x, None)
    548 
    549 # Cell

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastai-2.2.7-py3.7.egg/fastai/data/load.py in __iter__(self)
    109         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    110             if self.device is not None: b = to_device(b, self.device)
--> 111             yield self.after_batch(b)
    112         self.after_iter()
    113         if hasattr(self, 'it'): del(self.it)

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in __call__(self, o)
    196         self.fs.append(t)
    197 
--> 198     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    199     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    200     def __getitem__(self,i): return self.fs[i]

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    148     for f in tfms:
    149         if not is_enc: f = f.decode
--> 150         x = f(x, **kwargs)
    151     return x
    152 

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in __call__(self, x, **kwargs)
    111     "A transform that always take tuples as items"
    112     _retain = True
--> 113     def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)
    114     def decode(self, x, **kwargs):   return self._call1(x, 'decode', **kwargs)
    115     def _call1(self, x, name, **kwargs):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in _call1(self, x, name, **kwargs)
    115     def _call1(self, x, name, **kwargs):
    116         if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
--> 117         y = getattr(super(), name)(list(x), **kwargs)
    118         if not self._retain: return y
    119         if is_listy(y) and not isinstance(y, tuple): y = tuple(y)

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in __call__(self, x, **kwargs)
     71     @property
     72     def name(self): return getattr(self, '_name', _get_name(self))
---> 73     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     74     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     75     def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     81     def _call(self, fn, x, split_idx=None, **kwargs):
     82         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83         return self._do_call(getattr(self, fn), x, **kwargs)
     84 
     85     def _do_call(self, f, x, **kwargs):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     87             if f is None: return x
     88             ret = f.returns(x) if hasattr(f,'returns') else None
---> 89             return retain_type(f(x, **kwargs), x, ret)
     90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    116         elif self.inst is not None: f = MethodType(f, self.inst)
    117         elif self.owner is not None: f = MethodType(f, self.owner)
--> 118         return f(*args, **kwargs)
    119 
    120     def __get__(self, inst, owner):

~/Documents/MPPN/mppn/mppn/mppn.py in encodes(self, e)
    358         self.transformer=GramianAngularField(image_size=gs,sample_range=None, method="s", overlapping=True)
    359 
--> 360     def encodes(self,e): return _gaf_loop(e,self.transformer)
    361 
    362 

~/Documents/MPPN/mppn/mppn/mppn.py in _gaf_loop(e, transformer)
    335         inp=inp*2-1
    336         x=torch.stack(
--> 337             tuple(_gaf_attr(inp[:,i],transformer) for i in range(inp.shape[1]))
    338         ).transpose(0,1)
    339         x=x[:,:,None].expand(-1,-1,3,-1,-1)

~/Documents/MPPN/mppn/mppn/mppn.py in <genexpr>(.0)
    335         inp=inp*2-1
    336         x=torch.stack(
--> 337             tuple(_gaf_attr(inp[:,i],transformer) for i in range(inp.shape[1]))
    338         ).transpose(0,1)
    339         x=x[:,:,None].expand(-1,-1,3,-1,-1)

~/Documents/MPPN/mppn/mppn/mppn.py in _gaf_attr(x, transformer)
    346     except ValueError as e:
    347         print(x)
--> 348         raise e
    349     x=tensor(x).cuda()
    350     x = x * 255

~/Documents/MPPN/mppn/mppn/mppn.py in _gaf_attr(x, transformer)
    343     try:
    344 
--> 345         x = transformer.transform(x)
    346     except ValueError as e:
    347         print(x)

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pyts-0.11.0-py3.7.egg/pyts/image/gaf.py in transform(self, X)
    113 
    114         """
--> 115         X = check_array(X)
    116         n_samples, n_timestamps = X.shape
    117         image_size = self._check_params(n_timestamps)

~/anaconda3/envs/mppn/lib/python3.7/site-packages/scikit_learn-1.0-py3.7-linux-x86_64.egg/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    790 
    791         if force_all_finite:
--> 792             _assert_all_finite(array, allow_nan=force_all_finite == "allow-nan")
    793 
    794     if ensure_min_samples > 0:

~/anaconda3/envs/mppn/lib/python3.7/site-packages/scikit_learn-1.0-py3.7-linux-x86_64.egg/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
    114             raise ValueError(
    115                 msg_err.format(
--> 116                     type_err, msg_dtype if msg_dtype is not None else X.dtype
    117                 )
    118             )

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

ppfeiff commented 2 years ago

Have you verified, that there are no "NaN" values or similar in your input batch? Which dataset do you use?

hapion1 commented 2 years ago

No, I did not verify that there are no NaN values etc. in the input, but I am using the preselected Mobis set.

When I choose another set like BPIC 12 or everything else from the EventLog class the following error occurs:


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3079             try:
-> 3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._maybe_get_bool_indexer()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'type'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
/tmp/ipykernel_7540/287089386.py in <module>
      2         cat_names=['activity','type','resource'],
      3         cont_names=['cost'],date_names=['timestamp'],
----> 4         splits=splits, y_names=['activity','resource','timestamp_Relative_elapsed_minmax']
      5        )

~/Documents/MPPN2/mppn/mppn/preprocessing.py in __init__(self, df, procs, cat_names, cont_names, date_names, y_names, splits, ycat_names, ycont_names, inplace, do_setup)
     80         self.procs = Pipeline(procs)
     81         self.splits=splits
---> 82         if do_setup: self.setup()
     83 
     84 

~/Documents/MPPN2/mppn/mppn/preprocessing.py in setup(self)
     91             self.ycat_names,self.ycont_names=(L([i for i in L(y_names) if i in self.cat_names]),
     92                                                 L([i for i in L(y_names) if i not in self.cat_names]))
---> 93     def setup(self): self.procs.setup(self)
     94     def subset(self, i): return self.new(self.loc[self.splits[i]]) if self.splits else self
     95     def __len__(self): return len(np.unique(self.items.index))

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in setup(self, items, train_setup)
    190         tfms = self.fs[:]
    191         self.fs.clear()
--> 192         for t in tfms: self.add(t,items, train_setup)
    193 
    194     def add(self,t, items=None, train_setup=False):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in add(self, t, items, train_setup)
    193 
    194     def add(self,t, items=None, train_setup=False):
--> 195         t.setup(items, train_setup)
    196         self.fs.append(t)
    197 

~/Documents/MPPN2/mppn/mppn/preprocessing.py in setup(self, items, train_setup)
    133     "Base class to write a non-lazy tabular processor for dataframes"
    134     def setup(self, items=None, train_setup=False): #TODO: properly deal with train_setup
--> 135         super().setup(getattr(items,'train',items), train_setup=False)
    136         #super().setup(items, train_setup=False)
    137 

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/transform.py in setup(self, items, train_setup)
     77     def setup(self, items=None, train_setup=False):
     78         train_setup = train_setup if self.train_setup is None else self.train_setup
---> 79         return self.setups(getattr(items, 'train', items) if train_setup else items)
     80 
     81     def _call(self, fn, x, split_idx=None, **kwargs):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/fastcore-1.3.19-py3.7.egg/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    116         elif self.inst is not None: f = MethodType(f, self.inst)
    117         elif self.owner is not None: f = MethodType(f, self.owner)
--> 118         return f(*args, **kwargs)
    119 
    120     def __get__(self, inst, owner):

~/Documents/MPPN2/mppn/mppn/preprocessing.py in setups(self, to)
    153     order = 2
    154     def setups(self, to):
--> 155         store_attr(classes={n:CategoryMap(to.items.loc[:,n], add_na=True) for n in to.cat_names}, but='to')
    156     def encodes(self, to):
    157         to.transform(to.cat_names, partial(_apply_cats, self.classes, 1))

~/Documents/MPPN2/mppn/mppn/preprocessing.py in <dictcomp>(.0)
    153     order = 2
    154     def setups(self, to):
--> 155         store_attr(classes={n:CategoryMap(to.items.loc[:,n], add_na=True) for n in to.cat_names}, but='to')
    156     def encodes(self, to):
    157         to.transform(to.cat_names, partial(_apply_cats, self.classes, 1))

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexing.py in __getitem__(self, key)
    887                     # AttributeError for IntervalTree get_value
    888                     return self.obj._get_value(*key, takeable=self._takeable)
--> 889             return self._getitem_tuple(key)
    890         else:
    891             # we by definition only have the 0th axis

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexing.py in _getitem_tuple(self, tup)
   1058     def _getitem_tuple(self, tup: Tuple):
   1059         with suppress(IndexingError):
-> 1060             return self._getitem_lowerdim(tup)
   1061 
   1062         # no multi-index, so validate all of the indexers

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    805                 # We don't need to check for tuples here because those are
    806                 #  caught by the _is_nested_tuple_indexer check above.
--> 807                 section = self._getitem_axis(key, axis=i)
    808 
    809                 # We should never have a scalar section here, because

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1122         # fall thru to straight lookup
   1123         self._validate_key(key, axis)
-> 1124         return self._get_label(key, axis=axis)
   1125 
   1126     def _get_slice_axis(self, slice_obj: slice, axis: int):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexing.py in _get_label(self, label, axis)
   1071     def _get_label(self, label, axis: int):
   1072         # GH#5667 this will fail if the label is not present in the axis.
-> 1073         return self.obj.xs(label, axis=axis)
   1074 
   1075     def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   3722         if axis == 1:
   3723             if drop_level:
-> 3724                 return self[key]
   3725             index = self.columns
   3726         else:

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/frame.py in __getitem__(self, key)
   3022             if self.columns.nlevels > 1:
   3023                 return self._getitem_multilevel(key)
-> 3024             indexer = self.columns.get_loc(key)
   3025             if is_integer(indexer):
   3026                 indexer = [indexer]

~/anaconda3/envs/mppn/lib/python3.7/site-packages/pandas-1.2.3-py3.7-linux-x86_64.egg/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3080                 return self._engine.get_loc(casted_key)
   3081             except KeyError as err:
-> 3082                 raise KeyError(key) from err
   3083 
   3084         if tolerance is not None:

KeyError: 'type' ```

ppfeiff commented 2 years ago

No matter what dataset you use, you need to make sure that there are non "Nan" etc values in it. The transformations will not be able to scale them properly and raise the mentioned error. I know that "cost" in MobIS has a lot of NaN values as the values does not change in each event. You can either try to set "cost" to any other numeric value if it is "NaN", e.g. using this code assuming data is a pandas dataframe

data["cost"] = self.data["cost"].str.replace(",", ".") data["cost"] = pd.to_numeric(self.data["cost"], downcast="signed", errors="coerce") data["cost"].fillna(0, inplace=True) data["cost"] = self.data["cost"].apply(lambda x: int(round(x, 0)))

or you remove "cost" from the cont_names of PPObj.

When using other datasets than MobIS you need to modify the cat_names, cont_names and date_names in PPObj. They need to match columns of the dataset you choose. cat for categorical attributes, cont for numerical and date for temporal.

joLahann / mppn

05_case_retrieval does not run #3