I am working with asset price data and I would like to perform log transform as part of a preprocessing step to insure that the model never predicts negative prices. I tried what I thought was the obvious thing to do.
X = darts.TimeSeries.from_series(asset["price"])
preprocessor = dataprocessing.transformers.InvertibleMapper(fn=np.log, inverse_fn=np.exp)
Z = preprocessor.transform(X)
model = models.ARIMA()
model.fit(Z)
So far so good. I them make my predictions. I am using the model probabilistic forecasting to I am going to generate N forecasts.
predictions = model.predict(T, num_samples=N)
I then need to apply the inverse transform on my predictions to get back to the original price forecasts.
[2021-12-09 18:02:35,340] ERROR | main_logger | AssertionError: The pd_dataframe() method can only return DataFrames of deterministic time series, and this series is not deterministic (it contains several samples). Consider calling quantile_df() instead.
[2021-12-09 18:02:35,340] ERROR | main_logger | AssertionError: The pd_dataframe() method can only return DataFrames of deterministic time series, and this series is not deterministic (it contains several samples). Consider calling quantile_df() instead.
[2021-12-09 18:02:35,340] ERROR | main_logger | AssertionError: The pd_dataframe() method can only return DataFrames of deterministic time series, and this series is not deterministic (it contains several samples). Consider calling quantile_df() instead.
[2021-12-09 18:02:35,340] ERROR | main_logger | AssertionError: The pd_dataframe() method can only return DataFrames of deterministic time series, and this series is not deterministic (it contains several samples). Consider calling quantile_df() instead.
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
/var/folders/69/7vb352bd765cb9jwg51q78n40000gp/T/ipykernel_13515/4062002826.py in <module>
1 val.plot()
----> 2 preprocessor.inverse_transform(predictions)
3 #plt.yscale("log")
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/dataprocessing/transformers/mappers.py in inverse_transform(self, series, *args, **kwargs)
115 *args, **kwargs) -> Union[TimeSeries, List[TimeSeries]]:
116 # adding the inverse_fn param
--> 117 return super().inverse_transform(series, inverse_fn=self._inverse_fn, *args, **kwargs)
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/dataprocessing/transformers/invertible_data_transformer.py in inverse_transform(self, series, *args, **kwargs)
150 total=len(data))
151
--> 152 transformed_data = _parallel_apply(input_iterator, self.__class__.ts_inverse_transform,
153 self._n_jobs, args, kwargs)
154
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/utils/utils.py in _parallel_apply(iterator, fn, n_jobs, fn_args, fn_kwargs)
234 """
235
--> 236 returned_data = Parallel(n_jobs=n_jobs)(delayed(fn)(*sample, *fn_args, **fn_kwargs)
237 for sample in iterator)
238 return returned_data
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/parallel.py in __call__(self, iterable)
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1045
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
863
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/parallel.py in _dispatch(self, batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/Research/crypto-research/env/lib/python3.9/site-packages/joblib/parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/dataprocessing/transformers/mappers.py in ts_inverse_transform(series, inverse_fn)
103 inverse_fn: Union[Callable[[np.number], np.number],
104 Callable[[pd.Timestamp, np.number], np.number]]) -> TimeSeries:
--> 105 return series.map(inverse_fn)
106
107 def transform(self,
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/timeseries.py in map(self, fn)
1820
1821 if num_args == 1: # simple map function f(x)
-> 1822 df = self.pd_dataframe().applymap(fn)
1823 elif num_args == 2: # map function uses timestamp f(timestamp, x)
1824 def apply_fn_wrapper(row):
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/timeseries.py in pd_dataframe(self, copy)
740 """
741 if not self.is_deterministic:
--> 742 raise_log(AssertionError('The pd_dataframe() method can only return DataFrames of deterministic '
743 'time series, and this series is not deterministic (it contains several samples). '
744 'Consider calling quantile_df() instead.'))
~/Research/crypto-research/env/lib/python3.9/site-packages/darts/logging.py in raise_log(exception, logger)
99 logger.error(exception_type + ": " + message)
100
--> 101 raise exception
102
103
AssertionError: The pd_dataframe() method can only return DataFrames of deterministic time series, and this series is not deterministic (it contains several samples). Consider calling quantile_df() instead.
Conceptually this seems like it should work fine: I am asking to apply the inverse_fn to each of the N predictions and return the result. I assume it is simple user error on my part but I can't find any thing in the docs to solve my issue.
Hi @davidrpugh. Unfortunately this is a bug with TimeSeries.map() on the current version of Darts, which does not (yet) work on stochastic series. We will fix this one asap in an upcoming version.
Thanks for the awesome library!
I am working with asset price data and I would like to perform log transform as part of a preprocessing step to insure that the model never predicts negative prices. I tried what I thought was the obvious thing to do.
So far so good. I them make my predictions. I am using the model probabilistic forecasting to I am going to generate
N
forecasts.I then need to apply the inverse transform on my predictions to get back to the original price forecasts.
However this last call generates and error.
Conceptually this seems like it should work fine: I am asking to apply the
inverse_fn
to each of theN
predictions and return the result. I assume it is simple user error on my part but I can't find any thing in the docs to solve my issue.