alkaline-ml / pmdarima

A statistical library designed to fill the void in Python's time series analysis capabilities, including the equivalent of R's auto.arima function.
https://www.alkaline-ml.com/pmdarima
MIT License
1.58k stars 234 forks source link

MaybeEncodingError while following example notebook #25

Closed Sprinting closed 6 years ago

Sprinting commented 6 years ago

Description

Example: MaybeEncodingError raised when following example notebook

Steps/Code to Reproduce


# coding: utf-8

# In[1]:

import numpy as np
import pyramid

print('numpy version: %r' % np.__version__)
print('pyramid version: %r' % pyramid.__version__)

# In[2]:

# this is a dataset from R
wineind = np.array([
    # Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct    Nov    Dec
    15136, 16733, 20016, 17708, 18019, 19227, 22893, 23739, 21133, 22591, 26786, 29740, 
    15028, 17977, 20008, 21354, 19498, 22125, 25817, 28779, 20960, 22254, 27392, 29945, 
    16933, 17892, 20533, 23569, 22417, 22084, 26580, 27454, 24081, 23451, 28991, 31386, 
    16896, 20045, 23471, 21747, 25621, 23859, 25500, 30998, 24475, 23145, 29701, 34365, 
    17556, 22077, 25702, 22214, 26886, 23191, 27831, 35406, 23195, 25110, 30009, 36242, 
    18450, 21845, 26488, 22394, 28057, 25451, 24872, 33424, 24052, 28449, 33533, 37351, 
    19969, 21701, 26249, 24493, 24603, 26485, 30723, 34569, 26689, 26157, 32064, 38870, 
    21337, 19419, 23166, 28286, 24570, 24001, 33151, 24878, 26804, 28967, 33311, 40226, 
    20504, 23060, 23562, 27562, 23940, 24584, 34303, 25517, 23494, 29095, 32903, 34379, 
    16991, 21109, 23740, 25552, 21752, 20294, 29009, 25500, 24166, 26960, 31222, 38641, 
    14672, 17543, 25453, 32683, 22449, 22316, 27595, 25451, 25421, 25288, 32568, 35110, 
    16052, 22146, 21198, 19543, 22084, 23816, 29961, 26773, 26635, 26972, 30207, 38687, 
    16974, 21697, 24179, 23757, 25013, 24019, 30345, 24488, 25156, 25650, 30923, 37240, 
    17466, 19463, 24352, 26805, 25236, 24735, 29356, 31234, 22724, 28496, 32857, 37198, 
    13652, 22784, 23565, 26323, 23779, 27549, 29660, 23356]
).astype(np.float64)

# In[3]:

from pyramid.arima import ARIMA

fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12)).fit(y=wineind)

# In[4]:

fit = ARIMA(order=(1, 1, 1), seasonal_order=None).fit(y=wineind)

# In[5]:

# fitting a stepwise model:
from pyramid.arima import auto_arima

stepwise_fit = auto_arima(wineind, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                          start_P=0, seasonal=True, d=1, D=1, trace=True,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise

stepwise_fit.summary()

# In[7]:

rs_fit = auto_arima(wineind, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
                    start_P=0, seasonal=True, n_jobs=-1, d=1, D=1, trace=True,
                    error_action='ignore',  # don't want to know if an order does not work
                    suppress_warnings=True,  # don't want convergence warnings
                    stepwise=False, random=True, random_state=42,  # we can fit a random search (not exhaustive)
                    n_fits=25)

"""
---------------------------------------------------------------------------
MaybeEncodingError                        Traceback (most recent call last)
<ipython-input-7-a8c602c556a4> in <module>()
      4                     suppress_warnings=True,  # don't want convergence warnings
      5                     stepwise=False, random=True, random_state=42,  # we can fit a random search (not exhaustive)
----> 6                     n_fits=25)
      7 
      8 rs_fit.summary()

~\Anaconda3\envs\major_project\lib\site-packages\pyramid\arima\auto.py in auto_arima(y, exogenous, start_p, d, start_q, max_p, max_d, max_q, start_P, D, start_Q, max_P, max_D, max_Q, max_order, m, seasonal, stationary, information_criterion, alpha, test, seasonal_test, stepwise, n_jobs, start_params, trend, method, transparams, solver, maxiter, disp, callback, offset_test_args, seasonal_test_args, suppress_warnings, error_action, trace, random, random_state, n_fits, return_valid_fits, out_of_sample_size, scoring, scoring_args, **fit_args)
    574                                 out_of_sample_size=out_of_sample_size,
    575                                 scoring=scoring, scoring_args=scoring_args)
--> 576             for order, seasonal_order in gen)
    577 
    578     # otherwise, we're fitting the stepwise algorithm...

~\Anaconda3\envs\major_project\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time

~\Anaconda3\envs\major_project\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
    697             try:
    698                 if getattr(self._backend, 'supports_timeout', False):
--> 699                     self._output.extend(job.get(timeout=self.timeout))
    700                 else:
    701                     self._output.extend(job.get())

~\Anaconda3\envs\major_project\lib\multiprocessing\pool.py in get(self, timeout)
    642             return self._value
    643         else:
--> 644             raise self._value
    645 
    646     def _set(self, i, obj):

MaybeEncodingError: Error sending result: '[ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(2, 1, 1),
   out_of_sample_size=0, scoring='mse', scoring_args={},
   seasonal_order=(0, 1, 2, 12), solver='lbfgs', start_params=None,
   suppress_warnings=True, transparams=True, trend='c')]'. Reason: 'TypeError("can't pickle statsmodels.tsa.statespace._statespace.dStatespace objects",)'

"""

-->

Expected Results

Results Obtained from following : https://www.github.com/tgsmith61591/pyramid/blob/master/examples/quick_start_example.ipynb

Versions

Windows-10-10.0.16299-SP0
Python 3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 10:22:32) [MSC v.1900 64 bit (AMD64)]
Pyramid 0.6.5
NumPy 1.14.0
SciPy 1.0.0
Scikit-Learn 0.19.1
Statsmodels 0.8.0
tgsmith61591 commented 6 years ago

Great find, thanks for reporting the issue! I'll take a look and try to reproduce on my end.

tgsmith61591 commented 6 years ago

I'm on a unix machine, so not a 1x1 comparison, but here's how I'm trying to replicate:

$ conda create -n pmd_issue25 python=3.6 numpy=1.14 \
  scipy=1.0.0 scikit-learn=0.19.1 statsmodels=0.8.0
$ source activate pmd_issue25
$ (pmd_issue25) pip install pyramid-arima==0.6.5

And I am indeed seeing the same issues:

>>> rs_fit = auto_arima(wineind, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
...                     start_P=0, seasonal=True, n_jobs=-1, d=1, D=1, trace=True,
...                     error_action='ignore',  # don't want to know if an order does not work
...                     suppress_warnings=True,  # don't want convergence warnings
...                     stepwise=False, random=True, random_state=42,  # we can fit a random search (not exhaustive)
...                     n_fits=25)
Fit ARIMA: order=(3, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(0, 1, 1, 12); AIC=3068.842, BIC=3091.036, Fit time=1.769 seconds
Fit ARIMA: order=(3, 1, 3) seasonal_order=(0, 1, 1, 12); AIC=3072.626, BIC=3101.160, Fit time=2.591 seconds
Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=3068.503, BIC=3090.696, Fit time=4.930 seconds
Traceback (most recent call last):
  File "<stdin>", line 6, in <module>
  File "//anaconda/envs/pmd_issue25/lib/python3.6/site-packages/pyramid/arima/auto.py", line 576, in auto_arima
    for order, seasonal_order in gen)
  File "//anaconda/envs/pmd_issue25/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 789, in __call__
    self.retrieve()
  File "//anaconda/envs/pmd_issue25/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 699, in retrieve
    self._output.extend(job.get(timeout=self.timeout))
  File "//anaconda/envs/pmd_issue25/lib/python3.6/multiprocessing/pool.py", line 644, in get
    raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '[ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(2, 1, 1),
   out_of_sample_size=0, scoring='mse', scoring_args={},
   seasonal_order=(0, 1, 2, 12), solver='lbfgs', start_params=None,
   suppress_warnings=True, transparams=True, trend='c')]'. Reason: 'TypeError("can't pickle statsmodels.tsa.statespace._statespace.dStatespace objects",)'

This is strange because parallel fits are tested in the unit tests, so not sure whether this is a versioning thing or what... I'll dig into it, but my immediate advice would be to use stepwise=True for the time being.

Sprinting commented 6 years ago

@tgsmith61591 thank you for looking into this - I'm using stepwise=True since I needed it to be somewhat fast, but reported it anyways since I came across it while following the example,

Edt: Thank you

tgsmith61591 commented 6 years ago

Seems related to this statsmodels issue. You have two options:

$ conda create -n pmd_issue25_py35 python=3.5 numpy=1.14 \
  scipy=1.0.0 scikit-learn=0.19.1 statsmodels=0.8.0
$ source activate pmd_issue25_py35
$ (pmd_issue25_py35) pip install pyramid-arima==0.6.5

Works:

>>> rs_fit = auto_arima(wineind, start_p=1, start_q=1, max_p=3, max_q=3, m=12,
...                     start_P=0, seasonal=True, n_jobs=-1, d=1, D=1, trace=True,
...                     error_action='ignore',  # don't want to know if an order does not work
...                     suppress_warnings=True,  # don't want convergence warnings
...                     stepwise=False, random=True, random_state=42,  # we can fit a random search (not exhaustive)
...                     n_fits=25)
Fit ARIMA: order=(3, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(0, 1, 1, 12); AIC=3068.842, BIC=3091.036, Fit time=1.668 seconds
Fit ARIMA: order=(3, 1, 3) seasonal_order=(0, 1, 1, 12); AIC=3072.626, BIC=3101.160, Fit time=2.268 seconds
Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=3068.503, BIC=3090.696, Fit time=4.990 seconds
Fit ARIMA: order=(2, 1, 3) seasonal_order=(1, 1, 1, 12); AIC=3071.522, BIC=3100.057, Fit time=3.874 seconds
Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=3068.086, BIC=3090.280, Fit time=1.013 seconds
Fit ARIMA: order=(1, 1, 2) seasonal_order=(0, 1, 1, 12); AIC=3066.771, BIC=3085.794, Fit time=1.282 seconds
Fit ARIMA: order=(2, 1, 2) seasonal_order=(1, 1, 1, 12); AIC=3070.025, BIC=3095.389, Fit time=1.888 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=3068.757, BIC=3090.951, Fit time=10.621 seconds
Fit ARIMA: order=(3, 1, 3) seasonal_order=(0, 1, 2, 12); AIC=3073.580, BIC=3105.285, Fit time=13.863 seconds
Fit ARIMA: order=(3, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(0, 1, 2, 12); AIC=3069.854, BIC=3095.218, Fit time=15.224 seconds
Fit ARIMA: order=(3, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=3070.436, BIC=3095.800, Fit time=6.790 seconds
Fit ARIMA: order=(2, 1, 2) seasonal_order=(0, 1, 1, 12); AIC=3068.701, BIC=3090.895, Fit time=1.036 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 2, 12); AIC=3070.103, BIC=3095.467, Fit time=20.696 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(2, 1, 1, 12); AIC=3070.302, BIC=3092.495, Fit time=14.282 seconds
Fit ARIMA: order=(3, 1, 3) seasonal_order=(1, 1, 1, 12); AIC=3073.552, BIC=3105.257, Fit time=4.957 seconds
Fit ARIMA: order=(2, 1, 3) seasonal_order=(0, 1, 1, 12); AIC=3070.642, BIC=3096.006, Fit time=2.583 seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(2, 1, 2, 12); AIC=3072.019, BIC=3103.723, Fit time=30.945 seconds
Fit ARIMA: order=(1, 1, 2) seasonal_order=(1, 1, 2, 12); AIC=3069.717, BIC=3095.081, Fit time=12.080 seconds
Fit ARIMA: order=(3, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=3075.533, BIC=3110.408, Fit time=33.650 seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=3071.758, BIC=3100.293, Fit time=13.260 seconds
Fit ARIMA: order=(2, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=3073.538, BIC=3105.242, Fit time=28.746 seconds
Fit ARIMA: order=(2, 1, 3) seasonal_order=(1, 1, 2, 12); AIC=3073.553, BIC=3105.258, Fit time=23.821 seconds
Fit ARIMA: order=(1, 1, 3) seasonal_order=(2, 1, 1, 12); AIC=3072.048, BIC=3100.582, Fit time=6.794 seconds
Total fit time: 38.469 seconds