pandas-dev / pandas

Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
https://pandas.pydata.org
BSD 3-Clause "New" or "Revised" License
43.8k stars 17.98k forks source link

Intersection of non-overlapping DatetimeIndexes with tz set fails #7880

Closed alorenzo175 closed 10 years ago

alorenzo175 commented 10 years ago

When taking the intersection of two DatetimeIndexs that have timezone information and don't overlap, I get anIndexError. I don't have any problems if they don't have any timezone information or they do overlap.

import pandas as pd

a = pd.date_range('2014-07-07 12', freq='3min', periods=20,)
b = pd.date_range('2014-07-07 13', freq='3min', periods=20,)
print a.intersection(b)

c = a.tz_localize('MST')
d = b.tz_localize('MST')
print c.intersection(d)
<class 'pandas.tseries.index.DatetimeIndex'>
Length: 0, Freq: None, Timezone: None
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-17-b073ce8aa1f2> in <module>()
      7 c = a.tz_localize('MST')
      8 d = b.tz_localize('MST')
----> 9 print c.intersection(d)

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/tseries/index.pyc in intersection(self, other)
   1131             if isinstance(result, DatetimeIndex):
   1132                 if result.freq is None:
-> 1133                     result.offset = to_offset(result.inferred_freq)
   1134             return result
   1135 

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/lib.so in pandas.lib.cache_readonly.__get__ (pandas/lib.c:36380)()

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/tseries/index.pyc in inferred_freq(self)
   1415     def inferred_freq(self):
   1416         try:
-> 1417             return infer_freq(self)
   1418         except ValueError:
   1419             return None

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/tseries/frequencies.pyc in infer_freq(index, warn)
    657 
    658     index = pd.DatetimeIndex(index)
--> 659     inferer = _FrequencyInferer(index, warn=warn)
    660     return inferer.get_freq()
    661 

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/tseries/frequencies.pyc in __init__(self, index, warn)
    697 
    698         if index.tz is not None:
--> 699             self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz)
    700 
    701         self.warn = warn

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/pandas/tseries/frequencies.pyc in _tz_convert_with_transitions(values, to_tz, from_tz)
    685             return tslib.tz_convert(values,to_tz,from_tz)
    686 
--> 687     return np.vectorize(f)(values)
    688 
    689 class _FrequencyInferer(object):

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1571             vargs.extend([kwargs[_n] for _n in names])
   1572 
-> 1573         return self._vectorize_call(func=func, args=vargs)
   1574 
   1575     def _get_ufunc_and_otypes(self, func, args):

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1631             _res = func()
   1632         else:
-> 1633             ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
   1634 
   1635             # Convert args to object arrays first

/home/supas/forecasting_python/python_env_051214/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _get_ufunc_and_otypes(self, func, args)
   1594             # Assumes that ufunc first evaluates the 0th elements in the input
   1595             # arrays (the input values are not checked to ensure this)
-> 1596             inputs = [asarray(_a).flat[0] for _a in args]
   1597             outputs = func(*inputs)
   1598 

IndexError: index 0 is out of bounds for axis 0 with size 0

My system info is:


INSTALLED VERSIONS
------------------
commit: None
python: 2.7.6.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-431.17.1.el6.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8

pandas: 0.14.1
nose: 1.3.3
Cython: 0.20.1
numpy: 1.8.1
scipy: 0.14.0
statsmodels: 0.5.0
IPython: 2.0.0
sphinx: 1.2.2
patsy: 0.2.1
scikits.timeseries: None
dateutil: 2.2
pytz: 2014.2
bottleneck: 0.8.0
tables: 3.1.1
numexpr: 2.4
matplotlib: 1.3.1
openpyxl: 1.8.6
xlrd: None
xlwt: None
xlsxwriter: None
lxml: 3.3.5
bs4: 4.3.2
html5lib: None
httplib2: None
apiclient: None
rpy2: None
sqlalchemy: 0.9.4
pymysql: None
psycopg2: None
jreback commented 10 years ago

yep, we'll call that a bug

sinhrks commented 10 years ago

7798 will change this logic to use tslib.tz_convert but still fails. Because it doesn't care about empty array.

Can I include the fix to #7798?

jreback commented 10 years ago

sure