AI4Finance-Foundation / FinRL-Meta

FinRL­-Meta: Dynamic datasets and market environments for FinRL.
https://ai4finance.org
MIT License
1.26k stars 585 forks source link

YahooFinance dataprocessor clean_data() fails #189

Open eyast opened 2 years ago

eyast commented 2 years ago

traceback below

./tests/test_finrl_loads.py::test_yahoo_data_processor[ticker_input1-6300] Failed: [undefined]ValueError: Parameter start received with timezone defined as 'UTC' although a Date must be timezone naive. time_interval = '1d', start_date = '2021-01-01', end_date = '2021-10-31' ticker_input = ['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', ...] expected_df_size = 6300 tech_indicator_list = ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', ...]

@pytest.mark.parametrize(
    "ticker_input, expected_df_size",
    [(SINGLE_TICKER, 210), (DOW_30_TICKER, 6300)],
)
def test_yahoo_data_processor(
    time_interval: str,
    start_date: str,
    end_date: str,
    ticker_input: List[str],
    expected_df_size: int,
    tech_indicator_list: List[str]
) -> None:
    """
    Tests the Yahoo Downloader and the returned data shape
    """
    assert isinstance(start_date, str)
    assert isinstance(end_date, str)
    data_source = "yahoofinance"
    dp = DataProcessor(data_source, start_date, end_date, time_interval)
    dp.download_data(ticker_input)
    assert isinstance(dp.dataframe, pd.DataFrame)
    assert dp.dataframe.shape == (
        expected_df_size,
        9,
    ) or dp.dataframe.shape == (expected_df_size - 1, 9)
  dp.clean_data()

tests\test_finrl_loads.py:58:


meta\data_processor.py:91: in clean_data self.processor.clean_data() meta\data_processors\yahoofinance.py:87: in clean_data trading_days = self.get_trading_days(start=self.start_date, end=self.end_date) meta\data_processors\yahoofinance.py:181: in get_trading_days df = nyse.sessions_in_range( C:\Users\eyast\Miniconda3\envs\finrlmeta\lib\site-packages\exchange_calendars\exchange_calendar.py:2170: in sessions_in_range slc = self._get_sessions_slice(start, end, _parse) C:\Users\eyast\Miniconda3\envs\finrlmeta\lib\site-packages\exchange_calendars\exchange_calendar.py:2147: in _get_sessions_slice start, end = self._parse_start_end_dates(start, end, _parse) C:\Users\eyast\Miniconda3\envs\finrlmeta\lib\site-packages\exchange_calendars\exchange_calendar.py:2143: in _parse_start_end_dates return parse_date(start, "start", self), parse_date(end, "end", self)


date = Timestamp('2021-01-01 00:00:00+0000', tz='UTC'), param_name = 'start' calendar = <exchange_calendars.exchange_calendar_xnys.XNYSExchangeCalendar object at 0x00000197448042E0> raise_oob = True

def parse_date(
    date: Date,
    param_name: str = "date",
    calendar: ExchangeCalendar | None = None,
    raise_oob: bool = True,
) -> pd.Timestamp:
    """Parse input intended to represent a date.

     Parameters
     ----------
     date
         Input to be parsed as date. Must be valid input to pd.Timestamp
         and have a time component of 00:00.

     param_name
         Name of a parameter that was to receive a date.

    calendar
        ExchangeCalendar against which to evalute out-of-bounds dates.
        Only requried if `raise_oob` True.

    raise_oob : default: True
        True to raise DateOutOfBounds if `date` is earlier than the
        first session or later than the last session of `calendar`. NB if
        True (default) then `calendar` must be passed.

    Returns
     -------
     pd.Timestamp
         pd.Timestamp (timezone naive with time component of 00:00).

     Raises
     ------
     Errors as `parse_timestamp` and additionally:

     ValueError
         If `date` time component is not 00:00.
         If `date` is timezone aware.

    exchange_calendars.errors.DateOutOfBounds
        If `raise_oob` True and `date` parses to a valid timestamp although
        timestamp is before `calendar`'s first session or after
        `calendar`'s last session.
    """
    # side "left" to get it through 'second' handling. Has undesirable effect of
    # allowing `date` to be defined with a second (or more accurate) compoment
    # if it falls within the minute that follows midnight.
    ts = parse_timestamp(date, param_name, raise_oob=False, side="left", utc=False)

    if ts.tz is not None:
      raise ValueError(

f"Parameter {param_name} received with timezone defined as '{ts.tz.zone}'" f" although a Date must be timezone naive." ) E ValueError: Parameter start received with timezone defined as 'UTC' although a Date must be timezone naive.

C:\Users\eyast\Miniconda3\envs\finrlmeta\lib\site-packages\exchange_calendars\calendar_helpers.py:378: ValueError