Adding a new Data Bandit using the Titanic Data

TMorville commented 4 years ago

I am getting an error that might be related to #300, or possibly to do with custom implementation.

To reproduce:

1) Make a user and download the data here: https://www.kaggle.com/c/titanic 2) Running this code creates as genrl compatible data set

def _format_titatic():

    gender_submission = pd.read_csv('gender_submission.csv')
    test = pd.read_csv('test.csv')
    train = pd.read_csv('train.csv')

    train_str = train.select_dtypes(include='object').fillna('0')
    train_float = train.select_dtypes(include='float64')
    train_int = train.select_dtypes(include='int64')

    le = LabelEncoder()

    train_str_enc = train_str.apply(le.fit_transform)

    train_enc = pd.concat([train_str_enc, train_float, train_int], axis=1)

    _df = pd.DataFrame()

    _df[0] = train_enc.Survived + 1

    for i, c in enumerate(list(train_enc.drop('Survived', axis=1))):
        _df[i + 1] = train_enc[c]

    return _df

Now run the custom bandit code with the small change from #300 (still to be confirmed if actually an error):

import torch

from typing import Tuple
from genrl.utils.data_bandits.base import DataBasedBandit

class TitanicDataBandit(DataBasedBandit):

    def __init__(self, **kwargs):
        super(TitanicDataBandit, self).__init__(**kwargs)

        self._df = _format_titatic()
        self.n_actions = len(self._df[0].unique())
        self.context_dim = self._df.shape[1] - 1
        self.len = len(self._df)

        print(self.n_actions, self.context_dim, self.len)

    def reset(self) -> torch.Tensor:
        self._reset()
        self.df = self._df.sample(frac=1).reset_index(drop=True)
        return self._get_context()

    def _compute_reward(self, action: int) -> Tuple[int, int]:
        label = self._df.iloc[self.idx, 0]
        r = int(label == (action + 1))
        return r, 1

    def _get_context(self) -> torch.Tensor:
        return torch.tensor(
            self._df.iloc[self.idx, 1:].values,
            device=self.device,
            dtype=torch.float,
        )

bandit = TitanicDataBandit()
context = bandit.reset()

from genrl.agents import NeuralLinearPosteriorAgent

agent = NeuralLinearPosteriorAgent(bandit)
context = bandit.reset()

action = agent.select_action(context)
new_context, reward = bandit.step(action)

from genrl.trainers import DCBTrainer

trainer = DCBTrainer(agent, bandit)
trainer.train(timesteps=5000, batch_size=32)

yields a shape error:

Started at 31-08-20 15:40:43
Training NeuralLinearPosteriorAgent on TitanicDataBandit for 5000 timesteps
timestep                  regret/regret             reward/reward             regret/cumulative_regret  reward/cumulative_reward  regret/regret_moving_avg  reward/reward_moving_avg  
100                       0                         1                         45                        55                        0.45                      0.55                      
200                       0                         1                         89                        111                       0.445                     0.555                     
300                       1                         0                         136                       164                       0.452                     0.548                     
400                       1                         0                         178                       222                       0.444                     0.556                     
500                       1                         0                         226                       274                       0.464                     0.536                     

Encounterred exception during training!
size mismatch, [2 x 12], [51] at ../aten/src/TH/generic/THTensorMath.cpp:292

Training completed in 1 seconds
Final Regret Moving Average: 0.46 | Final Reward Moving Average: 0.54
Traceback (most recent call last):
  File "/usr/local/anaconda3/envs/rl/lib/python3.6/site-packages/genrl-0.0.1-py3.6.egg/genrl/trainers/bandit.py", line 185, in train
    action = self.agent.select_action(context)
  File "/usr/local/anaconda3/envs/rl/lib/python3.6/site-packages/genrl-0.0.1-py3.6.egg/genrl/agents/bandits/contextual/neural_linpos.py", line 153, in select_action
    values = torch.mv(beta, torch.cat([latent_context.squeeze(0), torch.ones(1)]))
RuntimeError: size mismatch, [2 x 12], [51] at ../aten/src/TH/generic/THTensorMath.cpp:292
{'regrets': [0,

I can't seem to figure out why this fails after 500 time steps.

EDIT:

Seems that the shape of beta is changed from [2, 51] to [2, 12] at timestep 500. For some reason this part of NeuralLinearPosteriorAgent.select_action fails

            beta = (
                torch.tensor(
                    np.stack(
                        [
                            np.random.multivariate_normal(
                                self.mu[i], var[i] * self.cov[i]
                            )
                            for i in range(self.n_actions)
                        ]
                    )
                )
                .to(self.device)
                .to(torch.float)
            )

and the exception occurs triggering a new calculation of beta:

        except np.linalg.LinAlgError as e:  # noqa F841

            print("Linalg error.")

            beta = (
                (
                    torch.stack(
                        [
                            torch.distributions.MultivariateNormal(
                                torch.zeros(self.context_dim + 1),
                                torch.eye(self.context_dim + 1),
                            ).sample()
                            for i in range(self.n_actions)
                        ]
                    )
                )
                .to(self.device)
                .to(torch.float)
            )

TMorville commented 4 years ago

There was an error in the exception loop.

Replacing self.context_dim with self.latent_dim yields the correct dimensions and allows training.

Can authors confirm that this is correct? If yes, I can do a PR.