While running using the following code from the main branch,
import warnings
warnings.filterwarnings("ignore")
import random
from time import time
import numpy as np
import pandas as pd
import torch
import torch_choice
from torch_choice import run
from tqdm import tqdm
from torch_choice.data import ChoiceDataset, JointDataset, utils, load_mode_canada_dataset, load_house_cooling_dataset_v1
from torch_choice.model import ConditionalLogitModel, NestedLogitModel
# set the random seed to enforce reproducibility.
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)
car_choice = pd.read_csv("./tutorials/public_datasets/car_choice.csv")
car_choice.head()
user_observable_columns=["gender", "income"]
from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapper
data_wrapper_from_columns = EasyDatasetWrapper(
main_data=car_choice,
purchase_record_column='record_id',
choice_column='purchase',
item_name_column='car',
user_index_column='consumer_id',
session_index_column='session_id',
user_observable_columns=['gender', 'income'],
item_observable_columns=['speed'],
session_observable_columns=['discount'],
itemsession_observable_columns=['price'])
data_wrapper_from_columns.summary()
dataset = data_wrapper_from_columns.choice_dataset
# ChoiceDataset(label=[], item_index=[885], provided_num_items=[], user_index=[885], session_index=[885], item_availability=[885, 4], item_speed=[4, 1], user_gender=[885, 1], user_income=[885, 1], session_discount=[885, 1], itemsession_price=[885, 4, 1], device=cpu)
Depeneding the pandas version, one may encounter a pandas error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 27
25 user_observable_columns=["gender", "income"]
26 from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapper
---> 27 data_wrapper_from_columns = EasyDatasetWrapper(
28 main_data=car_choice,
29 purchase_record_column='record_id',
30 choice_column='purchase',
31 item_name_column='car',
32 user_index_column='consumer_id',
33 session_index_column='session_id',
34 user_observable_columns=['gender', 'income'],
35 item_observable_columns=['speed'],
36 session_observable_columns=['discount'],
37 itemsession_observable_columns=['price'])
39 data_wrapper_from_columns.summary()
40 dataset = data_wrapper_from_columns.choice_dataset
File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:142, in EasyDatasetWrapper.__init__(self, main_data, purchase_record_column, item_name_column, choice_column, user_index_column, session_index_column, user_observable_data, item_observable_data, useritem_observable_data, session_observable_data, price_observable_data, itemsession_observable_data, useritemsession_observable_data, user_observable_columns, item_observable_columns, useritem_observable_columns, session_observable_columns, price_observable_columns, itemsession_observable_columns, useritemsession_observable_columns, device)
135 self.derive_observable_from_main_data(item_observable_columns,
136 user_observable_columns,
137 session_observable_columns,
138 price_observable_columns)
140 self.observable_data_to_observable_tensors()
--> 142 self.create_choice_dataset()
143 print('Finished Creating Choice Dataset.')
File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:303, in EasyDatasetWrapper.create_choice_dataset(self)
301 if len(np.unique(choice_set_size)) > 1:
302 print(f'Note: choice sets of different sizes found in different purchase records: {rep}')
--> 303 self.item_availability = self.get_item_availability_tensor()
304 else:
305 # None means all items are available.
306 self.item_availability = None
File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:349, in EasyDatasetWrapper.get_item_availability_tensor(self)
347 if self.session_index_column is None:
348 raise ValueError(f'Item availability cannot be constructed without session index column.')
--> 349 A = self.main_data.pivot(self.session_index_column, self.item_name_column, self.choice_column)
350 return torch.BoolTensor(~np.isnan(A.values))
TypeError: pivot() takes 1 positional argument but 4 were given
Cause of the Issue
After pandas 2.0.0 upgrade, there was a change in the pivot method (see this issue). The position argument to pivot has been disabled. Previous we could simply write df.pivot("A", "B", "C") but now we need to specify df.pivot(index="A", columns="B", values="C").
Description of the Issue
While running using the following code from the main branch,
Depeneding the pandas version, one may encounter a pandas error:
Cause of the Issue
After pandas 2.0.0 upgrade, there was a change in the
pivot
method (see this issue). The position argument topivot
has been disabled. Previous we could simply writedf.pivot("A", "B", "C")
but now we need to specifydf.pivot(index="A", columns="B", values="C")
.