Closed lauracrln closed 3 years ago
Can you please paste the full call stack, I cant see where the error is happening
do you mean like this?
fields = ['customer_id', 'vendor_id', 'vendor_rating']
ratings = pd.read_csv(root_path + "train_200_800_final.csv", usecols= fields)
ratings['timestamp'] = '00:00:00'
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
ratings['customer_id']= label_encoder.fit_transform(ratings['customer_id'])
df = pd.DataFrame(data=ratings)
df.to_csv('ratings_rest.csv',index=False)
restaurant_data = pd.read_csv(root_path + "train_200_800_final.csv")
restaurant_data = pd.DataFrame(data=restaurant_data)
from sklearn import preprocessing
label_encoder = preprocessing.LabelEncoder()
restaurant_data['customer_id']= label_encoder.fit_transform(restaurant_data['customer_id'])
import torch
tensor_data = torch.tensor(restaurant_data.values)
tensor_data.shape
rest_list = dict([(k,torch.tensor(tensor_data[i]).float()) for k, i in zip(restaurant_data.index, range(tensor_data.shape[0]))])
import pickle
pickle.dump(rest_list,open('rest_dataset.pkl' ,'wb'))
import numpy as np
import datetime
import random
import time
def string_time_to_unix(s):
return int(time.mktime(datetime.datetime.strptime(s, "%H:%M:%S").timetuple()))
def prepare_my_dataset(args_mut, kwargs):
frame_size = kwargs.get('frame_size')
key_to_id = args_mut.base.key_to_id
df = args_mut.df
df['vendor_rating'] = df['vendor_rating'].apply(lambda i: 2 * (i - 2.5))
df['timestamp'] = df['timestamp'].apply(string_time_to_unix)
df['vendor_id'] = df['vendor_id'].apply(key_to_id.get)
customer = df[['customer_id', 'vendor_id']].groupby(['customer_id']).size()
customer = customer[customer > frame_size].sort_values(ascending=False).index
ratings = df.sort_values(by='timestamp').set_index('customer_id').drop('timestamp', axis=1).groupby('customer_id')
cust_dict ={}
def app(x):
customer_id = x.index[0]
cust_dict [int(customer_id)] = {}
cust_dict[int(customer_id)]['items'] = x['vendor_id'].values
cust_dict[int(customer_id)]['ratings']= x['vendor_rating'].values
ratings.apply(app)
print(customer)
args_mut.cust_dict = cust_dict
args_mut.customer = customer
return args_mut,kwargs
frame_size = 10
batch_size = 25
dirs = recnn.data.env.DataPath(
base="/content/",
embeddings="rest_dataset.pkl",
ratings='ratings_rest.csv',
cache="cached_frame_env.pkl", # cache will generate after you run
use_cache=True
)
env = recnn.data.env.FrameEnv(dirs, frame_size, batch_size,prepare_dataset= prepare_my_dataset)
No the error message shows the functions called, where the exception happened Paste the call stack
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-166-65892292bdab> in <module>()
10 )
11
---> 12 env = recnn.data.env.FrameEnv(dirs, frame_size, batch_size,prepare_dataset= prepare_my_dataset)
4 frames
/usr/local/lib/python3.6/dist-packages/recnn/data/env.py in __init__(self, path, frame_size, batch_size, num_workers, *args, **kwargs)
216 kwargs["frame_size"] = frame_size
217 super(FrameEnv, self).__init__(
--> 218 path, min_seq_size=frame_size + 1, *args, **kwargs
219 )
220
/usr/local/lib/python3.6/dist-packages/recnn/data/env.py in __init__(self, path, prepare_dataset, embed_batch, **kwargs)
135 self.load_env(path.cache)
136 else:
--> 137 self.process_env(path)
138 if path.use_cache:
139 self.save_env(path.cache)
/usr/local/lib/python3.6/dist-packages/recnn/data/env.py in process_env(self, path, **kwargs)
175 user_dict = process_args_mut.user_dict
176
--> 177 train_users, test_users = train_test_split(users, test_size=test_size)
178 train_users = utils.sort_users_itemwise(user_dict, train_users)[2:]
179 test_users = utils.sort_users_itemwise(user_dict, test_users)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_split.py in train_test_split(*arrays, **options)
2127 arrays = indexable(*arrays)
2128
-> 2129 n_samples = _num_samples(arrays[0])
2130 n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,
2131 default_test_size=0.25)
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in _num_samples(x)
189 x = np.asarray(x)
190 else:
--> 191 raise TypeError(message)
192
193 if hasattr(x, 'shape') and x.shape is not None:
TypeError: Expected sequence or array-like, got <class 'NoneType'>
i already found the problem, im so sorry to bother you, turns out i used wrong name variable on prepare dataset
thank you so muchhh !
No Problem!
helo, im using my own dataset
i keep getting error
Everytime i tried
env = recnn.data.env.FrameEnv(dirs, frame_size, batch_size,prepare_dataset= prepare_my_dataset)
this is my prepare dataset function:
this is how my customer looks like :
Int64Index([199, 62, 72, 71, 70, 69, 68, 67, 66, 65, ... 135, 134, 133, 132, 131, 130, 129, 128, 127, 0], dtype='int64', name='customer_id', length=200)
and this is how a little of my cust_dict looks like :
`{0: {'items': array([221, 225, 237, 250, 259, 265, 271, 274, 288, 289, 294, 295, 298, 299, 300, 304, 356, 386, 391, 398, 401, 419, 459, 537, 547, 573, 575, 216, 577, 207, 201, 90, 92, 104, 105, 106, 110, 113, 115, 134, 145, 148, 149, 154, 157, 159, 160, 161, 176, 180, 188, 189, 191, 192, 193, 195, 197, 199, 203, 86, 85, 83, 84, 4, 13, 20, 23, 28, 33, 43, 44, 55, 66, 67, 75, 76, 78, 79, 81, 82]), 'ratings': array([3.4, 3.4, 4.2, 4. , 3.6, 3.6, 4. , 2.4, 4.2, 4. , 3.8, 4.4, 4.4, 3.4, 3.8, 3. , 3.4, 4. , 3.4, 3.4, 4. , 3.4, 3.4, 3.8, 3.8, 4.2, 3.2, 4.4, 4. , 3.2, 3. , 3.8, 4.2, 4. , 4. , 4. , 4.2, 4.4, 4.6,
can you help me?