Closed Shankaroe closed 2 years ago
Code for profiling:
import time
def test_data_generators():
scores_df = read_scores()
scores_df = MultipleScoreSelector().transform(scores_df)
test_generator = RawDataGenerator(scores_df, batch_size=2)
start_time = time.time()
for i in range(2):
X, y = test_generator.__getitem__(i)
assert X.shape == (2, 501, 18)
assert y.shape == (2, 1)
print('time: ', time.time() - start_time)
def test_data_generators_scaler():
scores_df = read_scores()
scores_df = MultipleScoreSelector().transform(scores_df)
test_generator = RawDataGenerator(scores_df, batch_size=2, scaler=StandardScaler(),
cutoff=50)
start_time = time.time()
for i in range(2):
X, y = test_generator.__getitem__(i)
assert X.shape == (2, 451, 18)
assert y.shape == (2, 1)
print('time: ', time.time() - start_time)
def test_data_generators_slow():
scores_df = read_scores()
scores_df = MultipleScoreSelector().transform(scores_df)
test_generator = RawDataGenerator(scores_df, batch_size=2, scaler=StandardScaler(),
cutoff=50, likelihood=0.8)
start_time = time.time()
for i in range(2):
X, y = test_generator.__getitem__(i)
assert X.shape == (2, 451, 18)
assert y.shape == (2, 1)
print('time: ', time.time() - start_time)
Fixed with #25
Fix might be able with caching