RehabAUmc / modys-video

Apache License 2.0
3 stars 0 forks source link

Raw Data generator very slow with interpolation #26

Closed Shankaroe closed 2 years ago

Shankaroe commented 2 years ago

Fix might be able with caching

svenvanderburg commented 2 years ago

Code for profiling:

import time
def test_data_generators():
    scores_df = read_scores()
    scores_df = MultipleScoreSelector().transform(scores_df)
    test_generator = RawDataGenerator(scores_df, batch_size=2)
    start_time = time.time()
    for i in range(2):
        X, y = test_generator.__getitem__(i)
        assert X.shape == (2, 501, 18)
        assert y.shape == (2, 1)
    print('time: ',  time.time() - start_time)

def test_data_generators_scaler():
    scores_df = read_scores()
    scores_df = MultipleScoreSelector().transform(scores_df)
    test_generator = RawDataGenerator(scores_df, batch_size=2, scaler=StandardScaler(),
                                      cutoff=50)
    start_time = time.time()
    for i in range(2):
        X, y = test_generator.__getitem__(i)
        assert X.shape == (2, 451, 18)
        assert y.shape == (2, 1)
    print('time: ',  time.time() - start_time)

def test_data_generators_slow():
    scores_df = read_scores()
    scores_df = MultipleScoreSelector().transform(scores_df)
    test_generator = RawDataGenerator(scores_df, batch_size=2, scaler=StandardScaler(),
                                      cutoff=50, likelihood=0.8)
    start_time = time.time()
    for i in range(2):
        X, y = test_generator.__getitem__(i)
        assert X.shape == (2, 451, 18)
        assert y.shape == (2, 1)
    print('time: ',  time.time() - start_time)
svenvanderburg commented 2 years ago

Fixed with #25