HelmchenLabSoftware / Cascade

Calibrated inference of spiking from calcium ΔF/F data using deep networks
GNU General Public License v3.0
123 stars 34 forks source link

vectorize preprocessing #14

Closed bryanlimy closed 3 years ago

bryanlimy commented 3 years ago

replace for loops in calculate_noise_levels() and preprocess_traces() with numpy ops to speed up the process.

Wrote a mini comparison script

import pickle
import numpy as np
from time import time

def vectorize_calculate_noise_levels(dF_traces, frame_rate):
    noise_levels = np.nanmedian(np.abs(np.diff(dF_traces, axis=-1)), axis=-1) / np.sqrt(frame_rate)
    return noise_levels * 100

def calculate_noise_levels(dF_traces, frame_rate):
    nb_neurons = dF_traces.shape[0]
    noise_levels = np.zeros( nb_neurons )
    for neuron in range(nb_neurons):
        noise_levels[neuron] = np.nanmedian( np.abs(np.diff(dF_traces[neuron,:])))/np.sqrt(frame_rate)
    return noise_levels * 100

def vectorize_preprocess_traces(dF_traces, before_frac, window_size):
    start = int(before_frac * window_size)
    end = dF_traces.shape[1] - start
    window_indexes = (np.expand_dims(np.arange(window_size), 0) + np.expand_dims(np.arange(dF_traces.shape[1] - window_size), 0).T)
    X = np.full(shape=(dF_traces.shape[0], dF_traces.shape[1], window_size), fill_value=np.nan)
    X[:, start:end, :] = dF_traces[:, window_indexes]
    return X

def preprocess_traces(dF_traces, before_frac, window_size):
    before = int( before_frac * window_size )
    nb_neurons = dF_traces.shape[0]
    nb_timepoints = dF_traces.shape[1]
    X = np.zeros( (nb_neurons,nb_timepoints,window_size) ) * np.nan
    for neuron in range(nb_neurons):
        for timepoint in range(nb_timepoints-window_size):
            X[neuron,timepoint+before,:] = dF_traces[neuron, timepoint:(timepoint+window_size)]
    return X

def main():
    num_trials = 100

    nb_neurons = 1005
    time_points = 260
    frame_rate = 7.5
    traces = np.random.rand(nb_neurons, time_points)

    o_start = time()
    o_noise_levels = [calculate_noise_levels(traces, frame_rate) for _ in range(num_trials)]
    o_end = time()
    o_elapse = (o_end - o_start) / num_trials

    v_start = time()
    v_noise_levels = [vectorize_calculate_noise_levels(traces, frame_rate) for _ in range(num_trials)]
    v_end = time()
    v_elapse = (v_end - v_start) / num_trials

    print('calculate_noise_levels()')
    print(f'\tresults equal: {np.array_equal(v_noise_levels[0], o_noise_levels[0], equal_nan=True)}')
    print(f'\toriginal avg. elapse: {o_elapse:.04f}s')
    print(f'\tvectorized avg. elapse: {v_elapse:.04f}s')

    before_frac = 0.5
    window_size = 64

    o_start = time()
    o_traces = [preprocess_traces(traces, before_frac, window_size) for _ in range(num_trials)]
    o_end = time()
    o_elapse = (o_end - o_start) / num_trials

    v_start = time()
    v_traces = [vectorize_preprocess_traces(traces, before_frac, window_size) for _ in range(num_trials)]
    v_end = time()
    v_elapse = (v_end - v_start) / num_trials

    print('\npreprocess_traces()')
    print(f'\tresults equal: {np.array_equal(v_traces[0], o_traces[0], equal_nan=True)}')
    print(f'\toriginal avg. elapse: {o_elapse:.04f}s')
    print(f'\tvectorized avg. elapse: {v_elapse:.04f}s')

if __name__ == '__main__':
    main()

Output

calculate_noise_levels()
    results equal: True
    original avg. elapse: 0.0934s
    vectorized avg. elapse: 0.0223s

preprocess_traces()
    results equal: True
    original avg. elapse: 0.3440s
    vectorized avg. elapse: 0.3249s