% Estimate and whiten the noise, assuming channel/time separability. This makes the
% L2-norm portion of the CBP objective into a sum of squares, simplifying the
% computation and improving computational efficiency.
% - find "noise zones"
% - compute noise auto-correlation function from these zones
% - whiten each channel in time with the inverse matrix sqrt of the auto-correlation
% - whiten across channels with inverse matrix sqrt of the covariance matrix.
%%@ first autocorrelate to get variance for each channel independently,
%%then rather than x-corr everything, take correlation of chan 1 and chan 2
%%and then do blah blah blah
% params.whitening includes:
% - noise_threshold: noise zones must have cross-channel L2-norm less than this.
% - min_zone_len: noise zones must have duration of at least this many samples.
% - num_acf_lags: number of samples over which auto-correlation is estimated.
data_pp = WhitenNoise(filtdata, params);
%%@ You don't care if you "whiten" the signal because you're reconstructing
%%@ on the spot
% Initialize spike waveforms, using clustering:
% - collect data segments with L2-norm larger than params.clustering.spike_threshold
% - align peaks of waveforms within these segments
% - Perform PCA on these segments, select a subspace containing desired percent of variance
% - Perform K-means clustering in this subspace
% params.clustering includes:
% - num_waveforms : number of cells to be recovered
% - spike_threshold : threshold used to pick spike-containing data segments (in stdevs)
% - percent_variance : used to determine number of principal components to use for clustering
if (params.general.plot_diagnostics)
VisualizeClustering(XProj, assignments, X, data_pp.nchan, ...
params.plotting.first_fig_num+3, ...
params.clustering.spike_threshold);
end
% For later comparisons, also compute spike times corresponding to the segments
% assigned to each cluster:
spike_times_cl = GetSpikeTimesFromAssignments(segment_centers_cl, assignments);
%%@ seems like mystery operation
% Diagnostics for waveform initialization:
% Fig 5 shows the data segments projected onto the first two principal components,
% and the identified clusters. Fig 6 shows the waveforms associated with each
% cluster. The visualization function also prints out a table of distances between
% waveforms, and each of their distances to the origin (i.e., their norm). All
% distances are relative to the noise amplitude. These numbers provide some
% indication of how likely it is that waveforms could be confused with each other, or
% with background noise.
% At this point, waveforms of all potential cells should be identified (NOTE:
% spike identification errors are irrelevant - only the WAVEFORMS matter). If
% not, may need to adjust params.clustering.num_waveforms and re-run the clustering
% to identify more/fewer cells. May also wish to adjust the
% params.general.waveform_len, increasing it if the waveforms (Fig 5) are being
% chopped off, or shortening it if there is a substantial boundary region of silence.
% If you do this, you should go back and re-run starting from the whitening step,
% since the waveform_len affects the identification of noise regions.
closeIfOpen(params.plotting.first_fig_num+[1,3]); % close Fourier and clustering figures
% To speed up computation, partition data into "snippets", which will be processed
% independently. Snippets have duration between min/max_snippet_len and are separated
% by "noise zones" in which the RMS of the waveforms does not surpass "threshold" for
% at least "min_separation_len" consecutive samples. Choose a conservative (low)
% threshold to avoid dropping spikes!
Split into sections
%% ---------------------------------------------------------------------------------- % Preprocessing Step 2: Estimate noise covariance and whiten
% Estimate and whiten the noise, assuming channel/time separability. This makes the % L2-norm portion of the CBP objective into a sum of squares, simplifying the % computation and improving computational efficiency. % - find "noise zones" % - compute noise auto-correlation function from these zones % - whiten each channel in time with the inverse matrix sqrt of the auto-correlation % - whiten across channels with inverse matrix sqrt of the covariance matrix. %%@ first autocorrelate to get variance for each channel independently, %%then rather than x-corr everything, take correlation of chan 1 and chan 2 %%and then do blah blah blah % params.whitening includes: % - noise_threshold: noise zones must have cross-channel L2-norm less than this. % - min_zone_len: noise zones must have duration of at least this many samples. % - num_acf_lags: number of samples over which auto-correlation is estimated.
data_pp = WhitenNoise(filtdata, params); %%@ You don't care if you "whiten" the signal because you're reconstructing %%@ on the spot
%% ---------------------------------------------------------------------------------- % Preprocessing Step 3: Estimate initial spike waveforms
% Initialize spike waveforms, using clustering: % - collect data segments with L2-norm larger than params.clustering.spike_threshold % - align peaks of waveforms within these segments % - Perform PCA on these segments, select a subspace containing desired percent of variance % - Perform K-means clustering in this subspace % params.clustering includes: % - num_waveforms : number of cells to be recovered % - spike_threshold : threshold used to pick spike-containing data segments (in stdevs) % - percent_variance : used to determine number of principal components to use for clustering
[centroids, assignments, X, XProj, PCs, segment_centers_cl] = ... EstimateInitialWaveforms(data_pp, params);
init_waveforms = waveformMat2Cell(centroids, params.general.waveform_len, ... data_pp.nchan, params.clustering.num_waveforms);
%%@Eero is not a fan of this ^^
if (params.general.plot_diagnostics) VisualizeClustering(XProj, assignments, X, data_pp.nchan, ... params.plotting.first_fig_num+3, ... params.clustering.spike_threshold); end
% For later comparisons, also compute spike times corresponding to the segments % assigned to each cluster: spike_times_cl = GetSpikeTimesFromAssignments(segment_centers_cl, assignments); %%@ seems like mystery operation
% Diagnostics for waveform initialization: % Fig 5 shows the data segments projected onto the first two principal components, % and the identified clusters. Fig 6 shows the waveforms associated with each % cluster. The visualization function also prints out a table of distances between % waveforms, and each of their distances to the origin (i.e., their norm). All % distances are relative to the noise amplitude. These numbers provide some % indication of how likely it is that waveforms could be confused with each other, or % with background noise.
% At this point, waveforms of all potential cells should be identified (NOTE: % spike identification errors are irrelevant - only the WAVEFORMS matter). If % not, may need to adjust params.clustering.num_waveforms and re-run the clustering % to identify more/fewer cells. May also wish to adjust the % params.general.waveform_len, increasing it if the waveforms (Fig 5) are being % chopped off, or shortening it if there is a substantial boundary region of silence. % If you do this, you should go back and re-run starting from the whitening step, % since the waveform_len affects the identification of noise regions.
%% ----------------------------------------------------------------------------------- % CBP preprocessing
closeIfOpen(params.plotting.first_fig_num+[1,3]); % close Fourier and clustering figures
% To speed up computation, partition data into "snippets", which will be processed % independently. Snippets have duration between min/max_snippet_len and are separated % by "noise zones" in which the RMS of the waveforms does not surpass "threshold" for % at least "min_separation_len" consecutive samples. Choose a conservative (low) % threshold to avoid dropping spikes!
[snippets, breaks, snippet_lens, snippet_centers, snippet_idx] = ... PartitionSignal(data_pp.data, params.partition);
%%@Should be inside the black box ^^