Improve Support for Redundantly Averaged Data in Noise Calculations

I'm trying to predict thermal noise from autocorrelations using PSpecData.pspec().

I'm doing the following:

cosmo = hp.conversions.Cosmo_Conversions()
pspecbeam = hp.pspecbeam.PSpecBeamUV(uvb_ps, cosmo=cosmo)
ds = hp.PSpecData(dsets=[hd2, hd2], beam=pspecbeam) 
uvp = ds.pspec([ANTPAIR], [ANTPAIR], dsets=(0, 1), 
               pols=[('pI', 'pI'), ('pQ', 'pQ')], 
               spw_ranges=[(bs.start, bs.stop) for bs in band_slices],
               taper="bh",
               store_cov=True,
               cov_model='autos',
              )

This gives the following error traceback:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In [49], line 1
----> 1 uvp = ds.pspec([ANTPAIR], [ANTPAIR], dsets=(0, 1), 
      2                pols=[('pI', 'pI'), ('pQ', 'pQ')], 
      3                spw_ranges=[(bs.start, bs.stop) for bs in band_slices],
      4                taper="bh",
      5                store_cov=True,
      6                cov_model='autos',
      7               )

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/hera_pspec/pspecdata.py:3345, in PSpecData.pspec(self, bls1, bls2, dsets, pols, n_dlys, input_data_weight, norm, taper, sampling, little_h, spw_ranges, symmetric_taper, baseline_tol, store_cov, store_cov_diag, return_q, store_window, exact_windows, ftbeam, verbose, filter_extensions, exact_norm, history, r_params, cov_model, known_cov, allow_fft)
   3342 if store_cov or store_cov_diag:
   3343     if nper_chunk == 1: logger.info(" Building q_hat covariance...")
   3344     cov_q_real, cov_q_imag, cov_real, cov_imag \
-> 3345         = self.get_analytic_covariance(key1, key2, Mv,
   3346                                        exact_norm=exact_norm,
   3347                                        pol=pol,
   3348                                        model=cov_model,
   3349                                        known_cov=known_cov, )
   3351     if self.primary_beam != None:
   3352         cov_real = cov_real * (scalar)**2.

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/hera_pspec/pspecdata.py:1897, in PSpecData.get_analytic_covariance(self, key1, key2, M, exact_norm, pol, model, known_cov)
   1893 check_uniform_input = False
   1894 if model != 'foreground_dependent':
   1895 # When model is 'foreground_dependent', since we are processing the outer products of visibilities from different times,
   1896 # we are expected to have time-dependent inputs, thus check_uniform_input is always set to be False here.
-> 1897     C11_first = self.C_model(key1, model=model, known_cov=known_cov, time_index=0)
   1898     C11_last = self.C_model(key1, model=model, known_cov=known_cov, time_index=self.dsets[0].Ntimes-1)
   1899     if np.isclose(C11_first, C11_last).all():

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/hera_pspec/pspecdata.py:673, in PSpecData.C_model(self, key, model, time_index, known_cov, include_extension)
    671 elif model == 'autos':
    672     spw_range = self.get_spw(include_extension=include_extension)
--> 673     self.set_C({Ckey: np.diag(utils.variance_from_auto_correlations(self.dsets[dset], bl, spw_range, time_index))})
    674 else:
    675     raise ValueError("didn't recognize Ckey {}".format(Ckey))

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/hera_pspec/utils.py:117, in variance_from_auto_correlations(uvd, bl, spw_range, time_index)
    115 spw = slice(spw_range[0], spw_range[1])
    116 x_bl1 = uvd.get_data(bl1)[time_index, spw]
--> 117 x_bl2 = uvd.get_data(bl2)[time_index, spw]
    118 nsample_bl = uvd.get_nsamples(bl)[time_index, spw]
    119 nsample_bl = np.where(nsample_bl>0, nsample_bl, np.median(uvd.nsample_array[:, spw, :]))

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/pyuvdata/uvdata/uvdata.py:4148, in UVData.get_data(self, key1, key2, key3, squeeze, force_copy)
   4146 if len(key) > 3:
   4147     raise ValueError("no more than 3 key values can be passed")
-> 4148 ind1, ind2, indp = self._key2inds(key)
   4149 out = self._smart_slicing(
   4150     self.data_array, ind1, ind2, indp, squeeze=squeeze, force_copy=force_copy
   4151 )
   4152 return out

File /lustre/aoc/projects/hera/jsdillon/mambaforge/envs/python3/lib/python3.9/site-packages/pyuvdata/uvdata/uvdata.py:3796, in UVData._key2inds(self, key)
   3794 blt_ind2 = self.antpair2ind(key[1], key[0])
   3795 if len(blt_ind1) + len(blt_ind2) == 0:
-> 3796     raise KeyError(
   3797         "Antenna pair {pair} not found in "
   3798         "data".format(pair=(key[0], key[1]))
   3799     )
   3800 if type(key[2]) is str:
   3801     # pol is str
   3802     if len(blt_ind1) > 0:

KeyError: 'Antenna pair (4, 4) not found in data'

I also get the same error when I use hp.utils.uvp_noise_error().

Basically, what's happening is that my data has (0, 0) in it as the only autocorrelation, but the baseline I'm interested in (0, 4). If the data is redundantly averaged, there's usually only one autocorrelation per polarization, and there's no guarantee that either antenna in the key for a given baseline is the antenna used to key the autocorrelation. I'd like some elegant way to handle this where it knows to go look for the single auto that's in the data when the data is redundantly averaged.

HERA-Team / hera_pspec

Improve Support for Redundantly Averaged Data in Noise Calculations #384