ValueError: low >= high when adding Lyman-alpha forest to BOSS/DR9 QLF

moustakas commented 6 years ago

from astropy.cosmology import Planck13
from simqso.sqgrids import *
from simqso import sqbase
from simqso.sqmodels import BOSS_DR9_PLEpivot,get_BossDr9_model_vars
wave = sqbase.fixed_R_dispersion(1000,20e4,1000)
nqso = 10
np.random.seed(12345)
zin = 2.0 + np.random.rand(nqso)
kcorr = sqbase.ContinuumKCorr('DECam-r',1450,effWaveBand='SDSS-r')
qsos = generateQlfPoints(BOSS_DR9_PLEpivot(cosmo=Planck13),
                         (17,22),(2.0,3.0),
                         kcorr=kcorr,zin=zin,
                         qlfseed=12345,gridseed=67890)
sedVars = get_BossDr9_model_vars(qsos,wave,0,noforest=False)
qsos.addVars(sedVars)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-e6c2e1a93eb5> in <module>()
     14                          qlfseed=12345,gridseed=67890)
     15 sedVars = get_BossDr9_model_vars(qsos,wave,0,noforest=False)
---> 16 qsos.addVars(sedVars)

~/repos/simqso/simqso/sqgrids.py in addVars(self, newVars, noVals)
    950         '''
    951         for var in newVars:
--> 952             self.addVar(var,noVals=noVals)
    953     def addData(self,data):
    954         self.data = hstack([self.data,data])

~/repos/simqso/simqso/sqgrids.py in addVar(self, var, noVals)
    942         self.varNames.append(var.name)
    943         if not noVals:
--> 944             vals = var(self.nObj)
    945             if vals is not None:
    946                 self.data[var.name] = vals

~/repos/simqso/simqso/sqgrids.py in __call__(self, n, **kwargs)
    366         if self.seed:
    367             np.random.seed(self.seed)
--> 368         vals = self.sampler(n,**kwargs)
    369         if vals is not None:
    370             vals = np.array(vals).astype(self.dtype)

~/repos/simqso/simqso/sqgrids.py in __call__(self, n, **kwargs)
     52         pass
     53     def __call__(self,n,**kwargs):
---> 54         return self.sample(n,**kwargs)
     55     def __str__(self):
     56         s = str((self.low,self.high))

~/repos/simqso/simqso/sqgrids.py in sample(self, n, **kwargs)
    109         super(RandomSubSampler,self).__init__(0,n)
    110     def sample(self,n,**kwargs):
--> 111         return np.random.randint(self.low,self.high,n)
    112 
    113 class ConstSampler(Sampler):

mtrand.pyx in mtrand.RandomState.randint()

ValueError: low >= high

imcgreer commented 6 years ago

Sorry for the lack of documentation for get_BossDr9_model_vars(). The third argument is the number of independent sightlines to generate for the forest, and I set it to 0 in the example for noforest=True. If you set it to something >0 it should work. That said, I will make some changes so that the default behavior for 0 is to generate one sightline per quasar, which I think was something I intended anyway.

moustakas commented 6 years ago

Thanks @imcgreer. The snippet of code above now works (after leaving off the nSightLines input). However, the way I'm calling essentially the same code (but this time within desisim.templates) is failing with the error

[snip]
~/repos/simqso/simqso/hiforest.py in __init__(self, wave, forestModel, numSightLines, **kwargs)
    342         dloglam = np.diff(logwave)
--> 344         if not np.allclose(dloglam,dloglam[0]):
    345             raise ValueError("Must have constant dloglam")
    346         specR = dloglam[0]**-1

ValueError: Must have constant dloglam

It looks like get_BossDr9_model_vars is altering the input wavelength array, so that when I call it twice it is no longer the same as what sqbase.fixed_R_dispersion generated.

Any thoughts?

moustakas commented 6 years ago

Apologies that I can't easily make a MWE -- I can't seem to reproduce this issue using the relevant bits of your notebook but somehow the wavelength array is changing internally. Here's my while loop if it's helpful (which is necessary because I generate spectra and then apply DESI target selection and so I need to iterate until I have the desired number of spectra) -- https://github.com/desihub/desisim/blob/simqso/py/desisim/templates.py#L2222-L2242

On the first iteration everything is fine but on the second iteration the Must have constant dloglam error is raised (even though the check of the input wavelength array passes!).

imcgreer commented 6 years ago

after updating to ed3cf614c283756d103e17f8c397634bb85b5328, try this at the end of your snippet:

from simqso.hiforest import IGMTransmissionGrid
sedVars = get_BossDr9_model_vars(qsos, iterwave, noforest=True)
igmGrid = IGMTransmissionGrid(iterwave,sqmodels.forestModels['Worseck&Prochaska2011'],
  len(qsos.z),zmax=qsos.z.max(),voigtcache=False)
hiVar = HIAbsorptionVar(igmGrid,subsample=False)
qsos.addVar(hiVar)

There is an instance of global memory in the form of a lookup table of Voigt profiles. This turns it off. I'm curious if that's the problem.

moustakas commented 6 years ago

Still getting the "Must have constant dloglam" error on the second iteration. Here's the relevant bit of code (not committed yet) --

from simqso.sqgrids import generateQlfPoints, ConstSampler, HIAbsorptionVar
from simqso.sqmodels import get_BossDr9_model_vars, forestModels
from simqso.sqrun import buildSpectraBulk
from simqso.hiforest import IGMTransmissionGrid

[snip]

sedVars = get_BossDr9_model_vars(qsos, iterwave, noforest=True)

# Add hot dust.
self.sublimdust.set_associated_var(sedVars[0])
self.hotdust.set_associated_var(sedVars[0])
qsos.addVars(sedVars+[self.sublimdust, self.hotdust])

# Add the Lyman-alpha forest (optionally).
if lyaforest:
    igmGrid = IGMTransmissionGrid(iterwave, forestModels['Worseck&Prochaska2011'],
                                                          len(qsos.z), 
                                                          zmax=qsos.z.max(), 
                                                          voigtcache=False)
    hiVar = HIAbsorptionVar(igmGrid, subsample=False)
    qsos.addVar(hiVar)

imcgreer commented 6 years ago

This might be related to #14. Trying to reproduce. One workaround is that the simulations are rather fast, so you could just hugely overproduce the initial list so that you end up with enough after applying the selection. But it's worth tracking down the bug anyway.

imcgreer commented 6 years ago

I wasn't able to reproduce the error by trying to replicate the loop functionality in a standalone script. I got desisim up and running and was able to reproduce it immediately, on the second iteration. I couldn't see how simqso could be changing the wavelength array, then I noticed that self.basewave was being used by other functions. So I made this patch:

diff --git a/py/desisim/templates.py b/py/desisim/templates.py
index 4c48e51..f6e43fb 100755
--- a/py/desisim/templates.py
+++ b/py/desisim/templates.py
@@ -2046,6 +2046,7 @@ class SIMQSO():

         self.basewave = fixed_R_dispersion(500.0, 20e4, 1000)
         self.cosmo = cosmology.core.FlatLambdaCDM(70.0, 0.3)
+        self.basewave2 = self.basewave.copy()

         self.lambda_lylimit = 911.76
         self.lambda_lyalpha = 1215.67
@@ -2232,8 +2233,8 @@ class SIMQSO():
             # emission-line template, and an iron emission-line template.
             print(itercount)

-            iterwave = self.basewave.copy()
-            if np.count_nonzero(self.basewave - iterwave) > 0:
+            iterwave = self.basewave2.copy()
+            if np.count_nonzero(self.basewave2 - iterwave) > 0:
                 raise ValueError('Wavelength array changed!')

             logwave = np.log(iterwave)

This isolates the "basewave" used by simqso to its own array. With this patch the code runs to completion without tripping the logwave error. It looks like the memory bug is not in simqso.

imcgreer commented 6 years ago

I think I've identified the spot:

/home/ian/soft/anaconda/lib/python2.7/site-packages/desisim-0.22.0.dev1184-py2.7.egg/desisim/templates.pyc in make_templates(self, nmodel, zrange, rmagrange, seed, redshift, input_meta, maxiter, lyaforest, nocolorcuts, return_qsometa, verbose)
   2267             foo = self.basewave.copy()
   2268             maggies = self.decamwise.get_ab_maggies(flux, self.basewave, mask_invalid=True)
-> 2269             assert np.all(self.basewave==foo)
   2270 
   2271             if self.normfilter in self.decamwise.names:

AssertionError:

imcgreer commented 6 years ago

Yup, with this small change I can make the simqso crash go away:

diff --git a/py/desisim/templates.py b/py/desisim/templates.py
index 4c48e51..a89d2fe 100755
--- a/py/desisim/templates.py
+++ b/py/desisim/templates.py
@@ -2263,7 +2263,7 @@ class SIMQSO():

             # Synthesize photometry to determine which models will pass the
             # color-cuts.
-            maggies = self.decamwise.get_ab_maggies(flux, self.basewave, mask_invalid=True)
+            maggies = self.decamwise.get_ab_maggies(flux, self.basewave.copy(), mask_invalid=True)

             if self.normfilter in self.decamwise.names:
                 normmaggies = np.array(maggies[self.normfilter])

moustakas commented 6 years ago

Thanks, @imcgreer

imcgreer / simqso

ValueError: low >= high when adding Lyman-alpha forest to BOSS/DR9 QLF #20