slaclab / pysmurf

Other
2 stars 9 forks source link

Estimate Phase Delay Fails waiting for PVs to update #673

Open jlashner opened 2 years ago

jlashner commented 2 years ago

Describe the bug

The estimate_phase_delay function currently fails about a quarter of the time because it takes too long for PV's to update. There doesn't seem to be anything wrong with the backend, so we are usually able to re-run the function and get it to work the second or third attempt. However this makes it difficult to write functions that automate the setup procedure, since when running estimate phase delay for all 8 bands there is a very good chance that one of the bands will fail.

Here is the full traceback for the error:

ExceptionTraceback (most recent call last)
<ipython-input-24-81cd45b6b2a5> in <module>
----> 1 S.estimate_phase_delay(band,make_plot=True,save_plot=True,show_plot=False)
/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:
/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in estimate_phase_delay(self, band, nsamp, make_plot, show_plot, save_plot, save_data, n_scan, timestamp, uc_att, dc_att, freq_min, freq_max)
    304         freq_cable, resp_cable = self.full_band_resp(
    305             band, nsamp=nsamp, make_plot=make_plot,
--> 306             save_data=save_data, n_scan=n_scan)
    307 
    308         idx_cable = np.where( (freq_cable > freq_min) & (freq_cable < freq_max) )
/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:
/usr/local/src/pysmurf/python/pysmurf/client/tune/smurf_tune.py in full_band_resp(self, band, n_scan, nsamp, make_plot, save_plot, show_plot, save_data, timestamp, save_raw_data, correct_att, swap, hw_trigger, write_log, return_plot_path, check_if_adc_is_saturated)
    643             #If ADC is saturated, throws an exception.
    644             if check_if_adc_is_saturated:
--> 645                 adc_is_saturated = self.check_adc_saturation(band)
    646                 if adc_is_saturated:
    647                     raise ValueError('Playing the noise file saturates the '+
/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:
/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in check_adc_saturation(self, band)
   1522         """
   1523         adc = self.read_adc_data(band, data_length=2**12, make_plot=False,
-> 1524                   save_data=False, show_plot=False, save_plot=False)
   1525         adc_max   = int(np.max((adc.real.max(), adc.imag.max())))
   1526         adc_min   = int(np.min((adc.real.min(), adc.imag.min())))
/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:
/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in read_adc_data(self, band, data_length, hw_trigger, make_plot, save_data, timestamp, show_plot, save_plot, plot_ylimits)
   1608 
   1609         res = self.read_stream_data_daq(data_length, bay=bay,
-> 1610             hw_trigger=hw_trigger)
   1611         dat = res[1] + 1.j * res[0]
   1612 
/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:
/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in read_stream_data_daq(self, data_length, bay, hw_trigger, write_log)
   1497 
   1498         time.sleep(.1)
-> 1499         sg.wait()
   1500 
   1501         vals = sg.get_values()
/usr/local/src/pysmurf/python/pysmurf/client/command/sync_group.py in wait(self, epics_poll)
     67                 time.sleep(.001)
     68             if time.time() - t0 > self.timeout:
---> 69                 raise Exception('Timeout waiting for PVs to update.')
Exception: Timeout waiting for PVs to update.

To Reproduce

Try running estimate phase delay for 8 bands in a row.

agustiner commented 2 years ago

After following the call stack, the simple fix appears to be increasing the timeout from 30 seconds. The call is fairly low-level, so changing it might negatively impact other parts of pysmurf. Also, 30 seconds is already quite long, which might indicate some underlying issue. Looking further into it.

https://github.com/slaclab/pysmurf/blob/e12b783a829129936840010d0cf8ee5a9defb95d/python/pysmurf/client/command/sync_group.py#L26

jlashner commented 2 years ago

Curious to see if this fixes it. Have you been able to recreate this problem on a SLAC system?

jlashner commented 1 year ago

Currently having this issue in the most recent version of pysmurf.


ExceptionTraceback (most recent call last)
<ipython-input-21-155fd56fe9a0> in <module>
      1 uxm_setup.uxm_setup(
----> 2     S, cfg, bands=bands, skip_estimate_attens=True, skip_setup_amps=True,
      3 )
      4 

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/sodetlib/sodetlib/operations/uxm_setup.py in uxm_setup(S, cfg, bands, show_plots, update_cfg, skip_estimate_attens, skip_phase_delay, skip_setup_amps)
    475         sdl.set_session_data(S, 'timestamps', summary['timestamps'])
    476         success, summary['setup_phase_delay'] = setup_phase_delay(
--> 477             S, cfg, bands, update_cfg=update_cfg)
    478         if not success:
    479             S.log("UXM Setup failed on setup phase delay step")

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/sodetlib/sodetlib/operations/uxm_setup.py in setup_phase_delay(S, cfg, bands, update_cfg, modify_attens)
    217         #S.set_att_dc(b, 30)
    218         summary['bands'].append(int(b))
--> 219         band_delay_us, _ = S.estimate_phase_delay(b, make_plot=True, show_plot=False)
    220         band_delay_us = float(band_delay_us)
    221         summary['band_delay_us'].append(band_delay_us)

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in estimate_phase_delay(self, band, nsamp, make_plot, show_plot, save_plot, save_data, n_scan, timestamp, uc_att, dc_att, freq_min, freq_max)
    290         freq_cable, resp_cable = self.full_band_resp(
    291             band, nsamp=nsamp, make_plot=make_plot,
--> 292             save_data=save_data, n_scan=n_scan)
    293 
    294         idx_cable = np.where( (freq_cable > freq_min) & (freq_cable < freq_max) )

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/usr/local/src/pysmurf/python/pysmurf/client/tune/smurf_tune.py in full_band_resp(self, band, n_scan, nsamp, make_plot, save_plot, show_plot, save_data, timestamp, save_raw_data, correct_att, swap, hw_trigger, write_log, return_plot_path, check_if_adc_is_saturated)
    643             #If ADC is saturated, throws an exception.
    644             if check_if_adc_is_saturated:
--> 645                 adc_is_saturated = self.check_adc_saturation(band)
    646                 if adc_is_saturated:
    647                     raise ValueError('Playing the noise file saturates the '+

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in check_adc_saturation(self, band)
   1487         """
   1488         adc = self.read_adc_data(band, data_length=2**12, make_plot=False,
-> 1489                   save_data=False, show_plot=False, save_plot=False)
   1490         adc_max   = int(np.max((adc.real.max(), adc.imag.max())))
   1491         adc_min   = int(np.min((adc.real.min(), adc.imag.min())))

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in read_adc_data(self, band, data_length, hw_trigger, make_plot, save_data, timestamp, show_plot, save_plot, plot_ylimits)
   1573 
   1574         res = self.read_stream_data_daq(data_length, bay=bay,
-> 1575             hw_trigger=hw_trigger)
   1576         dat = res[1] + 1.j * res[0]
   1577 

/usr/local/src/pysmurf/python/pysmurf/client/util/pub.py in wrapper(S, pub_action, *args, **kwargs)
     48                     S.pub._action_ts = S.get_timestamp()
     49 
---> 50                 rv = func(S, *args, **kwargs)
     51 
     52             finally:

/usr/local/src/pysmurf/python/pysmurf/client/util/smurf_util.py in read_stream_data_daq(self, data_length, bay, hw_trigger, write_log)
   1462 
   1463         time.sleep(.1)
-> 1464         sg.wait()
   1465 
   1466         vals = sg.get_values()

/usr/local/src/pysmurf/python/pysmurf/client/command/sync_group.py in wait(self, epics_poll)
     67                 time.sleep(.001)
     68             if time.time() - t0 > self.timeout:
---> 69                 raise Exception('Timeout waiting for PVs to update.')

Exception: Timeout waiting for PVs to update.
swh76 commented 1 year ago

@jlashner trying to fix this. Running

counter=0 
while True: 
   for band in range(8): 
      print(f'Band{band} counter={counter}') 
      S.estimate_phase_delay(band,show_plot=False) 
   counter+=1

on a production carrier with two production LB AMCs and so far this has run 20+ times on all 8 bands without crashing. When you run estimate_phase_delay, do you give it any other non-default arguments? And when you're running it, what are the conditions? E.g. do you run it on multiple slots at the same time? I am running this loop on only one carrier, right after running setup.