askap-vast / vast-tools

A collection of tools that are useful for the VAST project and for exploration of results from the VAST Pipeline.
https://vast-survey.org/vast-tools/
MIT License
8 stars 0 forks source link

_validate_files missing selavy catalogues #457

Closed ddobie closed 1 year ago

ddobie commented 1 year ago
from vasttools.query import Query
my_query = Query(source_names=['PSR J0952-0607'], epochs='all-vast')
my_query.find_fields()

my_query.find_sources()

Throws the following error:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_95/2349753045.py in <module>
----> 1 my_query.find_sources()

/opt/conda/lib/python3.9/site-packages/vasttools/query.py in find_sources(self)
   1176         self.logger.debug("Getting components...")
   1177         results = (
-> 1178             dd.from_pandas(self.sources_df, self.ncpu)
   1179             .groupby('selavy')
   1180             .apply(

/opt/conda/lib/python3.9/site-packages/dask/base.py in compute(self, **kwargs)
    286         dask.base.compute
    287         """
--> 288         (result,) = compute(self, traverse=False, **kwargs)
    289         return result
    290 

/opt/conda/lib/python3.9/site-packages/dask/base.py in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    569         postcomputes.append(x.__dask_postcompute__())
    570 
--> 571     results = schedule(dsk, keys, **kwargs)
    572     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    573 

/opt/conda/lib/python3.9/site-packages/dask/multiprocessing.py in get(dsk, keys, num_workers, func_loads, func_dumps, optimize_graph, pool, chunksize, **kwargs)
    217     try:
    218         # Run
--> 219         result = get_async(
    220             pool.submit,
    221             pool._max_workers,

/opt/conda/lib/python3.9/site-packages/dask/local.py in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    505                             _execute_task(task, data)  # Re-execute locally
    506                         else:
--> 507                             raise_exception(exc, tb)
    508                     res, worker_id = loads(res_info)
    509                     state["cache"][key] = res

/opt/conda/lib/python3.9/site-packages/dask/local.py in reraise(exc, tb)
    313     if exc.__traceback__ is not tb:
    314         raise exc.with_traceback(tb)
--> 315     raise exc
    316 
    317 

/opt/conda/lib/python3.9/site-packages/dask/local.py in execute_task()
    218     try:
    219         task, data = loads(task_info)
--> 220         result = _execute_task(task, data)
    221         id = get_id()
    222         result = dumps((result, id))

/opt/conda/lib/python3.9/site-packages/dask/core.py in _execute_task()
    117         # temporaries by their reference count and can execute certain
    118         # operations in-place.
--> 119         return func(*(_execute_task(a, cache) for a in args))
    120     elif not ishashable(arg):
    121         return arg

/opt/conda/lib/python3.9/site-packages/dask/optimization.py in __call__()
    967         if not len(args) == len(self.inkeys):
    968             raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 969         return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
    970 
    971     def __reduce__(self):

/opt/conda/lib/python3.9/site-packages/dask/core.py in get()
    147     for key in toposort(dsk):
    148         task = dsk[key]
--> 149         result = _execute_task(task, cache)
    150         cache[key] = result
    151     result = _execute_task(out, cache)

/opt/conda/lib/python3.9/site-packages/dask/core.py in _execute_task()
    117         # temporaries by their reference count and can execute certain
    118         # operations in-place.
--> 119         return func(*(_execute_task(a, cache) for a in args))
    120     elif not ishashable(arg):
    121         return arg

/opt/conda/lib/python3.9/site-packages/dask/utils.py in apply()
     35 def apply(func, args, kwargs=None):
     36     if kwargs:
---> 37         return func(*args, **kwargs)
     38     else:
     39         return func(*args)

/opt/conda/lib/python3.9/site-packages/dask/dataframe/core.py in apply_and_enforce()
   6066     func = kwargs.pop("_func")
   6067     meta = kwargs.pop("_meta")
-> 6068     df = func(*args, **kwargs)
   6069     if is_dataframe_like(df) or is_series_like(df) or is_index_like(df):
   6070         if not len(df):

/opt/conda/lib/python3.9/site-packages/dask/dataframe/groupby.py in _groupby_slice_apply()
    168     if key:
    169         g = g[key]
--> 170     return g.apply(func, *args, **kwargs)
    171 
    172 

/opt/conda/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in apply()
   1565                     old_msg, FutureWarning, new_msg
   1566                 ) if is_np_func else nullcontext():
-> 1567                     result = self._python_apply_general(f, self._selected_obj)
   1568             except TypeError:
   1569                 # gh-20949

/opt/conda/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in _python_apply_general()
   1627             data after applying f
   1628         """
-> 1629         values, mutated = self.grouper.apply(f, data, self.axis)
   1630         if not_indexed_same is None:
   1631             not_indexed_same = mutated or self.mutated

/opt/conda/lib/python3.9/site-packages/pandas/core/groupby/ops.py in apply()
    837             # group might be modified
    838             group_axes = group.axes
--> 839             res = f(group)
    840             if not mutated and not _is_indexed_like(res, group_axes, axis):
    841                 mutated = True

/opt/conda/lib/python3.9/site-packages/vasttools/query.py in _get_components()
   1655         master = pd.DataFrame()
   1656 
-> 1657         selavy_df = read_selavy(selavy_file)
   1658 
   1659         if self.settings['stokes'] != "I":

/opt/conda/lib/python3.9/site-packages/vasttools/utils.py in read_selavy()
    330 
    331     if selavy_path.endswith(".xml") or selavy_path.endswith(".vot"):
--> 332         df = Table.read(
    333             selavy_path, format="votable", use_names_over_ids=True
    334         ).to_pandas()

/opt/conda/lib/python3.9/site-packages/astropy/table/connect.py in __call__()
     59         descriptions = kwargs.pop('descriptions', None)
     60 
---> 61         out = registry.read(cls, *args, **kwargs)
     62 
     63         # For some readers (e.g., ascii.ecsv), the returned `out` class is not

/opt/conda/lib/python3.9/site-packages/astropy/io/registry.py in read()
    525 
    526         reader = get_reader(format, cls)
--> 527         data = reader(*args, **kwargs)
    528 
    529         if not isinstance(data, cls):

/opt/conda/lib/python3.9/site-packages/astropy/io/votable/connect.py in read_table_votable()
     84     """
     85     if not isinstance(input, (VOTableFile, VOTable)):
---> 86         input = parse(input, table_id=table_id, verify=verify, **kwargs)
     87 
     88     # Parse all table objects

/opt/conda/lib/python3.9/site-packages/astropy/utils/decorators.py in wrapper()
    534                     warnings.warn(message, warning_type, stacklevel=2)
    535 
--> 536             return function(*args, **kwargs)
    537 
    538         return wrapper

/opt/conda/lib/python3.9/site-packages/astropy/io/votable/table.py in parse()
    161         config['filename'] = source
    162 
--> 163     with iterparser.get_xml_iterator(
    164             source,
    165             _debug_python_based_parser=_debug_python_based_parser) as iterator:

/opt/conda/lib/python3.9/contextlib.py in __enter__()
    115         del self.args, self.kwds, self.func
    116         try:
--> 117             return next(self.gen)
    118         except StopIteration:
    119             raise RuntimeError("generator didn't yield") from None

/opt/conda/lib/python3.9/site-packages/astropy/utils/xml/iterparser.py in get_xml_iterator()
    158               event.
    159     """
--> 160     with _convert_to_fd_or_read_function(source) as fd:
    161         if _debug_python_based_parser:
    162             context = _slow_iterparse(fd)

/opt/conda/lib/python3.9/contextlib.py in __enter__()
    115         del self.args, self.kwds, self.func
    116         try:
--> 117             return next(self.gen)
    118         except StopIteration:
    119             raise RuntimeError("generator didn't yield") from None

/opt/conda/lib/python3.9/site-packages/astropy/utils/xml/iterparser.py in _convert_to_fd_or_read_function()
     61         return
     62 
---> 63     with data.get_readable_fileobj(fd, encoding='binary') as new_fd:
     64         if sys.platform.startswith('win'):
     65             yield new_fd.read

/opt/conda/lib/python3.9/contextlib.py in __enter__()
    115         del self.args, self.kwds, self.func
    116         try:
--> 117             return next(self.gen)
    118         except StopIteration:
    119             raise RuntimeError("generator didn't yield") from None

/opt/conda/lib/python3.9/site-packages/astropy/utils/data.py in get_readable_fileobj()
    260                 timeout=remote_timeout, sources=sources,
    261                 http_headers=http_headers)
--> 262         fileobj = io.FileIO(name_or_obj, 'r')
    263         if is_url and not cache:
    264             delete_fds.append(fileobj)

FileNotFoundError: [Errno 2] No such file or directory: '/data/vast-survey/VAST/release/EPOCH13/COMBINED/STOKESI_SELAVY/selavy-VAST_0943-06A.EPOCH13.I.conv.components.xml'

The missing file is known not to exist (see https://github.com/askap-vast/vast-project/wiki/Pilot-Survey-Status-&-Data#epoch-13) but it should be caught by _validate_files()

ddobie commented 1 year ago

Issue was with the version of vast-tools installed on jupyterhub, not vast-tools itself. Closing.