Describe the bug
When running in parallel the error: TypeError: Expected Node, got SerializableObject. Error observed when running Orchestrated dace:cpu Acoustics test in pyFV3 github workflow.
Failure output:
FAILED tests/savepoint/test_translate.py::test_parallel_savepoint[DynCore-rank=5-call=0] - TypeError: Expected Node, got SerializableObject (<dace.serialize.SerializableObject object at 0x7efbed4f0690>)
================================================== 1 failed, 1 deselected, 232 warnings in 29.09s ===================================================
FAILED
===================================================================== FAILURES ======================================================================
__________________________________________________ test_parallel_savepoint[DynCore-rank=3-call=0] ___________________________________________________
self = <gt4py.cartesian.backend.dace_backend.SDFGManager object at 0x7fbf8d36a710>
def _frozen_sdfg(self, *, origin: Dict[str, Tuple[int, ...]], domain: Tuple[int, ...]):
frozen_hash = shash(origin, domain)
# check if same sdfg already cached on disk
path = self.builder.module_path
basename = os.path.splitext(path)[0]
path = basename + "_" + str(frozen_hash) + ".sdfg"
if path not in self._loaded_sdfgs:
try:
> sdfg = dace.SDFG.from_file(path)
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_backend.py:398:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename = '/home/Frank.Malatino/pyfv3_fork/.gt_cache_FV3_A/py311_1013/dacecpu/pyFV3/stencils/c_sw/zero_delpc_ptc/m_zero_delpc_ptc__dacecpu_a004db179b_a38a10fd19fc8a4ef448d512c7b50a6ed8c336c75b461b0e75f5279eabc1c222.sdfg'
@staticmethod
def from_file(filename: str) -> 'SDFG':
""" Constructs an SDFG from a file.
:param filename: File name to load SDFG from.
:return: An SDFG.
"""
# Try compressed first. If fails, try uncompressed
try:
with gzip.open(filename, 'rb') as fp:
return SDFG._from_file(fp)
except OSError:
pass
> with open(filename, "rb") as fp:
E FileNotFoundError: [Errno 2] No such file or directory: '/home/Frank.Malatino/pyfv3_fork/.gt_cache_FV3_A/py311_1013/dacecpu/pyFV3/stencils/c_sw/zero_delpc_ptc/m_zero_delpc_ptc__dacecpu_a004db179b_a38a10fd19fc8a4ef448d512c7b50a6ed8c336c75b461b0e75f5279eabc1c222.sdfg'
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/sdfg.py:1589: FileNotFoundError
During handling of the above exception, another exception occurred:
case = SavepointCase(savepoint_name='DynCore', data_dir='./test_data/8.1.3/c12_6ranks_standard/dycore', rank=3, i_call=0, tes...te_dyncore.TranslateDynCore object at 0x7fbf90fc7050>, grid=<ndsl.stencils.testing.grid.Grid object at 0x7fbf90ebfe50>)
backend = 'dace:cpu', print_failures = False, failure_stride = 1
subtests = SubTests(ihook=<_pytest.config.compat.PathAwareHookProxy object at 0x7fbfd7ae0f90>, suspend_capture_ctx=<bound method ..._capture_fixture=None>>, request=<SubRequest 'subtests' for <Function test_parallel_savepoint[DynCore-rank=3-call=0]>>)
caplog = <_pytest.logging.LogCaptureFixture object at 0x7fbf8fe57bd0>
threshold_overrides = {'A2B_Ord4': [{'backend': 'dace:cpu', 'max_error': '1e-13'}], 'CS_Profile_2d': [{'backend': 'gt:gpu', 'max_error': 2.5...'cuda', 'max_error': '3e-8', 'near_zero': 1.5e-14}], 'Del2Cubed': [{'backend': 'dace:cpu', 'max_error': '8e-12'}], ...}
grid = 'file', xy_indices = True
@pytest.mark.parallel
@pytest.mark.skipif(
MPI is None or MPI.COMM_WORLD.Get_size() == 1,
reason="Not running in parallel with mpi",
)
def test_parallel_savepoint(
case: SavepointCase,
backend,
print_failures,
failure_stride,
subtests,
caplog,
threshold_overrides,
grid,
xy_indices=True,
):
if MPI.COMM_WORLD.Get_size() % 6 != 0:
layout = (
int(MPI.COMM_WORLD.Get_size() ** 0.5),
int(MPI.COMM_WORLD.Get_size() ** 0.5),
)
communicator = get_tile_communicator(MPI.COMM_WORLD, layout)
else:
layout = (
int((MPI.COMM_WORLD.Get_size() // 6) ** 0.5),
int((MPI.COMM_WORLD.Get_size() // 6) ** 0.5),
)
communicator = get_communicator(MPI.COMM_WORLD, layout)
if case.testobj is None:
pytest.xfail(
f"no translate object available for savepoint {case.savepoint_name}"
)
stencil_config = StencilConfig(
compilation_config=CompilationConfig(backend=backend),
dace_config=DaceConfig(
communicator=communicator,
backend=backend,
),
)
# Increase minimum error threshold for GPU
if stencil_config.is_gpu_backend:
case.testobj.max_error = max(case.testobj.max_error, GPU_MAX_ERR)
case.testobj.near_zero = max(case.testobj.near_zero, GPU_NEAR_ZERO)
if threshold_overrides is not None:
process_override(
threshold_overrides, case.testobj, case.savepoint_name, backend
)
if case.testobj.skip_test:
return
if (grid == "compute") and not case.testobj.compute_grid_option:
pytest.xfail(f"Grid compute option not used for test {case.savepoint_name}")
input_data = dataset_to_dict(case.ds_in)
# run python version of functionality
> output = case.testobj.compute_parallel(input_data, communicator)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/stencils/testing/test_translate.py:396:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pyFV3/testing/translate_dyncore.py:177: in compute_parallel
acoustic_dynamics(state, timestep=inputs["mdt"], n_map=state.n_map)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:498: in __call__
return wrapped(*arg, **kwarg)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:399: in __call__
sdfg = _parse_sdfg(
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:295: in _parse_sdfg
sdfg = daceprog.to_sdfg(
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:276: in to_sdfg
sdfg = self._parse(args, kwargs, simplify=simplify, save=save, validate=validate)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:492: in _parse
sdfg, cached = self._generate_pdp(args, kwargs, simplify=simplify)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:900: in _generate_pdp
sdfg = newast.parse_dace_program(self.name,
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:238: in parse_dace_program
sdfg, _, _, _ = visitor.parse_program(preprocessed_ast.preprocessed_ast.body[0])
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:1230: in parse_program
self.visit_TopLevel(stmt)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/astutils.py:489: in visit_TopLevel
return self.visit(node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:1206: in visit
return super().visit(node)
../.conda/envs/pyftest/lib/python3.11/ast.py:418: in visit
return visitor(node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:2327: in visit_For
laststate, first_loop_state, last_loop_state, _ = self._recursive_visit(node.body,
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:2192: in _recursive_visit
self.visit_TopLevel(stmt)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/astutils.py:487: in visit_TopLevel
return visitor(node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:4625: in visit_TopLevelExpr
self.visit_Call(node.value)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:4451: in visit_Call
return self._parse_sdfg_call(funcname, func, node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:3769: in _parse_sdfg_call
raise ex
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:3746: in _parse_sdfg_call
sdfg = fcopy.__sdfg__(*fargs, **fkwargs)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:501: in __sdfg__
return wrapped.__sdfg__(*args, **kwargs)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:414: in __sdfg__
return _parse_sdfg(self.daceprog, self.lazy_method.config, *args, **kwargs)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/dace/orchestration.py:295: in _parse_sdfg
sdfg = daceprog.to_sdfg(
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:276: in to_sdfg
sdfg = self._parse(args, kwargs, simplify=simplify, save=save, validate=validate)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:492: in _parse
sdfg, cached = self._generate_pdp(args, kwargs, simplify=simplify)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/parser.py:900: in _generate_pdp
sdfg = newast.parse_dace_program(self.name,
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:238: in parse_dace_program
sdfg, _, _, _ = visitor.parse_program(preprocessed_ast.preprocessed_ast.body[0])
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:1230: in parse_program
self.visit_TopLevel(stmt)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/astutils.py:487: in visit_TopLevel
return visitor(node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:4625: in visit_TopLevelExpr
self.visit_Call(node.value)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:4451: in visit_Call
return self._parse_sdfg_call(funcname, func, node)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:3769: in _parse_sdfg_call
raise ex
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/frontend/python/newast.py:3746: in _parse_sdfg_call
sdfg = fcopy.__sdfg__(*fargs, **fkwargs)
../.conda/envs/pyftest/lib/python3.11/site-packages/ndsl/dsl/stencil.py:506: in __sdfg__
return self.stencil_object.__sdfg__(
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_lazy_stencil.py:66: in __sdfg__
sdfg = sdfg_manager.frozen_sdfg(origin=norm_kwargs["origin"], domain=norm_kwargs["domain"])
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_backend.py:415: in frozen_sdfg
return copy.deepcopy(self._frozen_sdfg(origin=origin, domain=domain))
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_backend.py:401: in _frozen_sdfg
inner_sdfg = self.unexpanded_sdfg()
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_backend.py:379: in unexpanded_sdfg
return copy.deepcopy(self._unexpanded_sdfg())
../.conda/envs/pyftest/lib/python3.11/site-packages/gt4py/cartesian/backend/dace_backend.py:358: in _unexpanded_sdfg
sdfg = dace.SDFG.from_file(path)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/sdfg.py:1590: in from_file
return SDFG._from_file(fp)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/sdfg.py:1568: in _from_file
sdfg = SDFG.from_json(sdfg_json)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/sdfg.py:608: in from_json
state = SDFGState.from_json(n, context=nci)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/state.py:1380: in from_json
ret.add_node(nret)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = SDFGState (state_0), node = <dace.serialize.SerializableObject object at 0x7fbf8d117210>
def add_node(self, node):
if not isinstance(node, nd.Node):
> raise TypeError("Expected Node, got " + type(node).__name__ + " (" + str(node) + ")")
E TypeError: Expected Node, got SerializableObject (<dace.serialize.SerializableObject object at 0x7fbf8d117210>)
../.conda/envs/pyftest/lib/python3.11/site-packages/dace/sdfg/state.py:1280: TypeError
Describe the bug When running in parallel the error:
TypeError: Expected Node, got SerializableObject
. Error observed when runningOrchestrated dace:cpu Acoustics
test inpyFV3
github workflow.Failure output:
To Reproduce
Expected behavior
System Environment Describe the system environment, include:
Red Hat Enterprise Linux 8.10 (Ootpa)
dace:cpu
openmpi-ucx/5.0.0
netcdf/4.9.2