kokkos / pykokkos

Performance portable parallel programming in Python.
102 stars 19 forks source link

MAINT, BUG, DOC: various issues with functor workunit flexibility #178

Open tylerjereddy opened 1 year ago

tylerjereddy commented 1 year ago

While working on gh-177, I spent quite some time dealing with weird/unhelpful error messages and/or limitations that arise with functors that don't arise with isolated workunits. For example,

import pykokkos as pk

@pk.functor
class Workload_gh_173:
    def __init__(self, s: int):
        self.size: pk.int = s

    @pk.workunit
    def store_result(self, i: int, result: pk.View1D[pk.int64]):
        result[i] = self.size

def test_gh_173():
    w = Workload_gh_173(900)
    result = pk.View([1], dtype=pk.int64)
    pk.parallel_for(1, w.store_result, result=result)

Fails after python runtests.py -t tests/test_regressions.py with a long traceback that isn't particularly helpful in identifying the problem:

``` _____________________________________________________________________________________________________________________________________________________________________________________ test_gh_173 ______________________________________________________________________________________________________________________________________________________________________________________ self = , source = (['class Workload_gh_173:\n', ' def __init__(self, s: int):\n', ' self.size: pk.int = s\n', '\n', '\n', ' @pk.workunit\n', ...], 4) def translate_workunits(self, source: Tuple[List[str], int]) -> Tuple[Dict[cppast.DeclRefExpr, Tuple[str, cppast.MethodDecl]], bool]: """ Translate the workunits :param source: the python source code of the workload :returns: a tuple of a dictionary mapping from workload name to a tuple of operation name and source, and a boolean indicating whether the workunit has a call to pk.rand() """ node_visitor = WorkunitVisitor( {}, source, self.pk_members.views, self.pk_members.pk_workunits, self.pk_members.fields, self.pk_members.pk_functions, self.pk_members.classtype_methods, self.pk_import, debug=True) workunits: Dict[cppast.DeclRefExpr, Tuple[str, cppast.MethodDecl]] = {} has_rand_call: bool = False for n, w in self.pk_members.pk_workunits.items(): try: > workunits[n] = node_visitor.visit(w) pykokkos/core/translators/static.py:215: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = , node = def visit(self, node): """Visit a node.""" method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method, self.generic_visit) > return visitor(node) /usr/lib/python3.10/ast.py:410: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = , node = def visit_FunctionDef(self, node: ast.FunctionDef) -> Union[str, Tuple[str, cppast.MethodDecl]]: if self.is_nested_call(node): params: List[cppast.ParmVarDecl] = [a for a in self.visit(node.args)] body = cppast.CompoundStmt([self.visit(b) for b in node.body]) workunit = cppast.LambdaExpr("[&]", params, body) self.nested_work_units[node.name] = workunit return "" else: operation: Optional[str] = self.get_operation_type(node) if operation is None: > self.error(node.args, "Incorrect types in workunit definition") pykokkos/core/visitors/workunit_visitor.py:36: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = , node = , message = 'Incorrect types in workunit definition' def error(self, node, message): > visitors_util.error(self.src, self.debug, node, message) pykokkos/core/visitors/pykokkos_visitor.py:677: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ src = (['class Workload_gh_173:\n', ' def __init__(self, s: int):\n', ' self.size: pk.int = s\n', '\n', '\n', ' @pk.workunit\n', ...], 4), debug = True, node = , message = 'Incorrect types in workunit definition' def error(src, debug: bool, node, message) -> None: if hasattr(node, "lineno"): print(f"\n\033[31m\033[01mError on line {node.lineno} \033[0m: {message}") else: print(f"\n\033[31m\033[01mError\033[0m: {message}") if debug: print("DEBUG AST:") pretty_print(node) if hasattr(node, "lineno"): print(src[0][node.lineno - src[1] - 1], end="") err_len = node.end_col_offset - node.col_offset if node.end_col_offset else 1 print(" " * node.col_offset + "^" * err_len) > sys.exit("PyKokkos: Translation failed") E SystemExit: PyKokkos: Translation failed pykokkos/core/visitors/visitors_util.py:141: SystemExit During handling of the above exception, another exception occurred: def test_gh_173(): w = Workload_gh_173(900) result = pk.View([1], dtype=pk.int64) > pk.parallel_for(1, w.store_result, result=result) tests/test_regressions.py:18: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pykokkos/interface/parallel_dispatch.py:171: in parallel_for func, args = runtime_singleton.runtime.run_workunit( pykokkos/core/runtime.py:97: in run_workunit members: Optional[PyKokkosMembers] = self.precompile_workunit(workunit,policy.space) pykokkos/core/runtime.py:68: in precompile_workunit members: Optional[PyKokkosMembers] = self.compiler.compile_object(module_setup, space, km.is_uvm_enabled()) pykokkos/core/compiler.py:149: in compile_object self.compile_entity(module_setup.main, module_setup, entity, parser.get_classtypes(), space, force_uvm, members) pykokkos/core/compiler.py:191: in compile_entity functor, bindings, cast = translator.translate(entity, classtypes) pykokkos/core/translators/static.py:77: in translate workunits, has_rand_call = self.translate_workunits(source) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = , source = (['class Workload_gh_173:\n', ' def __init__(self, s: int):\n', ' self.size: pk.int = s\n', '\n', '\n', ' @pk.workunit\n', ...], 4) def translate_workunits(self, source: Tuple[List[str], int]) -> Tuple[Dict[cppast.DeclRefExpr, Tuple[str, cppast.MethodDecl]], bool]: """ Translate the workunits :param source: the python source code of the workload :returns: a tuple of a dictionary mapping from workload name to a tuple of operation name and source, and a boolean indicating whether the workunit has a call to pk.rand() """ node_visitor = WorkunitVisitor( {}, source, self.pk_members.views, self.pk_members.pk_workunits, self.pk_members.fields, self.pk_members.pk_functions, self.pk_members.classtype_methods, self.pk_import, debug=True) workunits: Dict[cppast.DeclRefExpr, Tuple[str, cppast.MethodDecl]] = {} has_rand_call: bool = False for n, w in self.pk_members.pk_workunits.items(): try: workunits[n] = node_visitor.visit(w) has_rand_call = has_rand_call or node_visitor.has_rand_call if node_visitor.has_rand_call: workunit: cppast.MethodDecl = workunits[n][1] self.add_rand_pool_state(workunit) node_visitor.has_rand_call = False except: print(f"Translation of {w} failed") > sys.exit(1) E SystemExit: 1 pykokkos/core/translators/static.py:223: SystemExit --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Error: Incorrect types in workunit definition DEBUG AST: arguments( posonlyargs=[], args=[ arg(arg='self'), arg( arg='i', annotation=Name(id='int', ctx=Load())), arg( arg='result', annotation=Subscript( value=Attribute( value=Name(id='pk', ctx=Load()), attr='View1D', ctx=Load()), slice=Attribute( value=Name(id='pk', ctx=Load()), attr='int64', ctx=Load()), ctx=Load()))], kwonlyargs=[], kw_defaults=[], defaults=[]) Translation of failed =============================================================================================================================================================================== short test summary info ================================================================================================================================================================================ FAILED tests/test_regressions.py::test_gh_173 - SystemExit: 1 ```

Conversely (and confusingly), if I extract the workunit outside of the functor, this type of construction works just fine:

import pykokkos as pk

@pk.workunit
def store_result(i: int, result: pk.View1D[pk.int64]):
    result[i] = 0

def test_gh_173():
    result = pk.View([1], dtype=pk.int64)
    pk.parallel_for(1, store_result, result=result)

I'm inclined to say this is a bug because the error message is just not clear enough to explain why this is allowed in one case and not the other.

To make matters worse, I get a different error message with this kind of construct:

import pykokkos as pk

@pk.functor
class Workload_gh_173:
    def __init__(self, s: int):
        self.size: pk.int = s

    @pk.workunit
    def store_result(self, i: int, j: int):
        printf("i:", i)

def test_gh_173():
    w = Workload_gh_173(900)
    pk.parallel_for(1, w.store_result, j=3)

The error message is basically nonsense to me:

``` def test_gh_173(): w = Workload_gh_173(900) > pk.parallel_for(1, w.store_result, j=3) tests/test_regressions.py:17: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pykokkos/interface/parallel_dispatch.py:171: in parallel_for func, args = runtime_singleton.runtime.run_workunit( pykokkos/core/runtime.py:97: in run_workunit members: Optional[PyKokkosMembers] = self.precompile_workunit(workunit,policy.space) pykokkos/core/runtime.py:68: in precompile_workunit members: Optional[PyKokkosMembers] = self.compiler.compile_object(module_setup, space, km.is_uvm_enabled()) pykokkos/core/compiler.py:149: in compile_object self.compile_entity(module_setup.main, module_setup, entity, parser.get_classtypes(), space, force_uvm, members) pykokkos/core/compiler.py:197: in compile_entity cpp_setup.compile(output_dir, functor, cast, bindings, space, force_uvm, self.get_compiler()) pykokkos/core/cpp_setup.py:67: in compile self.invoke_script(output_dir, space, enable_uvm, compiler) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = , output_dir = PosixPath('pk_cpp/home/tyler/github_projects/pykokkos/runtests/test_regressions_Workload_gh_173/OpenMP'), space = , enable_uvm = False, compiler = 'g++' def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: bool, compiler: str) -> None: """ Invoke the compilation script :param output_dir: the base directory :param space: the execution space of the workload :param enable_uvm: whether to enable CudaUVMSpace :param compiler: what compiler to use """ view_space: str = "Kokkos::HostSpace" if space is ExecutionSpace.Cuda: if enable_uvm: view_space = "Kokkos::CudaUVMSpace" if space is ExecutionSpace.HIP: if enable_uvm: view_space = "Kokkos::Experimental::HIPManagedSpace" space_value: str if space.value == "HIP": space_value = "Experimental::HIP" else: space_value = space.value view_layout: str = str(get_default_layout(get_default_memory_space(space))) view_layout = view_layout.split(".")[-1] view_layout = f"Kokkos::{view_layout}" precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path include_path: Path compiler_path: Path lib_path, include_path, compiler_path = self.get_kokkos_paths(space, compiler) compute_capability: str = self.get_cuda_compute_capability(compiler) lib_suffix: str = self.get_kokkos_lib_suffix(space) command: List[str] = [f"./{self.script}", compiler, # What compiler to use self.module_file, # Compilation target space_value, # Execution space view_space, # Argument views memory space view_layout, # Argument views memory layout precision, # Default real precision str(lib_path), # Path to Kokkos install lib/ directory str(include_path), # Path to Kokkos install include/ directory compute_capability, # Device compute capability lib_suffix, # The libkokkos* suffix identifying the gpu str(compiler_path)] # The path to the compiler to use compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False) if compile_result.returncode != 0: print(compile_result.stderr.decode("utf-8")) print(f"C++ compilation in {output_dir} failed") > sys.exit(1) E SystemExit: 1 pykokkos/core/cpp_setup.py:257: SystemExit --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Captured stdout call --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- In file included from /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/Kokkos_OpenMP.hpp:236, from /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/decl/Kokkos_Declare_OPENMP.hpp:49, from /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/KokkosCore_Config_DeclareBackend.hpp:47, from /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/Kokkos_Core.hpp:57, from ./bindings.cpp:3: /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/OpenMP/Kokkos_OpenMP_Parallel.hpp: In instantiation of ‘static std::enable_if_t<((! std::is_void::work_tag>::value) && std::is_same::work_tag>::value)> Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::exec_work(const FunctorType&, Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::Member) [with Enable = pk_functor_Workload_gh_173::store_result; FunctorType = pk_functor_Workload_gh_173; Traits = {Kokkos::OpenMP, pk_functor_Workload_gh_173::store_result}; std::enable_if_t<((! std::is_void::work_tag>::value) && std::is_same::work_tag>::value)> = void; typename Kokkos::RangePolicy::work_tag = pk_functor_Workload_gh_173::store_result; Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::Member = long unsigned int]’: /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/OpenMP/Kokkos_OpenMP_Parallel.hpp:91:16: required from ‘static void Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::exec_range(const FunctorType&, Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::Member, Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::Member) [with FunctorType = pk_functor_Workload_gh_173; Traits = {Kokkos::OpenMP, pk_functor_Workload_gh_173::store_result}; Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::Member = long unsigned int]’ /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/OpenMP/Kokkos_OpenMP_Parallel.hpp:138:17: required from ‘void Kokkos::Impl::ParallelFor, Kokkos::OpenMP>::execute() const [with FunctorType = pk_functor_Workload_gh_173; Traits = {Kokkos::OpenMP, pk_functor_Workload_gh_173::store_result}]’ /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/Kokkos_Parallel.hpp:176:18: required from ‘void Kokkos::parallel_for(const string&, const ExecPolicy&, const FunctorType&) [with ExecPolicy = Kokkos::RangePolicy::store_result>; FunctorType = pk_functor_Workload_gh_173; Enable = void; std::string = std::__cxx11::basic_string]’ ./bindings.cpp:12:313: required from here /home/tyler/python_310_pykokkos_work/lib/python3.10/site-packages/pykokkos_base-0.0.7-py3.10-linux-x86_64.egg/include/kokkos/OpenMP/Kokkos_OpenMP_Parallel.hpp:106:12: error: no match for call to ‘(const pk_functor_Workload_gh_173) (Kokkos::Impl::ParallelFor, Kokkos::RangePolicy::store_result>, Kokkos::OpenMP>::WorkTag, const Member&)’ 106 | functor(WorkTag{}, iwork); | ~~~~~~~^~~~~~~~~~~~~~~~~~ In file included from ./bindings.cpp:9: ../functor.hpp:5:301: note: candidate: ‘void pk_functor_Workload_gh_173::operator()(const pk_functor_Workload_gh_173::store_result&, int32_t, int32_t) const [with ExecSpace = Kokkos::OpenMP; int32_t = int]’ 5 | template struct pk_functor_Workload_gh_173{struct store_result{};int32_t size; pk_functor_Workload_gh_173(int32_t size, int32_t pk_randpool_num_states, int32_t pk_randpool_seed){ this->size= size; }; pk_functor_Workload_gh_173(int32_t size){ this->size= size; };KOKKOS_FUNCTION void operator()(const store_result& , int32_t i, int32_t j)const{ printf("i:",i); };}; | ^~~~~~~~ ../functor.hpp:5:301: note: candidate expects 3 arguments, 2 provided /home/tyler/.linuxbrew/opt/binutils/bin/ld: cannot find ./bindings.cpp.o: No such file or directory collect2: error: ld returned 1 exit status C++ compilation in pk_cpp/home/tyler/github_projects/pykokkos/runtests/test_regressions_Workload_gh_173/OpenMP failed =============================================================================================================================================================================== short test summary info ================================================================================================================================================================================ FAILED tests/test_regressions.py::test_gh_173 - SystemExit: 1 ```

And if I extract the workunit from the functor, things work "ok" again, so I find this arrangement of behaviors really hard to reason about:

import pykokkos as pk

@pk.workunit
def store_result(i: int, j: int):
    printf("i:", i)

def test_gh_173():
    pk.parallel_for(1, store_result, j=3)
JBludau commented 1 year ago

From the messages it looks like the second, unused argument is not passed correctly when the workunit is inside a functor ...