For GPU memory, we can use Cupy_Ref and for CPU memory wen can use pybind11::array_t to pass memory between Python and C++ side. alpaka allows us to write generic code and execute it on different accelerators, only by changing a template parameter.
You task is to develop a generic Men_Ref class, which exchange CPU or GPU memory (is specified by a template parameter) between Python and C++ side.
Here is some pseudo code for the task:
tags.hpp
struct CPU;
struct CUDAGPU;
mem_ref.hpp
#include "tags.hpp"
typename<typename TAcc>
class Mem_Ref;
typename<>
class Mem_Ref<CPU>{
using type = pybind11::array_t<float, pybind11::array::c_style>;
// ...
};
typename<>
class Mem_Ref<GPUCUDA>{
using type = Cupy_Ref;
// ...
};
algo.hpp
#include "tags.hpp"
#include "mem_ref.hpp"
template<typename TDevice>
class Algo {
public:
void whoami();
Ref<TDevice> get_input_memory();
void compute(Ref<TDevice> input, Ref<TDevice> output, int size);
Ref<TDevice> get_ouput_memory();
};
template<>
class Algo<CPU> {
public:
using TAcc = CPU;
float * input;
float * output;
void whoami(){
std::cout << "I'm the CPU version\n";
}
Ref<TAcc> get_input_memory(){
return ; // input as numpy_array or similar
}
void compute(Ref<TAcc> input, Ref<TAcc> output, int size){
for(int i = 0; i < size; ++i){
output[i] = 2 * input[i]
}
}
Ref<TAcc> get_output_memory(){
return; // output as numpy_array or similar
}
};
template<>
class Algo<CUDAGPU> {
using TAcc = CUDAGPU;
public:
void whoami(){
std::cout << "I'm the CUDA GPU version\n";
}
Ref<TAcc> get_input_memory(){
return ; // input as cupy_ref
}
void compute(Ref<TAcc> input, Ref<TAcc> output, int size){
// execute cuda kernel
}
Ref<TAcc> get_output_memory(){
return; // output as cupy_ref
}
};
# algo can be easily replace by
# algo = binding.AlgoCPU
algo = binding.AlgoCUDA
input = algo.get_input_memory()
for i in range(10):
input[i] = data[i]
algo.compute(algo.get_input_memory(), algo.get_output_memory())
print(algo.get_output_memory())
For GPU memory, we can use
Cupy_Ref
and for CPU memory wen can usepybind11::array_t
to pass memory between Python and C++ side. alpaka allows us to write generic code and execute it on different accelerators, only by changing a template parameter.You task is to develop a generic
Men_Ref
class, which exchange CPU or GPU memory (is specified by a template parameter) between Python and C++ side.Here is some pseudo code for the task:
tags.hpp
mem_ref.hpp
algo.hpp
binding.cu
main.py