Closed ariasanovsky closed 1 year ago
template dump:
// $PROJECT/target/spindle/map/U/src/union.rs
pub union U {
_0: X,
_1: Y,
}
and
// $PROJECT/target/spindle/map/U/src/foo.rs
pub fn foo(x: X) -> Y { /* blah */ }
use super::union::U;
unsafe fn foo(u: &mut U) {
let input_ref = &*(u as *mut _ as *mut _);
let output = foo(*input_ref);
let output_ptr: *mut _ = u as *mut _ as _;
*output_ptr = output;
}
// let's actually make this a method
impl U {
pub unsafe fn foo(&mut self) {
let input_ref = &*(self as *mut _ as *mut _);
let output = foo(*input_ref);
let output_ptr: *mut _ = self as *mut _ as _;
*output_ptr = output;
}
}
and have a kernel
// $PROJECT/target/spindle/map/U/src/lib.rs
#![no_std]
#![feature(abi_ptx)]
#![feature(stdsimd)]
#![feature(core_intrinsics)]
mod foo;
mod union;
use foo::*;
use union::*;
use core::arch::nvptx::*;
#[panic_handler]
fn my_panic(_: &core::panic::PanicInfo) -> ! {
loop {}
}
#[no_mangle]
pub unsafe extern "ptx-kernel" fn foo_kernel(slice: *mut U, size: i32) {
let thread_id: i32 = _thread_idx_x();
let block_id: i32 = _block_idx_x();
let block_dim: i32 = _block_dim_x();
let grid_dim: i32 = _grid_dim_x();
let n_threads: i32 = block_dim * grid_dim;
let thread_index: i32 = thread_id + block_id * block_dim;
let mut i: i32 = thread_index;
while i < size {
let u: &mut U = &mut *slice.offset(i as isize);
u.device();
i = i.wrapping_add(n_threads);
}
}
See godbolt example.
#[repr(C)]
pub union U {
_0: i32,
_1: f64,
}
pub fn foo(x: i32) -> f64 {
x as _
}
impl U {
pub unsafe fn foo_method(&mut self) {
let input_ref = &*(self as *mut _ as *mut _);
let output = foo(*input_ref);
let output_ptr: *mut _ = self as *mut _ as _;
*output_ptr = output;
}
}
pub unsafe fn lift_foo(u: &mut U) {
let input_ref = &*(u as *mut _ as *mut _);
let output = foo(*input_ref);
let output_ptr: *mut _ = u as *mut _ as _;
*output_ptr = output;
}
pub unsafe fn lift_foo_verbose(u: &mut U) {
let input_ref: &i32 = &*(u as *mut _ as *mut _);
let output: f64 = foo(*input_ref);
let output_ptr: *mut f64 = u as *mut _ as _;
// let experiment: &mut f64 = &mut *output_ptr;
*output_ptr = output;
}
emits
example::foo:
cvtsi2sd xmm0, edi
ret
example::U::foo_method:
cvtsi2sd xmm0, dword ptr [rdi]
movsd qword ptr [rdi], xmm0
ret
example::lift_foo:
cvtsi2sd xmm0, dword ptr [rdi]
movsd qword ptr [rdi], xmm0
ret
example::lift_foo_verbose:
cvtsi2sd xmm0, dword ptr [rdi]
movsd qword ptr [rdi], xmm0
ret
Improved path: $PROJECT/target/spindle/map/U/src/lib.rs
has kernel_foo
, kernel_bar
, etc together.
#[spindle::map]
&
and&mut
)Tentative syntax/goal (c.f.,
examples/02-i32-to-f64-map.rs
)