ariasanovsky / spindle

12 stars 1 forks source link

needs support for maps #1

Closed ariasanovsky closed 1 year ago

ariasanovsky commented 1 year ago

Tentative syntax/goal (c.f., examples/02-i32-to-f64-map.rs)

#[spindle::map]
fn foo(x: X) -> Y { /* blah */ }
#[spindle::map]
fn bar(y: Y) -> Z { /* blah */ }
spindle::spin!(U, X, Y, Z);

fn main() {
    let x: Vec<X> = /* blah */;
    let x: DevSpindle<U, X> = unsafe { x.try_into() }.unwrap();
    let y /* DevSpindle<U, Y> */ = unsafe { x.foo() }.unwrap();
    let z /* DevSpindle<U, Z> */ = unsafe { y.bar() }.unwrap();
    let z /* HostSpindle<U, Z> */ = unsafe { z.try_to_host() }.unwrap();
    let z: Vec<&Z> = z.iter().collect();
    // or
    let x: Vec<X> = /* blah */;
    let z: Vec<&Z> = unsafe {
        x.try_into::<DevSpindle<U, _>>().unwrap()
        .foo().unwrap()
        .bar().unwrap()
        .try_to_host().unwrap()
        .iter().collect()
    };
}
ariasanovsky commented 1 year ago

template dump:

// $PROJECT/target/spindle/map/U/src/union.rs
pub union U {
    _0: X,
    _1: Y,
}

and

// $PROJECT/target/spindle/map/U/src/foo.rs
pub fn foo(x: X) -> Y { /* blah */ }
use super::union::U;
unsafe fn foo(u: &mut U) {
    let input_ref = &*(u as *mut _ as *mut _);
    let output = foo(*input_ref);
    let output_ptr: *mut _ = u as *mut _ as _;
    *output_ptr = output;
}
// let's actually make this a method
impl U {
    pub unsafe fn foo(&mut self) {
        let input_ref = &*(self as *mut _ as *mut _);
        let output = foo(*input_ref);
        let output_ptr: *mut _ = self as *mut _ as _;
        *output_ptr = output;
    }
}

and have a kernel

// $PROJECT/target/spindle/map/U/src/lib.rs
#![no_std]
#![feature(abi_ptx)]
#![feature(stdsimd)]
#![feature(core_intrinsics)]

mod foo;
mod union;
use foo::*;
use union::*;
use core::arch::nvptx::*;

#[panic_handler]
fn my_panic(_: &core::panic::PanicInfo) -> ! {
    loop {}
}

#[no_mangle]
pub unsafe extern "ptx-kernel" fn foo_kernel(slice: *mut U, size: i32) {
    let thread_id: i32 = _thread_idx_x();
    let block_id: i32 = _block_idx_x();
    let block_dim: i32 = _block_dim_x();
    let grid_dim: i32 = _grid_dim_x();
    
    let n_threads: i32 = block_dim * grid_dim;
    let thread_index: i32 =  thread_id + block_id * block_dim;
    
    let mut i: i32 = thread_index;
    while i < size {
        let u: &mut U = &mut *slice.offset(i as isize);
        u.device();
        i = i.wrapping_add(n_threads);
    }
}
ariasanovsky commented 1 year ago

See godbolt example.

#[repr(C)]
pub union U {
    _0: i32,
    _1: f64,
}

pub fn foo(x: i32) -> f64 {
    x as _
}

impl U {
    pub unsafe fn foo_method(&mut self) {
        let input_ref = &*(self as *mut _ as *mut _);
        let output = foo(*input_ref);
        let output_ptr: *mut _ = self as *mut _ as _;
        *output_ptr = output;
    }
}

pub unsafe fn lift_foo(u: &mut U) {
    let input_ref = &*(u as *mut _ as *mut _);
    let output = foo(*input_ref);
    let output_ptr: *mut _ = u as *mut _ as _;
    *output_ptr = output;
}

pub unsafe fn lift_foo_verbose(u: &mut U) {
    let input_ref: &i32 = &*(u as *mut _ as *mut _);
    let output: f64 = foo(*input_ref);
    let output_ptr: *mut f64 = u as *mut _ as _;
    // let experiment: &mut f64 = &mut *output_ptr;
    *output_ptr = output;
}

emits

example::foo:
        cvtsi2sd        xmm0, edi
        ret

example::U::foo_method:
        cvtsi2sd        xmm0, dword ptr [rdi]
        movsd   qword ptr [rdi], xmm0
        ret

example::lift_foo:
        cvtsi2sd        xmm0, dword ptr [rdi]
        movsd   qword ptr [rdi], xmm0
        ret

example::lift_foo_verbose:
        cvtsi2sd        xmm0, dword ptr [rdi]
        movsd   qword ptr [rdi], xmm0
        ret
ariasanovsky commented 1 year ago

Improved path: $PROJECT/target/spindle/map/U/src/lib.rs has kernel_foo, kernel_bar, etc together.

ariasanovsky commented 1 year ago

Main changes merged in.