Closed bheisler closed 6 years ago
Unfortunately, this is a Rust issue. I can reproduce it with raw rustc
:
rustc --target nvptx64-nvidia-cuda test.rs -O --emit=asm
#![feature(lang_items)]
#![feature(no_core, platform_intrinsics)]
#![crate_type = "lib"]
#![no_core]
#[lang = "copy"]
trait Copy {}
#[lang = "freeze"]
trait Freeze {}
#[lang = "sized"]
trait Sized {}
extern "platform-intrinsic" {
pub fn nvptx_syncthreads();
}
#[no_mangle]
pub unsafe fn foo() {
nvptx_syncthreads();
}
emits
//
// Generated by LLVM NVPTX Back-End
//
.version 3.2
.target sm_20
.address_size 64
// .globl foo
.extern .func llvm.cuda.syncthreads
()
;
.visible .func foo()
{
{ // callseq 0, 0
.reg .b32 temp_param_reg;
call.uni
llvm.cuda.syncthreads,
(
);
} // callseq 0
ret;
}
Could you please reopen it in Rust repo?
If I reference the
synchtreads
intrinsic, the linker produces an invalid PTX file. Relevant PTX output:It looks as though it's trying to call
llvm.cuda.syncthreads
rather than emitting the appropriate PTX instruction.