具体是从这开始报错的:
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1973): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.nc.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1979): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.nc.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1985): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cg.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1991): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cg.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1997): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.ca.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2003): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.ca.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2009): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cs.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2015): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cs.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2021): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.lu.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2027): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.lu.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2033): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cv.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2039): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cv.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2044): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wb.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2048): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wb.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2052): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cg.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2056): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cg.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2060): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cs.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2064): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cs.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2068): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wt.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2072): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wt.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(3495): error: asm operand type size(8) does not match type/size implied by constraint 'r'
: "r"(address), "h"((reinterpret_cast<const unsigned short >(&(val))))
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1897): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.nc.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1903): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.nc.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1909): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cg.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1915): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cg.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1921): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.ca.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1927): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.ca.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1933): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cs.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1939): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cs.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr));
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1945): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.lu.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1951): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.lu.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1957): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cv.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1963): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("ld.global.cv.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1969): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wb.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1973): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wb.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1977): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cg.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1981): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cg.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1985): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cs.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1989): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.cs.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1993): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wt.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory");
^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1997): error: asm operand type size(8) does not match type/size implied by constraint 'r'
asm ("st.global.wt.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory");
^
41 errors detected in the compilation of "neural/cuda/cuda_kernels.cu"
具体是从这开始报错的: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1973): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.nc.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1979): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.nc.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1985): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cg.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1991): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cg.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(1997): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.ca.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2003): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.ca.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2009): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cs.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2015): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cs.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2021): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.lu.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2027): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.lu.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2033): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cv.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2039): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cv.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2044): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wb.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2048): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wb.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2052): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cg.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2056): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cg.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2060): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cs.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2064): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cs.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2068): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wt.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(2072): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wt.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_fp16.hpp(3495): error: asm operand type size(8) does not match type/size implied by constraint 'r' : "r"(address), "h"((reinterpret_cast<const unsigned short >(&(val)))) ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1897): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.nc.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1903): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.nc.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1909): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cg.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1915): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cg.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1921): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.ca.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1927): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.ca.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1933): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cs.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1939): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cs.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr)); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1945): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.lu.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1951): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.lu.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1957): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cv.b32 %0, [%1];" : "=r"((reinterpret_cast<unsigned int >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1963): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("ld.global.cv.b16 %0, [%1];" : "=h"((reinterpret_cast<unsigned short >(&(ret)))) : "r"(ptr) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1969): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wb.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1973): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wb.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1977): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cg.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1981): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cg.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1985): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cs.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1989): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.cs.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1993): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wt.b32 [%0], %1;" :: "r"(ptr), "r"((reinterpret_cast<const unsigned int >(&(value)))) : "memory"); ^
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\include\cuda_bf16.hpp(1997): error: asm operand type size(8) does not match type/size implied by constraint 'r' asm ("st.global.wt.b16 [%0], %1;" :: "r"(ptr), "h"((reinterpret_cast<const unsigned short >(&(value)))) : "memory"); ^
41 errors detected in the compilation of "neural/cuda/cuda_kernels.cu"
编译使用的命令行如下: nvcc main.cc config.cc version.cc game/board.cc game/book.cc game/game_state.cc game/gtp.cc game/iterator.cc game/pattern_board.cc game/sgf.cc game/strings.cc game/symmetry.cc game/zobrist.cc mcts/node.cc mcts/search.cc mcts/time_control.cc neural/description.cc neural/encoder.cc neural/loader.cc neural/network.cc neural/training_data.cc neural/winograd_helper.cc neural/blas/batchnorm.cc neural/blas/biases.cc neural/blas/blas.cc neural/blas/blas_forward_pipe.cc neural/blas/convolution.cc neural/blas/fullyconnect.cc neural/blas/se_unit.cc neural/blas/sgemm.cc neural/blas/winograd_convolution3.cc neural/cuda/cuda_common.cc neural/cuda/cuda_forward_pipe.cc neural/cuda/cuda_layers.cc neural/cuda/cuda_kernels.cu pattern/gammas_dict.cc pattern/mm.cc pattern/mm_trainer.cc pattern/pattern.cc selfplay/engine.cc selfplay/pipe.cc summary/accuracy.cc utils/filesystem.cc utils/gogui_helper.cc utils/gzip_helper.cc utils/komi.cc utils/log.cc utils/option.cc utils/parse_float.cc utils/random.cc utils/splitter.cc utils/time.cc -o sayuri -I . -DNDEBUG -DWIN32 -DNOMINMAX -DUSE_CUDA -lcudart -lcublas -O3 -Xcompiler /O2 -Xcompiler /std:c++14
请问如何解决?谢谢