Closed gxsaccount closed 3 years ago
i find that the tool mov_replace also have this problem. it seems like the assertion caused by some function in "nvbit_reg_rw.h" .
Can you try to apply the patch below to nvbit_reg_rw.h? In addition, we recently found bugs in nvbit_read/write_{ureg,pred_reg,upred_reg}()
and will release the fix to them soon.
--- a/nvbit_reg_rw.h
+++ b/nvbit_reg_rw.h
@@ -42,14 +42,36 @@ extern "C" __device__ __noinline__ int32_t nvbit_read_reg(uint64_t reg_num) {
#pragma unroll
for (int i = 0; i < 1024; i++) __nvbit_var += i;
assert(__nvbit_var == reg_num);
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
return __nvbit_var;
}
extern "C" __device__ __noinline__ void nvbit_write_reg(uint64_t reg_num,
int32_t reg_val) {
-#pragma unroll
+#pragma unroll 1024
for (int i = 0; i < 1024; i++) __nvbit_var += i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
assert(__nvbit_var == reg_num + reg_val);
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
+#pragma unroll 1024
+ for (int i = 0; i < 1024; i++) __nvbit_var += reg_num*i;
}
extern "C" __device__ __noinline__ int32_t nvbit_read_ureg(uint64_t reg_num) {
hi,
i want debug my own tools, so i add "-G" in my makefile. add run with vectoradd. but i get this Assertion:
vectoradd: nvbit_imp.cpp:702: void Nvbit::func_loading(CUcontext, CUfunction): Assertion `sizeof(_text_nvbit_nvbit_write_reg75) <= function->nbytes' failed.
i want to konw the meaning to this assertion , and fix out the problem.
thanks.