When I train a SOLOv2 model I get the following error at training start.
Seems like it cannot find the libnvrtc-builtins.so.11.1 file, even though it is correctly installed.
Im using ubuntu18.04, cuda11.1, pytorch 1.8.0+cu111.
No problem whatsoever when using pytorch with GPU acceleration on literally any other task.
Traceback (most recent call last):
File "tools/train_net.py", line 229, in <module>
launch(
File "/home/luca/venvs/adelaidet/lib/python3.8/site-packages/detectron2/engine/launch.py", line 82, in launch
main_func(*args)
File "tools/train_net.py", line 223, in main
return trainer.train()
File "tools/train_net.py", line 102, in train
self.train_loop(self.start_iter, self.max_iter)
File "tools/train_net.py", line 91, in train_loop
self.run_step()
File "/home/luca/venvs/adelaidet/lib/python3.8/site-packages/detectron2/engine/defaults.py", line 494, in run_step
self._trainer.run_step()
File "/home/luca/venvs/adelaidet/lib/python3.8/site-packages/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/home/luca/venvs/adelaidet/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/luca/repos/AdelaiDet/adet/modeling/solov2/solov2.py", line 137, in forward
losses = self.loss(cate_pred, kernel_pred, mask_pred, targets)
File "/home/luca/repos/AdelaiDet/adet/modeling/solov2/solov2.py", line 333, in loss
loss_cate = self.focal_loss_weight * sigmoid_focal_loss_jit(flatten_cate_preds, flatten_cate_labels_oh,
RuntimeError: nvrtc: error: failed to open libnvrtc-builtins.so.11.1.
Make sure that libnvrtc-builtins.so.11.1 is installed correctly.
nvrtc compilation failed:
#define NAN __int_as_float(0x7fffffff)
#define POS_INFINITY __int_as_float(0x7f800000)
#define NEG_INFINITY __int_as_float(0xff800000)
template<typename T>
__device__ T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template<typename T>
__device__ T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" __global__
void fused_sigmoid_mul_n_10162883967136821163(double v0, float* t1, float* t2, float* aten_pow, float* aten_add, float* aten_add_1, float* aten_add_2, float* aten_sigmoid) {
{
float t2_1 = __ldg(t2 + 512 * blockIdx.x + threadIdx.x);
aten_sigmoid[512 * blockIdx.x + threadIdx.x] = 1.f / (1.f + (expf(0.f - t2_1)));
float t1_1 = __ldg(t1 + 512 * blockIdx.x + threadIdx.x);
aten_add_1[512 * blockIdx.x + threadIdx.x] = (0.f - t1_1) + 1.f;
aten_add_2[512 * blockIdx.x + threadIdx.x] = (0.f - 1.f / (1.f + (expf(0.f - t2_1)))) + 1.f;
aten_add[512 * blockIdx.x + threadIdx.x] = (0.f - ((1.f / (1.f + (expf(0.f - t2_1)))) * t1_1 + ((0.f - 1.f / (1.f + (expf(0.f - t2_1)))) + 1.f) * ((0.f - t1_1) + 1.f))) + 1.f;
aten_pow[512 * blockIdx.x + threadIdx.x] = (float)(pow((double)((0.f - ((1.f / (1.f + (expf(0.f - t2_1)))) * t1_1 + ((0.f - 1.f / (1.f + (expf(0.f - t2_1)))) + 1.f) * ((0.f - t1_1) + 1.f))) + 1.f), v0));
}
}
When I train a SOLOv2 model I get the following error at training start. Seems like it cannot find the libnvrtc-builtins.so.11.1 file, even though it is correctly installed.
Im using ubuntu18.04, cuda11.1, pytorch 1.8.0+cu111.
No problem whatsoever when using pytorch with GPU acceleration on literally any other task.