RuntimeError: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 804: forward compatibility was attempted on non supported HW

Hi there, it raised an error on my side when I run

device = "cuda"
sam.to(device=device)

The full process

In [4]: sam_checkpoint = "pretrained_checkpoint/sam_hq_vit_l.pth"
   ...: model_type = "vit_l"
   ...: device = "cuda"
   ...: sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
   ...: sam.to(device=device)
   ...: predictor = SamPredictor(sam)
<All keys matched successfully>
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[4], line 5
      3 device = "cuda"
      4 sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
----> 5 sam.to(device=device)
      6 predictor = SamPredictor(sam)

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1145, in Module.to(self, *args, **kwargs)
   1141         return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                     non_blocking, memory_format=convert_to_format)
   1143     return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
-> 1145 return self._apply(convert)

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:820, in Module._apply(self, fn)
    816 # Tensors stored in modules are graph leaves, and we don't want to
    817 # track autograd history of `param_applied`, so we have to use
    818 # `with torch.no_grad():`
    819 with torch.no_grad():
--> 820     param_applied = fn(param)
    821 should_use_set_data = compute_should_use_set_data(param, param_applied)
    822 if should_use_set_data:

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1143, in Module.to.<locals>.convert(t)
   1140 if convert_to_format is not None and t.dim() in (4, 5):
   1141     return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                 non_blocking, memory_format=convert_to_format)
-> 1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)

File ~/miniconda3/lib/python3.10/site-packages/torch/cuda/__init__.py:247, in _lazy_init()
    245 if 'CUDA_MODULE_LOADING' not in os.environ:
    246     os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
--> 247 torch._C._cuda_init()
    248 # Some of the queued calls may reentrantly call _lazy_init();
    249 # we need to just return without initializing in that case.
    250 # However, we must not let any *other* threads in!
    251 _tls.is_initializing = True

RuntimeError: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 804: forward compatibility was attempted on non supported HW

SysCV / sam-hq

RuntimeError: Unexpected error from cudaGetDeviceCount(). Did you run some cuda functions before calling NumCudaDevices() that might have already set an error? Error 804: forward compatibility was attempted on non supported HW #121