microsoft / GLIP

Grounded Language-Image Pre-training
MIT License
2.18k stars 191 forks source link

How to fix the demo to the Pytorch 2.0.0 - Cuda 11.8 #164

Closed sangphamitus closed 5 months ago

sangphamitus commented 6 months ago

I had tried to fix this, and it works on my Windows 11 with cuda 11.8 and Pytorch 2.0.0 and no use Docker 🎊. Happy coding 😊.

Since the torch 2.0.0 had removed the THC libs which creating many errors.

  1. deform_conv_cuda.cu
#include <THC/THC.h> // remove this import

// add all import below
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
#include <ATen/ceil_div.h>
#include <ATen/cuda/ThrustAllocator.h>
  1. deform_pool_cuda.cu
    
    #include <THC/THC.h> // remove this import

// add all import below

include <ATen/cuda/CUDAContext.h>

include <ATen/cuda/CUDAEvent.h>

include <ATen/ceil_div.h>

include <ATen/cuda/ThrustAllocator.h>


3. `ml_nms.cu`
```C
#include <THC/THC.h> // remove this import

// add all import below
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
#include <ATen/ceil_div.h>
#include <ATen/cuda/ThrustAllocator.h>
...
// line 69: change this 
const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 
// to
const int col_blocks = at::ceil_div(n_boxes, threadsPerBlock);
...
// line 84: change this 
const int col_blocks =  THCCeilDiv(boxes_num, threadsPerBlock);
//to
const int col_blocks = at::ceil_div(boxes_num, threadsPerBlock);
...
//line 88:
THCState *state = at::globalContext().lazyInitCUDA() // remove this
...
//line 91:
THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, boxes_num * col_blocks * sizeof(unsigned long long))); // remove this
...
//line 94: change this
mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
//to
mask_dev= (unsigned long long*) c10::cuda::CUDACachingAllocator::raw_alloc( boxes_num * col_blocks * sizeof(unsigned long long));
...
//line 97: change this
dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),THCCeilDiv(boxes_num, threadsPerBlock));
// to
dim3 blocks(at::ceil_div(boxes_num, threadsPerBlock),at::ceil_div(boxes_num, threadsPerBlock));
...
//line 107: change this
THCudaCheck(cudaMemcpy(&mask_host[0],  mask_dev,sizeof(unsigned long long) * boxes_num * col_blocks,cudaMemcpyDeviceToHost));
//to
C10_CUDA_CHECK(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));
...
//line 130: change this
THCudaFree(state, mask_dev);
// to
c10::cuda::CUDACachingAllocator::raw_delete(mask_dev);
  1. nms.cu:
#include <THC/THC.h> // remove this import

// add all import below
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
#include <ATen/ceil_div.h>
#include <ATen/cuda/ThrustAllocator.h>
...
// line 64: change this 
const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 
// to
const int col_blocks = at::ceil_div(n_boxes, threadsPerBlock);
...
// line 79: change this 
const int col_blocks =  THCCeilDiv(boxes_num, threadsPerBlock);
//to
const int col_blocks = at::ceil_div(boxes_num, threadsPerBlock);
...
//line 83:
THCState *state = at::globalContext().lazyInitCUDA() // remove this
...
//line 86:
THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, boxes_num * col_blocks * sizeof(unsigned long long))); // remove this
...
//line 89: change this
mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
// to
mask_dev= (unsigned long long*) c10::cuda::CUDACachingAllocator::raw_alloc( boxes_num * col_blocks * sizeof(unsigned long long));
...
//line 91: change this
dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),THCCeilDiv(boxes_num, threadsPerBlock));
// to:
dim3 blocks(at::ceil_div(boxes_num, threadsPerBlock),at::ceil_div(boxes_num, threadsPerBlock));
...
//line 100: change this
THCudaCheck(cudaMemcpy(&mask_host[0], mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));
// to
C10_CUDA_CHECK(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));
...
// line 125: change this
THCudaFree(state, mask_dev);
// to
c10::cuda::CUDACachingAllocator::raw_delete(mask_dev);
  1. ROIAlign_cuda.cu, ROIPool_cuda.cu:
    
    #include <THC/THC.h> // remove this import

// add all import below

include <ATen/ceil_div.h>

include <ATen/cuda/CUDAContext.h>

include <ATen/cuda/CUDAEvent.h>

include <ATen/ceil_div.h>

include <ATen/cuda/ThrustAllocator.h>

... // line 275: change this dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); // to dim3 grid(std::min(((int)output_size +512-1)/512, 4096)); ... // change this THCudaCheck(cudaGetLastError()); // to C10_CUDA_CHECK(cudaGetLastError()); ... // line 320: change this dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); // to dim3 grid(std::min(((int)(grad.numel())+512-1)/512, 4096));


6. `SigmoidFocalLoss_cuda.cu` :
```C
#include <THC/THC.h> // remove this import

// add all import below
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
#include <ATen/ceil_div.h>
#include <ATen/cuda/ThrustAllocator.h>
...
// change this
THCudaCheck(cudaGetLastError());
// to
C10_CUDA_CHECK(cudaGetLastError());
...
// change this
dim3 grid(std::min(THCCeilDiv(losses_size, 512L), 4096L));
// to
dim3 grid(std::min(((int)losses_size+512-1)/512, 4096));
...
// change this
dim3 grid(std::min(THCCeilDiv(d_logits_size, 512L), 4096L));
// to
dim3 grid(std::min(((int)d_logits_size+512-1)/512, 4096));

.py files

  1. box_aug.py,build.py

    # change
    if cfg.INPUT.FORMAT is not '':
    # to
    if cfg.INPUT.FORMAT != '':
  2. lvis_eval.py

    
    # change
    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)

to

tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)

9. `deform_conv.py`,`nms.py`,`roi_align.py`,`roi_pool.py`,`sigmoid_focal_lost.py`
```Python
# change
from maskrcnn_benchmark import _C
# to
import maskrcnn_benchmark._C as _C
  1. clip_model.py

    # change
    or pretrained_layers[0] is '*'
    # to
    or pretrained_layers[0] == '*'
  2. anchor_generator.py

    #change:
    np.array(sizes, dtype=np.float) / stride,
    np.array(aspect_ratios, dtype=np.float),
    # to
    np.array(sizes, dtype=np.float32) / stride,
    np.array(aspect_ratios, dtype=np.float32),
    ...
    #change:
    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
    # to
    anchor = np.array([1, 1, base_size, base_size], dtype=np.float32) - 1
  3. big_model_loading.py,c2_model_loading.py,pretrain_model_loading.py:

    #change
                if old_key.find(param) is -1:
    #to
    if old_key.find(param) == -1:
  4. imports.py:

    # if torch._six:
    import importlib
    import importlib.util
    import sys
    # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
    def import_file(module_name, file_path, make_importable=False):
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    if make_importable:
        sys.modules[module_name] = module
    return module
  5. model_zoo.py:

    #change :
    from torch.hub import _download_url_to_file
    # to:
    from torch.hub import download_url_to_file
    ...
    #change :
    from torch.utils.model_zoo import _download_url_to_file
    #to 
    from torch.hub import download_url_to_file
huuthientran commented 6 months ago

Wow, this is pretty cool! Thanks for your hard work buddy πŸ”₯