CUDA out of memory for yolo-pose

josebenitezg commented 2 years ago

Hi @AlexeyAB @WongKinYiu Great work with Yolov7 btw I trying to run the following code with yolo-pose

import torch
import cv2
from torchvision import transforms
import numpy as np
import tqdm
from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weigths = torch.load('yolov7-w6-pose.pt')
model = weigths['model']
model = model.half().to(device)
_ = model.eval()

def process_keypoints(video_file, model, output_video_path):
    video = cv2.VideoCapture(video_file)
    writer = _create_vid_writer(video, output_video_path)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    pbar = tqdm.tqdm(total=num_frames, desc="inf")
    while video.isOpened():
        ret, frame = video.read()
        if (frame is None):
            break

        frame = cv2.resize(frame, (640, 480),
                           interpolation=cv2.INTER_AREA)

        pbar.update(1)

        frame = letterbox(frame, 640, stride=64, auto=True)[0]
        frame_ = frame.copy()
        frame = transforms.ToTensor()(frame)
        frame = torch.tensor(np.array([frame.numpy()]))
        frame = frame.to(device)
        frame = frame.half()

        output, _ = model(frame)

        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
        #output = output_to_keypoint(output)
        with torch.no_grad():
            output = output_to_keypoint(output)
        nimg = frame[0].permute(1, 2, 0) * 255
        nimg = nimg.cpu().numpy().astype(np.uint8)
        nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
        for idx in range(output.shape[0]):
            plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
        writer.write(nimg)

    video.release()
    writer.release()

def _create_vid_writer(vid_cap, video_path):
    fps = vid_cap.get(cv2.CAP_PROP_FPS)
    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
    return writer

video_file = 'input.mp4'
video_output = 'output.mp4'
process_keypoints(video_file, model, video_output)

But I have the following error: I trying to resize the image before input to the model but don't solve the problem

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 14.41 GiB already allocated; 18.94 MiB free; 14.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

Run on p3.2xlarge AWS EC2 Instance (NVIDIA V100)

Thank you for your help

lofyol commented 2 years ago

where is yolov7-pose

Thanossrs commented 2 years ago

When I execute the above code on a video, the output video is extracted, but when I open it, it reports that there was a problem: this file does not include streams that may be played.

Any help on this? Thanks Thanos

Ysnower commented 2 years ago

Hi @AlexeyAB @WongKinYiu Great work with Yolov7 btw I trying to run the following code with yolo-pose

import torch
import cv2
from torchvision import transforms
import numpy as np
import tqdm
from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weigths = torch.load('yolov7-w6-pose.pt')
model = weigths['model']
model = model.half().to(device)
_ = model.eval()

def process_keypoints(video_file, model, output_video_path):
    video = cv2.VideoCapture(video_file)
    writer = _create_vid_writer(video, output_video_path)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    pbar = tqdm.tqdm(total=num_frames, desc="inf")
    while video.isOpened():
        ret, frame = video.read()
        if (frame is None):
            break

        frame = cv2.resize(frame, (640, 480),
                           interpolation=cv2.INTER_AREA)

        pbar.update(1)

        frame = letterbox(frame, 640, stride=64, auto=True)[0]
        frame_ = frame.copy()
        frame = transforms.ToTensor()(frame)
        frame = torch.tensor(np.array([frame.numpy()]))
        frame = frame.to(device)
        frame = frame.half()

        output, _ = model(frame)

        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
        #output = output_to_keypoint(output)
        with torch.no_grad():
            output = output_to_keypoint(output)
        nimg = frame[0].permute(1, 2, 0) * 255
        nimg = nimg.cpu().numpy().astype(np.uint8)
        nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
        for idx in range(output.shape[0]):
            plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
        writer.write(nimg)

    video.release()
    writer.release()

def _create_vid_writer(vid_cap, video_path):
  fps = vid_cap.get(cv2.CAP_PROP_FPS)
  w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

  writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
  return writer

video_file = 'input.mp4'
video_output = 'output.mp4'
process_keypoints(video_file, model, video_output)

But I have the following error: I trying to resize the image before input to the model but don't solve the problem

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 14.41 GiB already allocated; 18.94 MiB free; 14.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

Run on p3.2xlarge AWS EC2 Instance (NVIDIA V100)

Thank you for your help

you should set torch.set_grad_enabled(False) when inference

mkhoshbin72 commented 2 years ago

There is a line in non_max_suppression_kpt function that causes this error. I don't know why this happen. https://github.com/WongKinYiu/yolov7/blob/72a9c9628a22693a0c3a0eeb44c3be199d6cb1f1/utils/general.py#L742-L743 This line is used for computing conf for every object in multi-class model. But i don't know if there is any situation that pose model has more than a single person class. If there is any situation like this, this line should be there, but if there isn't, we can remove it.

dao027 commented 1 year ago

same issue, fixed by https://github.com/WongKinYiu/yolov7/issues/783 move ur with torch.no_grad(): position

WongKinYiu / yolov7

CUDA out of memory for yolo-pose #317