Closed duda1202 closed 4 years ago
Hi,
I am using a GeForce RTX 2060 with bonnetal and it is crashing the GPU. I get the error:
Unable to determine the device handle for GPU 0000:01:00.0: GPU is lost. Reboot the system to recover this GPU
In this case, i am using my own code for ROS which uses the user.infer. This is the code:
#!/usr/bin/env python3 # Futures from __future__ import print_function # STD import sys import time import argparse import subprocess import datetime import os import shutil # ROS import rospy import roslib from sensor_msgs.msg import CompressedImage # numpy and scipy import numpy as np from scipy.ndimage import filters # OpenCV import cv2 from cv_bridge import CvBridge, CvBridgeError # For overlaying images from PIL import Image import torch # check if cuda is activated cuda = torch.cuda.is_available() if cuda == False: print("Model is NOT using GPU") print ("Cuda:", torch.cuda.is_available()) class BonnetalNode: """ Encapsulates the bonnetal functionality into a ROS node. """ # A ROS subscriber for input images img_sub = None labelled_img_pub = None overlaid_img_pub = None # Bonnetal interface user = None def __init__(self): """ Initializes ROS (pubs and subs) and bonnetal. """ # Initialize ROS rospy.init_node("bonnetal_node") init = rospy.Time.now() # Parameters Config path_model = rospy.get_param("path_model") backend = rospy.get_param("backend") camera_topic = rospy.get_param("camera_topic") # Add path for bonnetal files abs_path = rospy.get_param("abs_path") print ("Abs path is: ", abs_path) sys.path.insert(0, abs_path + "bonnetal/train") # Initialize bonnetal self.initialize_bonnetal(path=path_model, backend=backend) # Initialize publishers and subscribers self.overlaid_img_pub = rospy.Publisher("/overlaid_image/compressed", CompressedImage, queue_size = 1) self.labelled_img_pub = rospy.Publisher("/output_labelled_img/compressed", CompressedImage, queue_size = 1) # buff size allows callback to get the latest msg instead of queueing them self.img_sub = rospy.Subscriber(camera_topic, CompressedImage, self.image_callback, queue_size = 1, buff_size=2**32) rospy.loginfo("Segmentation node initialized in {} seconds!".format( (rospy.Time.now()-init).to_sec())) def initialize_bonnetal(self, path, backend="native", workspace=8000000000, calib_images=None): """ Initializes bonnetal :type path: string :param path: full path to pretrained model :type backend: string :param backend: framework for segmentation task :type workspace: int :param workspace: max workspace size (only for TensorRT framework) :type calib_images: list :param calib_images: calibration images, must be a list of images (only for TensorRT framework) """ # create inference context for the desired backend if backend == "tensorrt": # import and use tensorRT try: print("Using tensorRT") from tasks.segmentation.modules.userTensorRT import UserTensorRT self.user = UserTensorRT(path, workspace, calib_images) except ImportError as e: print ("ERROR:", e) sys.exit(0) except: print('\nERROR:TensorRT needs to use inference model type .onnx. You can make one ' 'using tasks/segmentation/make_deploy_model.py') sys.exit(0) elif backend == "caffe2": try: # import and use caffe2 print("Using caffe2") from tasks.segmentation.modules.userCaffe2 import UserCaffe2 self.user = UserCaffe2(path) except ImportError as e: print ("ERROR:", e) sys.exit(0) except: print('\nERROR:Caffe2 needs to use inference model type .onnx. You can make one ' 'using tasks/segmentation/make_deploy_model.py') sys.exit(0) elif backend == "pytorch": # import and use pytorch try: print("Using PyTorch") from tasks.segmentation.modules.userPytorch import UserPytorch self.user = UserPytorch(path) except ImportError as e: print ("ERROR:", e) sys.exit(0) except: print('\nERROR:PyTorch needs to use inference model type .pytorch. You can make one ' 'using tasks/segmentation/make_deploy_model.py') sys.exit(0) else: # default to native pytorch print("Using native PyTorch") from tasks.segmentation.modules.user import User self.user = User(path) def segment_image(self, cv_img): """ Input should be cv image. :type cv_img: int :param cv_img: max workspace size (only for TensorRT framework) :rtype: numpy.ndarray :returns: OpenCV color image with labels of fuel :rtype: numpy.ndarray :returns: OpenCV color image from the camera with overlay labels of fuel """ # infer # print("Inferring ") _, lbl_img = self.user.infer(cv_img, False) overlay_img = Image.blend(Image.fromarray(cv_img), Image.fromarray(lbl_img), 0.5) return lbl_img, overlay_img def unpack_image_msg(self, msg): """ Receives a sensor_msgs/CompressedImage and returns a cv image :type msg: CompressedImage :param msg: CompressedImage ROS message :rtype: numpy.ndarray :returns: OpenCV color image """ np_arr = np.fromstring(msg.data, np.uint8) cv_img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) return cv_img def re_pack_image_msg(self, cv_img): """ Packing OpenCV image to ROS message CompressedImage :type cv_img: CompressedImage :param cv_img: CompressedImage ROS message :rtype: CompressedImage :returns: CompressedImage ROS message in jpeg format """ #img_msg = cv2_to_imgmsg(cv_img, encoding="bgr8") img_msg = CompressedImage() img_msg.header.stamp = rospy.Time.now() img_msg.format = "jpeg" img_msg.data = np.array(cv2.imencode('.jpg', np.asarray(cv_img))[1]).tostring() return img_msg def pub_lbl_img(self, cv_img): """ Publishes the labelled (segmented) images. :type cv_img: CompressedImage :param cv_img: CompressedImage ROS message """ img_msg = self.re_pack_image_msg(cv_img) self.labelled_img_pub.publish(img_msg) def pub_overlay_img(self, cv_img): """ Publishes the overlaid images. :type cv_img: CompressedImage :param cv_img: CompressedImage ROS message """ img_msg = self.re_pack_image_msg(cv_img) self.overlaid_img_pub.publish(img_msg) def image_callback(self, msg): """ Receives sensor_msgs/CompressedImage and publishes labelled images. :type msg: CompressedImage :param msg: CompressedImage ROS message """ cv_img = self.unpack_image_msg(msg) lbl_img, overlay_img = self.segment_image(cv_img) self.pub_lbl_img(lbl_img) self.pub_overlay_img(overlay_img) def run(self): """ Enters the main loop for processing messages. """ rospy.spin() def main(): node = BonnetalNode() node.run() if __name__ == "__main__": main()
Do you know what the issue could be?
Hi, This is likely a hardware problem. I would suggest 1) to check all of your power supply cables, and 2) checking dmesg for hardware problems. This is, however, nvidia-related, so I will close it here
Hi,
I am using a GeForce RTX 2060 with bonnetal and it is crashing the GPU. I get the error:
Unable to determine the device handle for GPU 0000:01:00.0: GPU is lost. Reboot the system to recover this GPU
In this case, i am using my own code for ROS which uses the user.infer. This is the code:
Do you know what the issue could be?