ray.init (GPUs) for inference_video.py fails when running more than 1 worker

Code Below

import argparse
import time

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from matplotlib import animation

from object_detection.builders.dataset_builder import build as build_dataset
from object_detection.utils.config_util import get_configs_from_pipeline_file
from object_detection.utils.label_map_util import create_category_index_from_labelmap
from object_detection.utils import visualization_utils as viz_utils

from utils import get_module_logger
import ray
from psutil import cpu_count

@ray.remote(num_gpus=0.50)
def main(labelmap_path, model_path, tf_record_path, config_path, output_path, num):
    """
    Use a model and a tf record file and create a mp4 video
    args:
    - labelmap_path [str]: path to labelmap file
    - model_path [str]: path to exported model
    - tf_record_path [str]: path to tf record file to visualize
    - config_path [str]: path to config file
    - output_path [str]: path to mp4 file

    Save the results as mp4 file
    """
    # load label map
    category_index = create_category_index_from_labelmap(labelmap_path,
                                                         use_display_name=True)

    # Load saved model and build the detection function
    logger.info(f'Loading model from {model_path}')
    detect_fn = tf.saved_model.load(model_path)

    # open config file
    logger.info(f'Loading config from {config_path}')
    configs = get_configs_from_pipeline_file(config_path)
    eval_config = configs['eval_config']
    eval_input_config = configs['eval_input_config']
    model_config = configs['model']

    # update the eval config file
    eval_input_config.tf_record_input_reader.input_path[:] = [tf_record_path]
    dataset = build_dataset(eval_input_config)

    # build dataset
    dataset = build_dataset(eval_input_config)

    # here we infer on the entire dataset
    images = []
    logger.info(f'Inference on {tf_record_path}')
    for idx, batch in enumerate(dataset):
         if idx % 10 == 0:
            logger.info(f'Step: {idx}')
        # add new axis and feed into model
        input_tensor = batch['image']
        image_np = input_tensor.numpy().astype(np.uint8)
        input_tensor = input_tensor[tf.newaxis, ...]

        detections = detect_fn(input_tensor)

        # tensor -> numpy arr, remove one dimensions
        num_detections = int(detections.pop('num_detections'))
        detections = {key: value[0, ...].numpy()
                    for key, value in detections.items()}
        detections['num_detections'] = num_detections

        # detection_classes should be ints.
        detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

        image_np_with_detections = \
            viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np,
                detections['detection_boxes'],
                detections['detection_classes'],
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=200,
                min_score_thresh=.30,
                agnostic_mode=False)
        images.append(image_np_with_detections)

    # now we can create the animation
    f = plt.figure()
    f.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=None, hspace=None)
    ax = plt.subplot(111)
    ax.axis('off')
    im_obj = ax.imshow(images[0])

    def animate(idx):
        image = images[idx]
        im_obj.set_data(image)

    anim = animation.FuncAnimation(f, animate, frames=len(images))
    anim.save(output_path + str(num) + ".mp4", fps=5, dpi=300)

if __name__ == "__main__":
    logger = get_module_logger(__name__)

    parser = argparse.ArgumentParser(description='Create video')
    parser.add_argument('--labelmap_path', required=True, type=str,
                help='path to the label map')
    parser.add_argument('--model_path', required=True, type=str,
                        help='path to the saved model folder')
#    parser.add_argument('--tf_record_path', required=True, type=str,
#                        help='path to the tf record file')
         parser.add_argument('--config_path', required=False, type=str,
                        default='pipeline.config',
                        help='path to the config file')
    parser.add_argument('--output_path', required=False, type=str,
                        default='./data/animation.mp4',
                        help='path of the saved file')
    args = parser.parse_args()
    labelmap_path = args.labelmap_path
    model_path = args.model_path
    config_path = args.config_path
    output_path = args.output_path

     # open the filenames file
    with open('inference.txt', 'r') as f:
        filenames = f.read().splitlines()
    logger.info(f'Download {len(filenames)} files. Be patient, this will take a long time.')

     # init ray
    ray.init(num_gpus=1)
#    ray.init(num_cpus=cpu_count())
    length = len(filenames)
    i = 0
    while i < length:
#        for fn in filenames[i:i+2]:
#            index = filenames.index(fn)
#            main(labelmap_path, model_path, fn, config_path, output_path, num=int(index))
        workers = [main.remote(labelmap_path, model_path, fn, config_path, output_path, num=z) for fn in filenames[i:i+2] for z in range(i,i+2)]
        _ = ray.get(workers)
        i += 2
    print("Done with Inferencing")

error Below

(pid=368) WARNING:tensorflow:num_readers has been reduced to 1 to match input file shards.
(pid=368) WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/object_detection/builders/dataset_builder.py:101: parallel_interleave (from tensorflow.python.data.experimental.ops.interleave_ops) is deprecated and will be removed in a future version.
(pid=368) Instructions for updating:
(pid=368) Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.deterministic`.
(pid=368) WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/object_detection/builders/dataset_builder.py:236: DatasetV1.map_with_legacy_function (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
(pid=368) Instructions for updating:
(pid=368) Use `tf.data.Dataset.map()
(pid=357) WARNING:tensorflow:num_readers has been reduced to 1 to match input file shards.
(pid=357) WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/object_detection/builders/dataset_builder.py:101: parallel_interleave (from tensorflow.python.data.experimental.ops.interleave_ops) is deprecated and will be removed in a future version.
(pid=357) Instructions for updating:
(pid=357) Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy execution is desired, use `tf.data.Options.deterministic`.
(pid=357) WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/object_detection/builders/dataset_builder.py:236: DatasetV1.map_with_legacy_function (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
(pid=357) Instructions for updating:
(pid=357) Use `tf.data.Dataset.map()
(pid=368) WARNING:tensorflow:num_readers has been reduced to 1 to match input file shards.
(pid=357) WARNING:tensorflow:num_readers has been reduced to 1 to match input file shards.
(pid=368) 2021-11-25 05:32:36.528886: E tensorflow/stream_executor/cuda/cuda_dnn.cc:371] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
(pid=357) 2021-11-25 05:32:36.589477: E tensorflow/stream_executor/cuda/cuda_dnn.cc:371] Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR
Traceback (most recent call last):
  File "inference_video.py", line 140, in <module>
    _ = ray.get(workers)
  File "/usr/local/lib/python3.8/dist-packages/ray/worker.py", line 1538, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(UnknownError): ray::__main__.main() (pid=368, ip=192.168.61.33)
  File "python/ray/_raylet.pyx", line 479, in ray._raylet.execute_task
  File "inference_video.py", line 64, in main
    detections = detect_fn(input_tensor)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/saved_model/load.py", line 701, in _call_attribute
    return instance.__call__(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py", line 58, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.UnknownError: 2 root error(s) found.
  (0) UNKNOWN:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
         [[node ResNet101V1_FPN/model/conv1_conv/Conv2D
 (defined at inference_video.py:37)
]]
         [[StatefulPartitionedCall/Postprocessor/BatchMultiClassNonMaxSuppression/MultiClassNonMaxSuppression/Reshape/_38]]
  (1) UNKNOWN:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
         [[node ResNet101V1_FPN/model/conv1_conv/Conv2D
 (defined at inference_video.py:37)
]]
0 successful operations.
0 derived errors ignored. [Op:__inference_restored_function_body_87985]

Errors may have originated from an input operation.
Input Source operations connected to node ResNet101V1_FPN/model/conv1_conv/Conv2D:
In[0] ResNet101V1_FPN/model/lambda/Pad:
In[1] ResNet101V1_FPN/model/conv1_conv/Conv2D/ReadVariableOp:

Operation defined at: (most recent call last)
>>>   File "/usr/local/lib/python3.8/dist-packages/ray/workers/default_worker.py", line 123, in <module>
>>>     ray.worker.global_worker.main_loop()
>>>
>>>
>>>   File "inference_video.py", line 37, in main
>>>     detect_fn = tf.saved_model.load(model_path)
>>>

Input Source operations connected to node ResNet101V1_FPN/model/conv1_conv/Conv2D:
In[0] ResNet101V1_FPN/model/lambda/Pad:
In[1] ResNet101V1_FPN/model/conv1_conv/Conv2D/ReadVariableOp:

Operation defined at: (most recent call last)
>>>   File "/usr/local/lib/python3.8/dist-packages/ray/workers/default_worker.py", line 123, in <module>
>>>     ray.worker.global_worker.main_loop()
>>>
>>>
>>>   File "inference_video.py", line 37, in main
>>>     detect_fn = tf.saved_model.load(model_path)
>>>

Function call stack:
restored_function_body -> call_func -> restored_function_body -> call_func

udacity / nd013-c1-vision-starter

ray.init (GPUs) for inference_video.py fails when running more than 1 worker #11