Pose detection in python with CUDA support

Hello, Based on @jiuqiant 's comment: https://github.com/google/mediapipe/issues/1651#issuecomment-790176010 I'd like to build the python module to run pose detection (mediapipe.solutions.pose.Pose) with GPU CUDA support.

To simplify things, I've modified the repo's Dockerfile so that I can build the python package on it.

Here's the diff so far:

diff --git a/.bazelrc b/.bazelrc
index 37a0bc1..5af865c 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -87,6 +87,11 @@ build:darwin_x86_64 --apple_platform_type=macos
 build:darwin_x86_64 --macos_minimum_os=10.12
 build:darwin_x86_64 --cpu=darwin_x86_64

+# This config refers to building CUDA op kernels with nvcc.
+build:cuda --repo_env TF_NEED_CUDA=1
+build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+build:cuda --@local_config_cuda//:enable_cuda
+
 # This bazelrc file is meant to be written by a setup script.
 try-import %workspace%/.configure.bazelrc

diff --git a/Dockerfile b/Dockerfile
index c4c4df3..49a99f4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM ubuntu:18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

 MAINTAINER <mediapipe@google.com>

@@ -39,7 +39,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libopencv-video-dev \
         libopencv-calib3d-dev \
         libopencv-features2d-dev \
-        software-properties-common && \
+        software-properties-common \
+        python3-venv libprotobuf-dev protobuf-compiler cmake libgtk2.0-dev \
+        mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev mesa-utils \
+        pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
+        libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
+        gfortran openexr libatlas-base-dev python3-dev python3-numpy \
+        libtbb2 libtbb-dev libdc1394-22-dev  && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
     apt-get clean && \
diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD
index 890889a..fe3ebfe 100644
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@@ -97,6 +97,7 @@ cc_binary(
     deps = [
         "@com_google_absl//absl/strings",
     ],
+    linkopts = ["-lm"],
 )

 cc_library(
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 08a2995..d1dd589 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -64,13 +64,15 @@ cc_library(
         "//mediapipe/calculators/image:image_transformation_calculator",
         "//mediapipe/calculators/util:detection_unique_id_calculator",
         "//mediapipe/modules/face_detection:face_detection_front_cpu",
-        "//mediapipe/modules/face_landmark:face_landmark_front_cpu",
-        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu",
-        "//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
-        "//mediapipe/modules/objectron:objectron_cpu",
-        "//mediapipe/modules/palm_detection:palm_detection_cpu",
-        "//mediapipe/modules/pose_detection:pose_detection_cpu",
-        "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
-        "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
+        "//mediapipe/modules/face_landmark:face_landmark_front_gpu",
+        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
+        "//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
+        "//mediapipe/modules/objectron:objectron_gpu",
+        "//mediapipe/modules/palm_detection:palm_detection_gpu",
+        "//mediapipe/modules/pose_detection:pose_detection_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
+        "//mediapipe/calculators/image:color_convert_calculator",
     ],
 )
diff --git a/mediapipe/python/solutions/hands.py b/mediapipe/python/solutions/hands.py
index 15760ed..32798e6 100644
--- a/mediapipe/python/solutions/hands.py
+++ b/mediapipe/python/solutions/hands.py
@@ -64,7 +64,7 @@ class HandLandmark(enum.IntEnum):
   PINKY_TIP = 20

-BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.binarypb'
 HAND_CONNECTIONS = frozenset([
     (HandLandmark.WRIST, HandLandmark.THUMB_CMC),
     (HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
@@ -137,9 +137,9 @@ class Hands(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'palmdetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'handlandmarkcpu__ThresholdingCalculator.threshold':
+            'handlandmarkgpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['multi_hand_landmarks', 'multi_handedness'])
diff --git a/mediapipe/python/solutions/holistic.py b/mediapipe/python/solutions/holistic.py
index 64b63ab..928feb5 100644
--- a/mediapipe/python/solutions/holistic.py
+++ b/mediapipe/python/solutions/holistic.py
@@ -48,7 +48,7 @@ from mediapipe.python.solutions.pose import POSE_CONNECTIONS
 from mediapipe.python.solutions.pose import PoseLandmark
 # pylint: enable=unused-import

-BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_gpu.binarypb'

 def _download_oss_pose_landmark_model(model_complexity):
@@ -105,14 +105,14 @@ class Holistic(SolutionBase):
             'smooth_landmarks': smooth_landmarks and not static_image_mode,
         },
         calculator_params={
-            'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
+            'poselandmarkgpu__ConstantSidePacketCalculator.packet': [
                 constant_side_packet_calculator_pb2
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=[
diff --git a/mediapipe/python/solutions/objectron.py b/mediapipe/python/solutions/objectron.py
index 195c2b8..3f6ae9b 100644
--- a/mediapipe/python/solutions/objectron.py
+++ b/mediapipe/python/solutions/objectron.py
@@ -75,7 +75,7 @@ class BoxLandmark(enum.IntEnum):
   BACK_TOP_RIGHT = 7
   FRONT_TOP_RIGHT = 8

-BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_gpu.binarypb'
 BOX_CONNECTIONS = frozenset([
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index e25fe62..16c0346 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -82,7 +82,7 @@ class PoseLandmark(enum.IntEnum):
   LEFT_FOOT_INDEX = 31
   RIGHT_FOOT_INDEX = 32

-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 POSE_CONNECTIONS = frozenset([
     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
@@ -180,9 +180,9 @@ class Pose(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks'])
diff --git a/setup.py b/setup.py
index 81569b3..be1832c 100644
--- a/setup.py
+++ b/setup.py
@@ -224,9 +224,9 @@ class BuildBinaryGraphs(build.build):
     binary_graphs = [
         'face_detection/face_detection_front_cpu',
         'face_landmark/face_landmark_front_cpu',
-        'hand_landmark/hand_landmark_tracking_cpu',
-        'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu'
+        'hand_landmark/hand_landmark_tracking_gpu',
+        'holistic_landmark/holistic_landmark_gpu', 'objectron/objectron_gpu',
+        'pose_landmark/pose_landmark_gpu', 'pose_detection/pose_detection_gpu',
     ]
     for binary_graph in binary_graphs:
       sys.stderr.write('generating binarypb: %s\n' %
@@ -240,7 +240,11 @@ class BuildBinaryGraphs(build.build):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--define=no_aws_support=true',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -296,7 +300,11 @@ class BuildBazelExtension(build_ext.build_ext):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--define=no_aws_support=true',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]
diff --git a/third_party/BUILD b/third_party/BUILD
index 5800098..384dcb2 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -113,6 +113,8 @@ cmake_external(
         "WITH_PNG": "ON",
         "WITH_TIFF": "ON",
         "WITH_WEBP": "OFF",
+        "WITH_OPENEXR": "OFF",
+        "WITH_IPP": "OFF",
         # Optimization flags
         "CV_ENABLE_INTRINSICS": "ON",
         "WITH_EIGEN": "ON",

Once applied, I build the image, start the container and build the python wheel:

$ docker build -t mediapipe .
$ docker run -it --rm -v $(realpath ..):/host --name mediapipe mediapipe
# python3 setup.py gen_protos
# python3 setup.py bdist_wheel

When using the wheel I generated in python, I get:

pose_model = mp.solutions.pose.Pose(
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
            smooth_landmarks=True,
            static_image_mode=False,
)

RuntimeError: ; Unable to find the type for stream "image".  It may be set to AnyType or something else that isn't determinable, or the type may be defined but not registered.

My guess is that I'm missing changes in mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt to support image_frame_to_gpu_buffer_calculator, has anyone done it before? any ideas on how to modify pose_detection_gpu.pbtxt

Hello @gmontamat, I got the same issue and I solved it by following #1651 for holistic landmark. I think it's so similar to pose detection. You can try to read it.

Hi @thamquocdung . I'll try that and report here the results. Do you happen to have a diff for your mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt ?

I've modified mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt:

diff --git a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt
index 98917d9..f4c1c0e 100644
--- a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt
+++ b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt
@@ -14,7 +14,7 @@

 type: "PoseDetectionGpu"

-# GPU image. (GpuBuffer)
+# CPU image.
 input_stream: "IMAGE:image"

 # Detected poses. (std::vector<Detection>)
@@ -36,12 +36,24 @@ input_stream: "IMAGE:image"
 # this packet so that they don't wait for it unnecessarily.
 output_stream: "DETECTIONS:detections"

+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
 # Transforms the input image into a 224x224 one while keeping the aspect ratio
 # (what is expected by the corresponding model), resulting in potential
 # letterboxing in the transformed image.
 node: {
   calculator: "ImageToTensorCalculator"
-  input_stream: "IMAGE_GPU:image"
+  input_stream: "IMAGE:image_gpu"
   output_stream: "TENSORS:input_tensors"
   output_stream: "LETTERBOX_PADDING:letterbox_padding"
   options: {

And now I get

RuntimeError: Input stream "image_for_pose_detection" of calculator "posedetectiongpu__ColorConvertCalculator" expects packets of type "::mediapipe::ImageFrame" but the connected output stream will contain packets of type "[Same Type As mediapipe::GpuBuffer]"

when I initialize mediapipe.solutions.pose.Pose

@gmontamat , you don't need to make any change in pose_detection_gpu.pbtxt. All of the necessary modification:


diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
index c439737..c84c958 100644
--- a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
@@ -88,6 +88,20 @@ output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"

+
+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
+
 # Defines whether landmarks on the previous image should be used to help
 # localize landmarks on the current image.
 node {
@@ -117,7 +131,7 @@ node: {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE_GPU:image"
+  input_stream: "IMAGE_GPU:image_gpu"
   output_stream: "SIZE:image_size"
 }

@@ -126,7 +140,7 @@ node {
 # round of pose detection.
 node {
   calculator: "GateCalculator"
-  input_stream: "image"
+  input_stream: "image_gpu"
   input_stream: "image_size"
   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
   output_stream: "image_for_pose_detection"
@@ -181,7 +195,7 @@ node {
 node {
   calculator: "PoseLandmarkByRoiGpu"
   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "ROI:pose_rect"
   output_stream: "LANDMARKS:unfiltered_pose_landmarks"
   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
@@ -214,7 +228,7 @@ node {
 # timestamp bound update occurs to jump start the feedback loop.
 node {
   calculator: "PreviousLoopbackCalculator"
-  input_stream: "MAIN:image"
+  input_stream: "MAIN:image_gpu"
   input_stream: "LOOP:pose_rect_from_landmarks"
   input_stream_info: {
     tag_index: "LOOP"
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 08a2995..a61cff2 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -72,5 +72,10 @@ cc_library(
         "//mediapipe/modules/pose_detection:pose_detection_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
+        "//mediapipe/calculators/image:color_convert_calculator",
+
     ],
 )
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index e25fe62..16c0346 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -82,7 +82,7 @@ class PoseLandmark(enum.IntEnum):
   LEFT_FOOT_INDEX = 31
   RIGHT_FOOT_INDEX = 32

-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 POSE_CONNECTIONS = frozenset([
     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
@@ -180,9 +180,9 @@ class Pose(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks'])
diff --git a/setup.py b/setup.py
index 81569b3..4b15862 100644
--- a/setup.py
+++ b/setup.py
@@ -225,8 +225,9 @@ class BuildBinaryGraphs(build.build):
         'face_detection/face_detection_front_cpu',
         'face_landmark/face_landmark_front_cpu',
         'hand_landmark/hand_landmark_tracking_cpu',
         'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu'
+        'pose_landmark/pose_landmark_gpu',
     ]
     for binary_graph in binary_graphs:
       sys.stderr.write('generating binarypb: %s\n' %
@@ -240,7 +241,8 @@ class BuildBinaryGraphs(build.build):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -296,7 +298,8 @@ class BuildBazelExtension(build_ext.build_ext):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]

@thamquocdung very useful, build takes 2mins, now the python based pose-detect codes can run on RTX3080, the inference is more fasttttter, thank you!

I20210519 14:15:37.000900 2869899 gl_context_egl.cc:163] Successfully initialized EGL. Major : 1 Minor: 5 I20210519 14:15:37.031934 2869960 gl_context.cc:331] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 455.23.05) I20210519 14:15:37.031996 2869899 gl_context_egl.cc:163] Successfully initialized EGL. Major : 1 Minor: 5 I20210519 14:15:37.046761 2869961 gl_context.cc:331] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 455.23.05) INFO: Created TensorFlow Lite delegate for GPU. INFO: Replacing 229 node(s) with delegate (TfLiteGpuDelegate) node, yielding 1 partitions. INFO: Replacing 318 node(s) with delegate (TfLiteGpuDelegate) node, yielding 1 partitions.

@ykk648 Yeah, It took me a lot of time to configure and understand the params. So I wanna share with you. Hope it's helpful ^^

@thamquocdung Thank you for sharing，but when I run python3 setup.py gen_protos, some error occurred.

error info:

running gen_protos
generating proto file: mediapipe/framework/test_calculators_pb2.py
google/protobuf/any.proto: File not found.
mediapipe/framework/calculator.proto: Import "google/protobuf/any.proto" was not found or had errors.
mediapipe/framework/calculator.proto:273:14: "google.protobuf.Any" is not defined.
mediapipe/framework/calculator.proto:429:12: "google.protobuf.Any" is not defined.
mediapipe/framework/test_calculators.proto: Import "mediapipe/framework/calculator.proto" was not found or had errors.
mediapipe/framework/test_calculators.proto:26:10: "mediapipe.CalculatorOptions" seems to be defined in >"mediapipe/framework/calculator_options.proto", which is not imported by "mediapipe/framework/test_calculators.proto".  To >use it here, please add the necessary import.

Have you encountered such a problem？

@ZhiyuXu0124 I think you missed libprotobuf-dev package installation (sudo apt install libprotobuf-dev) Find more: here

@ZhiyuXu0124 I think you missed libprotobuf-dev package installation (sudo apt install libprotobuf-dev) Find more: here

@ZhiyuXu0124 you'll notice in my first diff, I've added that package to the Dockerfile to build the library in the container Thanks so much @thamquocdung I'll test it in a minute.

@thamquocdung thank you! Your changes worked. I'm leaving my diff below since I've also compiled for CUDA and used the Dockerfile provided so as no to mess up with dependencies on my system:

(apply diff to https://github.com/google/mediapipe/tree/ae05ad04b3ae43d475ccb2868e23f1418fea8746)

diff --git a/.bazelrc b/.bazelrc
index 37a0bc1..0e18020 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -87,6 +87,16 @@ build:darwin_x86_64 --apple_platform_type=macos
 build:darwin_x86_64 --macos_minimum_os=10.12
 build:darwin_x86_64 --cpu=darwin_x86_64

+# This config refers to building with CUDA available. It does not necessarily
+# mean that we build CUDA op kernels.
+build:using_cuda --define=using_cuda=true
+build:using_cuda --action_env TF_NEED_CUDA=1
+build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+
+# This config refers to building CUDA op kernels with nvcc.
+build:cuda --config=using_cuda
+build:cuda --define=using_cuda_nvcc=true
+
 # This bazelrc file is meant to be written by a setup script.
 try-import %workspace%/.configure.bazelrc

diff --git a/Dockerfile b/Dockerfile
index c4c4df3..24b2d81 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM ubuntu:18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

 MAINTAINER <mediapipe@google.com>

@@ -39,7 +39,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libopencv-video-dev \
         libopencv-calib3d-dev \
         libopencv-features2d-dev \
-        software-properties-common && \
+        software-properties-common \
+        python3-venv libprotobuf-dev protobuf-compiler cmake libgtk2.0-dev \
+        mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev mesa-utils \
+        pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
+        libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
+        gfortran openexr libatlas-base-dev python3-dev python3-numpy \
+        libtbb2 libtbb-dev libdc1394-22-dev  && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
     apt-get clean && \
@@ -69,3 +75,4 @@ COPY . /mediapipe/

 # If we want the docker image to contain the pre-built object_detection_offline_demo binary, do the following
 # RUN bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/demo:object_detection_tensorflow_demo
+ENV TF_CUDA_PATHS=/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu,/usr/include
diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD
index 890889a..fe3ebfe 100644
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@@ -97,6 +97,7 @@ cc_binary(
     deps = [
         "@com_google_absl//absl/strings",
     ],
+    linkopts = ["-lm"],
 )

 cc_library(
diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
index c439737..c84c958 100644
--- a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
@@ -88,6 +88,20 @@ output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"

+
+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
+
 # Defines whether landmarks on the previous image should be used to help
 # localize landmarks on the current image.
 node {
@@ -117,7 +131,7 @@ node: {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE_GPU:image"
+  input_stream: "IMAGE_GPU:image_gpu"
   output_stream: "SIZE:image_size"
 }

@@ -126,7 +140,7 @@ node {
 # round of pose detection.
 node {
   calculator: "GateCalculator"
-  input_stream: "image"
+  input_stream: "image_gpu"
   input_stream: "image_size"
   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
   output_stream: "image_for_pose_detection"
@@ -181,7 +195,7 @@ node {
 node {
   calculator: "PoseLandmarkByRoiGpu"
   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "ROI:pose_rect"
   output_stream: "LANDMARKS:unfiltered_pose_landmarks"
   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
@@ -214,7 +228,7 @@ node {
 # timestamp bound update occurs to jump start the feedback loop.
 node {
   calculator: "PreviousLoopbackCalculator"
-  input_stream: "MAIN:image"
+  input_stream: "MAIN:image_gpu"
   input_stream: "LOOP:pose_rect_from_landmarks"
   input_stream_info: {
     tag_index: "LOOP"
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 08a2995..dc05f34 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -72,5 +72,8 @@ cc_library(
         "//mediapipe/modules/pose_detection:pose_detection_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
+        "//mediapipe/calculators/image:color_convert_calculator",
     ],
 )
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index e25fe62..16c0346 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -82,7 +82,7 @@ class PoseLandmark(enum.IntEnum):
   LEFT_FOOT_INDEX = 31
   RIGHT_FOOT_INDEX = 32

-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 POSE_CONNECTIONS = frozenset([
     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
@@ -180,9 +180,9 @@ class Pose(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks'])
diff --git a/setup.py b/setup.py
index 81569b3..8e9dd93 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ from distutils import spawn
 import distutils.command.build as build
 import distutils.command.clean as clean

-__version__ = '0.8'
+__version__ = '0.8.4-cuda10.1'
 IS_WINDOWS = (platform.system() == 'Windows')
 MP_ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
 ROOT_INIT_PY = os.path.join(MP_ROOT_PATH, '__init__.py')
@@ -226,7 +226,7 @@ class BuildBinaryGraphs(build.build):
         'face_landmark/face_landmark_front_cpu',
         'hand_landmark/hand_landmark_tracking_cpu',
         'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu'
+        'pose_landmark/pose_landmark_gpu'
     ]
     for binary_graph in binary_graphs:
       sys.stderr.write('generating binarypb: %s\n' %
@@ -240,7 +240,10 @@ class BuildBinaryGraphs(build.build):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -296,7 +299,10 @@ class BuildBazelExtension(build_ext.build_ext):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]
diff --git a/third_party/BUILD b/third_party/BUILD
index 5800098..384dcb2 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -113,6 +113,8 @@ cmake_external(
         "WITH_PNG": "ON",
         "WITH_TIFF": "ON",
         "WITH_WEBP": "OFF",
+        "WITH_OPENEXR": "OFF",
+        "WITH_IPP": "OFF",
         # Optimization flags
         "CV_ENABLE_INTRINSICS": "ON",
         "WITH_EIGEN": "ON",

The steps to build the python wheel are:

$ docker build -t mediapipe .
$ docker run -it --rm -v $(realpath ..):/host --name mediapipe mediapipe
# python3 setup.py gen_protos
# python3 setup.py bdist_wheel
# cp dist/*.whl /host

Thanks again for the help. You can mark this issue as solved. Edit: well... it does build the package but when I run it, it uses OpenGL instead of CUDA

@ykk648 do you mind letting me know how did you setup GL version: 3.2 (OpenGL ES 3.2 NVIDIA 455.23.05)? When running it I get this message: GL version: 3.1 (OpenGL ES 3.1 Mesa 20.0.8)... and it's even slower than the CPU version.

@gmontamat Acturally I don't know, you may have a AMD GPU when I using Nvidia, my envs are Ubuntu20.04+GTX3080+CUDA11.1, besides follow opengl-es-setup-on-linux-desktop, never installed openGL by myself.

I think I closed this issue too early :( So I've got the wheel I generated with the CUDA options: https://github.com/google/mediapipe/issues/2041#issuecomment-844129267 When I try to run it in a cuda-enabled container (tensorflow/tensorflow:1.15.5-gpu-py3) on a GeForce GTX 1080, I get:

GL version: 3.1 (OpenGL ES 3.1 Mesa 20.0.8)

Which means that: 1) it appears to be using OpenGL instead of CUDA and 2) It's using the Mesa renderer (not Nvidia) which is even slower than running pose estimation on CPU

Any help is very much appreciated!

edit: I think there are 2 possible alternatives then: 1) use the wheel generated with the OpenGL diff https://github.com/google/mediapipe/issues/2041#issuecomment-843715324 and try to find the issue with docker and OpenGL (in my case it works but with Mesa, pose detection being really slow). I've tried using nvidia/opengl:1.2-glvnd-runtime-ubuntu18.04 and when I run glxinfo | grep -i openglI get the Mesa renderer again. EDIT: figured out this! see: https://github.com/google/mediapipe/issues/2041#issuecomment-847376359

2) fix https://github.com/google/mediapipe/issues/2041#issuecomment-844129267 so that the resulting python package actually runs with CUDA support

@thamquocdung I have followed your instructions. I have encountered the following error.

ERROR: /home/convsys/mediapipe/mediapipe/python/BUILD:19:17: Linking of rule '//mediapipe/python:_framework_bindings.so' failed (Exit 1): gcc failed: error executing command /usr/bin/gcc @bazel-out/k8-opt/bin/mediapipe/python/_framework_bindings.so-2.params

Use --sandbox_debug to see verbose messages from the sandbox gcc failed: error executing command /usr/bin/gcc @bazel-out/k8-opt/bin/mediapipe/python/_framework_bindings.so-2.params

Use --sandbox_debug to see verbose messages from the sandbox /usr/bin/ld.gold: error: cannot find -lImath /usr/bin/ld.gold: error: cannot find -lIlmImf /usr/bin/ld.gold: error: cannot find -lIex /usr/bin/ld.gold: error: cannot find -lHalf /usr/bin/ld.gold: error: cannot find -lIlmThread /usr/bin/ld.gold: error: cannot find -ldc1394 /usr/bin/ld.gold: error: cannot find -lavresample collect2: error: ld returned 1 exit status Target //mediapipe/python:_framework_bindings.so failed to build Use --verbose_failures to see the command lines of failed build steps. INFO: Elapsed time: 1.182s, Critical Path: 0.77s INFO: 2 processes: 2 internal. FAILED: Build did NOT complete successfully error: command 'bazel' failed with exit status 1

@danial880, Sorry, I have not encountered your issue before. Which arch you have built on? Make sure you reach all of the required packages. You should follow these instructions (install.md and python.md)

@gmontamat have you tried compiling it with only opengl support and not CUDA? If so, does it use the Nvidia driver in that situation?

@txf- the OpenGL build https://github.com/google/mediapipe/issues/2041#issuecomment-843715324 worked for me (but used to be slow running Mesa OpenGL - found the issue though)! I found out why docker wasn't able to run the Nvidia OpenGL runtime on my server: turns out it only runs on a specific display (:0 on my machine) and I was sharing VNC's display to the docker container running mediapipe (:3 instead of :0). When I figured out this, glxgears, glxinfo, and mediapipe with OpenGL finally ran on the GPU with decent performance. To view real-time predictions I had to use RSTP since I don't have physical access to display :0 (this is a server). Note that the docker image you use with OpenGL needs special configs, for example: https://github.com/machinekoder/nvidia-opengl-docker

So I figured out how to make alternative 1 from https://github.com/google/mediapipe/issues/2041#issuecomment-845414224 work (running OpenGL's mediapipe pkg on the GPU with Docker).

That said, I'd really like to know why the CUDA package I'm trying to build using this diff https://github.com/google/mediapipe/issues/2041#issuecomment-844129267 isn't using CUDA at all (it uses OpenGL) even when I followed these steps: https://google.github.io/mediapipe/getting_started/gpu_support.html#tensorflow-cuda-support-and-setup-on-linux-desktop

@thamquocdung @danial880 I've encountered a similar issue while compiling, that's why I had to add this change which is on https://github.com/google/mediapipe/issues/2041#issuecomment-844129267 :

diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD
index 890889a..fe3ebfe 100644
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@@ -97,6 +97,7 @@ cc_binary(
     deps = [
         "@com_google_absl//absl/strings",
     ],
+    linkopts = ["-lm"],
 )

 cc_library(

@gmontamat thanks for sharing. I ran setup_opencv.sh. After that i installed the mediapipe. Error doesn't appear. Its running fine on my 1080Ti giving 67 fps at an average of 3 seconds.

@thamquocdung thank you! Your changes worked. I'm leaving my diff below since I've also compiled for CUDA and used the Dockerfile provided so as no to mess up with dependencies on my system:

(apply diff to https://github.com/google/mediapipe/tree/ae05ad04b3ae43d475ccb2868e23f1418fea8746)

diff --git a/.bazelrc b/.bazelrc
index 37a0bc1..0e18020 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -87,6 +87,16 @@ build:darwin_x86_64 --apple_platform_type=macos
 build:darwin_x86_64 --macos_minimum_os=10.12
 build:darwin_x86_64 --cpu=darwin_x86_64

+# This config refers to building with CUDA available. It does not necessarily
+# mean that we build CUDA op kernels.
+build:using_cuda --define=using_cuda=true
+build:using_cuda --action_env TF_NEED_CUDA=1
+build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+
+# This config refers to building CUDA op kernels with nvcc.
+build:cuda --config=using_cuda
+build:cuda --define=using_cuda_nvcc=true
+
 # This bazelrc file is meant to be written by a setup script.
 try-import %workspace%/.configure.bazelrc

diff --git a/Dockerfile b/Dockerfile
index c4c4df3..24b2d81 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM ubuntu:18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

 MAINTAINER <mediapipe@google.com>

@@ -39,7 +39,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libopencv-video-dev \
         libopencv-calib3d-dev \
         libopencv-features2d-dev \
-        software-properties-common && \
+        software-properties-common \
+        python3-venv libprotobuf-dev protobuf-compiler cmake libgtk2.0-dev \
+        mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev mesa-utils \
+        pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
+        libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
+        gfortran openexr libatlas-base-dev python3-dev python3-numpy \
+        libtbb2 libtbb-dev libdc1394-22-dev  && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
     apt-get clean && \
@@ -69,3 +75,4 @@ COPY . /mediapipe/

 # If we want the docker image to contain the pre-built object_detection_offline_demo binary, do the following
 # RUN bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/demo:object_detection_tensorflow_demo
+ENV TF_CUDA_PATHS=/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu,/usr/include
diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD
index 890889a..fe3ebfe 100644
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@@ -97,6 +97,7 @@ cc_binary(
     deps = [
         "@com_google_absl//absl/strings",
     ],
+    linkopts = ["-lm"],
 )

 cc_library(
diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
index c439737..c84c958 100644
--- a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
@@ -88,6 +88,20 @@ output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"

+
+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
+
 # Defines whether landmarks on the previous image should be used to help
 # localize landmarks on the current image.
 node {
@@ -117,7 +131,7 @@ node: {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE_GPU:image"
+  input_stream: "IMAGE_GPU:image_gpu"
   output_stream: "SIZE:image_size"
 }

@@ -126,7 +140,7 @@ node {
 # round of pose detection.
 node {
   calculator: "GateCalculator"
-  input_stream: "image"
+  input_stream: "image_gpu"
   input_stream: "image_size"
   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
   output_stream: "image_for_pose_detection"
@@ -181,7 +195,7 @@ node {
 node {
   calculator: "PoseLandmarkByRoiGpu"
   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "ROI:pose_rect"
   output_stream: "LANDMARKS:unfiltered_pose_landmarks"
   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
@@ -214,7 +228,7 @@ node {
 # timestamp bound update occurs to jump start the feedback loop.
 node {
   calculator: "PreviousLoopbackCalculator"
-  input_stream: "MAIN:image"
+  input_stream: "MAIN:image_gpu"
   input_stream: "LOOP:pose_rect_from_landmarks"
   input_stream_info: {
     tag_index: "LOOP"
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 08a2995..dc05f34 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -72,5 +72,8 @@ cc_library(
         "//mediapipe/modules/pose_detection:pose_detection_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
+        "//mediapipe/calculators/image:color_convert_calculator",
     ],
 )
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index e25fe62..16c0346 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -82,7 +82,7 @@ class PoseLandmark(enum.IntEnum):
   LEFT_FOOT_INDEX = 31
   RIGHT_FOOT_INDEX = 32

-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 POSE_CONNECTIONS = frozenset([
     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
@@ -180,9 +180,9 @@ class Pose(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks'])
diff --git a/setup.py b/setup.py
index 81569b3..8e9dd93 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ from distutils import spawn
 import distutils.command.build as build
 import distutils.command.clean as clean

-__version__ = '0.8'
+__version__ = '0.8.4-cuda10.1'
 IS_WINDOWS = (platform.system() == 'Windows')
 MP_ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
 ROOT_INIT_PY = os.path.join(MP_ROOT_PATH, '__init__.py')
@@ -226,7 +226,7 @@ class BuildBinaryGraphs(build.build):
         'face_landmark/face_landmark_front_cpu',
         'hand_landmark/hand_landmark_tracking_cpu',
         'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu'
+        'pose_landmark/pose_landmark_gpu'
     ]
     for binary_graph in binary_graphs:
       sys.stderr.write('generating binarypb: %s\n' %
@@ -240,7 +240,10 @@ class BuildBinaryGraphs(build.build):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -296,7 +299,10 @@ class BuildBazelExtension(build_ext.build_ext):
         'bazel',
         'build',
         '--compilation_mode=opt',
-        '--define=MEDIAPIPE_DISABLE_GPU=1', 
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]
diff --git a/third_party/BUILD b/third_party/BUILD
index 5800098..384dcb2 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -113,6 +113,8 @@ cmake_external(
         "WITH_PNG": "ON",
         "WITH_TIFF": "ON",
         "WITH_WEBP": "OFF",
+        "WITH_OPENEXR": "OFF",
+        "WITH_IPP": "OFF",
         # Optimization flags
         "CV_ENABLE_INTRINSICS": "ON",
         "WITH_EIGEN": "ON",

The steps to build the python wheel are:

$ docker build -t mediapipe .
$ docker run -it --rm -v $(realpath ..):/host --name mediapipe mediapipe
# python3 setup.py gen_protos
# python3 setup.py bdist_wheel
# cp dist/*.whl /host

Thanks again for the help. You can mark this issue as solved. Edit: well... it does build the package but when I run it, it uses OpenGL instead of CUDA

@gmontamat Thanks for your patch, i can install it in the docker and run with Nvidida OpenGL on hand_tracking. For running docker i give access to my display:1 from docker with xhost +SI:localuser:root and use docker run --rm -it --runtime=nvidia -v $(pwd)/mediapipe_docker:/host -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY -e XAUTHORITY -e NVIDIA_DRIVER_CAPABILITIES=all -v $XAUTHORITY:$XAUTHORITY --name mediapipe mediapipe This command allows you use OpenGL and Cuda inside docker. Also i use nvidia/cudagl:10.1-base instead nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 and it resolve my issue with using OpenGL Mesa drivers instead of OpenGL Nvidia drivers

But I still have some questions abount pose landmark detection 1) What command should i use to get my landmarks? When i try to run pose_tracking or holistic_tracking:

bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 mediapipe/examples/desktop/pose_tracking:pose_tracking_gpu
GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/pose_tracking/pose_tracking_gpu --calculator_graph_config_file=mediapipe/graphs/pose_tracking/pose_tracking_gpu.pbtxt --input_video_path=/host/gahangir.mp4 --output_video_path=/host/output_pose.mp4 I get this error (but i can run hand_tracking_desktop_live_gpu without ploblems )

Error

>I20210723 13:10:56.997021 13103 demo_run_graph_main_gpu.cc:52] Get calculator graph config contents: # MediaPipe graph that performs pose tracking with TensorFlow Lite on GPU. // GPU buffer. (GpuBuffer) input_stream: "input_video" // Output image with rendered results. (GpuBuffer) output_stream: "output_video" // Pose landmarks. (NormalizedLandmarkList) output_stream: "pose_landmarks" node { calculator: "FlowLimiterCalculator" input_stream: "input_video" input_stream: "FINISHED:output_video" input_stream_info: { tag_index: "FINISHED" back_edge: true } output_stream: "throttled_input_video" } // Subgraph that detects poses and corresponding landmarks. node { calculator: "PoseLandmarkGpu" input_stream: "IMAGE:throttled_input_video" output_stream: "LANDMARKS:pose_landmarks" output_stream: "DETECTION:pose_detection" output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" } // Subgraph that renders pose-landmark annotation onto the input image. node { calculator: "PoseRendererGpu" input_stream: "IMAGE:throttled_input_video" input_stream: "LANDMARKS:pose_landmarks" input_stream: "ROI:roi_from_landmarks" input_stream: "DETECTION:pose_detection" output_stream: "IMAGE:output_video" } I20210723 13:10:56.998176 13103 demo_run_graph_main_gpu.cc:58] Initialize the calculator graph. E20210723 13:10:57.004001 13103 demo_run_graph_main_gpu.cc:197] Failed to run the graph: ValidatedGraphConfig Initialization failed. No registered object with name: ColorConvertCalculator; Unable to find Calculator "ColorConvertCalculator" No registered object with name: ImageFrameToGpuBufferCalculator; Unable to find Calculator "ImageFrameToGpuBufferCalculator"

2) I also try to use python wheel, that was created when i run "python3 setup.py bdist_wheel", but there is a Segmentation Fault (core dumped) error

Segmentation Error

>root@a06848afe1b9:/host# python3 Python 3.6.9 (default, Jan 26 2021, 15:33:00) [GCC 8.4.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import mediapipe as mp >>> pose_model = mp.solutions.pose.Pose( ... min_detection_confidence=0.5, ... min_tracking_confidence=0.5, ... smooth_landmarks=True, ... static_image_mode=False, ... ) WARNING: Logging before InitGoogleLogging() is written to STDERR I20210723 12:47:55.086122 12126 gl_context_egl.cc:163] Successfully initialized EGL. Major : 1 Minor: 5 I20210723 12:47:55.112541 12138 gl_context.cc:331] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 470.57.02) I20210723 12:47:55.112702 12126 gl_context_egl.cc:163] Successfully initialized EGL. Major : 1 Minor: 5 I20210723 12:47:55.126700 12139 gl_context.cc:331] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 470.57.02) >>> I20210723 12:47:55.358247 12138 tflite_gpu_runner.cc:142] OpenCL backend is used. Segmentation fault (core dumped)

I've tried to consolidate the different patch files on this thread and the linked comments on the master branch but I'm running into a segmentation fault. Has anyone solved cuda support in a docker container for pose?

Here is my diff:

diff --git a/Dockerfile b/Dockerfile
index c4c4df3..95d027a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM ubuntu:18.04
+FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

-MAINTAINER <mediapipe@google.com>
+LABEL maintainer=<mediapipe@google.com>

 WORKDIR /io
 WORKDIR /mediapipe
@@ -39,20 +39,29 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libopencv-video-dev \
         libopencv-calib3d-dev \
         libopencv-features2d-dev \
-        software-properties-common && \
+        software-properties-common \
+        python3-venv libprotobuf-dev protobuf-compiler cmake libgtk2.0-dev \
+        mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev mesa-utils \
+        pkg-config libgtk-3-dev libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
+        libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
+        gfortran openexr libatlas-base-dev python3-dev python3-numpy \
+        libtbb2 libtbb-dev libdc1394-22-dev  && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*

 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100 --slave /usr/bin/g++ g++ /usr/bin/g++-8
+RUN pip3 install --upgrade pip
 RUN pip3 install --upgrade setuptools
-RUN pip3 install wheel
-RUN pip3 install future
-RUN pip3 install six==1.14.0
-RUN pip3 install tensorflow==1.14.0
-RUN pip3 install tf_slim
+RUN pip3 install wheel \
+    future \
+    six==1.14.0 \
+    tensorflow==1.14.0 \
+    tf_slim

+COPY requirements.txt .
+RUN pip3 install -r requirements.txt
 RUN ln -s /usr/bin/python3 /usr/bin/python

 # Install bazel
@@ -67,5 +76,13 @@ azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \

 COPY . /mediapipe/

+ENV TF_CUDA_PATHS=/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu,/usr/include
+
+RUN python setup.py gen_protos
+# RUN python setup.py bdist_wheel
+RUN python setup.py install
+
+
+
 # If we want the docker image to contain the pre-built object_detection_offline_demo binary, do the following
 # RUN bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/demo:object_detection_tensorflow_demo
diff --git a/MANIFEST.in b/MANIFEST.in
index 14afffe..c8eff9e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -17,4 +17,3 @@ exclude mediapipe/modules/objectron/object_detection_3d_camera.tflite
 exclude mediapipe/modules/objectron/object_detection_3d_cup.tflite
 exclude mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite
 exclude mediapipe/modules/pose_landmark/pose_landmark_lite.tflite
-exclude mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite
diff --git a/issue_2041.patch b/issue_2041.patch
new file mode 100644
index 0000000..a2385ca
--- /dev/null
+++ b/issue_2041.patch
@@ -0,0 +1,134 @@
+diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+index c439737..c84c958 100644
+--- a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
++++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+@@ -88,6 +88,20 @@ output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
+ # Regions of interest calculated based on pose detections. (NormalizedRect)
+ output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
+ 
++
++node: {
++  calculator: "ColorConvertCalculator"
++  input_stream: "RGB_IN:image"
++  output_stream: "RGBA_OUT:image_rgba"
++}
++
++node: {
++  calculator: "ImageFrameToGpuBufferCalculator"
++  input_stream: "image_rgba"
++  output_stream: "image_gpu"
++}
++
++
+ # Defines whether landmarks on the previous image should be used to help
+ # localize landmarks on the current image.
+ node {
+@@ -117,7 +131,7 @@ node: {
+ # Calculates size of the image.
+ node {
+   calculator: "ImagePropertiesCalculator"
+-  input_stream: "IMAGE_GPU:image"
++  input_stream: "IMAGE_GPU:image_gpu"
+   output_stream: "SIZE:image_size"
+ }
+ 
+@@ -126,7 +140,7 @@ node {
+ # round of pose detection.
+ node {
+   calculator: "GateCalculator"
+-  input_stream: "image"
++  input_stream: "image_gpu"
+   input_stream: "image_size"
+   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
+   output_stream: "image_for_pose_detection"
+@@ -181,7 +195,7 @@ node {
+ node {
+   calculator: "PoseLandmarkByRoiGpu"
+   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
+-  input_stream: "IMAGE:image"
++  input_stream: "IMAGE:image_gpu"
+   input_stream: "ROI:pose_rect"
+   output_stream: "LANDMARKS:unfiltered_pose_landmarks"
+   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
+@@ -214,7 +228,7 @@ node {
+ # timestamp bound update occurs to jump start the feedback loop.
+ node {
+   calculator: "PreviousLoopbackCalculator"
+-  input_stream: "MAIN:image"
++  input_stream: "MAIN:image_gpu"
+   input_stream: "LOOP:pose_rect_from_landmarks"
+   input_stream_info: {
+     tag_index: "LOOP"
+diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
+index 08a2995..a61cff2 100644
+--- a/mediapipe/python/BUILD
++++ b/mediapipe/python/BUILD
+@@ -72,5 +72,10 @@ cc_library(
+         "//mediapipe/modules/pose_detection:pose_detection_cpu",
+         "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
+         "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
++        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
++        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
++        "//mediapipe/calculators/image:color_convert_calculator",
++
+     ],
+ )
+diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
+index e25fe62..16c0346 100644
+--- a/mediapipe/python/solutions/pose.py
++++ b/mediapipe/python/solutions/pose.py
+@@ -82,7 +82,7 @@ class PoseLandmark(enum.IntEnum):
+   LEFT_FOOT_INDEX = 31
+   RIGHT_FOOT_INDEX = 32
+ 
+-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
++BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
+ POSE_CONNECTIONS = frozenset([
+     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
+     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
+@@ -180,9 +180,9 @@ class Pose(SolutionBase):
+                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
+                     bool_value=not static_image_mode)
+             ],
+-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
++            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
+                 min_detection_confidence,
+-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
++            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
+                 min_tracking_confidence,
+         },
+         outputs=['pose_landmarks'])
+diff --git a/setup.py b/setup.py
+index 81569b3..4b15862 100644
+--- a/setup.py
++++ b/setup.py
+@@ -225,8 +225,9 @@ class BuildBinaryGraphs(build.build):
+         'face_detection/face_detection_front_cpu',
+         'face_landmark/face_landmark_front_cpu',
+         'hand_landmark/hand_landmark_tracking_cpu',
+         'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
+-        'pose_landmark/pose_landmark_cpu'
++        'pose_landmark/pose_landmark_gpu',
+     ]
+     for binary_graph in binary_graphs:
+       sys.stderr.write('generating binarypb: %s\n' %
+@@ -240,7 +241,8 @@ class BuildBinaryGraphs(build.build):
+         'bazel',
+         'build',
+         '--compilation_mode=opt',
+-        '--define=MEDIAPIPE_DISABLE_GPU=1',
++        '--copt=-DMESA_EGL_NO_X11_HEADERS',
++        '--copt=-DEGL_NO_X11',
+         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
+         os.path.join('mediapipe/modules/', graph_path),
+     ]
+@@ -296,7 +298,8 @@ class BuildBazelExtension(build_ext.build_ext):
+         'bazel',
+         'build',
+         '--compilation_mode=opt',
+-        '--define=MEDIAPIPE_DISABLE_GPU=1',
++        '--copt=-DMESA_EGL_NO_X11_HEADERS',
++        '--copt=-DEGL_NO_X11',
+         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
+         str(ext.bazel_target + '.so'),
+     ]
diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD
index 00d3648..7625121 100644
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@@ -111,6 +111,7 @@ cc_binary(
     deps = [
         "@com_google_absl//absl/strings",
     ],
+    linkopts = ["-lm"],
 )

 cc_library(
diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
index ce053b7..58637bd 100644
--- a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
+++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt
@@ -96,6 +96,18 @@ output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
 # Regions of interest calculated based on pose detections. (NormalizedRect)
 output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"

+node: {
+  calculator: "ColorConvertCalculator"
+  input_stream: "RGB_IN:image"
+  output_stream: "RGBA_OUT:image_rgba"
+}
+
+node: {
+  calculator: "ImageFrameToGpuBufferCalculator"
+  input_stream: "image_rgba"
+  output_stream: "image_gpu"
+}
+
 # Defines whether landmarks on the previous image should be used to help
 # localize landmarks on the current image.
 node {
@@ -125,7 +137,7 @@ node: {
 # Calculates size of the image.
 node {
   calculator: "ImagePropertiesCalculator"
-  input_stream: "IMAGE_GPU:image"
+  input_stream: "IMAGE_GPU:image_gpu"
   output_stream: "SIZE:image_size"
 }

@@ -134,7 +146,7 @@ node {
 # round of pose detection.
 node {
   calculator: "GateCalculator"
-  input_stream: "image"
+  input_stream: "image_gpu"
   input_stream: "image_size"
   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
   output_stream: "image_for_pose_detection"
@@ -189,7 +201,7 @@ node {
 node {
   calculator: "PoseLandmarkByRoiGpu"
   input_side_packet: "MODEL_COMPLEXITY:model_complexity"
-  input_stream: "IMAGE:image"
+  input_stream: "IMAGE:image_gpu"
   input_stream: "ROI:pose_rect"
   output_stream: "LANDMARKS:unfiltered_pose_landmarks"
   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
@@ -225,7 +237,7 @@ node {
 # timestamp bound update occurs to jump start the feedback loop.
 node {
   calculator: "PreviousLoopbackCalculator"
-  input_stream: "MAIN:image"
+  input_stream: "MAIN:image_gpu"
   input_stream: "LOOP:pose_rect_from_landmarks"
   input_stream_info: {
     tag_index: "LOOP"
diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD
index 42ce07f..372700c 100644
--- a/mediapipe/python/BUILD
+++ b/mediapipe/python/BUILD
@@ -71,8 +71,18 @@ cc_library(
         "//mediapipe/modules/objectron:objectron_cpu",
         "//mediapipe/modules/palm_detection:palm_detection_cpu",
         "//mediapipe/modules/pose_detection:pose_detection_cpu",
+        "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
         "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_cpu",
         "//mediapipe/modules/pose_landmark:pose_landmark_cpu",
         "//mediapipe/modules/selfie_segmentation:selfie_segmentation_cpu",
+        "//mediapipe/modules/face_landmark:face_landmark_front_gpu",
+        "//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu",
+        "//mediapipe/modules/holistic_landmark:holistic_landmark_gpu",
+        "//mediapipe/modules/objectron:objectron_gpu",
+        "//mediapipe/modules/palm_detection:palm_detection_gpu",
+        "//mediapipe/modules/pose_detection:pose_detection_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_by_roi_gpu",
+        "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
+        "//mediapipe/calculators/image:color_convert_calculator",
     ],
 )
diff --git a/mediapipe/python/solutions/hands.py b/mediapipe/python/solutions/hands.py
index 15760ed..32798e6 100644
--- a/mediapipe/python/solutions/hands.py
+++ b/mediapipe/python/solutions/hands.py
@@ -64,7 +64,7 @@ class HandLandmark(enum.IntEnum):
   PINKY_TIP = 20

-BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.binarypb'
 HAND_CONNECTIONS = frozenset([
     (HandLandmark.WRIST, HandLandmark.THUMB_CMC),
     (HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
@@ -137,9 +137,9 @@ class Hands(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'palmdetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'handlandmarkcpu__ThresholdingCalculator.threshold':
+            'handlandmarkgpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['multi_hand_landmarks', 'multi_handedness'])
diff --git a/mediapipe/python/solutions/holistic.py b/mediapipe/python/solutions/holistic.py
index 53d2817..2507165 100644
--- a/mediapipe/python/solutions/holistic.py
+++ b/mediapipe/python/solutions/holistic.py
@@ -48,7 +48,7 @@ from mediapipe.python.solutions.pose import POSE_CONNECTIONS
 from mediapipe.python.solutions.pose import PoseLandmark
 # pylint: enable=unused-import

-BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_gpu.binarypb'

 def _download_oss_pose_landmark_model(model_complexity):
@@ -105,14 +105,14 @@ class Holistic(SolutionBase):
             'smooth_landmarks': smooth_landmarks and not static_image_mode,
         },
         calculator_params={
-            'poselandmarkcpu__ConstantSidePacketCalculator.packet': [
+            'poselandmarkgpu__ConstantSidePacketCalculator.packet': [
                 constant_side_packet_calculator_pb2
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=[
diff --git a/mediapipe/python/solutions/objectron.py b/mediapipe/python/solutions/objectron.py
index 195c2b8..3f6ae9b 100644
--- a/mediapipe/python/solutions/objectron.py
+++ b/mediapipe/python/solutions/objectron.py
@@ -75,7 +75,7 @@ class BoxLandmark(enum.IntEnum):
   BACK_TOP_RIGHT = 7
   FRONT_TOP_RIGHT = 8

-BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_gpu.binarypb'
 BOX_CONNECTIONS = frozenset([
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
     (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py
index 7e0ec8b..f1d2411 100644
--- a/mediapipe/python/solutions/pose.py
+++ b/mediapipe/python/solutions/pose.py
@@ -82,7 +82,8 @@ class PoseLandmark(enum.IntEnum):
   LEFT_FOOT_INDEX = 31
   RIGHT_FOOT_INDEX = 32

-BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+# BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_gpu.binarypb'
 POSE_CONNECTIONS = frozenset([
     (PoseLandmark.NOSE, PoseLandmark.RIGHT_EYE_INNER),
     (PoseLandmark.RIGHT_EYE_INNER, PoseLandmark.RIGHT_EYE),
@@ -180,9 +181,11 @@ class Pose(SolutionBase):
                 .ConstantSidePacketCalculatorOptions.ConstantSidePacket(
                     bool_value=not static_image_mode)
             ],
-            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            # 'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+            'poselandmarkgpu__posedetectiongpu__TensorsToDetectionsCalculator.min_score_thresh':
                 min_detection_confidence,
-            'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            # 'poselandmarkcpu__poselandmarkbyroicpu__ThresholdingCalculator.threshold':
+            'poselandmarkgpu__poselandmarkbyroigpu__ThresholdingCalculator.threshold':
                 min_tracking_confidence,
         },
         outputs=['pose_landmarks', 'pose_world_landmarks'])
diff --git a/setup.py b/setup.py
index 24fb2e3..78a5cdc 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ from distutils import spawn
 import distutils.command.build as build
 import distutils.command.clean as clean

-__version__ = '0.8'
+__version__ = '0.8.4'
 IS_WINDOWS = (platform.system() == 'Windows')
 MP_ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
 ROOT_INIT_PY = os.path.join(MP_ROOT_PATH, '__init__.py')
@@ -225,9 +225,9 @@ class BuildBinaryGraphs(build.build):
         'face_detection/face_detection_short_range_cpu',
         'face_detection/face_detection_full_range_cpu',
         'face_landmark/face_landmark_front_cpu',
-        'hand_landmark/hand_landmark_tracking_cpu',
-        'holistic_landmark/holistic_landmark_cpu', 'objectron/objectron_cpu',
-        'pose_landmark/pose_landmark_cpu',
+        'holistic_landmark/holistic_landmark_gpu', 'objectron/objectron_gpu',
+        'pose_landmark/pose_landmark_gpu', 'pose_detection/pose_detection_gpu',
+        'pose_landmark/pose_landmark_gpu',
         'selfie_segmentation/selfie_segmentation_cpu'
     ]
     for binary_graph in binary_graphs:
@@ -243,7 +243,12 @@ class BuildBinaryGraphs(build.build):
         'build',
         '--compilation_mode=opt',
         '--copt=-DNDEBUG',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        '--define=no_aws_support=true',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         os.path.join('mediapipe/modules/', graph_path),
     ]
@@ -300,7 +305,12 @@ class BuildBazelExtension(build_ext.build_ext):
         'build',
         '--compilation_mode=opt',
         '--copt=-DNDEBUG',
-        '--define=MEDIAPIPE_DISABLE_GPU=1',
+        # '--define=MEDIAPIPE_DISABLE_GPU=1',
+        '--config=cuda',
+        '--spawn_strategy=local',
+        # '--define=no_aws_support=true',
+        '--copt=-DMESA_EGL_NO_X11_HEADERS',
+        '--copt=-DEGL_NO_X11',
         '--action_env=PYTHON_BIN_PATH=' + _normalize_path(sys.executable),
         str(ext.bazel_target + '.so'),
     ]
diff --git a/third_party/BUILD b/third_party/BUILD
index e2044cf..8378416 100644
--- a/third_party/BUILD
+++ b/third_party/BUILD
@@ -113,6 +113,8 @@ cmake_external(
         "WITH_PNG": "ON",
         "WITH_TIFF": "ON",
         "WITH_WEBP": "OFF",
+        "WITH_OPENEXR": "OFF",
+        "WITH_IPP": "OFF",
         # Optimization flags
         "CV_ENABLE_INTRINSICS": "ON",
         "WITH_EIGEN": "ON",

@sgowroji , I have fallowed #2320 to build mediapipe package in python, after python3 setup.py bdist_wheel command . I am facing following error

ERROR: Skipping 'mediapipe/modules/pose_landmark/pose_landmark_gpuselfie_segmentation/selfie_segmentation_cpu': no such target '//mediapipe/modules/pose_landmark:pose_landmark_gpuselfie_segmentation/selfie_segmentation_cpu': target 'pose_landmark_gpuselfie_segmentation/s elfie_segmentation_cpu' not declared in package 'mediapipe/modules/pose_landmark' defined by /mediapipe/mediapipe/modules/pose_landmark/BUILD WARNING: Target pattern parsing failed. ERROR: no such target '//mediapipe/modules/pose_landmark:pose_landmark_gpuselfie_segmentation/selfie_segmentation_cpu': target 'pose_landmark_gpuselfie_segmentation/selfie_segmentation_cpu' not declared in package 'mediapipe/modules/pose_landmark' defined by /mediapipe/m ediapipe/modules/pose_landmark/BUILD INFO: Elapsed time: 0.353s INFO: 0 processes. FAILED: Build did NOT complete successfully (0 packages loaded)

Hi @gmontamat , Could you please reproduce the above error with the latest release and let us know if the issue still exists. Thanks!

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you.

Closing as stale. Please reopen if you'd like to work on this further.

Are you satisfied with the resolution of your issue? Yes No

google-ai-edge / mediapipe

Pose detection in python with CUDA support #2041