PINTO0309 / PINTO_model_zoo

A repository for storing models that have been inter-converted between various frameworks. Supported frameworks are TensorFlow, PyTorch, ONNX, OpenVINO, TFJS, TFTRT, TensorFlowLite (Float32/16/INT8), EdgeTPU, CoreML.
https://qiita.com/PINTO
MIT License
3.5k stars 566 forks source link

Visualization of BLAZEPOSE Predictions #76

Closed ashutoshsoni891 closed 3 years ago

ashutoshsoni891 commented 3 years ago

Hi there!

Thanks for the amazing repo. I am currently playing around with 58_Blazepose_Full ( Accurate ). I am facing problem to visualize the predictions .I was checking the source code and I found the model returns 3 outputs :

model = Model(inputs=inputs, outputs=[conv99_1, sigm99_1, reshape99_2])

I also checked tf.js code but I couldnt understand how you are visualizing those points

Could you please help me , how do I visualize those points in image .Also Instead of 39 Images , I am trying to train in on 58 keypoints. Can you please tell me about this also

I have added more details over here : https://stackoverflow.com/questions/66384742/visualize-and-train-custom-blazepose-model-pose-estimation-keypoint-detection

Cheers!

PINTO0309 commented 3 years ago

I have collaborated with the authors of this repository. https://github.com/terryky/tfjs_webgl_app.git

https://github.com/PINTO0309/PINTO_model_zoo/blob/main/058_BlazePose_Full_Keypoints/01_Accurate/url.txt

ashutoshsoni891 commented 3 years ago

Do you know How can we visualize it in python ? The first repo has js and 2nd one has cpp

ashutoshsoni891 commented 3 years ago

Here is the Visualization Script / Inference code which I have developed and its working perfect now

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import cv2

def set_env():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Restrict TensorFlow to only use the fourth GPU
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

def return_model(filepath):

    model = load_model(filepath)
    model.compile()
    return model

def get_preds(model , image):
    img = np.expand_dims(image , axis = 0)
    preds = model.predict(img)
    return preds

def convert_preds_to_xy(preds):

    kpts = []
    temp = preds[2][0]
    for x,y in zip(temp[::4] , temp[1::4]):
        kpts.append((int(x),int(y)))
    return kpts 

def infer_video(model , video = 0 ):
    cap = cv2.VideoCapture(video)

    while cap.isOpened():
        okay , frame = cap.read()
        if not okay :
            print('Cant open webcam , please try again!')
            break

        inframe = frame.copy()
        inframe_resize = cv2.resize(inframe , (256 , 256)) / 255

        preds = get_preds(model , inframe_resize)
        kpts = convert_preds_to_xy(preds)

        for pair in POSE_PAIRS:
            cv2.line(inframe_resize, kpts[pair[0]], kpts[pair[1]], (0, 255, 0), thickness=1)
        # for point in kpts:
        #     cv2.circle(inframe_resize , point , 2, (0,0,255) , 2)
        cv2.imshow('Inference' , inframe_resize)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

def infer_image(model , image):
    img = cv2.imread(image)
    img_resize = cv2.resize(img , (256 , 256)) / 255
    preds = get_preds(model , img_resize)

    kpts = convert_preds_to_xy(preds)

    for pair in POSE_PAIRS:
        cv2.line(img_resize, kpts[pair[0]], kpts[pair[1]], (0, 255, 0), thickness=1)

    # for idx , point in enumerate(kpts):
    #     cv2.circle(img_resize , point, 2 , (0 , 0 , 255) , 2)
    #     cv2.putText(img_resize, "{}".format(idx), point, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0 , 255 ,0), 1, lineType=cv2.LINE_AA)   
        # cv2.line(image, (x1, y1), (x2, y2), (0, 255, 0), thickness=line_thickness)
    cv2.imshow('Inference Image' , img_resize)
    cv2.waitKey(0)

if __name__ == '__main__':

    POSE_PAIRS = [(0,1) ,(0,4) ,(1,2) ,(2,3) ,(3,7),(4,5) ,(5,6) ,(6, 8), (9, 10),
                (11 ,12) , (12 ,14) , (14,16) ,(16,22) ,(16,18) ,(16,22), (18,20),
                (12,24) , (24,26) ,(26,28) ,(28,32), (28,30) ,(30 ,32) ,(24,23) ,
                (11,13) ,(13,15) ,(15,21) ,(15,17) ,(15,19) ,(19,17) ,(11,23),
                (23,25) ,(25,27) ,(27,29) ,(27 ,31) ,(29 ,31)
                ]
    set_env()

    model = return_model('full_pose_landmark_39kp.h5')
    model.summary()
    infer_video(model)
    # infer_image(model , 'image.jpg')