How to interpret the value of v given by a pretrained pose estimation model

Hi open-mmlab developers, I am using a pretrained model (coco-format) to perform image inference. I do not know how to interpret the keypoint values given by the model. For example according to coco docs : v can either be 0,1 or 2 but the model gives float values in the output Below is one of the output I got while performing image inference

[{'bbox': array([ 0.6854594 , 45.018677 , 88.19361 , 265.10806 , 0.99963367], dtype=float32), 'keypoints': array([[ 37.382774 , 78.91617 , 0.9567224 ], [ 39.541847 , 74.59802 , 0.94851094], [ 30.90554 , 74.59802 , 0.9755936 ], [ 41.70092 , 65.961716 , 0.8276397 ], [ 20.11016 , 68.12079 , 0.9391268 ], [ 50.337227 , 76.757095 , 0.9199661 ], [ 4.9966354 , 85.3934 , 0.8950063 ], [ 74.08706 , 98.347855 , 0.88189924], [ 2.837555 , 113.46138 , 0.60554993], [ 78.405205 , 89.71156 , 0.9048376 ], [ 2.837555 , 111.30232 , 0.58759665], [ 30.90554 , 141.52937 , 0.8425735 ], [ 4.9966354 , 148.0066 , 0.8193674 ], [ 46.01908 , 189.02904 , 0.9078411 ], [ 2.837555 , 201.9835 , 0.90722847], [ 37.382774 , 243.00597 , 0.8663102 ], [ 2.837555 , 247.32408 , 0.2709621 ]], dtype=float32)

All of the values of v are in float. Also the model fails to tell how many keypoints it has detected per person ( This info is provided in coco ground truth annotations ).I have used the following code block to generate the outputs.

    import cv2
    import os
    import pickle 
    import collections  
    from mmdet.apis import inference_detector, init_detector
    from mmpose.apis import (inference_top_down_pose_model,
                            init_pose_model,
                            vis_pose_result,
                            process_mmdet_results)

    pose_config = '/home/video_understanding/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py'
    pose_checkpoint = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
    det_config = '/home/video_understanding/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py'
    det_checkpoint = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth'
    keypoint_dataset = '/home/ochuman_dataset/original_images'
    keypoint_inference = '/home/ochuman_dataset/image_inference'
    pose_results_file = '/home/ochuman_dataset/pose_results.pkl'

    # initialize pose model
    pose_model = init_pose_model(pose_config, pose_checkpoint)
    # initialize detector
    det_model = init_detector(det_config, det_checkpoint)

    image_files = [os.path.join(keypoint_dataset,f) for f in os.listdir(keypoint_dataset) if f.endswith('.jpg')]    
    image_files.sort(key = lambda x:int(x.split('/')[-1].split('.')[0]))

    pose_dict = collections.OrderedDict()
    for f in image_files:
        print('processing the file--->{}'.format(f))
        image_filename = f.split('/')[-1]
        mmdet_results = inference_detector(det_model, f)
        person_results = process_mmdet_results(mmdet_results, cat_id=1)
        pose_results, returned_outputs = inference_top_down_pose_model(pose_model,
                                                                        f,
                                                                        person_results,
                                                                        bbox_thr=0.3,
                                                                        format='xyxy',
                                                                        dataset=pose_model.cfg.data.test.type)
        vis_result = vis_pose_result(pose_model,
                                    f,
                                    pose_results,
                                    dataset=pose_model.cfg.data.test.type,
                                    show=False)
        # reduce image size
        vis_result = cv2.resize(vis_result, dsize=None, fx=0.5, fy=0.5)
        cv2.imwrite(os.path.join(keypoint_inference,image_filename), vis_result)
        print('writing the image file to destination directory--->{}'.format(os.path.join(keypoint_inference,image_filename)))
        pose_dict[f] = pose_results

    output_file = open(pose_results_file,'wb')
    pickle.dump(pose_dict,output_file)
    output_file.close()

Please guide me on how I can interpret the value of v that I get during inference and also on how to get model outputs that are in line with coco ground truth annotations for person keypoint detection task.

open-mmlab / mmpose

How to interpret the value of v given by a pretrained pose estimation model #919