triton-inference-server / openvino_backend

OpenVINO backend for Triton.
BSD 3-Clause "New" or "Revised" License
29 stars 15 forks source link

Using Intel OpenVINO models doesn't provide good results #79

Open siretru opened 1 month ago

siretru commented 1 month ago

By using this model from Intel : https://docs.openvino.ai/2024/omz_models_model_age_gender_recognition_retail_0013.html

I can't get good results (Or this model offers really good accuracy in the demo)...

Instructions to replicate:

Download model repo (it is FP32 version from https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/README.md):

repo.zip

Start triton docker container: docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /~/repo:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models

Code to perform inference:

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException
import cv2
import argparse

# Configuration
model_name = "age_gender"
model_version = "1"
server_url = "localhost:8001"
input_name = "data"
output_names = ["prob", "age_conv3"]
input_shape = (1, 3, 62, 62)  # Dimensions attendues par le modèle

# Charger une image et la prétraiter
def preprocess_image(image_path, input_shape):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image non trouvée au chemin: {image_path}")
    image = cv2.resize(image, (input_shape[2], input_shape[3]))
    image = image.astype(np.float32) / 255.0  # Normalisation
    image = np.transpose(image, (2, 0, 1))  # HWC to CHW
    image = np.expand_dims(image, axis=0)  # Ajouter la dimension batch
    return image

# Initialiser le client Triton
def initialize_triton_client(server_url):
    try:
        triton_client = grpcclient.InferenceServerClient(url=server_url, verbose=True)
    except Exception as e:
        print("Erreur lors de la création du client Triton: " + str(e))
        exit(1)
    return triton_client

# Vérifier si le serveur et le modèle sont prêts
def check_server_and_model(triton_client, model_name, model_version):
    if not triton_client.is_server_live():
        print("Le serveur Triton n'est pas en ligne.")
        exit(1)

    if not triton_client.is_server_ready():
        print("Le serveur Triton n'est pas prêt.")
        exit(1)

    if not triton_client.is_model_ready(model_name, model_version=model_version):
        print(f"Le modèle {model_name} version {model_version} n'est pas prêt.")
        exit(1)

# Envoyer la requête d'inférence et récupérer les résultats
def infer(triton_client, model_name, model_version, input_image):
    inputs = []
    outputs = []

    inputs.append(grpcclient.InferInput(input_name, input_shape, "FP32"))
    inputs[0].set_data_from_numpy(input_image)

    for output_name in output_names:
        outputs.append(grpcclient.InferRequestedOutput(output_name))

    try:
        results = triton_client.infer(model_name=model_name, model_version=model_version, inputs=inputs, outputs=outputs)
    except InferenceServerException as e:
        print("Erreur lors de l'inférence: " + str(e))
        exit(1)

    return results

# Interpréter les résultats
def interpret_results(results):
    prob_output = results.as_numpy("prob")
    age_output = results.as_numpy("age_conv3")

    # prob_output: shape (1, 2, 1, 1) - Softmax output across 2 type classes [0 - female, 1 - male]
    # age_output: shape (1, 1, 1, 1) - Estimated age divided by 100

    print("gender result=", prob_output)
    print("age results=", age_output)

    gender_prob = prob_output[0, 1, 0, 0]  # Probabilité de la classe "male"
    age = age_output[0, 0, 0, 0] * 100  # Convertir l'âge en multipliant par 100

    gender = "Male" if gender_prob > 0.5 else "Female"

    return gender, gender_prob, age

# Fonction principale
def main(image_path):
    input_image = preprocess_image(image_path, input_shape)
    triton_client = initialize_triton_client(server_url)
    check_server_and_model(triton_client, model_name, model_version)
    results = infer(triton_client, model_name, model_version, input_image)
    gender, gender_prob, age = interpret_results(results)

    print(f"Genre prédit: {gender} (probabilité: {gender_prob})")
    print(f"Âge prédit: {age}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Tester une inférence sur un modèle de classification age_gender hébergé sur Triton Inference Server.")
    parser.add_argument("image_path", type=str, help="Chemin de l'image à utiliser pour l'inférence.")
    args = parser.parse_args()

    main(args.image_path)
dtrawins commented 3 days ago

@siretru I think you shouldn't use the normalization. That model expect in input data in a range of 0-255. Try dropping that division by 255 in preprocessing.