DLR-RM / stable-baselines3

PyTorch version of Stable Baselines, reliable implementations of reinforcement learning algorithms.
https://stable-baselines3.readthedocs.io
MIT License
8.84k stars 1.68k forks source link

[Question] Why torch model in c++ got totally different output from python #1925

Closed JaimeParker closed 4 months ago

JaimeParker commented 4 months ago

❓ Question

I trained a model using PPO, then converted it to onnx, to .pt based on doc export . However, when I used c++ to load this model and testing, it got totally different prediction, and changes each time I run my c++ program when the observation was fixed.


Here is my onnx_utils.py, this is for converting a model trained by PPO to onnx and pt then saving them.

import os
import time

import torch as th
import numpy as np
from typing import Tuple
from stable_baselines3 import SAC, PPO
from stable_baselines3.tools.load_apply import get_model_path
from stable_baselines3.tools.save_sync import save_sth_to_nut
from stable_baselines3.tools import params
from stable_baselines3.common.policies import BasePolicy
import onnxruntime as ort

class OnnxPolicySAC(th.nn.Module):
    def __init__(self, actor: th.nn.Module):
        super().__init__()
        self.actor = actor

    def forward(self, observation: th.Tensor) -> th.Tensor:
        return self.actor(observation, deterministic=True)

class OnnxPolicyPPO(th.nn.Module):
    def __init__(self, policy: BasePolicy):
        super().__init__()
        self.policy = policy

    def forward(self, observation: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
        return self.policy(observation, deterministic=True)

class OnnxModel:
    def __init__(self, model_name: str = ""):
        self.ort_session = None
        self.model = None
        self.model_name = model_name
        self.trained_algo = None

    def load_model(self):
        if not self.model_name:
            raise ValueError("Model name is required.")

        abs_zip_path, self.model_name = get_model_path(filename=self.model_name)
        try:
            self.model = SAC.load(abs_zip_path, device="cpu")
            self.trained_algo = "SAC"
            print("Model loaded with SAC")
        except Exception as e:
            try:
                print("Model could not be loaded with SAC, try for PPO.", e)
                self.model = PPO.load(abs_zip_path, device="cpu")
                self.trained_algo = "PPO"
                print("Model loaded with PPO")
            except Exception as e:
                print("Model could not be loaded either SAC or PPO.", e)
                self.trained_algo = None
                exit(1)

    def convert2onnx(self, onnx_model_name: str = ""):
        if self.model is None:
            raise ValueError("Model is not loaded.")
        onnx_model = None
        try:
            if self.trained_algo == "PPO":
                onnx_model = OnnxPolicyPPO(self.model.policy)
                print("convert a PPO model to onnx")
            elif self.trained_algo == "SAC":
                onnx_model = OnnxPolicySAC(self.model.policy.actor)
                print("convert a SAC model to onnx")
        except Exception as e:
            print(e)
            raise ValueError("Model could not be loaded with OnnxPolicy.")

        if not onnx_model_name:
            onnx_model_name = self.model_name + ".onnx"

        current_dir = os.getcwd()
        parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
        onnx_folder = "/onnx_models/"
        onnx_model_path = parent_dir + onnx_folder

        if not os.path.exists(onnx_model_path):
            print(onnx_model_path, "does not exist, creating...")
            os.makedirs(onnx_model_path)

        onnx_model_path += onnx_model_name

        observation_size = self.model.observation_space.shape
        dummy_input = th.randn(1, *observation_size)
        th.onnx.export(
            onnx_model,
            dummy_input,
            onnx_model_path,
            opset_version=17,
            input_names=["input"],
        )

    def convert2jit(self, jit_model_name: str = ""):
        if not jit_model_name:
            jit_model_name = self.model_name + ".pt"

        if self.model is None:
            raise ValueError("Model is not loaded.")

        onnx_model = None
        try:
            if self.trained_algo == "PPO":
                onnx_model = OnnxPolicyPPO(self.model.policy)
                print("convert a PPO model to pt")
            elif self.trained_algo == "SAC":
                onnx_model = OnnxPolicySAC(self.model.policy.actor)
                print("convert a SAC model to pt")
        except Exception as e:
            print(e)
            raise ValueError("Model could not be loaded with OnnxPolicy.")

        observation_size = self.model.observation_space.shape
        dummy_input = th.randn(1, *observation_size)

        current_dir = os.getcwd()
        parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
        jit_folder = "/jit_models/"
        jit_model_path = parent_dir + jit_folder

        if not os.path.exists(jit_model_path):
            print(jit_model_path, "does not exist, creating...")
            os.makedirs(jit_model_path)
            time.sleep(1)

        jit_model_path += jit_model_name

        traced_module = th.jit.trace(onnx_model.eval(), dummy_input)
        frozen_module = th.jit.freeze(traced_module)
        frozen_module = th.jit.optimize_for_inference(frozen_module)
        th.jit.save(frozen_module, jit_model_path)

        try:
            th.jit.save(frozen_module, os.path.join(save_sth_to_nut(self.model_name), jit_model_name))
        except Exception as e:
            print("Model could not be saved to nut.", e)

    def load_onnx(self, onnx_model_name: str = ""):
        if not onnx_model_name:
            if self.model_name is None:
                raise ValueError("Specify the model name or the onnx model name.")
            onnx_model_name = self.model_name
        onnx_model_name += ".onnx"

        path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + "/onnx_models/"
        try:
            self.ort_session = ort.InferenceSession(path + onnx_model_name)
        except Exception as e:
            print("Model could not be loaded with onnx.", e)
            return

    def onnx_forward(self, observation: np.array):
        """
        Notes
        -----
        Observation should be a numpy array with astype(np.float32)
        """
        observation = observation.reshape(1, -1)

        scaled_action = self.ort_session.run(None, {"input": observation})[0]
        scaled_action = scaled_action.reshape(-1)
        return scaled_action
        # return ort_session.run(None, {"input": observation.astype(np.float32)})

def main():
    onnx_model = OnnxModel(model_name="2024-05-13_12-24")
    onnx_model.load_model()
    onnx_model.convert2onnx()
    onnx_model.convert2jit()
    onnx_model.load_onnx()

    time_start = time.time()
    for _ in range(100):
        onnx_model.onnx_forward(np.ones(params.ENV_NUM_OBS).astype(np.float32))
    time_end = time.time()
    print("Time elapsed:", time_end - time_start)

if __name__ == "__main__":
    main()

mention that

from stable_baselines3.tools.load_apply import get_model_path
from stable_baselines3.tools.save_sync import save_sth_to_nut
from stable_baselines3.tools import params

is my self-designed code, it only provided some saving and parameters loading work.


After saving the onnx model and pt model, I tried do prediction using both python and c++. First is the python script:

from stable_baselines3.common.envs import QuadrotorStochasticEnv
from stable_baselines3.tools.load_apply import get_model_path, load_model, get_obs
from stable_baselines3.tools.onnx_util import OnnxModel
from stable_baselines3.tools import params
import numpy as np
import torch as th
import os

file_name = "2024-05-13_12-24"
abs_zip_path, model_name = get_model_path(filename=file_name)

env = QuadrotorStochasticEnv()
model = load_model(abs_zip_path, env)

vec_env = model.get_env()
obs = vec_env.reset()
pos = np.array([0., 0., 1.])
vel = np.array([0., 0., 0.])
att = np.array([0., 0., 0.])

new_obs = get_obs(pos, vel, att)
action_using_env, _states = model.predict(new_obs, deterministic=True)
print("action using env:", action_using_env)

onnx_model = OnnxModel(file_name)
onnx_model.load_onnx()
action_onnx = onnx_model.onnx_forward(new_obs.reshape(1, params.ENV_NUM_OBS).astype(np.float32))
print("action using onnx:", action_onnx)

path = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) + "/jit_models/" + file_name + ".pt"
pt_model = th.jit.load(path)
new_obs_reshaped = new_obs.reshape(1, -1)
action_jit = pt_model(th.tensor(new_obs_reshaped).float())
print("action using jit:", action_jit)

output is:

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Model loaded with PPO.
action using env: [-0.01473716  1.         -0.48989314  0.21408731]
action using onnx: [-0.01473707  1.516587   -0.48989344  0.21408732]
action using jit: (tensor([[-0.0147,  1.5166, -0.4899,  0.2141]]), tensor([[-82.9755]]), tensor([2.3153]))

Process finished with exit code 0

My action space is between $[-1, 1]$ , I don't know why onnx output and pt output exceeded that. But it seems would be normal if using a np.clip.

For c++, its weird. First is the class.

void RLMotion::init() {
    std::string op_path = ros::package::getPath("backend_optimizer");
    std::string model_path = op_path + "/model/2024-05-13_12-24.pt";
    try {
        model_ = torch::jit::load(model_path);
    }
    catch (const c10::Error& e) {
        std::cerr << "error loading the model\n" << std::endl;
    }
    ROS_INFO_STREAM("\033[1;34m" << "RL model loaded!" << "\033[0m");
}

Eigen::Vector4d RLMotion::forwardModel() {
    std::vector<torch::jit::IValue> obs = getObservation();
    // PPO model, Tuple[th.Tensor, th.Tensor, th.Tensor], this code is for PPO model now
    // SAC model, th.Tensor
    // TODO: check if equal to python rl output
    auto output_tuple = model_.forward(obs).toTuple();

    // Assuming the tensor you need is the first element of the tuple
    at::Tensor action = output_tuple->elements()[0].toTensor();

    Eigen::Vector4d output_vector;
    output_vector << action[0][0].item().toDouble(), action[0][1].item().toDouble(),
            action[0][2].item().toDouble(), action[0][3].item().toDouble();

    return output_vector;
}

second is my main function

    Eigen::Vector4d action = rl_motion.forwardModel();
    std::cout << "action: " << action.transpose() << std::endl;
    for (int i = 0; i < 10; i++) {
        rl_motion.setAllStates(pos, vel, att);
        action = rl_motion.forwardModel();
        std::cout << "action: " << action.transpose() << std::endl;
    }

    auto start_time = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < 1000; i++) {
        rl_motion.setAllStates(pos, vel, att);
        rl_motion.forwardModel();
    }
    auto stop_time = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop_time - start_time);
    double average_time = static_cast<double>(duration.count()) / 1e6;
    std::cout << "Average time taken by forwardModel: " << average_time << " seconds" << std::endl;

    return 0;

outcomes:

action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
action: -0.143557   0.78608   4.49336  0.720168
Average time taken by forwardModel: 0.033783 seconds

run it again

[ INFO] [1715676913.263620266]: RL model loaded!
action: -0.572065   2.75332   3.62885 -0.781199
action: -0.442859 -0.840001  -1.36992   1.62985
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
action:   1.2039 -0.68558  3.55891 0.160373
Average time taken by forwardModel: 0.03365 seconds

and again

action: 0.906062  -4.0474 -3.63199   1.0012
action:  1.59084 -2.14124 -5.44334 0.873244
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
action: -0.677913  -1.72651  -4.88338  0.102833
Average time taken by forwardModel: 0.033544 seconds

I'm sure that I was using the same model and observation!

How to fix it, thanks in advance!

Checklist

JaimeParker commented 4 months ago

the way I set observation in c++ is:

    std::vector<double> values = // 26 double data

    torch::Tensor obs_tensor_data = torch::from_blob(values.data(), {1, 26});

    std::vector<torch::jit::IValue> obs_tensor;
    obs_tensor.emplace_back(obs_tensor_data);

    return obs_tensor;

my num_obs is 26

JaimeParker commented 4 months ago

I shall put the pt file on google drive, and the obs is a 1*26 array

[0.         0.         0.         0.         0.         0.
 3.         1.         0.8        0.         0.         2.
 0.         0.         3.         0.         0.         1.
 0.         0.         1.         0.         0.         2.
 3.         1.57079633]
araffin commented 4 months ago

For the clipping, in the doc "The following returns normalized actions and doesn’t include the post-processing step that is done with continuous actions (clip or unscale the action to the correct space)."

For the observation, you should give "all zeros" and "all ones" a try (that's easier to make sure they are the same in python/c++). For the rest, I don't know, if the jit/onnx export/load worked in python, then it might be an error in pytorch? (or a mismatch between pytorch versions)

JaimeParker commented 4 months ago

@araffin fixed, should use float in c++ instead of double.

in the get_obs function in c++:

    std::vector<double> values = { 26 numbers };

    torch::Tensor obs_tensor_data = torch::from_blob(values.data(), {1, 26});

    std::vector<torch::jit::IValue> obs_tensor;
    obs_tensor.emplace_back(obs_tensor_data);

//    std::vector<torch::jit::IValue> obs_tensor;
//    obs_tensor.emplace_back(torch::ones({1, 32}));

    return obs_tensor;

the data type should be float, which is

std::vector<float> values;

shall close this one and thanks for your help!