I have trained with the dataset "hand_wiping_1-14_5actiongap_10000points.hdf5" for about 3000 epochs with the provided config. Then I write some code to visualize the predicted action and reference action (Using the same dataset for validation). I found that most results looks pretty well but there also exists some bad output, which are shown as above where green curve refers to the reference trajectory and red curve refers to the predicted action(trajectory). I wander whether it's in your expectation(in my expectation, the predicted output should be very close to the reference when using the same dataset for training and validation)?
The visualization coda are as below and hope it can help for validation before deploying on any real robot:
Code for saving the reference trajectory and predicted trajectory
import json
import torch
import robomimic.utils.obs_utils as ObsUtils
from robomimic.config import config_factory
import robomimic.utils.file_utils as FileUtils
import numpy as np
import os
import h5py
import robomimic.utils.train_utils as TrainUtils
from torch.utils.data import DataLoader
def vector_to_action_dict(action: np.ndarray, action_shapes: dict[str, int], action_keys: list[str]) -> dict[
str, np.ndarray]:
action_dict = dict()
start_idx = 0
for key in action_keys:
this_act_shape = action_shapes[key]
this_act_dim = np.prod(this_act_shape)
end_idx = start_idx + this_act_dim
action_dict[key] = action[:, start_idx:end_idx]
start_idx = end_idx
return action_dict
# load model
infer_device = "cuda:0"
checkpoint_path = "~/diffusion_policy_pcd_wiping_1-14/20240530170743/models/model_epoch_2300.pth"
algo_name, ckpt_dict = FileUtils.algo_name_from_checkpoint(ckpt_path=checkpoint_path)
dp_eval_steps = 10
if dp_eval_steps is not None:
# HACK: modify the config, then dump to json again and write to ckpt_dict
tmp_config, _ = FileUtils.config_from_checkpoint(ckpt_dict=ckpt_dict)
with tmp_config.values_unlocked():
if tmp_config.algo.ddpm.enabled:
tmp_config.algo.ddpm.num_inference_timesteps = dp_eval_steps
elif tmp_config.algo.ddim.enabled:
tmp_config.algo.ddim.num_inference_timesteps = dp_eval_steps
else:
raise Exception("should not reach here")
ckpt_dict['config'] = tmp_config.dump()
# restore policy
model, ckpt_dict = FileUtils.policy_from_checkpoint(ckpt_dict=ckpt_dict, device=infer_device, verbose=True)
config_file = "......../training_config/diffusion_policy_pcd_wiping_1-14.json"
ext_cfg = json.load(open(config_file, 'r'))
config = config_factory(ext_cfg["algo_name"])
with config.values_unlocked():
config.update(ext_cfg)
# read config to set up metadata for observation modalities (e.g. detecting rgb observations)
ObsUtils.initialize_obs_utils_with_config(config)
# make sure the dataset exists
eval_dataset_cfg = config.train.data[0]
dataset_path = os.path.expandvars(os.path.expanduser(eval_dataset_cfg["path"]))
ds_format = config.train.data_format
if not os.path.exists(dataset_path):
raise Exception("Dataset at provided path {} not found!".format(dataset_path))
shape_meta = FileUtils.get_shape_metadata_from_dataset(
dataset_path=dataset_path,
action_keys=config.train.action_keys,
all_obs_keys=config.all_obs_keys,
ds_format=ds_format,
verbose=True
)
trainset, validset = TrainUtils.load_data_for_training(
config, obs_keys=shape_meta["all_obs_keys"])
train_sampler = trainset.get_dataset_sampler()
# initialize data loaders
train_loader = DataLoader(
dataset=trainset,
sampler=train_sampler,
batch_size=config.train.batch_size,
shuffle= False, #(train_sampler is None),
num_workers=config.train.num_data_workers,
drop_last=True
)
expected_traj = []
output_traj = []
data_loader_iter = iter(train_loader)
num_steps = min(len(train_loader), 200)
To = 3
with torch.no_grad():
for i in range(num_steps):
batch = next(data_loader_iter)
obs = {}
for k, v in batch['obs'].items():
obs[k] = torch.squeeze(v, dim=0)
obs = {k: obs[k][:To, :] for k in obs}
output_action_numpy = model(obs)
action_shapes = {"eef_position": 3, "eef_quaternion": 4, "gripper": 1}
action_keys = ["eef_position", "eef_quaternion", "gripper"]
action_dict = vector_to_action_dict(output_action_numpy, action_shapes, action_keys)
expected_traj.append(np.squeeze(np.asarray(batch['actions'])))
output_traj.append(action_dict['eef_position'])
save_file = "~/dexil_inference_with_trainset_debug-wiping-1-14.hdf5"
with h5py.File(save_file, 'w') as output_hdf5:
for idx in range(len(expected_traj)):
traj = output_hdf5.create_group(f"traj_{idx}")
traj.create_dataset("output_traj", data = output_traj[idx])
traj.create_dataset("expected_traj", data = expected_traj[idx])
output_hdf5.attrs["num_samples"] = len(output_traj)
Code for visualizing the trajectories
import h5py
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
file = "~/dexil_inference_with_trainset_debug-wiping-1-14.hdf5"
f = h5py.File(file,'r')
traj_num = f.attrs['num_samples']
# new a figure and set it into 3d
fig = plt.figure()
ax = fig.add_axes(Axes3D(fig))
# set figure information
ax.set_title("3D_Curve")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
print("start to polt trajectories!")
idx = 0
save_dir = "..../debug_images/"
for k,v in f.items():
expected_traj = v['expected_traj']
output_traj = v['output_traj']
expected_x = expected_traj[:, 0]
expected_y = expected_traj[:, 1]
expected_z = expected_traj[:, 2]
output_x = output_traj[:, 0]
output_y = output_traj[:, 1]
output_z = output_traj[:, 2]
ax.clear()
ax.plot(output_x, output_y, output_z, color='green')
ax.plot(expected_x, expected_y, expected_z, color='red')
ax.plot(output_traj[0][0], output_traj[0][1], output_traj[0][2], marker="o", markersize=4, markeredgecolor="green", markerfacecolor="green")
ax.plot(expected_traj[0][0], expected_traj[0][1], expected_traj[0][2], marker="o", markersize=4, markeredgecolor="red", markerfacecolor="red")
plt.savefig(f"{save_dir}/{idx}.png")
idx += 1
plt.show()
It's still a bit hard for me to locate the problem from the description. As we discussed in the previous accuracy issue, could you first confirm that you have received consistent results and can reproduce similar accuracy on the training data as we did in that discussion?
Thanks for your great job.
I have trained with the dataset "hand_wiping_1-14_5actiongap_10000points.hdf5" for about 3000 epochs with the provided config. Then I write some code to visualize the predicted action and reference action (Using the same dataset for validation). I found that most results looks pretty well but there also exists some bad output, which are shown as above where green curve refers to the reference trajectory and red curve refers to the predicted action(trajectory). I wander whether it's in your expectation(in my expectation, the predicted output should be very close to the reference when using the same dataset for training and validation)?
The visualization coda are as below and hope it can help for validation before deploying on any real robot:
Code for saving the reference trajectory and predicted trajectory
Code for visualizing the trajectories