Open luzzou opened 5 years ago
hi,
I have the same problem with @Lu-Zou ,please upload it again, thx.
@weigq Could you please reupload the dataset or release code for transforming the .h5 files provided by https://github.com/una-dinosauria/3d-pose-baseline into the format used by this repo (including stat_3d.pth.tar
)? At the moment it is not really possible to use your excellent work as-is since we can't train our own model or get the pretrained model.
I have the same problem, thank you so much If you can provide the data that pre-processed !
I just found the human3.6M but not sure where is the stat_3d.pth.tar https://github.com/facebookresearch/VideoPose3D/blob/master/DATASETS.md
Run this code to obtain the required input files stat_3d.pth.tar etc for the code. You'll need to have downloaded the h36m dataset first.
"""Utility functions for dealing with human3.6m data."""
from __future__ import division
import os
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import cameras
#import viz
import h5py
import glob
import copy
import torch
TRAIN_SUBJECTS = [1,5,6,7,8]
TEST_SUBJECTS = [9,11]
# Joints in H3.6M -- data has 32 joints, but only 17 that move; these are the indices.
H36M_NAMES = ['']*32
H36M_NAMES[0] = 'Hip'
H36M_NAMES[1] = 'RHip'
H36M_NAMES[2] = 'RKnee'
H36M_NAMES[3] = 'RFoot'
H36M_NAMES[6] = 'LHip'
H36M_NAMES[7] = 'LKnee'
H36M_NAMES[8] = 'LFoot'
H36M_NAMES[12] = 'Spine'
H36M_NAMES[13] = 'Thorax'
H36M_NAMES[14] = 'Neck/Nose'
H36M_NAMES[15] = 'Head'
H36M_NAMES[17] = 'LShoulder'
H36M_NAMES[18] = 'LElbow'
H36M_NAMES[19] = 'LWrist'
H36M_NAMES[25] = 'RShoulder'
H36M_NAMES[26] = 'RElbow'
H36M_NAMES[27] = 'RWrist'
# Stacked Hourglass produces 16 joints. These are the names.
SH_NAMES = ['']*16
SH_NAMES[0] = 'RFoot'
SH_NAMES[1] = 'RKnee'
SH_NAMES[2] = 'RHip'
SH_NAMES[3] = 'LHip'
SH_NAMES[4] = 'LKnee'
SH_NAMES[5] = 'LFoot'
SH_NAMES[6] = 'Hip'
SH_NAMES[7] = 'Spine'
SH_NAMES[8] = 'Thorax'
SH_NAMES[9] = 'Head'
SH_NAMES[10] = 'RWrist'
SH_NAMES[11] = 'RElbow'
SH_NAMES[12] = 'RShoulder'
SH_NAMES[13] = 'LShoulder'
SH_NAMES[14] = 'LElbow'
SH_NAMES[15] = 'LWrist'
def main():
data_dir = '/data/h36m/'
actions = define_actions('all')
rcams = cameras.load_cameras(bpath=data_dir + '/cameras.h5')
camera_frame = True #boolean. Whether to convert the data to camera coordinates
# HG prediction (i.e. deeplabcut or similar)
train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = \
read_2d_predictions( actions, data_dir )
torch.save(train_set, '/data/h36m/train_2d_ft.pth.tar')
torch.save(test_set, '/data/h36m/test_2d_ft.pth.tar')
#3D ground truth
train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = \
create_2d_data( actions, data_dir, rcams )
stat_2d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore}
# torch.save(stat_2d, '/data/h36m/stat_2d.pth.tar')
torch.save(train_set, '/data/h36m/train_2d.pth.tar')
torch.save(test_set, '/data/h36m/test_2d.pth.tar')
#3D ground truth
train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, \
train_root_positions, test_root_positions = \
read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False )
stat_3d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore}
torch.save(stat_3d, '/data/h36m/stat_3d.pth.tar')
torch.save(train_set, '/data/h36m/train_3d.pth.tar')
torch.save(test_set, '/data/h36m/test_3d.pth.tar')
def load_data( bpath, subjects, actions, dim=3 ):
"""
Loads 2d ground truth from disk, and puts it in an easy-to-acess dictionary
Args
bpath: String. Path where to load the data from
subjects: List of integers. Subjects whose data will be loaded
actions: List of strings. The actions to load
dim: Integer={2,3}. Load 2 or 3-dimensional data
Returns:
data: Dictionary with keys k=(subject, action, seqname)
values v=(nx(32*2) matrix of 2d ground truth)
There will be 2 entries per subject/action if loading 3d data
There will be 8 entries per subject/action if loading 2d data
"""
if not dim in [2,3]:
raise(ValueError, 'dim must be 2 or 3')
data = {}
for subj in subjects:
for action in actions:
print('Reading subject {0}, action {1}'.format(subj, action))
dpath = os.path.join( bpath, 'S{0}'.format(subj), 'MyPoses/{0}D_positions'.format(dim), '{0}*.h5'.format(action) )
print( dpath )
fnames = glob.glob( dpath )
loaded_seqs = 0
for fname in fnames:
seqname = os.path.basename( fname )
# This rule makes sure SittingDown is not loaded when Sitting is requested
if action == "Sitting" and seqname.startswith( "SittingDown" ):
continue
# This rule makes sure that WalkDog and WalkTogeter are not loaded when
# Walking is requested.
if seqname.startswith( action ):
print( fname )
loaded_seqs = loaded_seqs + 1
with h5py.File( fname, 'r' ) as h5f:
poses = h5f['{0}D_positions'.format(dim)][:]
poses = poses.T
data[ (subj, action, seqname) ] = poses
if dim == 2:
assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead".format( loaded_seqs )
else:
assert loaded_seqs == 2, "Expecting 2 sequences, found {0} instead".format( loaded_seqs )
return data
def load_stacked_hourglass(data_dir, subjects, actions):
"""
Load 2d detections from disk, and put it in an easy-to-acess dictionary.
Args
data_dir: string. Directory where to load the data from,
subjects: list of integers. Subjects whose data will be loaded.
actions: list of strings. The actions to load.
Returns
data: dictionary with keys k=(subject, action, seqname)
values v=(nx(32*2) matrix of 2d stacked hourglass detections)
There will be 2 entries per subject/action if loading 3d data
There will be 8 entries per subject/action if loading 2d data
"""
# Permutation that goes from SH detections to H36M ordering.
SH_TO_GT_PERM = np.array([SH_NAMES.index( h ) for h in H36M_NAMES if h != '' and h in SH_NAMES])
assert np.all( SH_TO_GT_PERM == np.array([6,2,1,0,3,4,5,7,8,9,13,14,15,12,11,10]) )
data = {}
for subj in subjects:
for action in actions:
print('Reading subject {0}, action {1}'.format(subj, action))
dpath = os.path.join( data_dir, 'S{0}'.format(subj), 'StackedHourglass/{0}*.h5'.format(action) )
print( dpath )
fnames = glob.glob( dpath )
loaded_seqs = 0
for fname in fnames:
seqname = os.path.basename( fname )
seqname = seqname.replace('_',' ')
# This rule makes sure SittingDown is not loaded when Sitting is requested
if action == "Sitting" and seqname.startswith( "SittingDown" ):
continue
# This rule makes sure that WalkDog and WalkTogeter are not loaded when
# Walking is requested.
if seqname.startswith( action ):
print( fname )
loaded_seqs = loaded_seqs + 1
# Load the poses from the .h5 file
with h5py.File( fname, 'r' ) as h5f:
poses = h5f['poses'][:]
# Permute the loaded data to make it compatible with H36M
poses = poses[:,SH_TO_GT_PERM,:]
# Reshape into n x (32*2) matrix
poses = np.reshape(poses,[poses.shape[0], -1])
poses_final = np.zeros([poses.shape[0], len(H36M_NAMES)*2])
dim_to_use_x = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] * 2
dim_to_use_y = dim_to_use_x+1
dim_to_use = np.zeros(len(SH_NAMES)*2,dtype=np.int32)
dim_to_use[0::2] = dim_to_use_x
dim_to_use[1::2] = dim_to_use_y
poses_final[:,dim_to_use] = poses
seqname = seqname+'-sh'
data[ (subj, action, seqname) ] = poses_final
# Make sure we loaded 8 sequences
if (subj == 11 and action == 'Directions'): # <-- this video is damaged
assert loaded_seqs == 7, "Expecting 7 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action )
else:
assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action )
return data
def normalization_stats(complete_data, dim, predict_14=False ):
"""
Computes normalization statistics: mean and stdev, dimensions used and ignored
Args
complete_data: nxd np array with poses
dim. integer={2,3} dimensionality of the data
predict_14. boolean. Whether to use only 14 joints
Returns
data_mean: np vector with the mean of the data
data_std: np vector with the standard deviation of the data
dimensions_to_ignore: list of dimensions not used in the model
dimensions_to_use: list of dimensions used in the model
"""
if not dim in [2,3]:
raise(ValueError, 'dim must be 2 or 3')
data_mean = np.mean(complete_data, axis=0)
data_std = np.std(complete_data, axis=0)
# Encodes which 17 (or 14) 2d-3d pairs we are predicting
dimensions_to_ignore = []
if dim == 2:
dimensions_to_use = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0]
dimensions_to_use = np.sort( np.hstack( (dimensions_to_use*2, dimensions_to_use*2+1)))
dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*2), dimensions_to_use )
else: # dim == 3
dimensions_to_use = np.where(np.array([x != '' for x in H36M_NAMES]))[0]
dimensions_to_use = np.delete( dimensions_to_use, [0,7,9] if predict_14 else 0 )
dimensions_to_use = np.sort( np.hstack( (dimensions_to_use*3,
dimensions_to_use*3+1,
dimensions_to_use*3+2)))
dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*3), dimensions_to_use )
return data_mean, data_std, dimensions_to_ignore, dimensions_to_use
def transform_world_to_camera(poses_set, cams, ncams=4 ):
"""
Project 3d poses from world coordinate to camera coordinate system
Args
poses_set: dictionary with 3d poses
cams: dictionary with cameras
ncams: number of cameras per subject
Return:
t3d_camera: dictionary with 3d poses in camera coordinate
"""
t3d_camera = {}
for t3dk in sorted( poses_set.keys() ):
subj, action, seqname = t3dk
t3d_world = poses_set[ t3dk ]
for c in range( ncams ):
R, T, f, c, k, p, name = cams[ (subj, c+1) ]
camera_coord = cameras.world_to_camera_frame( np.reshape(t3d_world, [-1, 3]), R, T)
camera_coord = np.reshape( camera_coord, [-1, len(H36M_NAMES)*3] )
sname = seqname[:-3]+"."+name+".h5" # e.g.: Waiting 1.58860488.h5
t3d_camera[ (subj, action, sname) ] = camera_coord
return t3d_camera
def normalize_data(data, data_mean, data_std, dim_to_use ):
"""
Normalizes a dictionary of poses
Args
data: dictionary where values are
data_mean: np vector with the mean of the data
data_std: np vector with the standard deviation of the data
dim_to_use: list of dimensions to keep in the data
Returns
data_out: dictionary with same keys as data, but values have been normalized
"""
data_out = {}
for key in data.keys():
data[ key ] = data[ key ][ :, dim_to_use ]
mu = data_mean[dim_to_use]
stddev = data_std[dim_to_use]
data_out[ key ] = np.divide( (data[key] - mu), stddev )
return data_out
def unNormalizeData(normalized_data, data_mean, data_std, dimensions_to_ignore):
"""
Un-normalizes a matrix whose mean has been substracted and that has been divided by
standard deviation. Some dimensions might also be missing
Args
normalized_data: nxd matrix to unnormalize
data_mean: np vector with the mean of the data
data_std: np vector with the standard deviation of the data
dimensions_to_ignore: list of dimensions that were removed from the original data
Returns
orig_data: the input normalized_data, but unnormalized
"""
T = normalized_data.shape[0] # Batch size
D = data_mean.shape[0] # Dimensionality
orig_data = np.zeros((T, D), dtype=np.float32)
dimensions_to_use = np.array([dim for dim in range(D)
if dim not in dimensions_to_ignore])
orig_data[:, dimensions_to_use] = normalized_data
# Multiply times stdev and add the mean
stdMat = data_std.reshape((1, D))
stdMat = np.repeat(stdMat, T, axis=0)
meanMat = data_mean.reshape((1, D))
meanMat = np.repeat(meanMat, T, axis=0)
orig_data = np.multiply(orig_data, stdMat) + meanMat
return orig_data
def define_actions( action ):
"""
Given an action string, returns a list of corresponding actions.
Args
action: String. either "all" or one of the h36m actions
Returns
actions: List of strings. Actions to use.
Raises
ValueError: if the action is not a valid action in Human 3.6M
"""
actions = ["Directions","Discussion","Eating","Greeting",
"Phoning","Photo","Posing","Purchases",
"Sitting","SittingDown","Smoking","Waiting",
"WalkDog","Walking","WalkTogether"]
if action == "All" or action == "all":
return actions
if not action in actions:
raise( ValueError, "Unrecognized action: %s" % action )
return [action]
def project_to_cameras( poses_set, cams, ncams=4 ):
"""
Project 3d poses using camera parameters
Args
poses_set: dictionary with 3d poses
cams: dictionary with camera parameters
ncams: number of cameras per subject
Returns
t2d: dictionary with 2d poses
"""
t2d = {}
for t3dk in sorted( poses_set.keys() ):
subj, a, seqname = t3dk
t3d = poses_set[ t3dk ]
for cam in range( ncams ):
R, T, f, c, k, p, name = cams[ (subj, cam+1) ]
pts2d, _, _, _, _ = cameras.project_point_radial( np.reshape(t3d, [-1, 3]), R, T, f, c, k, p )
pts2d = np.reshape( pts2d, [-1, len(H36M_NAMES)*2] )
sname = seqname[:-3]+"."+name+".h5" # e.g.: Waiting 1.58860488.h5
t2d[ (subj, a, sname) ] = pts2d
return t2d
def postprocess_3d( poses_set ):
"""
Center 3d points around root
Args
poses_set: dictionary with 3d data
Returns
poses_set: dictionary with 3d data centred around root (center hip) joint
root_positions: dictionary with the original 3d position of each pose
"""
root_positions = {}
for k in poses_set.keys():
# Keep track of the global position
root_positions[k] = copy.deepcopy(poses_set[k][:,:3])
# Remove the root from the 3d position
poses = poses_set[k]
poses = poses - np.tile( poses[:,:3], [1, len(H36M_NAMES)] )
poses_set[k] = poses
return poses_set, root_positions
def read_2d_predictions( actions, data_dir ):
"""
Loads 2d data from precomputed Stacked Hourglass detections
Args
actions: list of strings. Actions to load
data_dir: string. Directory where the data can be loaded from
Returns
train_set: dictionary with loaded 2d stacked hourglass detections for training
test_set: dictionary with loaded 2d stacked hourglass detections for testing
data_mean: vector with the mean of the 2d training data
data_std: vector with the standard deviation of the 2d training data
dim_to_ignore: list with the dimensions to not predict
dim_to_use: list with the dimensions to predict
"""
train_set = load_stacked_hourglass( data_dir, TRAIN_SUBJECTS, actions)
test_set = load_stacked_hourglass( data_dir, TEST_SUBJECTS, actions)
complete_train = copy.deepcopy( np.vstack( train_set.values() ))
data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 )
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use )
test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
def create_2d_data( actions, data_dir, rcams ):
"""
Creates 2d poses by projecting 3d poses with the corresponding camera
parameters. Also normalizes the 2d poses
Args
actions: list of strings. Actions to load
data_dir: string. Directory where the data can be loaded from
rcams: dictionary with camera parameters
Returns
train_set: dictionary with projected 2d poses for training
test_set: dictionary with projected 2d poses for testing
data_mean: vector with the mean of the 2d training data
data_std: vector with the standard deviation of the 2d training data
dim_to_ignore: list with the dimensions to not predict
dim_to_use: list with the dimensions to predict
"""
# Load 3d data
train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 )
test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 )
train_set = project_to_cameras( train_set, rcams )
test_set = project_to_cameras( test_set, rcams )
# Compute normalization statistics.
complete_train = copy.deepcopy( np.vstack( train_set.values() ))
data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 )
# Divide every dimension independently
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use )
test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
def read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False ):
"""
Loads 3d poses, zero-centres and normalizes them
Args
actions: list of strings. Actions to load
data_dir: string. Directory where the data can be loaded from
camera_frame: boolean. Whether to convert the data to camera coordinates
rcams: dictionary with camera parameters
predict_14: boolean. Whether to predict only 14 joints
Returns
train_set: dictionary with loaded 3d poses for training
test_set: dictionary with loaded 3d poses for testing
data_mean: vector with the mean of the 3d training data
data_std: vector with the standard deviation of the 3d training data
dim_to_ignore: list with the dimensions to not predict
dim_to_use: list with the dimensions to predict
train_root_positions: dictionary with the 3d positions of the root in train
test_root_positions: dictionary with the 3d positions of the root in test
"""
# Load 3d data
train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 )
test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 )
if camera_frame:
train_set = transform_world_to_camera( train_set, rcams )
test_set = transform_world_to_camera( test_set, rcams )
# Apply 3d post-processing (centering around root)
train_set, train_root_positions = postprocess_3d( train_set )
test_set, test_root_positions = postprocess_3d( test_set )
# Compute normalization statistics
complete_train = copy.deepcopy( np.vstack( train_set.values() ))
data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=3, predict_14=predict_14 )
# Divide every dimension independently
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use )
test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, train_root_positions, test_root_positions
if __name__ == "__main__":
main()
@agosztolai Where is the module named 'cameras' in the file you mentioned?
Hi @agosztolai and @raincrash , I created the .pth files with a slightly changed version of the code from @agosztolai .
"""Utility functions for dealing with human3.6m data."""
from future import division
import os import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import src.cameras as cameras import cdflib
import h5py import glob import copy import torch
TRAIN_SUBJECTS = [1,5,6,7,8] TEST_SUBJECTS = [9,11]
H36M_NAMES = ['']*32 H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[6] = 'LHip' H36M_NAMES[7] = 'LKnee' H36M_NAMES[8] = 'LFoot' H36M_NAMES[12] = 'Spine' H36M_NAMES[13] = 'Thorax' H36M_NAMES[14] = 'Neck/Nose' H36M_NAMES[15] = 'Head' H36M_NAMES[17] = 'LShoulder' H36M_NAMES[18] = 'LElbow' H36M_NAMES[19] = 'LWrist' H36M_NAMES[25] = 'RShoulder' H36M_NAMES[26] = 'RElbow' H36M_NAMES[27] = 'RWrist'
SH_NAMES = ['']*16 SH_NAMES[0] = 'RFoot' SH_NAMES[1] = 'RKnee' SH_NAMES[2] = 'RHip' SH_NAMES[3] = 'LHip' SH_NAMES[4] = 'LKnee' SH_NAMES[5] = 'LFoot' SH_NAMES[6] = 'Hip' SH_NAMES[7] = 'Spine' SH_NAMES[8] = 'Thorax' SH_NAMES[9] = 'Head' SH_NAMES[10] = 'RWrist' SH_NAMES[11] = 'RElbow' SH_NAMES[12] = 'RShoulder' SH_NAMES[13] = 'LShoulder' SH_NAMES[14] = 'LElbow' SH_NAMES[15] = 'LWrist'
def main(): data_dir = './data/h36m/' actions = define_actions('all') SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras("./data/h36m/metadata.xml", SUBJECT_IDS) camera_frame = True #boolean. Whether to convert the data to camera coordinates
#2D ground truth
train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = \
create_2d_data( actions, data_dir, rcams )
stat_2d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore}
torch.save(stat_2d, './data/h36m/stat_2d.pth.tar')
torch.save(train_set, './data/h36m/train_2d.pth.tar')
torch.save(test_set, './data/h36m/test_2d.pth.tar')
#3D ground truth
train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, \
train_root_positions, test_root_positions = \
read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False )
stat_3d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore}
torch.save(stat_3d, './data/h36m/stat_3d.pth.tar')
torch.save(train_set, './data/h36m/train_3d.pth.tar')
torch.save(test_set, './data/h36m/test_3d.pth.tar')
def load_data( bpath, subjects, actions, dim=3 ): """Loads 2d ground truth from disk, and puts it in an easy-to-acess dictionary
Args bpath: String. Path where to load the data from subjects: List of integers. Subjects whose data will be loaded actions: List of strings. The actions to load dim: Integer={2,3}. Load 2 or 3-dimensional data Returns: data: Dictionary with keys k=(subject, action, seqname) values v=(nx(32*2) matrix of 2d ground truth) There will be 2 entries per subject/action if loading 3d data There will be 8 entries per subject/action if loading 2d data """
if not dim in [2,3]: raise ValueError('dim must be 2 or 3')
data = {}
for subj in subjects: for action in actions:
print('Reading subject {0}, action {1}'.format(subj, action))
dpath = os.path.join( bpath, 'S{0}'.format(subj), 'MyPoseFeatures/D{0}_Positions'.format(dim), '{0}*.cdf'.format(action) )
print( dpath )
fnames = glob.glob( dpath )
loaded_seqs = 0
for fname in fnames:
seqname = os.path.basename( fname )
# This rule makes sure SittingDown is not loaded when Sitting is requested
if action == "Sitting" and seqname.startswith( "SittingDown" ):
continue
# This rule makes sure that WalkDog and WalkTogeter are not loaded when
# Walking is requested.
if seqname.startswith( action ):
print( fname )
loaded_seqs = loaded_seqs + 1
cdf_file = cdflib.CDF(fname)
poses = cdf_file.varget("Pose").squeeze()
cdf_file.close()
data[ (subj, action, seqname) ] = poses
if dim == 2:
assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead".format( loaded_seqs )
else:
assert loaded_seqs == 2, "Expecting 2 sequences, found {0} instead".format( loaded_seqs )
return data
def normalization_stats(complete_data, dim, predict_14=False ): """Computes normalization statistics: mean and stdev, dimensions used and ignored
Args complete_data: nxd np array with poses dim. integer={2,3} dimensionality of the data predict_14. boolean. Whether to use only 14 joints Returns data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions not used in the model dimensions_to_use: list of dimensions used in the model """ if not dim in [2,3]: raise ValueError('dim must be 2 or 3')
data_mean = np.mean(complete_data, axis=0) data_std = np.std(complete_data, axis=0)
dimensions_to_ignore = [] if dim == 2: dimensions_to_use = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] dimensions_to_use = np.sort( np.hstack( (dimensions_to_use2, dimensions_to_use2+1))) dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*2), dimensions_to_use ) else: # dim == 3 dimensions_to_use = np.where(np.array([x != '' for x in H36M_NAMES]))[0] dimensions_to_use = np.delete( dimensions_to_use, [0,7,9] if predict_14 else 0 )
dimensions_to_use = np.sort( np.hstack( (dimensions_to_use*3,
dimensions_to_use*3+1,
dimensions_to_use*3+2)))
dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*3), dimensions_to_use )
return data_mean, data_std, dimensions_to_ignore, dimensions_to_use
def transform_world_to_camera(poses_set, cams, ncams=4 ): """Project 3d poses from world coordinate to camera coordinate system
Args
poses_set: dictionary with 3d poses
cams: dictionary with cameras
ncams: number of cameras per subject
Return:
t3d_camera: dictionary with 3d poses in camera coordinate
"""
t3d_camera = {}
for t3dk in sorted( poses_set.keys() ):
subj, action, seqname = t3dk
t3d_world = poses_set[ t3dk ]
for c in range( ncams ):
R, T, _, _, _, _, name = cams[ (subj, c+1) ]
camera_coord = cameras.world_to_camera_frame( np.reshape(t3d_world, [-1, 3]), R, T)
camera_coord = np.reshape( camera_coord, [-1, len(H36M_NAMES)*3] )
sname = seqname[:-3]+ name + ".h5" # e.g.: Waiting 1.58860488.h5
t3d_camera[ (subj, action, sname) ] = camera_coord
return t3d_camera
def normalize_data(data, data_mean, data_std, dim_to_use ): """Normalizes a dictionary of poses
Args data: dictionary where values are data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dim_to_use: list of dimensions to keep in the data Returns data_out: dictionary with same keys as data, but values have been normalized """ data_out = {}
for key in data.keys(): data[ key ] = data[ key ][ :, dim_to_use ] mu = data_mean[dim_to_use] stddev = data_std[dim_to_use] data_out[ key ] = np.divide( (data[key] - mu), stddev )
return data_out
def unNormalizeData(normalized_data, data_mean, data_std, dimensions_to_ignore): """Un-normalizes a matrix whose mean has been substracted and that has been divided by standard deviation. Some dimensions might also be missing
Args normalized_data: nxd matrix to unnormalize data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions that were removed from the original data Returns orig_data: the input normalized_data, but unnormalized """ T = normalized_data.shape[0] # Batch size D = data_mean.shape[0] # Dimensionality
orig_data = np.zeros((T, D), dtype=np.float32) dimensions_to_use = np.array([dim for dim in range(D) if dim not in dimensions_to_ignore])
orig_data[:, dimensions_to_use] = normalized_data
stdMat = data_std.reshape((1, D)) stdMat = np.repeat(stdMat, T, axis=0) meanMat = data_mean.reshape((1, D)) meanMat = np.repeat(meanMat, T, axis=0) orig_data = np.multiply(orig_data, stdMat) + meanMat return orig_data
def define_actions( action ): """Given an action string, returns a list of corresponding actions.
Args action: String. either "all" or one of the h36m actions Returns actions: List of strings. Actions to use. Raises ValueError: if the action is not a valid action in Human 3.6M """ actions = ["Directions","Discussion","Eating","Greeting", "Phoning","Photo","Posing","Purchases", "Sitting","SittingDown","Smoking","Waiting", "WalkDog","Walking","WalkTogether"]
if action == "All" or action == "all": return actions
if not action in actions: raise ValueError("Unrecognized action: %s" % action )
return [action]
def project_to_cameras( poses_set, cams, ncams=4 ): """ Project 3d poses using camera parameters
Args poses_set: dictionary with 3d poses cams: dictionary with camera parameters ncams: number of cameras per subject Returns t2d: dictionary with 2d poses """ t2d = {}
for t3dk in sorted( poses_set.keys() ): subj, a, seqname = t3dk t3d = poses_set[ t3dk ]
for cam in range( ncams ):
R, T, f, c, k, p, name = cams[ (subj, cam+1) ]
pts2d, _, _, _, _ = cameras.project_point_radial( np.reshape(t3d, [-1, 3]), R, T, f, c, k, p )
pts2d = np.reshape( pts2d, [-1, len(H36M_NAMES)*2] )
sname = seqname[:-3]+ name + ".h5" # e.g.: Waiting 1.58860488.h5
t2d[ (subj, a, sname) ] = pts2d
return t2d
def create_2d_data( actions, data_dir, rcams ): """Creates 2d poses by projecting 3d poses with the corresponding camera parameters. Also normalizes the 2d poses
Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from rcams: dictionary with camera parameters Returns train_set: dictionary with projected 2d poses for training test_set: dictionary with projected 2d poses for testing data_mean: vector with the mean of the 2d training data data_std: vector with the standard deviation of the 2d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict """
train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 ) test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 )
train_set = project_to_cameras( train_set, rcams ) test_set = project_to_cameras( test_set, rcams )
complete_train = copy.deepcopy( np.vstack( list(train_set.values()) )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 )
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
def read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False ): """Loads 3d poses, zero-centres and normalizes them
Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from camera_frame: boolean. Whether to convert the data to camera coordinates rcams: dictionary with camera parameters predict_14: boolean. Whether to predict only 14 joints Returns train_set: dictionary with loaded 3d poses for training test_set: dictionary with loaded 3d poses for testing data_mean: vector with the mean of the 3d training data data_std: vector with the standard deviation of the 3d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict train_root_positions: dictionary with the 3d positions of the root in train test_root_positions: dictionary with the 3d positions of the root in test """
train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 ) test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 )
if camera_frame: train_set = transform_world_to_camera( train_set, rcams ) test_set = transform_world_to_camera( test_set, rcams )
train_set, train_root_positions = postprocess_3d( train_set ) test_set, test_root_positions = postprocess_3d( test_set )
complete_train = copy.deepcopy( np.vstack( list(train_set.values()) )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=3, predict_14=predict_14 )
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, train_root_positions, test_root_positions
def postprocess_3d( poses_set ): """Center 3d points around root
Args poses_set: dictionary with 3d data Returns poses_set: dictionary with 3d data centred around root (center hip) joint root_positions: dictionary with the original 3d position of each pose """ root_positions = {} for k in poses_set.keys():
root_positions[k] = copy.deepcopy(poses_set[k][:,:3])
# Remove the root from the 3d position
poses = poses_set[k]
poses = poses - np.tile( poses[:,:3], [1, len(H36M_NAMES)] )
poses_set[k] = poses
return poses_set, root_positions
def load_stacked_hourglass(data_dir, subjects, actions): """ Load 2d detections from disk, and put it in an easy-to-acess dictionary.
Args data_dir: string. Directory where to load the data from, subjects: list of integers. Subjects whose data will be loaded. actions: list of strings. The actions to load. Returns data: dictionary with keys k=(subject, action, seqname) values v=(nx(32*2) matrix of 2d stacked hourglass detections) There will be 2 entries per subject/action if loading 3d data There will be 8 entries per subject/action if loading 2d data """
SH_TO_GT_PERM = np.array([SH_NAMES.index( h ) for h in H36M_NAMES if h != '' and h in SH_NAMES]) assert np.all( SH_TO_GT_PERM == np.array([6,2,1,0,3,4,5,7,8,9,13,14,15,12,11,10]) )
data = {}
for subj in subjects: for action in actions:
print('Reading subject {0}, action {1}'.format(subj, action))
dpath = os.path.join( data_dir, 'S{0}'.format(subj), 'StackedHourglass/{0}*.h5'.format(action) )
print( dpath )
fnames = glob.glob( dpath )
loaded_seqs = 0
for fname in fnames:
seqname = os.path.basename( fname )
seqname = seqname.replace('_',' ')
# This rule makes sure SittingDown is not loaded when Sitting is requested
if action == "Sitting" and seqname.startswith( "SittingDown" ):
continue
# This rule makes sure that WalkDog and WalkTogeter are not loaded when
# Walking is requested.
if seqname.startswith( action ):
print( fname )
loaded_seqs = loaded_seqs + 1
# Load the poses from the .h5 file
with h5py.File( fname, 'r' ) as h5f:
poses = h5f['poses'][:]
# Permute the loaded data to make it compatible with H36M
poses = poses[:,SH_TO_GT_PERM,:]
# Reshape into n x (32*2) matrix
poses = np.reshape(poses,[poses.shape[0], -1])
poses_final = np.zeros([poses.shape[0], len(H36M_NAMES)*2])
dim_to_use_x = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] * 2
dim_to_use_y = dim_to_use_x+1
dim_to_use = np.zeros(len(SH_NAMES)*2,dtype=np.int32)
dim_to_use[0::2] = dim_to_use_x
dim_to_use[1::2] = dim_to_use_y
poses_final[:,dim_to_use] = poses
seqname = seqname+'-sh'
data[ (subj, action, seqname) ] = poses_final
# Make sure we loaded 8 sequences
if (subj == 11 and action == 'Directions'): # <-- this video is damaged
assert loaded_seqs == 7, "Expecting 7 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action )
else:
assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action )
return data
def read_2d_predictions( actions, data_dir ): """ Loads 2d data from precomputed Stacked Hourglass detections
Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from Returns train_set: dictionary with loaded 2d stacked hourglass detections for training test_set: dictionary with loaded 2d stacked hourglass detections for testing data_mean: vector with the mean of the 2d training data data_std: vector with the standard deviation of the 2d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict """
train_set = load_stacked_hourglass( data_dir, TRAIN_SUBJECTS, actions) test_set = load_stacked_hourglass( data_dir, TEST_SUBJECTS, actions)
complete_train = copy.deepcopy( np.vstack( train_set.values() )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 )
train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use )
return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use
if name == "main": main()
I hope this is helpful! :)
Hi @agosztolai and @raincrash , I created the .pth files with a slightly changed version of the code from @agosztolai .
- Clone this repo.
- Download the needed files from the human3.6m dataset as explained in https://github.com/una-dinosauria/3d-pose-baseline#first-of-all and save them in the data folder of this repo, also perform the subsequent renaming of the files and folders to have consistent names.
- Copy cameras.py from https://github.com/una-dinosauria/3d-pose-baseline/tree/master/src to the src folder of this repo.
- Finally run this code:
"""Utility functions for dealing with human3.6m data.""" from __future__ import division import os import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import src.cameras as cameras import cdflib #import viz import h5py import glob import copy import torch TRAIN_SUBJECTS = [1,5,6,7,8] TEST_SUBJECTS = [9,11] # Joints in H3.6M -- data has 32 joints, but only 17 that move; these are the indices. H36M_NAMES = ['']*32 H36M_NAMES[0] = 'Hip' H36M_NAMES[1] = 'RHip' H36M_NAMES[2] = 'RKnee' H36M_NAMES[3] = 'RFoot' H36M_NAMES[6] = 'LHip' H36M_NAMES[7] = 'LKnee' H36M_NAMES[8] = 'LFoot' H36M_NAMES[12] = 'Spine' H36M_NAMES[13] = 'Thorax' H36M_NAMES[14] = 'Neck/Nose' H36M_NAMES[15] = 'Head' H36M_NAMES[17] = 'LShoulder' H36M_NAMES[18] = 'LElbow' H36M_NAMES[19] = 'LWrist' H36M_NAMES[25] = 'RShoulder' H36M_NAMES[26] = 'RElbow' H36M_NAMES[27] = 'RWrist' # Stacked Hourglass produces 16 joints. These are the names. SH_NAMES = ['']*16 SH_NAMES[0] = 'RFoot' SH_NAMES[1] = 'RKnee' SH_NAMES[2] = 'RHip' SH_NAMES[3] = 'LHip' SH_NAMES[4] = 'LKnee' SH_NAMES[5] = 'LFoot' SH_NAMES[6] = 'Hip' SH_NAMES[7] = 'Spine' SH_NAMES[8] = 'Thorax' SH_NAMES[9] = 'Head' SH_NAMES[10] = 'RWrist' SH_NAMES[11] = 'RElbow' SH_NAMES[12] = 'RShoulder' SH_NAMES[13] = 'LShoulder' SH_NAMES[14] = 'LElbow' SH_NAMES[15] = 'LWrist' def main(): data_dir = './data/h36m/' actions = define_actions('all') SUBJECT_IDS = [1,5,6,7,8,9,11] rcams = cameras.load_cameras("./data/h36m/metadata.xml", SUBJECT_IDS) camera_frame = True #boolean. Whether to convert the data to camera coordinates #2D ground truth train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use = \ create_2d_data( actions, data_dir, rcams ) stat_2d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore} torch.save(stat_2d, './data/h36m/stat_2d.pth.tar') torch.save(train_set, './data/h36m/train_2d.pth.tar') torch.save(test_set, './data/h36m/test_2d.pth.tar') #3D ground truth train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, \ train_root_positions, test_root_positions = \ read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False ) stat_3d = {'mean': data_mean, 'std': data_std, 'dim_use': dim_to_use, 'dim_ignore': dim_to_ignore} torch.save(stat_3d, './data/h36m/stat_3d.pth.tar') torch.save(train_set, './data/h36m/train_3d.pth.tar') torch.save(test_set, './data/h36m/test_3d.pth.tar') def load_data( bpath, subjects, actions, dim=3 ): """Loads 2d ground truth from disk, and puts it in an easy-to-acess dictionary Args bpath: String. Path where to load the data from subjects: List of integers. Subjects whose data will be loaded actions: List of strings. The actions to load dim: Integer={2,3}. Load 2 or 3-dimensional data Returns: data: Dictionary with keys k=(subject, action, seqname) values v=(nx(32*2) matrix of 2d ground truth) There will be 2 entries per subject/action if loading 3d data There will be 8 entries per subject/action if loading 2d data """ if not dim in [2,3]: raise ValueError('dim must be 2 or 3') data = {} for subj in subjects: for action in actions: print('Reading subject {0}, action {1}'.format(subj, action)) dpath = os.path.join( bpath, 'S{0}'.format(subj), 'MyPoseFeatures/D{0}_Positions'.format(dim), '{0}*.cdf'.format(action) ) print( dpath ) fnames = glob.glob( dpath ) loaded_seqs = 0 for fname in fnames: seqname = os.path.basename( fname ) # This rule makes sure SittingDown is not loaded when Sitting is requested if action == "Sitting" and seqname.startswith( "SittingDown" ): continue # This rule makes sure that WalkDog and WalkTogeter are not loaded when # Walking is requested. if seqname.startswith( action ): print( fname ) loaded_seqs = loaded_seqs + 1 cdf_file = cdflib.CDF(fname) poses = cdf_file.varget("Pose").squeeze() cdf_file.close() data[ (subj, action, seqname) ] = poses if dim == 2: assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead".format( loaded_seqs ) else: assert loaded_seqs == 2, "Expecting 2 sequences, found {0} instead".format( loaded_seqs ) return data def normalization_stats(complete_data, dim, predict_14=False ): """Computes normalization statistics: mean and stdev, dimensions used and ignored Args complete_data: nxd np array with poses dim. integer={2,3} dimensionality of the data predict_14. boolean. Whether to use only 14 joints Returns data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions not used in the model dimensions_to_use: list of dimensions used in the model """ if not dim in [2,3]: raise ValueError('dim must be 2 or 3') data_mean = np.mean(complete_data, axis=0) data_std = np.std(complete_data, axis=0) # Encodes which 17 (or 14) 2d-3d pairs we are predicting dimensions_to_ignore = [] if dim == 2: dimensions_to_use = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] dimensions_to_use = np.sort( np.hstack( (dimensions_to_use*2, dimensions_to_use*2+1))) dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*2), dimensions_to_use ) else: # dim == 3 dimensions_to_use = np.where(np.array([x != '' for x in H36M_NAMES]))[0] dimensions_to_use = np.delete( dimensions_to_use, [0,7,9] if predict_14 else 0 ) dimensions_to_use = np.sort( np.hstack( (dimensions_to_use*3, dimensions_to_use*3+1, dimensions_to_use*3+2))) dimensions_to_ignore = np.delete( np.arange(len(H36M_NAMES)*3), dimensions_to_use ) return data_mean, data_std, dimensions_to_ignore, dimensions_to_use def transform_world_to_camera(poses_set, cams, ncams=4 ): """Project 3d poses from world coordinate to camera coordinate system Args poses_set: dictionary with 3d poses cams: dictionary with cameras ncams: number of cameras per subject Return: t3d_camera: dictionary with 3d poses in camera coordinate """ t3d_camera = {} for t3dk in sorted( poses_set.keys() ): subj, action, seqname = t3dk t3d_world = poses_set[ t3dk ] for c in range( ncams ): R, T, _, _, _, _, name = cams[ (subj, c+1) ] camera_coord = cameras.world_to_camera_frame( np.reshape(t3d_world, [-1, 3]), R, T) camera_coord = np.reshape( camera_coord, [-1, len(H36M_NAMES)*3] ) sname = seqname[:-3]+ name + ".h5" # e.g.: Waiting 1.58860488.h5 t3d_camera[ (subj, action, sname) ] = camera_coord return t3d_camera def normalize_data(data, data_mean, data_std, dim_to_use ): """Normalizes a dictionary of poses Args data: dictionary where values are data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dim_to_use: list of dimensions to keep in the data Returns data_out: dictionary with same keys as data, but values have been normalized """ data_out = {} for key in data.keys(): data[ key ] = data[ key ][ :, dim_to_use ] mu = data_mean[dim_to_use] stddev = data_std[dim_to_use] data_out[ key ] = np.divide( (data[key] - mu), stddev ) return data_out def unNormalizeData(normalized_data, data_mean, data_std, dimensions_to_ignore): """Un-normalizes a matrix whose mean has been substracted and that has been divided by standard deviation. Some dimensions might also be missing Args normalized_data: nxd matrix to unnormalize data_mean: np vector with the mean of the data data_std: np vector with the standard deviation of the data dimensions_to_ignore: list of dimensions that were removed from the original data Returns orig_data: the input normalized_data, but unnormalized """ T = normalized_data.shape[0] # Batch size D = data_mean.shape[0] # Dimensionality orig_data = np.zeros((T, D), dtype=np.float32) dimensions_to_use = np.array([dim for dim in range(D) if dim not in dimensions_to_ignore]) orig_data[:, dimensions_to_use] = normalized_data # Multiply times stdev and add the mean stdMat = data_std.reshape((1, D)) stdMat = np.repeat(stdMat, T, axis=0) meanMat = data_mean.reshape((1, D)) meanMat = np.repeat(meanMat, T, axis=0) orig_data = np.multiply(orig_data, stdMat) + meanMat return orig_data def define_actions( action ): """Given an action string, returns a list of corresponding actions. Args action: String. either "all" or one of the h36m actions Returns actions: List of strings. Actions to use. Raises ValueError: if the action is not a valid action in Human 3.6M """ actions = ["Directions","Discussion","Eating","Greeting", "Phoning","Photo","Posing","Purchases", "Sitting","SittingDown","Smoking","Waiting", "WalkDog","Walking","WalkTogether"] if action == "All" or action == "all": return actions if not action in actions: raise ValueError("Unrecognized action: %s" % action ) return [action] def project_to_cameras( poses_set, cams, ncams=4 ): """ Project 3d poses using camera parameters Args poses_set: dictionary with 3d poses cams: dictionary with camera parameters ncams: number of cameras per subject Returns t2d: dictionary with 2d poses """ t2d = {} for t3dk in sorted( poses_set.keys() ): subj, a, seqname = t3dk t3d = poses_set[ t3dk ] for cam in range( ncams ): R, T, f, c, k, p, name = cams[ (subj, cam+1) ] pts2d, _, _, _, _ = cameras.project_point_radial( np.reshape(t3d, [-1, 3]), R, T, f, c, k, p ) pts2d = np.reshape( pts2d, [-1, len(H36M_NAMES)*2] ) sname = seqname[:-3]+ name + ".h5" # e.g.: Waiting 1.58860488.h5 t2d[ (subj, a, sname) ] = pts2d return t2d def create_2d_data( actions, data_dir, rcams ): """Creates 2d poses by projecting 3d poses with the corresponding camera parameters. Also normalizes the 2d poses Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from rcams: dictionary with camera parameters Returns train_set: dictionary with projected 2d poses for training test_set: dictionary with projected 2d poses for testing data_mean: vector with the mean of the 2d training data data_std: vector with the standard deviation of the 2d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict """ # Load 3d data train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 ) test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 ) # Create 2d data by projecting with camera parameters train_set = project_to_cameras( train_set, rcams ) test_set = project_to_cameras( test_set, rcams ) # Compute normalization statistics. complete_train = copy.deepcopy( np.vstack( list(train_set.values()) )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 ) # Divide every dimension independently train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use ) return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use def read_3d_data( actions, data_dir, camera_frame, rcams, predict_14=False ): """Loads 3d poses, zero-centres and normalizes them Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from camera_frame: boolean. Whether to convert the data to camera coordinates rcams: dictionary with camera parameters predict_14: boolean. Whether to predict only 14 joints Returns train_set: dictionary with loaded 3d poses for training test_set: dictionary with loaded 3d poses for testing data_mean: vector with the mean of the 3d training data data_std: vector with the standard deviation of the 3d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict train_root_positions: dictionary with the 3d positions of the root in train test_root_positions: dictionary with the 3d positions of the root in test """ # Load 3d data train_set = load_data( data_dir, TRAIN_SUBJECTS, actions, dim=3 ) test_set = load_data( data_dir, TEST_SUBJECTS, actions, dim=3 ) if camera_frame: train_set = transform_world_to_camera( train_set, rcams ) test_set = transform_world_to_camera( test_set, rcams ) # Apply 3d post-processing (centering around root) train_set, train_root_positions = postprocess_3d( train_set ) test_set, test_root_positions = postprocess_3d( test_set ) # Compute normalization statistics complete_train = copy.deepcopy( np.vstack( list(train_set.values()) )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=3, predict_14=predict_14 ) # Divide every dimension independently train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use ) return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use, train_root_positions, test_root_positions def postprocess_3d( poses_set ): """Center 3d points around root Args poses_set: dictionary with 3d data Returns poses_set: dictionary with 3d data centred around root (center hip) joint root_positions: dictionary with the original 3d position of each pose """ root_positions = {} for k in poses_set.keys(): # Keep track of the global position root_positions[k] = copy.deepcopy(poses_set[k][:,:3]) # Remove the root from the 3d position poses = poses_set[k] poses = poses - np.tile( poses[:,:3], [1, len(H36M_NAMES)] ) poses_set[k] = poses return poses_set, root_positions def load_stacked_hourglass(data_dir, subjects, actions): """ Load 2d detections from disk, and put it in an easy-to-acess dictionary. Args data_dir: string. Directory where to load the data from, subjects: list of integers. Subjects whose data will be loaded. actions: list of strings. The actions to load. Returns data: dictionary with keys k=(subject, action, seqname) values v=(nx(32*2) matrix of 2d stacked hourglass detections) There will be 2 entries per subject/action if loading 3d data There will be 8 entries per subject/action if loading 2d data """ # Permutation that goes from SH detections to H36M ordering. SH_TO_GT_PERM = np.array([SH_NAMES.index( h ) for h in H36M_NAMES if h != '' and h in SH_NAMES]) assert np.all( SH_TO_GT_PERM == np.array([6,2,1,0,3,4,5,7,8,9,13,14,15,12,11,10]) ) data = {} for subj in subjects: for action in actions: print('Reading subject {0}, action {1}'.format(subj, action)) dpath = os.path.join( data_dir, 'S{0}'.format(subj), 'StackedHourglass/{0}*.h5'.format(action) ) print( dpath ) fnames = glob.glob( dpath ) loaded_seqs = 0 for fname in fnames: seqname = os.path.basename( fname ) seqname = seqname.replace('_',' ') # This rule makes sure SittingDown is not loaded when Sitting is requested if action == "Sitting" and seqname.startswith( "SittingDown" ): continue # This rule makes sure that WalkDog and WalkTogeter are not loaded when # Walking is requested. if seqname.startswith( action ): print( fname ) loaded_seqs = loaded_seqs + 1 # Load the poses from the .h5 file with h5py.File( fname, 'r' ) as h5f: poses = h5f['poses'][:] # Permute the loaded data to make it compatible with H36M poses = poses[:,SH_TO_GT_PERM,:] # Reshape into n x (32*2) matrix poses = np.reshape(poses,[poses.shape[0], -1]) poses_final = np.zeros([poses.shape[0], len(H36M_NAMES)*2]) dim_to_use_x = np.where(np.array([x != '' and x != 'Neck/Nose' for x in H36M_NAMES]))[0] * 2 dim_to_use_y = dim_to_use_x+1 dim_to_use = np.zeros(len(SH_NAMES)*2,dtype=np.int32) dim_to_use[0::2] = dim_to_use_x dim_to_use[1::2] = dim_to_use_y poses_final[:,dim_to_use] = poses seqname = seqname+'-sh' data[ (subj, action, seqname) ] = poses_final # Make sure we loaded 8 sequences if (subj == 11 and action == 'Directions'): # <-- this video is damaged assert loaded_seqs == 7, "Expecting 7 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action ) else: assert loaded_seqs == 8, "Expecting 8 sequences, found {0} instead. S:{1} {2}".format(loaded_seqs, subj, action ) return data def read_2d_predictions( actions, data_dir ): """ Loads 2d data from precomputed Stacked Hourglass detections Args actions: list of strings. Actions to load data_dir: string. Directory where the data can be loaded from Returns train_set: dictionary with loaded 2d stacked hourglass detections for training test_set: dictionary with loaded 2d stacked hourglass detections for testing data_mean: vector with the mean of the 2d training data data_std: vector with the standard deviation of the 2d training data dim_to_ignore: list with the dimensions to not predict dim_to_use: list with the dimensions to predict """ train_set = load_stacked_hourglass( data_dir, TRAIN_SUBJECTS, actions) test_set = load_stacked_hourglass( data_dir, TEST_SUBJECTS, actions) complete_train = copy.deepcopy( np.vstack( train_set.values() )) data_mean, data_std, dim_to_ignore, dim_to_use = normalization_stats( complete_train, dim=2 ) train_set = normalize_data( train_set, data_mean, data_std, dim_to_use ) test_set = normalize_data( test_set, data_mean, data_std, dim_to_use ) return train_set, test_set, data_mean, data_std, dim_to_ignore, dim_to_use if __name__ == "__main__": main()
I hope this is helpful! :)
After downloading all the D positions . What renaming we need to do.
which file to run after this. ???
Can you explain ?
hi,
the link to download the pre-processed data human36m.zip does not exist, could you please upload it again?