Official Torch7 implementation of "V2V-PoseNet: Voxel-to-Voxel Prediction Network for Accurate 3D Hand and Human Pose Estimation from a Single Depth Map", CVPR 2018
I'm implementing a Tensorflow version of the V2V-PoseNet. The code down below is my implementation of the V2V-PoseNet model for training ITOP dataset. Could you please have a look of it and give me some feedback. Cause I'm not entirely sure it is a correct model. Thanks in advance!
model.py
import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, MaxPool3D, Dropout, BatchNormalization
from keras.layers import Conv3DTranspose, Input, Conv2D, MaxPool2D, Flatten, Dense
from keras import layers, models
from keras.initializers import Zeros, TruncatedNormal
def build_3DBlock(y, next_fDim=16, kernelSz=1):
y = Conv3D(next_fDim, (kernelSz, kernelSz, kernelSz), padding="same", # activation='relu',
use_bias=True, bias_initializer=Zeros(),
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
)(y)
y = add_common_layers(y)
# module.add(Dropout(_dropout_rate))
return y
def add_common_layers(y):
y = layers.BatchNormalization()(y)
y = layers.LeakyReLU()(y)
return y
def build_3DResBlock(y, next_fDim, _strides=(1, 1, 1), _project_shortcut=False):
shortcut = y
y = layers.Conv3D(next_fDim, kernel_size=(3, 3, 3), padding="same", strides=_strides,
use_bias=True, bias_initializer=Zeros(),
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
)(y)
y = layers.BatchNormalization()(y)
y = layers.LeakyReLU()(y)
y = layers.Conv3D(next_fDim, kernel_size=(3, 3, 3), padding="same", strides=(1, 1, 1),
use_bias=True, bias_initializer=Zeros(),
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
)(y)
y = layers.BatchNormalization()(y)
if _project_shortcut or _strides != (1, 1):
shortcut = layers.Conv3D(next_fDim, kernel_size=(1, 1, 1), strides=_strides, padding="same",
use_bias=True, bias_initializer=Zeros(),
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
)(shortcut)
shortcut = layers.BatchNormalization()(shortcut)
y = layers.add([shortcut, y])
y = layers.LeakyReLU()(y)
return y
def build_3DpoolBlock(y, poolSz):
y = MaxPool3D(pool_size=(poolSz, poolSz, poolSz), strides=(poolSz, poolSz, poolSz), padding="same")(y)
return y
def build_3DupsampleBlock(y, next_fDim, kernelSz, str):
y = Conv3DTranspose(next_fDim, (kernelSz, kernelSz, kernelSz), padding="same", # activation='relu',
use_bias=True, bias_initializer=Zeros(), strides=str,
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001))(y)
y = BatchNormalization()(y)
y = layers.LeakyReLU()(y)
return y
def build_branch1(y):
y = build_3DpoolBlock(y, 2)
y = build_3DResBlock(y, 64)
y = build_branch2(y)
y = build_3DResBlock(y, 64)
y = build_3DupsampleBlock(y, 32, 2, 2)
return y
def build_branch2(y):
x = build_3DResBlock(y, 64)
y = build_3DpoolBlock(y, 2)
for i in range(3):
proj_scut = True if i == 0 else False
y = build_3DResBlock(y, 128, _project_shortcut=proj_scut)
y = build_3DupsampleBlock(y, 64, 2, 2)
y = layers.add([y, x])
return y
def build_V2VModel(x):
x = build_3DBlock(x, next_fDim=16, kernelSz=7)
x = build_3DpoolBlock(x, 2)
for i in range(3):
proj_scut = True if i == 0 else False
x = build_3DResBlock(x, 32, _project_shortcut=proj_scut)
y = build_3DResBlock(x, 32)
b1 = build_branch1(x)
x = layers.add([b1, y])
x = build_3DResBlock(x, next_fDim=32)
x = build_3DBlock(x, next_fDim=32, kernelSz=1)
x = build_3DBlock(x, next_fDim=32, kernelSz=1)
x = Conv3D(15, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding="valid",
use_bias=True, bias_initializer=Zeros(),
kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
)(x)
return x
inputDim = 88
# Create V2V model
voxel_input = Input(shape=(inputDim, inputDim, inputDim, 1), dtype=np.float32, name='input_layer')
heatmap_output = build_V2VModel(voxel_input)
model = models.Model(inputs=voxel_input, outputs=heatmap_output)
print(model.summary())
model.compile(optimizer='RMSprop', loss='mean_squared_error')
# hist = model.fit(voxel, heatmap, batch_size=2, validation_split=0.2, epochs=10, verbose=1)
Hi Moon,
I'm implementing a Tensorflow version of the V2V-PoseNet. The code down below is my implementation of the V2V-PoseNet model for training ITOP dataset. Could you please have a look of it and give me some feedback. Cause I'm not entirely sure it is a correct model. Thanks in advance!
model.py
Model Summary: