mks0601 / V2V-PoseNet_RELEASE

Official Torch7 implementation of "V2V-PoseNet: Voxel-to-Voxel Prediction Network for Accurate 3D Hand and Human Pose Estimation from a Single Depth Map", CVPR 2018
https://arxiv.org/abs/1711.07399
MIT License
377 stars 69 forks source link

Tensorflow model.py #35

Closed jb892 closed 5 years ago

jb892 commented 5 years ago

Hi Moon,

I'm implementing a Tensorflow version of the V2V-PoseNet. The code down below is my implementation of the V2V-PoseNet model for training ITOP dataset. Could you please have a look of it and give me some feedback. Cause I'm not entirely sure it is a correct model. Thanks in advance!

model.py

import numpy as np
from keras.models import Sequential
from keras.layers import Conv3D, MaxPool3D, Dropout, BatchNormalization
from keras.layers import Conv3DTranspose, Input, Conv2D, MaxPool2D, Flatten, Dense
from keras import layers, models
from keras.initializers import Zeros, TruncatedNormal

def build_3DBlock(y, next_fDim=16, kernelSz=1):
    y = Conv3D(next_fDim, (kernelSz, kernelSz, kernelSz), padding="same", # activation='relu',
                      use_bias=True, bias_initializer=Zeros(),
                      kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
                      )(y)
    y = add_common_layers(y)
    # module.add(Dropout(_dropout_rate))
    return y

def add_common_layers(y):
    y = layers.BatchNormalization()(y)
    y = layers.LeakyReLU()(y)
    return y

def build_3DResBlock(y, next_fDim, _strides=(1, 1, 1), _project_shortcut=False):
    shortcut = y

    y = layers.Conv3D(next_fDim, kernel_size=(3, 3, 3), padding="same", strides=_strides,
                      use_bias=True, bias_initializer=Zeros(),
                      kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
                      )(y)
    y = layers.BatchNormalization()(y)
    y = layers.LeakyReLU()(y)

    y = layers.Conv3D(next_fDim, kernel_size=(3, 3, 3), padding="same", strides=(1, 1, 1),
                      use_bias=True, bias_initializer=Zeros(),
                      kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
                      )(y)
    y = layers.BatchNormalization()(y)

    if _project_shortcut or _strides != (1, 1):
        shortcut = layers.Conv3D(next_fDim, kernel_size=(1, 1, 1), strides=_strides, padding="same",
                                 use_bias=True, bias_initializer=Zeros(),
                                 kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
                                 )(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)
    y = layers.add([shortcut, y])
    y = layers.LeakyReLU()(y)

    return y

def build_3DpoolBlock(y, poolSz):
    y = MaxPool3D(pool_size=(poolSz, poolSz, poolSz), strides=(poolSz, poolSz, poolSz), padding="same")(y)
    return y

def build_3DupsampleBlock(y, next_fDim, kernelSz, str):
    y = Conv3DTranspose(next_fDim, (kernelSz, kernelSz, kernelSz), padding="same", # activation='relu',
                        use_bias=True, bias_initializer=Zeros(), strides=str,
                        kernel_initializer=TruncatedNormal(mean=0, stddev=0.001))(y)
    y = BatchNormalization()(y)
    y = layers.LeakyReLU()(y)
    return y

def build_branch1(y):
    y = build_3DpoolBlock(y, 2)
    y = build_3DResBlock(y, 64)
    y = build_branch2(y)
    y = build_3DResBlock(y, 64)
    y = build_3DupsampleBlock(y, 32, 2, 2)
    return y

def build_branch2(y):
    x = build_3DResBlock(y, 64)
    y = build_3DpoolBlock(y, 2)
    for i in range(3):
        proj_scut = True if i == 0 else False
        y = build_3DResBlock(y, 128, _project_shortcut=proj_scut)
    y = build_3DupsampleBlock(y, 64, 2, 2)
    y = layers.add([y, x])
    return y

def build_V2VModel(x):
    x = build_3DBlock(x, next_fDim=16, kernelSz=7)
    x = build_3DpoolBlock(x, 2)

    for i in range(3):
        proj_scut = True if i == 0 else False
        x = build_3DResBlock(x, 32, _project_shortcut=proj_scut)

    y = build_3DResBlock(x, 32)
    b1 = build_branch1(x)

    x = layers.add([b1, y])
    x = build_3DResBlock(x, next_fDim=32)
    x = build_3DBlock(x, next_fDim=32, kernelSz=1)
    x = build_3DBlock(x, next_fDim=32, kernelSz=1)
    x = Conv3D(15, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding="valid",
               use_bias=True, bias_initializer=Zeros(),
               kernel_initializer=TruncatedNormal(mean=0, stddev=0.001)
               )(x)
    return x

inputDim = 88

# Create V2V model
voxel_input = Input(shape=(inputDim, inputDim, inputDim, 1), dtype=np.float32, name='input_layer')

heatmap_output = build_V2VModel(voxel_input)

model = models.Model(inputs=voxel_input, outputs=heatmap_output)

print(model.summary())

model.compile(optimizer='RMSprop', loss='mean_squared_error')

# hist = model.fit(voxel, heatmap, batch_size=2, validation_split=0.2, epochs=10, verbose=1)

Model Summary:

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to
===================================================================
input_layer (InputLayer)        (None, 88, 88, 88, 1 0
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 88, 88, 88, 1 5504        input_layer[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 88, 88, 88, 1 64          conv3d_1[0][0]
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 88, 88, 88, 1 0           batch_normalization_1[0][0]
__________________________________________________________________________________________________
max_pooling3d_1 (MaxPooling3D)  (None, 44, 44, 44, 1 0           leaky_re_lu_1[0][0]
__________________________________________________________________________________________________
conv3d_2 (Conv3D)               (None, 44, 44, 44, 3 13856       max_pooling3d_1[0][0]
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 44, 44, 44, 3 128         conv3d_2[0][0]
__________________________________________________________________________________________________
leaky_re_lu_2 (LeakyReLU)       (None, 44, 44, 44, 3 0           batch_normalization_2[0][0]
__________________________________________________________________________________________________
conv3d_4 (Conv3D)               (None, 44, 44, 44, 3 544         max_pooling3d_1[0][0]
__________________________________________________________________________________________________
conv3d_3 (Conv3D)               (None, 44, 44, 44, 3 27680       leaky_re_lu_2[0][0]
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 44, 44, 44, 3 128         conv3d_4[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 44, 44, 44, 3 128         conv3d_3[0][0]
__________________________________________________________________________________________________
add_1 (Add)                     (None, 44, 44, 44, 3 0           batch_normalization_4[0][0]
                                                                 batch_normalization_3[0][0]
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 44, 44, 44, 3 0           add_1[0][0]
__________________________________________________________________________________________________
conv3d_5 (Conv3D)               (None, 44, 44, 44, 3 27680       leaky_re_lu_3[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 44, 44, 44, 3 128         conv3d_5[0][0]
__________________________________________________________________________________________________
leaky_re_lu_4 (LeakyReLU)       (None, 44, 44, 44, 3 0           batch_normalization_5[0][0]
__________________________________________________________________________________________________
conv3d_7 (Conv3D)               (None, 44, 44, 44, 3 1056        leaky_re_lu_3[0][0]
__________________________________________________________________________________________________
conv3d_6 (Conv3D)               (None, 44, 44, 44, 3 27680       leaky_re_lu_4[0][0]
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 44, 44, 44, 3 128         conv3d_7[0][0]
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 44, 44, 44, 3 128         conv3d_6[0][0]
__________________________________________________________________________________________________
add_2 (Add)                     (None, 44, 44, 44, 3 0           batch_normalization_7[0][0]
                                                                 batch_normalization_6[0][0]
__________________________________________________________________________________________________
leaky_re_lu_5 (LeakyReLU)       (None, 44, 44, 44, 3 0           add_2[0][0]
__________________________________________________________________________________________________
conv3d_8 (Conv3D)               (None, 44, 44, 44, 3 27680       leaky_re_lu_5[0][0]
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 44, 44, 44, 3 128         conv3d_8[0][0]
__________________________________________________________________________________________________
leaky_re_lu_6 (LeakyReLU)       (None, 44, 44, 44, 3 0           batch_normalization_8[0][0]
__________________________________________________________________________________________________
conv3d_10 (Conv3D)              (None, 44, 44, 44, 3 1056        leaky_re_lu_5[0][0]
__________________________________________________________________________________________________
conv3d_9 (Conv3D)               (None, 44, 44, 44, 3 27680       leaky_re_lu_6[0][0]
__________________________________________________________________________________________________
batch_normalization_10 (BatchNo (None, 44, 44, 44, 3 128         conv3d_10[0][0]
__________________________________________________________________________________________________
batch_normalization_9 (BatchNor (None, 44, 44, 44, 3 128         conv3d_9[0][0]
__________________________________________________________________________________________________
add_3 (Add)                     (None, 44, 44, 44, 3 0           batch_normalization_10[0][0]
                                                                 batch_normalization_9[0][0]
__________________________________________________________________________________________________
leaky_re_lu_7 (LeakyReLU)       (None, 44, 44, 44, 3 0           add_3[0][0]
__________________________________________________________________________________________________
max_pooling3d_2 (MaxPooling3D)  (None, 22, 22, 22, 3 0           leaky_re_lu_7[0][0]
__________________________________________________________________________________________________
conv3d_14 (Conv3D)              (None, 22, 22, 22, 6 55360       max_pooling3d_2[0][0]
__________________________________________________________________________________________________
batch_normalization_14 (BatchNo (None, 22, 22, 22, 6 256         conv3d_14[0][0]
__________________________________________________________________________________________________
leaky_re_lu_10 (LeakyReLU)      (None, 22, 22, 22, 6 0           batch_normalization_14[0][0]
__________________________________________________________________________________________________
conv3d_16 (Conv3D)              (None, 22, 22, 22, 6 2112        max_pooling3d_2[0][0]
__________________________________________________________________________________________________
conv3d_15 (Conv3D)              (None, 22, 22, 22, 6 110656      leaky_re_lu_10[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 22, 22, 22, 6 256         conv3d_16[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 22, 22, 22, 6 256         conv3d_15[0][0]
__________________________________________________________________________________________________
add_5 (Add)                     (None, 22, 22, 22, 6 0           batch_normalization_16[0][0]
                                                                 batch_normalization_15[0][0]
__________________________________________________________________________________________________
leaky_re_lu_11 (LeakyReLU)      (None, 22, 22, 22, 6 0           add_5[0][0]
__________________________________________________________________________________________________
max_pooling3d_3 (MaxPooling3D)  (None, 11, 11, 11, 6 0           leaky_re_lu_11[0][0]
__________________________________________________________________________________________________
conv3d_20 (Conv3D)              (None, 11, 11, 11, 1 221312      max_pooling3d_3[0][0]
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 11, 11, 11, 1 512         conv3d_20[0][0]
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 11, 11, 11, 1 0           batch_normalization_20[0][0]
__________________________________________________________________________________________________
conv3d_22 (Conv3D)              (None, 11, 11, 11, 1 8320        max_pooling3d_3[0][0]
__________________________________________________________________________________________________
conv3d_21 (Conv3D)              (None, 11, 11, 11, 1 442496      leaky_re_lu_14[0][0]
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 11, 11, 11, 1 512         conv3d_22[0][0]
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 11, 11, 11, 1 512         conv3d_21[0][0]
__________________________________________________________________________________________________
add_7 (Add)                     (None, 11, 11, 11, 1 0           batch_normalization_22[0][0]
                                                                 batch_normalization_21[0][0]
__________________________________________________________________________________________________
leaky_re_lu_15 (LeakyReLU)      (None, 11, 11, 11, 1 0           add_7[0][0]
__________________________________________________________________________________________________
conv3d_23 (Conv3D)              (None, 11, 11, 11, 1 442496      leaky_re_lu_15[0][0]
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 11, 11, 11, 1 512         conv3d_23[0][0]
__________________________________________________________________________________________________
leaky_re_lu_16 (LeakyReLU)      (None, 11, 11, 11, 1 0           batch_normalization_23[0][0]
__________________________________________________________________________________________________
conv3d_25 (Conv3D)              (None, 11, 11, 11, 1 16512       leaky_re_lu_15[0][0]
__________________________________________________________________________________________________
conv3d_24 (Conv3D)              (None, 11, 11, 11, 1 442496      leaky_re_lu_16[0][0]
__________________________________________________________________________________________________
batch_normalization_25 (BatchNo (None, 11, 11, 11, 1 512         conv3d_25[0][0]
__________________________________________________________________________________________________
batch_normalization_24 (BatchNo (None, 11, 11, 11, 1 512         conv3d_24[0][0]
__________________________________________________________________________________________________
add_8 (Add)                     (None, 11, 11, 11, 1 0           batch_normalization_25[0][0]
                                                                 batch_normalization_24[0][0]
__________________________________________________________________________________________________
leaky_re_lu_17 (LeakyReLU)      (None, 11, 11, 11, 1 0           add_8[0][0]
__________________________________________________________________________________________________
conv3d_26 (Conv3D)              (None, 11, 11, 11, 1 442496      leaky_re_lu_17[0][0]
__________________________________________________________________________________________________
batch_normalization_26 (BatchNo (None, 11, 11, 11, 1 512         conv3d_26[0][0]
__________________________________________________________________________________________________
leaky_re_lu_18 (LeakyReLU)      (None, 11, 11, 11, 1 0           batch_normalization_26[0][0]
__________________________________________________________________________________________________
conv3d_28 (Conv3D)              (None, 11, 11, 11, 1 16512       leaky_re_lu_17[0][0]
__________________________________________________________________________________________________
conv3d_27 (Conv3D)              (None, 11, 11, 11, 1 442496      leaky_re_lu_18[0][0]
__________________________________________________________________________________________________
conv3d_17 (Conv3D)              (None, 22, 22, 22, 6 110656      leaky_re_lu_11[0][0]
__________________________________________________________________________________________________
batch_normalization_28 (BatchNo (None, 11, 11, 11, 1 512         conv3d_28[0][0]
__________________________________________________________________________________________________
batch_normalization_27 (BatchNo (None, 11, 11, 11, 1 512         conv3d_27[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 22, 22, 22, 6 256         conv3d_17[0][0]
__________________________________________________________________________________________________
add_9 (Add)                     (None, 11, 11, 11, 1 0           batch_normalization_28[0][0]
                                                                 batch_normalization_27[0][0]
__________________________________________________________________________________________________
leaky_re_lu_12 (LeakyReLU)      (None, 22, 22, 22, 6 0           batch_normalization_17[0][0]
__________________________________________________________________________________________________
leaky_re_lu_19 (LeakyReLU)      (None, 11, 11, 11, 1 0           add_9[0][0]
__________________________________________________________________________________________________
conv3d_19 (Conv3D)              (None, 22, 22, 22, 6 4160        leaky_re_lu_11[0][0]
__________________________________________________________________________________________________
conv3d_18 (Conv3D)              (None, 22, 22, 22, 6 110656      leaky_re_lu_12[0][0]
__________________________________________________________________________________________________
conv3d_transpose_1 (Conv3DTrans (None, 22, 22, 22, 6 65600       leaky_re_lu_19[0][0]
__________________________________________________________________________________________________
batch_normalization_19 (BatchNo (None, 22, 22, 22, 6 256         conv3d_19[0][0]
__________________________________________________________________________________________________
batch_normalization_18 (BatchNo (None, 22, 22, 22, 6 256         conv3d_18[0][0]
__________________________________________________________________________________________________
batch_normalization_29 (BatchNo (None, 22, 22, 22, 6 256         conv3d_transpose_1[0][0]
__________________________________________________________________________________________________
add_6 (Add)                     (None, 22, 22, 22, 6 0           batch_normalization_19[0][0]
                                                                 batch_normalization_18[0][0]
__________________________________________________________________________________________________
leaky_re_lu_20 (LeakyReLU)      (None, 22, 22, 22, 6 0           batch_normalization_29[0][0]
__________________________________________________________________________________________________
leaky_re_lu_13 (LeakyReLU)      (None, 22, 22, 22, 6 0           add_6[0][0]
__________________________________________________________________________________________________
add_10 (Add)                    (None, 22, 22, 22, 6 0           leaky_re_lu_20[0][0]
                                                                 leaky_re_lu_13[0][0]
__________________________________________________________________________________________________
conv3d_29 (Conv3D)              (None, 22, 22, 22, 6 110656      add_10[0][0]
__________________________________________________________________________________________________
batch_normalization_30 (BatchNo (None, 22, 22, 22, 6 256         conv3d_29[0][0]
__________________________________________________________________________________________________
leaky_re_lu_21 (LeakyReLU)      (None, 22, 22, 22, 6 0           batch_normalization_30[0][0]
__________________________________________________________________________________________________
conv3d_31 (Conv3D)              (None, 22, 22, 22, 6 4160        add_10[0][0]
__________________________________________________________________________________________________
conv3d_30 (Conv3D)              (None, 22, 22, 22, 6 110656      leaky_re_lu_21[0][0]
__________________________________________________________________________________________________
conv3d_11 (Conv3D)              (None, 44, 44, 44, 3 27680       leaky_re_lu_7[0][0]
__________________________________________________________________________________________________
batch_normalization_32 (BatchNo (None, 22, 22, 22, 6 256         conv3d_31[0][0]
__________________________________________________________________________________________________
batch_normalization_31 (BatchNo (None, 22, 22, 22, 6 256         conv3d_30[0][0]
__________________________________________________________________________________________________
batch_normalization_11 (BatchNo (None, 44, 44, 44, 3 128         conv3d_11[0][0]
__________________________________________________________________________________________________
add_11 (Add)                    (None, 22, 22, 22, 6 0           batch_normalization_32[0][0]
                                                                 batch_normalization_31[0][0]
__________________________________________________________________________________________________
leaky_re_lu_8 (LeakyReLU)       (None, 44, 44, 44, 3 0           batch_normalization_11[0][0]
__________________________________________________________________________________________________
leaky_re_lu_22 (LeakyReLU)      (None, 22, 22, 22, 6 0           add_11[0][0]
__________________________________________________________________________________________________
conv3d_13 (Conv3D)              (None, 44, 44, 44, 3 1056        leaky_re_lu_7[0][0]
__________________________________________________________________________________________________
conv3d_12 (Conv3D)              (None, 44, 44, 44, 3 27680       leaky_re_lu_8[0][0]
__________________________________________________________________________________________________
conv3d_transpose_2 (Conv3DTrans (None, 44, 44, 44, 3 16416       leaky_re_lu_22[0][0]
__________________________________________________________________________________________________
batch_normalization_13 (BatchNo (None, 44, 44, 44, 3 128         conv3d_13[0][0]
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 44, 44, 44, 3 128         conv3d_12[0][0]
__________________________________________________________________________________________________
batch_normalization_33 (BatchNo (None, 44, 44, 44, 3 128         conv3d_transpose_2[0][0]
__________________________________________________________________________________________________
add_4 (Add)                     (None, 44, 44, 44, 3 0           batch_normalization_13[0][0]
                                                                 batch_normalization_12[0][0]
__________________________________________________________________________________________________
leaky_re_lu_23 (LeakyReLU)      (None, 44, 44, 44, 3 0           batch_normalization_33[0][0]
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 44, 44, 44, 3 0           add_4[0][0]
__________________________________________________________________________________________________
add_12 (Add)                    (None, 44, 44, 44, 3 0           leaky_re_lu_23[0][0]
                                                                 leaky_re_lu_9[0][0]
__________________________________________________________________________________________________
conv3d_32 (Conv3D)              (None, 44, 44, 44, 3 27680       add_12[0][0]
__________________________________________________________________________________________________
batch_normalization_34 (BatchNo (None, 44, 44, 44, 3 128         conv3d_32[0][0]
__________________________________________________________________________________________________
leaky_re_lu_24 (LeakyReLU)      (None, 44, 44, 44, 3 0           batch_normalization_34[0][0]
__________________________________________________________________________________________________
conv3d_34 (Conv3D)              (None, 44, 44, 44, 3 1056        add_12[0][0]
__________________________________________________________________________________________________
conv3d_33 (Conv3D)              (None, 44, 44, 44, 3 27680       leaky_re_lu_24[0][0]
__________________________________________________________________________________________________
batch_normalization_36 (BatchNo (None, 44, 44, 44, 3 128         conv3d_34[0][0]
__________________________________________________________________________________________________
batch_normalization_35 (BatchNo (None, 44, 44, 44, 3 128         conv3d_33[0][0]
__________________________________________________________________________________________________
add_13 (Add)                    (None, 44, 44, 44, 3 0           batch_normalization_36[0][0]
                                                                 batch_normalization_35[0][0]
__________________________________________________________________________________________________
leaky_re_lu_25 (LeakyReLU)      (None, 44, 44, 44, 3 0           add_13[0][0]
__________________________________________________________________________________________________
conv3d_35 (Conv3D)              (None, 44, 44, 44, 3 1056        leaky_re_lu_25[0][0]
__________________________________________________________________________________________________
batch_normalization_37 (BatchNo (None, 44, 44, 44, 3 128         conv3d_35[0][0]
__________________________________________________________________________________________________
leaky_re_lu_26 (LeakyReLU)      (None, 44, 44, 44, 3 0           batch_normalization_37[0][0]
__________________________________________________________________________________________________
conv3d_36 (Conv3D)              (None, 44, 44, 44, 3 1056        leaky_re_lu_26[0][0]
__________________________________________________________________________________________________
batch_normalization_38 (BatchNo (None, 44, 44, 44, 3 128         conv3d_36[0][0]
__________________________________________________________________________________________________
leaky_re_lu_27 (LeakyReLU)      (None, 44, 44, 44, 3 0           batch_normalization_38[0][0]
__________________________________________________________________________________________________
conv3d_37 (Conv3D)              (None, 44, 44, 44, 15 495         leaky_re_lu_27[0][0]
==================================================================================================
Total params: 3,461,615
Trainable params: 3,456,847
Non-trainable params: 4,768
__________________________________________________________________________________________________
None
mks0601 commented 5 years ago

Sorry. I'm too busy to see all those long codes.