wbenbihi / hourglasstensorflow

Tensorflow implementation of Stacked Hourglass Networks for Human Pose Estimation
MIT License
479 stars 177 forks source link

Model with Keras correct? #35

Closed SaifAlDilaimi closed 2 years ago

SaifAlDilaimi commented 6 years ago

I'm trying to convert the HourglassModel class/network to a Keras model. What I have so far is;

(https://gist.github.com/SaifAlDilaimi/cdacc15129fb71f905990282c20b0b35)

# -*- coding: utf-8 -*-
import GlobalParams as PARAMS

from keras import backend as K

from keras.layers import Input
from keras.layers import Lambda
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Activation
from keras.layers import Conv2D
from keras.layers import SeparableConv2D
from keras.layers import BatchNormalization

from keras.layers import MaxPooling2D
from keras.layers import UpSampling2D

from keras.layers import Multiply
from keras.layers import Concatenate
from keras.layers import Add

from keras.models import Model
from keras.optimizers import RMSprop
from keras import losses
from keras import metrics

def conv(x, filters, kernel_size = 1, strides=(1, 1), padding='same', name="conv"):
    """ Spatial Convolution (CONV2D)
        Args:
            inputs          : Input Tensor (Data Type : NHWC)
            filters     : Number of filters (channels)
            kernel_size : Size of kernel
            strides     : Stride
            pad             : Padding Type (VALID/SAME)
            name            : Name of the block
        Returns:
            conv            : Output Tensor (Convolved Input)
    """
    x = Conv2D(filters, kernel_size, strides=strides, padding=padding,
            use_bias=False)(x)
    return x

def max_pool2d(x, pool_size = (2,2), strides=(2,2), padding="valid"):
    x = MaxPooling2D(pool_size=pool_size, strides=strides, padding=padding)(x)
    return x

def conv_bn(x, filters, kernel_size = 1, strides=(1, 1), padding='same', name="conv_bn"):

    x = conv(x, filters, kernel_size, strides, padding, name)
    x = BatchNormalization(axis=-1, scale=False)(x)
    return x

def conv_bn_relu(x, filters, kernel_size = 1, strides = 1, padding="same", name="conv_bn_rel"):
    """ Spatial Convolution (CONV2D) + BatchNormalization + ReLU Activation
    Args:
        inputs          : Input Tensor (Data Type : NHWC)
        filters     : Number of filters (channels)
        kernel_size : Size of kernel
        strides     : Stride
        pad             : Padding Type (VALID/SAME)
        name            : Name of the block
    Returns:
        norm            : Output Tensor
    """

    x = conv(x, filters, kernel_size, strides, padding, name)
    x = BatchNormalization(axis=-1, scale=False)(x)
    x = Activation('relu')(x)
    return x

def conv_block(x, numOut, name="conv_block"):
    """ Convolutional Block
        Args:
            inputs  : Input Tensor
            numOut  : Desired output number of channel
            name    : Name of the block
        Returns:
            conv_3  : Output Tensor
        """

    x = BatchNormalization(axis=-1, scale=False)(x)
    x = Activation('relu')(x)
    x = conv(x, int(numOut))

    return x

def skip_layer(x, numOut, name = 'skip_layer'):
    """ Skip Layer
    Args:
        inputs  : Input Tensor
        numOut  : Desired output number of channel
        name    : Name of the bloc
    Returns:
        Tensor of shape (None, inputs.height, inputs.width, numOut)
    """
    print(x.shape, x.shape[3], numOut, x.shape[3] == numOut)
    if x.shape[3] == numOut: # check if right
        return x

    x = conv(x, numOut)
    return x

def residual_block(x, numOut, name = "residual_block"):
    """ Residual Unit
        Args:
            inputs  : Input Tensor
            numOut  : Number of Output Features (channels)
            name    : Name of the block
    """
    convb = conv_block(x, numOut)
    skip_l = skip_layer(x, numOut)
    x = Add()([convb, skip_l])
    x = Activation('relu')(x)
    return x

def hourglass(x, n, numOut, name = 'hourglass'):
    """ Hourglass Module
    Args:
        inputs  : Input Tensor
        n       : Number of downsampling step
        numOut  : Number of Output Features (channels)
        name    : Name of the block
    """
    # upper branch
    up_1 = residual_block(x, numOut, name="up_1")
    # lower branch
    low_ = max_pool2d(x)
    low_1 = residual_block(low_, numOut, name="low_1")

    if n > 0:
        low_2 = hourglass(low_1, n-1, numOut, name="low_2")
    else:
        low_2 = residual_block(low_1, numOut, name="low_2")

    low_3 = residual_block(low_2, numOut, name="low_3")
    print("low3: ", low_3.shape)
    low3_size = K.int_shape(low_3)[1:3]
    up_size = (2,2)
    print("upsampling size: ", up_size)
    #up_size = tuple([x*x for x in up_size])
    #print(up_size)
    up_2 = UpSampling2D(up_size)(low_3)

    print(up_1)
    print(up_2)

    x = Add()([up_2, up_1])
    x = Activation('relu')(x)
    x = Dropout(0.2)(x)

    return x

class HGKerasModel():

    def build_model(self):
        if K.image_data_format() == 'channels_first':
            input_shape = (3, PARAMS.ML_INPUT_IMAGE_HEIGHT, PARAMS.ML_INPUT_IMAGE_WIDTH)
        else:
            input_shape = (PARAMS.ML_INPUT_IMAGE_WIDTH, PARAMS.ML_INPUT_IMAGE_HEIGHT, 3)

        m_input = Input(shape=input_shape)

        # Storage Table
        hg = [None] * PARAMS.ML_DEEPPOSE_STAGES
        ll = [None] * PARAMS.ML_DEEPPOSE_STAGES
        ll_ = [None] * PARAMS.ML_DEEPPOSE_STAGES
        drop = [None] * PARAMS.ML_DEEPPOSE_STAGES
        out = [None] * PARAMS.ML_DEEPPOSE_STAGES
        out_ = [None] * PARAMS.ML_DEEPPOSE_STAGES
        sum_ = [None] * PARAMS.ML_DEEPPOSE_STAGES

        # preprossing
        conv1 = conv_bn_relu(m_input, filters=64, kernel_size=6, strides=2)
        r1 = residual_block(conv1, 128)
        pool1 = max_pool2d(r1)
        r2 = residual_block(pool1, numOut=int(PARAMS.ML_INPUT_FEATURES/2))
        r3 = residual_block(r2, numOut=PARAMS.ML_INPUT_FEATURES)

        # stage 0
        hg[0] = hourglass(r3, PARAMS.ML_HOURGLASS_DOWNSAMPLING, PARAMS.ML_INPUT_FEATURES)
        ll[0] = conv_bn_relu(hg[0], PARAMS.ML_INPUT_FEATURES)
        out[0] = conv_bn_relu(ll[0], PARAMS.ML_LABEL_CLASSES)
        out_[0] = conv(out[0], PARAMS.ML_INPUT_FEATURES)
        sum_[0] = Add()([out_[0], ll[0], r3])

        # build stages 1 till k-1
        for i in range(1, PARAMS.ML_DEEPPOSE_STAGES - 1):    
            hg[i] = hourglass(sum_[i-1], PARAMS.ML_HOURGLASS_DOWNSAMPLING, PARAMS.ML_INPUT_FEATURES)
            ll[i] = conv_bn_relu(hg[i], PARAMS.ML_INPUT_FEATURES)
            out[i] = conv_bn_relu(ll[i], PARAMS.ML_LABEL_CLASSES)
            out_[i] = conv(out[i], PARAMS.ML_INPUT_FEATURES)
            sum_[i] = Add()([out_[i], ll[i], sum_[i-1]])

        # build stage k-1
        stages = PARAMS.ML_DEEPPOSE_STAGES
        hg[stages - 1] = hourglass(sum_[stages - 2], PARAMS.ML_HOURGLASS_DOWNSAMPLING, PARAMS.ML_INPUT_FEATURES)
        ll[stages - 1] = conv_bn_relu(hg[stages - 1], PARAMS.ML_INPUT_FEATURES)
        out[stages - 1] = conv_bn_relu(ll[stages - 1], PARAMS.ML_LABEL_CLASSES)

        conc = Concatenate()(out)
        sigmoid = Activation('sigmoid')(conc)

        model = Model(inputs=m_input, outputs=sigmoid)
        rmsprop = RMSprop(lr=PARAMS.ML_HOURGLASS_LEARN_RATE, decay=PARAMS.ML_HOURGLASS_LEARN_RATE_DECAY)

        model.compile(rmsprop, loss=losses.binary_crossentropy, metrics=['accuracy'])

        model.summary()

        print("Input shape: ", m_input.shape)
        print("Output length: ", len(sigmoid_out))

        return model

def main():
    model = HGKerasModel().build_model()

    for out in model.output:
        print(out)

if __name__ == '__main__':
    main()

Now the summary of the model with 2 stacks (stages) is this:

Layer (type)                    Output Shape         Param #     Connected to
==================================================================================================
input_1 (InputLayer)            (None, 256, 256, 3)  0
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 128, 128, 64) 6912        input_1[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 128, 128, 64) 192         conv2d_1[0][0]
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 128, 128, 64) 0           batch_normalization_1[0][0]
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 128, 128, 64) 192         activation_1[0][0]
__________________________________________________________________________________________________
activation_2 (Activation)       (None, 128, 128, 64) 0           batch_normalization_2[0][0]
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 128, 128, 128 8192        activation_2[0][0]
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 128, 128, 128 8192        activation_1[0][0]
__________________________________________________________________________________________________
add_1 (Add)                     (None, 128, 128, 128 0           conv2d_2[0][0]
                                                                 conv2d_3[0][0]
__________________________________________________________________________________________________
activation_3 (Activation)       (None, 128, 128, 128 0           add_1[0][0]
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 64, 64, 128)  0           activation_3[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 64, 64, 128)  384         max_pooling2d_1[0][0]
__________________________________________________________________________________________________
activation_4 (Activation)       (None, 64, 64, 128)  0           batch_normalization_3[0][0]
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 64, 64, 128)  16384       activation_4[0][0]
__________________________________________________________________________________________________
add_2 (Add)                     (None, 64, 64, 128)  0           conv2d_4[0][0]
                                                                 max_pooling2d_1[0][0]
__________________________________________________________________________________________________
activation_5 (Activation)       (None, 64, 64, 128)  0           add_2[0][0]
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 64, 64, 128)  384         activation_5[0][0]
__________________________________________________________________________________________________
activation_6 (Activation)       (None, 64, 64, 128)  0           batch_normalization_4[0][0]
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 64, 64, 256)  32768       activation_6[0][0]
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 64, 64, 256)  32768       activation_5[0][0]
__________________________________________________________________________________________________
add_3 (Add)                     (None, 64, 64, 256)  0           conv2d_5[0][0]
                                                                 conv2d_6[0][0]
__________________________________________________________________________________________________
activation_7 (Activation)       (None, 64, 64, 256)  0           add_3[0][0]
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)  (None, 32, 32, 256)  0           activation_7[0][0]
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 32, 32, 256)  768         max_pooling2d_2[0][0]
__________________________________________________________________________________________________
activation_10 (Activation)      (None, 32, 32, 256)  0           batch_normalization_6[0][0]
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 32, 32, 256)  65536       activation_10[0][0]
__________________________________________________________________________________________________
add_5 (Add)                     (None, 32, 32, 256)  0           conv2d_8[0][0]
                                                                 max_pooling2d_2[0][0]
__________________________________________________________________________________________________
activation_11 (Activation)      (None, 32, 32, 256)  0           add_5[0][0]
__________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D)  (None, 16, 16, 256)  0           activation_11[0][0]
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 16, 16, 256)  768         max_pooling2d_3[0][0]
__________________________________________________________________________________________________
activation_14 (Activation)      (None, 16, 16, 256)  0           batch_normalization_8[0][0]
__________________________________________________________________________________________________
conv2d_10 (Conv2D)              (None, 16, 16, 256)  65536       activation_14[0][0]
__________________________________________________________________________________________________
add_7 (Add)                     (None, 16, 16, 256)  0           conv2d_10[0][0]
                                                                 max_pooling2d_3[0][0]
__________________________________________________________________________________________________
activation_15 (Activation)      (None, 16, 16, 256)  0           add_7[0][0]
__________________________________________________________________________________________________
max_pooling2d_4 (MaxPooling2D)  (None, 8, 8, 256)    0           activation_15[0][0]
__________________________________________________________________________________________________
batch_normalization_10 (BatchNo (None, 8, 8, 256)    768         max_pooling2d_4[0][0]
__________________________________________________________________________________________________
activation_18 (Activation)      (None, 8, 8, 256)    0           batch_normalization_10[0][0]
__________________________________________________________________________________________________
conv2d_12 (Conv2D)              (None, 8, 8, 256)    65536       activation_18[0][0]
__________________________________________________________________________________________________
add_9 (Add)                     (None, 8, 8, 256)    0           conv2d_12[0][0]
                                                                 max_pooling2d_4[0][0]
__________________________________________________________________________________________________
activation_19 (Activation)      (None, 8, 8, 256)    0           add_9[0][0]
__________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D)  (None, 4, 4, 256)    0           activation_19[0][0]
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 4, 4, 256)    768         max_pooling2d_5[0][0]
__________________________________________________________________________________________________
activation_22 (Activation)      (None, 4, 4, 256)    0           batch_normalization_12[0][0]
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 4, 4, 256)    65536       activation_22[0][0]
__________________________________________________________________________________________________
add_11 (Add)                    (None, 4, 4, 256)    0           conv2d_14[0][0]
                                                                 max_pooling2d_5[0][0]
__________________________________________________________________________________________________
activation_23 (Activation)      (None, 4, 4, 256)    0           add_11[0][0]
__________________________________________________________________________________________________
max_pooling2d_6 (MaxPooling2D)  (None, 2, 2, 256)    0           activation_23[0][0]
__________________________________________________________________________________________________
batch_normalization_14 (BatchNo (None, 2, 2, 256)    768         max_pooling2d_6[0][0]
__________________________________________________________________________________________________
activation_26 (Activation)      (None, 2, 2, 256)    0           batch_normalization_14[0][0]
__________________________________________________________________________________________________
conv2d_16 (Conv2D)              (None, 2, 2, 256)    65536       activation_26[0][0]
__________________________________________________________________________________________________
add_13 (Add)                    (None, 2, 2, 256)    0           conv2d_16[0][0]
                                                                 max_pooling2d_6[0][0]
__________________________________________________________________________________________________
activation_27 (Activation)      (None, 2, 2, 256)    0           add_13[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 2, 2, 256)    768         activation_27[0][0]
__________________________________________________________________________________________________
activation_28 (Activation)      (None, 2, 2, 256)    0           batch_normalization_15[0][0]
__________________________________________________________________________________________________
conv2d_17 (Conv2D)              (None, 2, 2, 256)    65536       activation_28[0][0]
__________________________________________________________________________________________________
add_14 (Add)                    (None, 2, 2, 256)    0           conv2d_17[0][0]
                                                                 activation_27[0][0]
__________________________________________________________________________________________________
activation_29 (Activation)      (None, 2, 2, 256)    0           add_14[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 2, 2, 256)    768         activation_29[0][0]
__________________________________________________________________________________________________
activation_30 (Activation)      (None, 2, 2, 256)    0           batch_normalization_16[0][0]
__________________________________________________________________________________________________
batch_normalization_13 (BatchNo (None, 4, 4, 256)    768         activation_23[0][0]
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 2, 2, 256)    65536       activation_30[0][0]
__________________________________________________________________________________________________
activation_24 (Activation)      (None, 4, 4, 256)    0           batch_normalization_13[0][0]
__________________________________________________________________________________________________
add_15 (Add)                    (None, 2, 2, 256)    0           conv2d_18[0][0]
                                                                 activation_29[0][0]
__________________________________________________________________________________________________
conv2d_15 (Conv2D)              (None, 4, 4, 256)    65536       activation_24[0][0]
__________________________________________________________________________________________________
activation_31 (Activation)      (None, 2, 2, 256)    0           add_15[0][0]
__________________________________________________________________________________________________
add_12 (Add)                    (None, 4, 4, 256)    0           conv2d_15[0][0]
                                                                 activation_23[0][0]
__________________________________________________________________________________________________
up_sampling2d_1 (UpSampling2D)  (None, 4, 4, 256)    0           activation_31[0][0]
__________________________________________________________________________________________________
activation_25 (Activation)      (None, 4, 4, 256)    0           add_12[0][0]
__________________________________________________________________________________________________
add_16 (Add)                    (None, 4, 4, 256)    0           up_sampling2d_1[0][0]
                                                                 activation_25[0][0]
__________________________________________________________________________________________________
activation_32 (Activation)      (None, 4, 4, 256)    0           add_16[0][0]
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 4, 4, 256)    0           activation_32[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 4, 4, 256)    768         dropout_1[0][0]
__________________________________________________________________________________________________
activation_33 (Activation)      (None, 4, 4, 256)    0           batch_normalization_17[0][0]
__________________________________________________________________________________________________
batch_normalization_11 (BatchNo (None, 8, 8, 256)    768         activation_19[0][0]
__________________________________________________________________________________________________
conv2d_19 (Conv2D)              (None, 4, 4, 256)    65536       activation_33[0][0]
__________________________________________________________________________________________________
activation_20 (Activation)      (None, 8, 8, 256)    0           batch_normalization_11[0][0]
__________________________________________________________________________________________________
add_17 (Add)                    (None, 4, 4, 256)    0           conv2d_19[0][0]
                                                                 dropout_1[0][0]
__________________________________________________________________________________________________
conv2d_13 (Conv2D)              (None, 8, 8, 256)    65536       activation_20[0][0]
__________________________________________________________________________________________________
activation_34 (Activation)      (None, 4, 4, 256)    0           add_17[0][0]
__________________________________________________________________________________________________
add_10 (Add)                    (None, 8, 8, 256)    0           conv2d_13[0][0]
                                                                 activation_19[0][0]
__________________________________________________________________________________________________
up_sampling2d_2 (UpSampling2D)  (None, 8, 8, 256)    0           activation_34[0][0]
__________________________________________________________________________________________________
activation_21 (Activation)      (None, 8, 8, 256)    0           add_10[0][0]
__________________________________________________________________________________________________
add_18 (Add)                    (None, 8, 8, 256)    0           up_sampling2d_2[0][0]
                                                                 activation_21[0][0]
__________________________________________________________________________________________________
activation_35 (Activation)      (None, 8, 8, 256)    0           add_18[0][0]
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 8, 8, 256)    0           activation_35[0][0]
__________________________________________________________________________________________________
batch_normalization_18 (BatchNo (None, 8, 8, 256)    768         dropout_2[0][0]
__________________________________________________________________________________________________
activation_36 (Activation)      (None, 8, 8, 256)    0           batch_normalization_18[0][0]
__________________________________________________________________________________________________
batch_normalization_9 (BatchNor (None, 16, 16, 256)  768         activation_15[0][0]
__________________________________________________________________________________________________
conv2d_20 (Conv2D)              (None, 8, 8, 256)    65536       activation_36[0][0]
__________________________________________________________________________________________________
activation_16 (Activation)      (None, 16, 16, 256)  0           batch_normalization_9[0][0]
__________________________________________________________________________________________________
add_19 (Add)                    (None, 8, 8, 256)    0           conv2d_20[0][0]
                                                                 dropout_2[0][0]
__________________________________________________________________________________________________
conv2d_11 (Conv2D)              (None, 16, 16, 256)  65536       activation_16[0][0]
__________________________________________________________________________________________________
activation_37 (Activation)      (None, 8, 8, 256)    0           add_19[0][0]
__________________________________________________________________________________________________
add_8 (Add)                     (None, 16, 16, 256)  0           conv2d_11[0][0]
                                                                 activation_15[0][0]
__________________________________________________________________________________________________
up_sampling2d_3 (UpSampling2D)  (None, 16, 16, 256)  0           activation_37[0][0]
__________________________________________________________________________________________________
activation_17 (Activation)      (None, 16, 16, 256)  0           add_8[0][0]
__________________________________________________________________________________________________
add_20 (Add)                    (None, 16, 16, 256)  0           up_sampling2d_3[0][0]
                                                                 activation_17[0][0]
__________________________________________________________________________________________________
activation_38 (Activation)      (None, 16, 16, 256)  0           add_20[0][0]
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 16, 16, 256)  0           activation_38[0][0]
__________________________________________________________________________________________________
batch_normalization_19 (BatchNo (None, 16, 16, 256)  768         dropout_3[0][0]
__________________________________________________________________________________________________
activation_39 (Activation)      (None, 16, 16, 256)  0           batch_normalization_19[0][0]
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 32, 32, 256)  768         activation_11[0][0]
__________________________________________________________________________________________________
conv2d_21 (Conv2D)              (None, 16, 16, 256)  65536       activation_39[0][0]
__________________________________________________________________________________________________
activation_12 (Activation)      (None, 32, 32, 256)  0           batch_normalization_7[0][0]
__________________________________________________________________________________________________
add_21 (Add)                    (None, 16, 16, 256)  0           conv2d_21[0][0]
                                                                 dropout_3[0][0]
__________________________________________________________________________________________________
conv2d_9 (Conv2D)               (None, 32, 32, 256)  65536       activation_12[0][0]
__________________________________________________________________________________________________
activation_40 (Activation)      (None, 16, 16, 256)  0           add_21[0][0]
__________________________________________________________________________________________________
add_6 (Add)                     (None, 32, 32, 256)  0           conv2d_9[0][0]
                                                                 activation_11[0][0]
__________________________________________________________________________________________________
up_sampling2d_4 (UpSampling2D)  (None, 32, 32, 256)  0           activation_40[0][0]
__________________________________________________________________________________________________
activation_13 (Activation)      (None, 32, 32, 256)  0           add_6[0][0]
__________________________________________________________________________________________________
add_22 (Add)                    (None, 32, 32, 256)  0           up_sampling2d_4[0][0]
                                                                 activation_13[0][0]
__________________________________________________________________________________________________
activation_41 (Activation)      (None, 32, 32, 256)  0           add_22[0][0]
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 32, 32, 256)  0           activation_41[0][0]
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 32, 32, 256)  768         dropout_4[0][0]
__________________________________________________________________________________________________
activation_42 (Activation)      (None, 32, 32, 256)  0           batch_normalization_20[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 64, 64, 256)  768         activation_7[0][0]
__________________________________________________________________________________________________
conv2d_22 (Conv2D)              (None, 32, 32, 256)  65536       activation_42[0][0]
__________________________________________________________________________________________________
activation_8 (Activation)       (None, 64, 64, 256)  0           batch_normalization_5[0][0]
__________________________________________________________________________________________________
add_23 (Add)                    (None, 32, 32, 256)  0           conv2d_22[0][0]
                                                                 dropout_4[0][0]
__________________________________________________________________________________________________
conv2d_7 (Conv2D)               (None, 64, 64, 256)  65536       activation_8[0][0]
__________________________________________________________________________________________________
activation_43 (Activation)      (None, 32, 32, 256)  0           add_23[0][0]
__________________________________________________________________________________________________
add_4 (Add)                     (None, 64, 64, 256)  0           conv2d_7[0][0]
                                                                 activation_7[0][0]
__________________________________________________________________________________________________
up_sampling2d_5 (UpSampling2D)  (None, 64, 64, 256)  0           activation_43[0][0]
__________________________________________________________________________________________________
activation_9 (Activation)       (None, 64, 64, 256)  0           add_4[0][0]
__________________________________________________________________________________________________
add_24 (Add)                    (None, 64, 64, 256)  0           up_sampling2d_5[0][0]
                                                                 activation_9[0][0]
__________________________________________________________________________________________________
activation_44 (Activation)      (None, 64, 64, 256)  0           add_24[0][0]
__________________________________________________________________________________________________
dropout_5 (Dropout)             (None, 64, 64, 256)  0           activation_44[0][0]
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 64, 64, 256)  65536       dropout_5[0][0]
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 64, 64, 256)  768         conv2d_23[0][0]
__________________________________________________________________________________________________
activation_45 (Activation)      (None, 64, 64, 256)  0           batch_normalization_21[0][0]
__________________________________________________________________________________________________
conv2d_24 (Conv2D)              (None, 64, 64, 16)   4096        activation_45[0][0]
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 64, 64, 16)   48          conv2d_24[0][0]
__________________________________________________________________________________________________
activation_46 (Activation)      (None, 64, 64, 16)   0           batch_normalization_22[0][0]
__________________________________________________________________________________________________
conv2d_25 (Conv2D)              (None, 64, 64, 256)  4096        activation_46[0][0]
__________________________________________________________________________________________________
add_25 (Add)                    (None, 64, 64, 256)  0           conv2d_25[0][0]
                                                                 activation_45[0][0]
                                                                 activation_7[0][0]
__________________________________________________________________________________________________
max_pooling2d_7 (MaxPooling2D)  (None, 32, 32, 256)  0           add_25[0][0]
__________________________________________________________________________________________________
batch_normalization_24 (BatchNo (None, 32, 32, 256)  768         max_pooling2d_7[0][0]
__________________________________________________________________________________________________
activation_49 (Activation)      (None, 32, 32, 256)  0           batch_normalization_24[0][0]
__________________________________________________________________________________________________
conv2d_27 (Conv2D)              (None, 32, 32, 256)  65536       activation_49[0][0]
__________________________________________________________________________________________________
add_27 (Add)                    (None, 32, 32, 256)  0           conv2d_27[0][0]
                                                                 max_pooling2d_7[0][0]
__________________________________________________________________________________________________
activation_50 (Activation)      (None, 32, 32, 256)  0           add_27[0][0]
__________________________________________________________________________________________________
max_pooling2d_8 (MaxPooling2D)  (None, 16, 16, 256)  0           activation_50[0][0]
__________________________________________________________________________________________________
batch_normalization_26 (BatchNo (None, 16, 16, 256)  768         max_pooling2d_8[0][0]
__________________________________________________________________________________________________
activation_53 (Activation)      (None, 16, 16, 256)  0           batch_normalization_26[0][0]
__________________________________________________________________________________________________
conv2d_29 (Conv2D)              (None, 16, 16, 256)  65536       activation_53[0][0]
__________________________________________________________________________________________________
add_29 (Add)                    (None, 16, 16, 256)  0           conv2d_29[0][0]
                                                                 max_pooling2d_8[0][0]
__________________________________________________________________________________________________
activation_54 (Activation)      (None, 16, 16, 256)  0           add_29[0][0]
__________________________________________________________________________________________________
max_pooling2d_9 (MaxPooling2D)  (None, 8, 8, 256)    0           activation_54[0][0]
__________________________________________________________________________________________________
batch_normalization_28 (BatchNo (None, 8, 8, 256)    768         max_pooling2d_9[0][0]
__________________________________________________________________________________________________
activation_57 (Activation)      (None, 8, 8, 256)    0           batch_normalization_28[0][0]
__________________________________________________________________________________________________
conv2d_31 (Conv2D)              (None, 8, 8, 256)    65536       activation_57[0][0]
__________________________________________________________________________________________________
add_31 (Add)                    (None, 8, 8, 256)    0           conv2d_31[0][0]
                                                                 max_pooling2d_9[0][0]
__________________________________________________________________________________________________
activation_58 (Activation)      (None, 8, 8, 256)    0           add_31[0][0]
__________________________________________________________________________________________________
max_pooling2d_10 (MaxPooling2D) (None, 4, 4, 256)    0           activation_58[0][0]
__________________________________________________________________________________________________
batch_normalization_30 (BatchNo (None, 4, 4, 256)    768         max_pooling2d_10[0][0]
__________________________________________________________________________________________________
activation_61 (Activation)      (None, 4, 4, 256)    0           batch_normalization_30[0][0]
__________________________________________________________________________________________________
conv2d_33 (Conv2D)              (None, 4, 4, 256)    65536       activation_61[0][0]
__________________________________________________________________________________________________
add_33 (Add)                    (None, 4, 4, 256)    0           conv2d_33[0][0]
                                                                 max_pooling2d_10[0][0]
__________________________________________________________________________________________________
activation_62 (Activation)      (None, 4, 4, 256)    0           add_33[0][0]
__________________________________________________________________________________________________
max_pooling2d_11 (MaxPooling2D) (None, 2, 2, 256)    0           activation_62[0][0]
__________________________________________________________________________________________________
batch_normalization_32 (BatchNo (None, 2, 2, 256)    768         max_pooling2d_11[0][0]
__________________________________________________________________________________________________
activation_65 (Activation)      (None, 2, 2, 256)    0           batch_normalization_32[0][0]
__________________________________________________________________________________________________
conv2d_35 (Conv2D)              (None, 2, 2, 256)    65536       activation_65[0][0]
__________________________________________________________________________________________________
add_35 (Add)                    (None, 2, 2, 256)    0           conv2d_35[0][0]
                                                                 max_pooling2d_11[0][0]
__________________________________________________________________________________________________
activation_66 (Activation)      (None, 2, 2, 256)    0           add_35[0][0]
__________________________________________________________________________________________________
batch_normalization_33 (BatchNo (None, 2, 2, 256)    768         activation_66[0][0]
__________________________________________________________________________________________________
activation_67 (Activation)      (None, 2, 2, 256)    0           batch_normalization_33[0][0]
__________________________________________________________________________________________________
conv2d_36 (Conv2D)              (None, 2, 2, 256)    65536       activation_67[0][0]
__________________________________________________________________________________________________
add_36 (Add)                    (None, 2, 2, 256)    0           conv2d_36[0][0]
                                                                 activation_66[0][0]
__________________________________________________________________________________________________
activation_68 (Activation)      (None, 2, 2, 256)    0           add_36[0][0]
__________________________________________________________________________________________________
batch_normalization_34 (BatchNo (None, 2, 2, 256)    768         activation_68[0][0]
__________________________________________________________________________________________________
activation_69 (Activation)      (None, 2, 2, 256)    0           batch_normalization_34[0][0]
__________________________________________________________________________________________________
batch_normalization_31 (BatchNo (None, 4, 4, 256)    768         activation_62[0][0]
__________________________________________________________________________________________________
conv2d_37 (Conv2D)              (None, 2, 2, 256)    65536       activation_69[0][0]
__________________________________________________________________________________________________
activation_63 (Activation)      (None, 4, 4, 256)    0           batch_normalization_31[0][0]
__________________________________________________________________________________________________
add_37 (Add)                    (None, 2, 2, 256)    0           conv2d_37[0][0]
                                                                 activation_68[0][0]
__________________________________________________________________________________________________
conv2d_34 (Conv2D)              (None, 4, 4, 256)    65536       activation_63[0][0]
__________________________________________________________________________________________________
activation_70 (Activation)      (None, 2, 2, 256)    0           add_37[0][0]
__________________________________________________________________________________________________
add_34 (Add)                    (None, 4, 4, 256)    0           conv2d_34[0][0]
                                                                 activation_62[0][0]
__________________________________________________________________________________________________
up_sampling2d_6 (UpSampling2D)  (None, 4, 4, 256)    0           activation_70[0][0]
__________________________________________________________________________________________________
activation_64 (Activation)      (None, 4, 4, 256)    0           add_34[0][0]
__________________________________________________________________________________________________
add_38 (Add)                    (None, 4, 4, 256)    0           up_sampling2d_6[0][0]
                                                                 activation_64[0][0]
__________________________________________________________________________________________________
activation_71 (Activation)      (None, 4, 4, 256)    0           add_38[0][0]
__________________________________________________________________________________________________
dropout_6 (Dropout)             (None, 4, 4, 256)    0           activation_71[0][0]
__________________________________________________________________________________________________
batch_normalization_35 (BatchNo (None, 4, 4, 256)    768         dropout_6[0][0]
__________________________________________________________________________________________________
activation_72 (Activation)      (None, 4, 4, 256)    0           batch_normalization_35[0][0]
__________________________________________________________________________________________________
batch_normalization_29 (BatchNo (None, 8, 8, 256)    768         activation_58[0][0]
__________________________________________________________________________________________________
conv2d_38 (Conv2D)              (None, 4, 4, 256)    65536       activation_72[0][0]
__________________________________________________________________________________________________
activation_59 (Activation)      (None, 8, 8, 256)    0           batch_normalization_29[0][0]
__________________________________________________________________________________________________
add_39 (Add)                    (None, 4, 4, 256)    0           conv2d_38[0][0]
                                                                 dropout_6[0][0]
__________________________________________________________________________________________________
conv2d_32 (Conv2D)              (None, 8, 8, 256)    65536       activation_59[0][0]
__________________________________________________________________________________________________
activation_73 (Activation)      (None, 4, 4, 256)    0           add_39[0][0]
__________________________________________________________________________________________________
add_32 (Add)                    (None, 8, 8, 256)    0           conv2d_32[0][0]
                                                                 activation_58[0][0]
__________________________________________________________________________________________________
up_sampling2d_7 (UpSampling2D)  (None, 8, 8, 256)    0           activation_73[0][0]
__________________________________________________________________________________________________
activation_60 (Activation)      (None, 8, 8, 256)    0           add_32[0][0]
__________________________________________________________________________________________________
add_40 (Add)                    (None, 8, 8, 256)    0           up_sampling2d_7[0][0]
                                                                 activation_60[0][0]
__________________________________________________________________________________________________
activation_74 (Activation)      (None, 8, 8, 256)    0           add_40[0][0]
__________________________________________________________________________________________________
dropout_7 (Dropout)             (None, 8, 8, 256)    0           activation_74[0][0]
__________________________________________________________________________________________________
batch_normalization_36 (BatchNo (None, 8, 8, 256)    768         dropout_7[0][0]
__________________________________________________________________________________________________
activation_75 (Activation)      (None, 8, 8, 256)    0           batch_normalization_36[0][0]
__________________________________________________________________________________________________
batch_normalization_27 (BatchNo (None, 16, 16, 256)  768         activation_54[0][0]
__________________________________________________________________________________________________
conv2d_39 (Conv2D)              (None, 8, 8, 256)    65536       activation_75[0][0]
__________________________________________________________________________________________________
activation_55 (Activation)      (None, 16, 16, 256)  0           batch_normalization_27[0][0]
__________________________________________________________________________________________________
add_41 (Add)                    (None, 8, 8, 256)    0           conv2d_39[0][0]
                                                                 dropout_7[0][0]
__________________________________________________________________________________________________
conv2d_30 (Conv2D)              (None, 16, 16, 256)  65536       activation_55[0][0]
__________________________________________________________________________________________________
activation_76 (Activation)      (None, 8, 8, 256)    0           add_41[0][0]
__________________________________________________________________________________________________
add_30 (Add)                    (None, 16, 16, 256)  0           conv2d_30[0][0]
                                                                 activation_54[0][0]
__________________________________________________________________________________________________
up_sampling2d_8 (UpSampling2D)  (None, 16, 16, 256)  0           activation_76[0][0]
__________________________________________________________________________________________________
activation_56 (Activation)      (None, 16, 16, 256)  0           add_30[0][0]
__________________________________________________________________________________________________
add_42 (Add)                    (None, 16, 16, 256)  0           up_sampling2d_8[0][0]
                                                                 activation_56[0][0]
__________________________________________________________________________________________________
activation_77 (Activation)      (None, 16, 16, 256)  0           add_42[0][0]
__________________________________________________________________________________________________
dropout_8 (Dropout)             (None, 16, 16, 256)  0           activation_77[0][0]
__________________________________________________________________________________________________
batch_normalization_37 (BatchNo (None, 16, 16, 256)  768         dropout_8[0][0]
__________________________________________________________________________________________________
activation_78 (Activation)      (None, 16, 16, 256)  0           batch_normalization_37[0][0]
__________________________________________________________________________________________________
batch_normalization_25 (BatchNo (None, 32, 32, 256)  768         activation_50[0][0]
__________________________________________________________________________________________________
conv2d_40 (Conv2D)              (None, 16, 16, 256)  65536       activation_78[0][0]
__________________________________________________________________________________________________
activation_51 (Activation)      (None, 32, 32, 256)  0           batch_normalization_25[0][0]
__________________________________________________________________________________________________
add_43 (Add)                    (None, 16, 16, 256)  0           conv2d_40[0][0]
                                                                 dropout_8[0][0]
__________________________________________________________________________________________________
conv2d_28 (Conv2D)              (None, 32, 32, 256)  65536       activation_51[0][0]
__________________________________________________________________________________________________
activation_79 (Activation)      (None, 16, 16, 256)  0           add_43[0][0]
__________________________________________________________________________________________________
add_28 (Add)                    (None, 32, 32, 256)  0           conv2d_28[0][0]
                                                                 activation_50[0][0]
__________________________________________________________________________________________________
up_sampling2d_9 (UpSampling2D)  (None, 32, 32, 256)  0           activation_79[0][0]
__________________________________________________________________________________________________
activation_52 (Activation)      (None, 32, 32, 256)  0           add_28[0][0]
__________________________________________________________________________________________________
add_44 (Add)                    (None, 32, 32, 256)  0           up_sampling2d_9[0][0]
                                                                 activation_52[0][0]
__________________________________________________________________________________________________
activation_80 (Activation)      (None, 32, 32, 256)  0           add_44[0][0]
__________________________________________________________________________________________________
dropout_9 (Dropout)             (None, 32, 32, 256)  0           activation_80[0][0]
__________________________________________________________________________________________________
batch_normalization_38 (BatchNo (None, 32, 32, 256)  768         dropout_9[0][0]
__________________________________________________________________________________________________
activation_81 (Activation)      (None, 32, 32, 256)  0           batch_normalization_38[0][0]
__________________________________________________________________________________________________
batch_normalization_23 (BatchNo (None, 64, 64, 256)  768         add_25[0][0]
__________________________________________________________________________________________________
conv2d_41 (Conv2D)              (None, 32, 32, 256)  65536       activation_81[0][0]
__________________________________________________________________________________________________
activation_47 (Activation)      (None, 64, 64, 256)  0           batch_normalization_23[0][0]
__________________________________________________________________________________________________
add_45 (Add)                    (None, 32, 32, 256)  0           conv2d_41[0][0]
                                                                 dropout_9[0][0]
__________________________________________________________________________________________________
conv2d_26 (Conv2D)              (None, 64, 64, 256)  65536       activation_47[0][0]
__________________________________________________________________________________________________
activation_82 (Activation)      (None, 32, 32, 256)  0           add_45[0][0]
__________________________________________________________________________________________________
add_26 (Add)                    (None, 64, 64, 256)  0           conv2d_26[0][0]
                                                                 add_25[0][0]
__________________________________________________________________________________________________
up_sampling2d_10 (UpSampling2D) (None, 64, 64, 256)  0           activation_82[0][0]
__________________________________________________________________________________________________
activation_48 (Activation)      (None, 64, 64, 256)  0           add_26[0][0]
__________________________________________________________________________________________________
add_46 (Add)                    (None, 64, 64, 256)  0           up_sampling2d_10[0][0]
                                                                 activation_48[0][0]
__________________________________________________________________________________________________
activation_83 (Activation)      (None, 64, 64, 256)  0           add_46[0][0]
__________________________________________________________________________________________________
dropout_10 (Dropout)            (None, 64, 64, 256)  0           activation_83[0][0]
__________________________________________________________________________________________________
conv2d_42 (Conv2D)              (None, 64, 64, 256)  65536       dropout_10[0][0]
__________________________________________________________________________________________________
batch_normalization_39 (BatchNo (None, 64, 64, 256)  768         conv2d_42[0][0]
__________________________________________________________________________________________________
activation_84 (Activation)      (None, 64, 64, 256)  0           batch_normalization_39[0][0]
__________________________________________________________________________________________________
conv2d_43 (Conv2D)              (None, 64, 64, 16)   4096        activation_84[0][0]
__________________________________________________________________________________________________
batch_normalization_40 (BatchNo (None, 64, 64, 16)   48          conv2d_43[0][0]
__________________________________________________________________________________________________
activation_85 (Activation)      (None, 64, 64, 16)   0           batch_normalization_40[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 64, 64, 32)   0           activation_46[0][0]
                                                                 activation_85[0][0]
__________________________________________________________________________________________________
activation_86 (Activation)      (None, 64, 64, 32)   0           concatenate_1[0][0]
==================================================================================================
Total params: 2,373,088
Trainable params: 2,354,848
Non-trainable params: 18,240
__________________________________________________________________________________________________
Input shape:  (?, 256, 256, 3)

As you see the output is not in the shape (16, 2) which contains the joints x,y... Any idea whats missing here? @wbenbihi

SaifAlDilaimi commented 6 years ago

Any idea @wbenbihi ??

ZhuohaoMai commented 6 years ago

of course it doesn't . hourglass network just regress heatmap not coordinate

xiaoxin05 commented 3 years ago

of course it doesn't . hourglass network just regress heatmap not coordinate

hi, So I want to ask you a question, how do I change this loss function from sigmoid cross-entropy to mean square error @ZhuohaoMai