USTC-Video-Understanding / I3D_Finetune

TensorFlow code for finetuning I3D model on UCF101.
144 stars 43 forks source link

I finetuned the model for 3 output classes but now at the time of prediction i got the error of input Shape does not match #32

Open siddharth2022 opened 5 years ago

siddharth2022 commented 5 years ago

Screenshot (232) input to reshape is a tensor with 409600 values, but the requested shape is 3072

siddharth2022 commented 5 years ago

` import argparse import numpy as np import tensorflow as tf import i3d import cv2

def main(IMAGE_SIZE=224,TOP_RESULT_CLASSES=3,SAMPLE_VIDEO_FRAMES = 250,\ SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy',\ SAMPLE_PATH_VIDEO= '../../finetune_i3d/input/test_vid/Abuse/Abuse001_x264.mp4',\ SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy',\ EVAL_TYPE = 'rgb',imagenet_pretrained = True,NUM_CLASSES = 3): """ This is main fuction which can be called from outher files. """

#############
# Constants #
#############

# IMAGE_SIZE = 224
# TOP_RESULT_CLASSES=20
# SAMPLE_VIDEO_FRAMES = 79

# SAMPLE_PATH_RGB = '../input/v_CricketShot_g04_c01_rgb.npy'
# SAMPLE_PATH_FLOW = '../input/v_CricketShot_g04_c01_flow.npy'
CHECKPOINT_PATHS = {
        'rgb_imagenet': '../../finetune_i3d/output/finetune-siddata1-rgb-1/siddata1_rgb_0.306_models1-145'

}
LABEL_MAP_PATH = '../models/Label_map.txt'

tf.reset_default_graph()
eval_type = EVAL_TYPE
# imagenet_pretrained = True

# NUM_CLASSES = 400

if eval_type == 'rgb600':
    NUM_CLASSES = 600

if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
    raise ValueError('Bad `eval_type`, must be one of rgb, flow, joint')

if eval_type == 'rgb600':
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH_600)]
else:
    kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH)]

##############
# Model Load #
##############
'''
This will load the model from the saved model directory.
'''

if eval_type in ['rgb', 'rgb600', 'joint']:

    rgb_input = tf.placeholder(
            tf.float32,
            shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3))

    with tf.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
        rgb_logits, _ = rgb_model(
                rgb_input, is_training=False, dropout_keep_prob=1.0)

    rgb_variable_map = {}
    for variable in tf.global_variables():

        if variable.name.split('/')[0] == 'RGB':
            if eval_type == 'rgb600':
                rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
            else:
                rgb_variable_map[variable.name.replace(':0', '')] = variable

    rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

if eval_type in ['flow', 'joint']:
    # Flow input has only 2 channels.
    flow_input = tf.placeholder(
            tf.float32,
            shape=(1, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 2))
    with tf.variable_scope('Flow'):
        flow_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
        flow_logits, _ = flow_model(
                flow_input, is_training=False, dropout_keep_prob=1.0)
    flow_variable_map = {}
    for variable in tf.global_variables():
        if variable.name.split('/')[0] == 'Flow':
            flow_variable_map[variable.name.replace(':0', '')] = variable
    flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

if eval_type == 'rgb' or eval_type == 'rgb600':
    model_logits = rgb_logits
elif eval_type == 'flow':
    model_logits = flow_logits
else:
    model_logits = rgb_logits + flow_logits
model_predictions = tf.nn.softmax(model_logits)

##############
# Input Load #
##############
'''
This will load the input files,
if you have choose rgb then it will read only rgb ,
if you have choose flow then it will load rgb and flow input
'''
def crop_center_square(frame):
    y,x = frame.shape[0:2]
    min_dim = min(y,x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]

def load_video(path,max_frames=SAMPLE_VIDEO_FRAMES,resize=(224,224)):
    cap = cv2.VideoCapture(path)
    frames=[]
    try:
        while True:
            ret,frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame,resize)
            frame = frame[:,:,[2,1,0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    # print(len(frames))
    return [np.array(frames)/255.0]

with tf.Session() as sess:
    feed_dict = {}
    if eval_type in ['rgb', 'rgb600', 'joint']:
        if imagenet_pretrained:
            rgb_saver.restore(sess, CHECKPOINT_PATHS['rgb_imagenet'])
        else:
            rgb_saver.restore(sess, CHECKPOINT_PATHS[eval_type])
        print("video shape")
        # print(load_video(SAMPLE_PATH_VIDEO).shape)    

        if SAMPLE_PATH_RGB is None:
            rgb_sample = load_video(SAMPLE_PATH_VIDEO)
        else:
            rgb_sample = no.load(SAMPLE_PATH_RGB)

        feed_dict[rgb_input] = rgb_sample

    if eval_type in ['flow', 'joint']:
        if imagenet_pretrained:
            flow_saver.restore(sess, CHECKPOINT_PATHS['flow_imagenet'])
        else:
            flow_saver.restore(sess, CHECKPOINT_PATHS['flow'])

        flow_sample = np.load(SAMPLE_PATH_FLOW)

        feed_dict[flow_input] = flow_sample

    ##############
    # Prediction #
    ##############      

    out_logits, out_predictions = sess.run(
            [model_logits, model_predictions],
            feed_dict=feed_dict)

    out_logits = out_logits[0]
    out_predictions = out_predictions[0]
    sorted_indices = np.argsort(out_predictions)[::-1]

    print('Norm of logits: %f' % np.linalg.norm(out_logits))
    print('\nTop classes and probabilities')
    # for index in sorted_indices[:TOP_RESULT_CLASSES]:
        # print(out_predictions[index], out_logits[index], kinetics_classes[index])
    print(sorted_indices)
    # return kinetics_classes[sorted_indices[0]]

main(SAMPLE_PATH_RGB=None) `

This is my code for testing.