StanfordVL / ReferringRelationships

Other
260 stars 79 forks source link

unable to load a model (h5) after training #17

Closed jageshmaharjan closed 5 years ago

jageshmaharjan commented 5 years ago

I trained on visual genome dataset, and the followings are the saved keras model.

dsdev@dsdev:/mnt/data/savedir$ ls
args.json                             model04-1.67.h5  model09-1.82.h5  model14-2.24.h5  model19-2.69.h5
events.out.tfevents.1573210626.dsdev  model05-1.66.h5  model10-1.90.h5  model15-2.40.h5  model20-2.73.h5
model01-1.66.h5                       model06-1.67.h5  model11-1.94.h5  model16-2.47.h5  model21-2.78.h5
model02-1.62.h5                       model07-1.63.h5  model12-2.04.h5  model17-2.57.h5  model22-2.81.h5
model03-1.62.h5                       model08-1.74.h5  model13-2.18.h5  model18-2.62.h5  train.log

I am trying to use the trained model (h5) to convert into protobuffer(pb) format. Since you mentioned, in your keras checkpointer to save_weight_only as True, so I am loading model graph from model.py, and trying to load the model, i am getting this error.

model.load_weights('/home/dsdev/model16-2.47.h5')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/data/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/network.py", line 1166, in load_weights
    f, self.layers, reshape=reshape)
  File "/data/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/saving.py", line 1030, in load_weights_from_hdf5_group
    str(len(filtered_layers)) + ' layers.')
ValueError: You are trying to load a weight file containing 844 layers into a model with 564 layers.

The way I am loading a graph is as follow:

args = {"input_dim": 224, "feat_map_dim": 14, "hidden_dim": 1024, "num_objects": 100, "num_predicates": 70, "dropout": 0.0, "use_subject": True, "use_predicate": True, "use_object": True,  "nb_conv_att_map": 6, "nb_conv_im_map": 0, "cnn": "resnet", "feat_map_layer": "activation_40", "conv_im_kernel": 0, "conv_predicate_kernel": 7,  "conv_predicate_channels": 10, "model": "ssas", "use_internal_loss": True, "internal_loss_weight": 1.0, "iterations": 3,  "attention_conv_kernel": 3, "refinement_conv_kernel": 3, "output_dim": 14, "embedding_dim": 512, "finetune_cnn": True}
args = parse_args()

metrics = get_metrics(args.output_dim, args.heatmap_threshold)
relationships_model = ReferringRelationshipsModel(args)
model = relationships_model.build_model()
optimizer = get_opt(opt=args.opt, lr=args.lr)

if args.loss_func == 'weighted':
  loss_func = get_loss_func(args.w1)
else:
  loss_func = 'binary_crossentropy'

losses = [loss_func, loss_func]
model.compile(loss=losses, optimizer=optimizer, metrics=metrics)
model.load_weights('/home/dsdev/model16-2.47.h5')
jageshmaharjan commented 5 years ago

Nevermind, managed to freeze the model, but it's not showing the signature definition and TensorFlow serving cannot serve the model. for that, I'll open another issue.

from config import parse_args
from iterator import DiscoveryIterator, SmartIterator
from keras.optimizers import RMSprop
from models import ReferringRelationshipsModel
from utils.eval_utils import format_results_eval
from utils.visualization_utils import objdict
from utils.eval_utils import get_metrics
from utils.train_utils import get_loss_func
import json
import os

args = parse_args(evaluation=True)
models_dir = '/mnt/data/savedir/'
heatmap_threshold = 0.5
params = objdict(json.load(open(os.path.join(models_dir, "args.json"), "r")))
params.discovery = args.discovery
params.shuffle = False
metrics = get_metrics(params.output_dim, args.heatmap_threshold)
relationships_model = ReferringRelationshipsModel(params)
model = relationships_model.build_model()
loss_func = get_loss_func(params.w1)
model.compile(loss=[loss_func, loss_func], optimizer=RMSprop(lr=0.01), metrics=metrics)
model.load_weights('/mnt/data/savedir/model22-2.81.h5')
model.inputs
[<tf.Tensor 'input_1:0' shape=(?, 224, 224, 3) dtype=float32>, <tf.Tensor 'input_2:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'input_4:0' shape=(?, 70) dtype=float32>, <tf.Tensor 'input_3:0' shape=(?, 1) dtype=float32>]
model.outputs
[<tf.Tensor 'subject/Reshape:0' shape=(?, 196) dtype=float32>, <tf.Tensor 'object/Reshape:0' shape=(?, 196) dtype=float32>]

from keras import backend as K
import tensorflow as tf
from keras import backend as K
K.set_learning_phase(0)

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):   
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names)
        return frozen_graph

frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs])

tf.train.write_graph(frozen_graph, "model", "tf_model.pb", as_text=False)

copied and created a version name.

.
├── 1
│   └── saved_model.pb
└── tf_model.pb