Closed kevinstan closed 5 years ago
Hey Kevin, according to your stack trace the error seems to come from the model_fn. Could you paste the full script you've used here? Best, Oliver
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from gqn.gqn_model import gqn_draw_model_fn
from gqn.gqn_params import PARAMS
from data_provider.gqn_tfr_provider import gqn_input_fn
MODEL_DIR='~/rooms_ring_debug/gqn_pool_draw12'
DATA_DIR='/mnt/cube/datasets/gqn-datasets'
DATASET='rooms_ring_camera'
estimator = tf.estimator.Estimator(
model_fn=gqn_draw_model_fn,
model_dir=MODEL_DIR,
params={'gqn_params' : PARAMS, 'debug' : False})
input_fn = lambda mode: gqn_input_fn(
dataset=DATASET,
context_size=PARAMS.CONTEXT_SIZE,
root=DATA_DIR,
mode=mode)
for prediction in estimator.predict(input_fn=input_fn):
# prediction is the dict @ogroth was mentioning
print(prediction['predicted_mean']) # this is probably what you want to look at
print(prediction['predicted_variance']) # or use this to sample a noisy image
Hi Oliver,
Thank you for providing an implementation of the GQN! I have been training the GQN on a custom dataset, but I am experiencing the same error as @kevinstan posted above when inferencing the model using tf.estimator.
Run Script:
import tensorflow as tf
from gqn.gqn_model import gqn_draw_model_fn
from gqn.gqn_params import PARAMS
from data_provider.gqn_tfr_provider import gqn_input_fn
MODEL_DIR='models/gqn-adni'
DATA_DIR='gqn-dataset'
DATASET='adni'
estimator = tf.estimator.Estimator(
model_fn=gqn_draw_model_fn,
model_dir=MODEL_DIR,
params={'gqn_params' : PARAMS, 'debug' : False})
input_fn = lambda mode: gqn_input_fn(
dataset=DATASET,
context_size=PARAMS.CONTEXT_SIZE,
root=DATA_DIR,
mode=mode)
for prediction in estimator.predict(input_fn=input_fn):
# prediction is the dict @ogroth was mentioning
print(prediction['predicted_mean']) # this is probably what you want to look at
print(prediction['predicted_variance']) # or use this to sample a noisy image
Output Error:
WARNING:tensorflow:Input graph does not use tf.data.Dataset or contain a QueueRunner. That means predict yields forever. This is probably a mistake.
Traceback (most recent call last):
File "test_gqn_draw2.py", line 22, in <module>
for prediction in estimator.predict(input_fn=input_fn):
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 533, in predict
features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 1107, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/vol/biomedic/users/bh1511/pycharm_project_853/gqn/gqn_model.py", line 100, in gqn_draw_model_fn
predictions=mu_target),
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/ops/metrics_impl.py", line 1237, in mean_squared_error
squared_error = math_ops.square(labels - predictions)
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 874, in r_binary_op_wrapper
x = ops.convert_to_tensor(x, dtype=y.dtype.base_dtype, name="x")
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1011, in convert_to_tensor
as_ref=False)
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1107, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 217, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 196, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/framework/tensor_util.py", line 424, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
However, I am able to create an instance of the network and run inference manually.
import tensorflow as tf
from gqn.gqn_params import PARAMS
from gqn.gqn_graph import gqn_draw
from data_provider.gqn_tfr_provider import gqn_input_fn
example = gqn_input_fn(
dataset='adni',
context_size=100,
batch_size=1,
root='gqn-dataset',
mode=tf.estimator.ModeKeys.PREDICT
)
# graph definition in test mode
net, ep_gqn = gqn_draw(
query_pose=example[0].query_camera,
target_frame=example[1],
context_poses=example[0].context.cameras,
context_frames=example[0].context.frames,
model_params=PARAMS,
is_training=False
)
saver = tf.train.Saver()
sess = tf.Session()
# Don't run initalisers, restore variables instead
# sess.run(tf.global_variables_initializer())
latest_checkpoint = tf.train.latest_checkpoint('models/gqn-adni')
saver.restore(sess, latest_checkpoint)
# Run network forward, shouldn't complain about uninitialised variables
output, output_gt = sess.run([net, example[1]])
From the code, it seems estimator will output the predicted_mean
(mu_target
tensor) and the predicted_variance
(sigma_target
tensor). I understand the mu_target
tensor is the output of the network, and the sigma_target
tensor is the result of the _linear_noise_annealing()
function. Is this correct? Can I use this as a substitute for tf.estimator?
Many Thanks and best wishes, Benjamin
@ogroth I think this is becauseEstimator.predict
does not pass the labels to the model_fn
, which means the metrics cannot be computed, which messes up l2_reconstruction
on line 97.
@kevinstan, @farrell236 try removing that metric (and anything else that might require labels), and we’ll push a fix for this soon.
Hi Ștefan! Thanks for the quick response, I've commented out all instances that use label
tensor. However it then leads to this:
WARNING:tensorflow:Input graph does not use tf.data.Dataset or contain a QueueRunner. That means predict yields forever. This is probably a mistake.
2018-10-30 18:11:43.603294: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2018-10-30 18:11:43.692931: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2018-10-30 18:11:43.693326: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties:
name: TITAN X (Pascal) major: 6 minor: 1 memoryClockRate(GHz): 1.531
pciBusID: 0000:01:00.0
totalMemory: 11.90GiB freeMemory: 11.72GiB
2018-10-30 18:11:43.693341: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0
2018-10-30 18:11:43.865174: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-10-30 18:11:43.865203: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958] 0
2018-10-30 18:11:43.865211: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0: N
2018-10-30 18:11:43.865396: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11345 MB memory) -> physical GPU (device: 0, name: TITAN X (Pascal), pci bus id: 0000:01:00.0, compute capability: 6.1)
Traceback (most recent call last):
File "test_gqn_draw2.py", line 31, in <module>
for prediction in estimator.predict(input_fn=input_fn):
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 558, in predict
for i in range(self._extract_batch_length(preds_evaluated)):
File "/vol/medic01/users/bh1511/_venv/tensorflow-py3/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 994, in _extract_batch_length
if value.shape[0] != batch_length:
IndexError: tuple index out of range
If the label
tensor is None
in predict mode, can I assume it is ignored in gqn_draw()
because the is_training
flag is set to False
?
The input_fn
is updated to:
input_fn = lambda mode: gqn_input_fn(
dataset=DATASET,
context_size=PARAMS.CONTEXT_SIZE,
batch_size=10,
num_threads=4,
buffer_size=64,
root=DATA_DIR,
mode=tf.estimator.ModeKeys.PREDICT)
Many Thanks and best wishes, Benjamin
Thanks for the suggestion @SliMM . I removed the l2_reconstruction
metric and anything requiring labels but got the same IndexError: tuple index out of range
as @farrell236
However, I was able to manually test the model by explicitly defining the session to run without relying on estimator.predict
, similar to the script @farrell236 provided above.
The result:
After more experimentation, I find that pred = estimator.predict(input_fn=input_fn)
returns a type <class 'generator'>
. Then trying to iterate over that generator using next(pred)
, the IndexError occurs.
One strange thing is that here, _extract_batch_length
is actually returning an IndexError
rather than the specified ValueError
. This means that value.shape[0]
cannot be indexed.
Any tips on how I can find out the shape of value
here? Why does indexing cause the error?
Best, Kevin
@kevinstan @farrell236 Can you try removing predict_sigma
from the prediction dict, or try to see what shapes mu_target
and sigma_target
are?
@kevinstan Do you mind sharing your main loop using tf.Session
for reference until we have time to fix this? Thanks!
Hi Ștefan,
Following your suggestion, tf.estimator will work in predict mode. As well as removing sigma_target
from the prediction dict, the relevant instantiation also needs to be removed in gqn_model.py L94 - L101.
# outputs: sampled images
mu_target = net
# sigma_target = _linear_noise_annealing(params['gqn_params'])
# target_normal = tf.distributions.Normal(loc=mu_target, scale=sigma_target)
# target_sample = tf.identity(target_normal.sample(), name='target_sample')
# l2_reconstruction = tf.identity(
# tf.metrics.mean_squared_error(
# labels=target_frame,
# predictions=mu_target),
# name='l2_reconstruction')
A related question: is there a way to dump all the images generated by the GQN instead of outputting the mean image?
Best wishes, Benjamin
Is there any updates on this issue, I have used the script by @farrell236 to generate target view
from gqn.gqn_model_baseline import gqn_draw_model_fn
from gqn.gqn_params import create_gqn_config
from data_provider.gqn_tfr_provider import gqn_input_fn
from gqn.gqn_graph import gqn_draw
from PIL import Image
import tensorflow as tf
def toImage(array):
img = Image.fromarray(array, 'RGB')
img.save('my.png')
MODEL_DIR='/home/storm/data/Work/code/tf-gqn-master/train_dir/GQN_model/'
DATA_DIR='/media/storm/Windows/dataset/'
DATASET='rooms_ring_camera'
custom_params = {
'SEQ_LENGTH' : 5,
'ADAM_LR_ALPHA' : 5*10e-5,
'ADAM_LR_BETA' : 5*10e-6,
}
gqn_config = create_gqn_config(custom_params)
example = gqn_input_fn(
dataset=DATASET,
context_size=5,
batch_size=1,
root=DATA_DIR,
mode=tf.estimator.ModeKeys.PREDICT
)
# graph definition in test mode
net, ep_gqn = gqn_draw(
query_pose=example[0].query_camera,
target_frame=example[1],
context_poses=example[0].context.cameras,
context_frames=example[0].context.frames,
model_params=gqn_config,
is_training=False
)
saver = tf.train.Saver()
sess = tf.Session()
# Don't run initalisers, restore variables instead
# sess.run(tf.global_variables_initializer())
latest_checkpoint = tf.train.latest_checkpoint(MODEL_DIR)
saver.restore(sess, latest_checkpoint)
# Run network forward, shouldn't complain about uninitialised variables
output, output_gt = sess.run([net, example[1]])
toImage(output.reshape(64,64,3))
print('Done !')
I have used the provided snapshot model but something went wrong with the output image. Can you guys help me ? @kevinstan
Hi @phongnhhn92 ,
There's nothing wrong with your code, the problem lies within the snapshot itself. The one I've uploaded was trained on a small-scale debug dataset and still has a very high KL divergence between inference and generator modules. That means that when you set up the GQN in prediction mode (is_training=False
), the generator module is still very close to its random initialization, hence the random noise output. The inference module (is_training=True
) is ok, though and you should see nice visualizations there. However, they don't generalize very well, i.e. when you manually change the target camera vector, you can't smoothly fly through the scene as shown in DeepMind's blog. Smooth movement through the scene is only possible with a low-KL model with a well-trained generator module. I'm currently training models which have this property and will upload them together with an example script soon. So watch this space.
Thanks for your reply @ogroth ! I think the problem lies within the current implementation of inference and generator module. I have read the paper and can not find much information they had given on the eta sampler function that they have use. Currently i am trying to train the model with the full rooms_ring_cameras dataset but I observe that the likelihood loss get saturated after 180k step, even though the kl regularizer is increasing slowly. Is this a weird pattern since the kl part suppose to be smaller. What do you think about this, is this a normal behavior?
Hi @phongnhhn92, That's the behavior I've observed during my training runs, too. It looks a bit weird and I can't say whether it's 100% correct, but the KL divergence and L2 losses still get minimized afterwards, so it should be fine.
Thanks @ogroth, Can you share the problem that you found with the current implementation so that I would like to help you. I checked your implementation and I see no difference with what has been defined on the DeepMind paper ? Although, the learning rate seems weird also. May be, the problem is that we are training on a small subset of small rooms_ring_camera instead of the full one.
@phongnhhn92 I will hopefully upload a GQN snapshot which is trained on the full rooms_free_camera_with_object_rotations dataset within a week. Stay tuned.
I still have the same issue as @phongnhhn92 . i.e when i run the code provided here https://github.com/ogroth/tf-gqn/issues/17#issuecomment-450661921 I get those lines as the output. This is the case even if i set is_training = True. Do I need to add some mean and variance to this image? Can you please share the inference code you used @kevinstan Thanks
Here is the code I used (originally provided in @farrell236 's comment) with a simple addition of saving the output images.
import tensorflow as tf
import scipy.misc
from gqn.gqn_params import PARAMS
from gqn.gqn_graph import gqn_draw
from data_provider.gqn_tfr_provider import gqn_input_fn
# contains the data
example = gqn_input_fn(
dataset='rooms_ring_camera',
context_size=5,
batch_size=10,
root='/mnt/cube/datasets/gqn-datasets',
mode=tf.estimator.ModeKeys.PREDICT
)
# graph definition in test mode
net, ep_gqn = gqn_draw(
query_pose=example[0].query_camera,
target_frame=example[1],
context_poses=example[0].context.cameras,
context_frames=example[0].context.frames,
model_params=PARAMS,
is_training=False
)
saver = tf.train.Saver()
sess = tf.Session()
# Don't run initalisers, restore variables instead
# sess.run(tf.global_variables_initializer())
latest_checkpoint = tf.train.latest_checkpoint('tmp/models/gqn')
saver.restore(sess, latest_checkpoint)
# Run network forward, shouldn't complain about uninitialised variables
output, output_gt = sess.run([net, example[1]])
for j in range(len(output)):
scipy.misc.imsave('testimgs/outputs{}.jpg'.format(j), output[j])
scipy.misc.imsave('testimgs/output_gts{}.jpg'.format(j), output_gt[j])
Thanks so much @kevinstan . It works !!
I was using the same code but with PIL fromarray and it was giving me those lines as before. Strange that only scipy imsave gives the correct output. It scales the min of the image to 0 to the maximum to 255 even though thats not what you get if you multiply the output of the network by 255.
Hi Oliver,
I seem to be running into the same issue as @phongnhhn92 when trying to train gqn datasets from google.
There's nothing wrong with your code, the problem lies within the snapshot itself. The one I've uploaded was trained on a small-scale debug dataset and still has a very high KL divergence between inference and generator modules. That means that when you set up the GQN in prediction mode (
is_training=False
), the generator module is still very close to its random initialization, hence the random noise output.
This is the behaviour I have observed also, both training on google's dataset as well as my own, the KL divergence is larger than ideal so the generated images from learned distribution is far from that of the training dataset.
The inference module (
is_training=True
) is ok, though and you should see nice visualizations there.
I don't think the inference module should be used during testing, it is using the ground truth (target) image to seed the distribution for the image generation step (Equation S22). Although deviating away from the original GQN paper, I wonder what would be the outcome if the inference_rnn
is used along with the average of all context_poses
instead of target_frame
here? Under the assumption that the average image should theoretically make a good seed for the distribution?
Best wishes, Benjamin
@farrell236 seems like this room dataset is quite a challenging one since I have got good result with other small dataset (mazes and shepard_metzler_5_part). May be we need to have multiple GPU and some tricks to train this room dataset as claimed in the DeepMind paper. If I understand your idea correctly then you mean to train the inference RNN with 'target_image' but use an average image of all input views during testing to replace the target_image, is that correct ? I have tried it on my GQN trained model and sadly it doesnt work =))) If you mean to use the average 'context_image' for training inference RNN then it would be an counter-intuitive to the meaning of variational approximation approach.
@phongnhhn92
@farrell236 seems like this room dataset is quite a challenging one since I have got good result with other small dataset (mazes and shepard_metzler_5_part). May be we need to have multiple GPU and some tricks to train this room dataset as claimed in the DeepMind paper.
Seems we're in the same boat here, I was only able to successfully train with shepard_metzler_5_part
and shepard_metzler_7_part
. It was getting there with mazes
but I stopped it because it doesn't seem to be converging, maybe I was a bit too hasty? For my custom dataset, it wasn't able to generalise unfortunately. DeepMind used "4 NVidia K80 GPUs for 2 million gradient steps" (quoted in supplementary). It's an older architecture, but close to 100GB in memory.
If I understand your idea correctly then you mean to train the inference RNN with 'target_image' but use an average image of all input views during testing to replace the target_image, is that correct ? I have tried it on my GQN trained model and sadly it doesnt work =)))
Yes, this is exactly what I had in mind! Thanks for testing it XD
@ogroth Hi Oliver, I'm currently trying to visualize the gqn network in lucid. I'm curious how the results will look on the full data set. Have you had time to upload the fully trained model? I can't tell from the home page. Sorry in advanced if I was just confused.
@ogroth @phongnhhn92 We ran the example shown here (https://github.com/ogroth/tf-gqn/issues/17#issuecomment-457411084) and are able to visualize the predictions. We noticed that if we set the is_training_flag to False, we only get the walls of the scene and no objects. If we set this flag to True, we get good predictions, but as the network is now in training mode, we see that the prediction depends on the target image being fed to the network. We want to test the pre-trained network on different scenes for which we do not have a target image. We wanted to know if there is a way to get the predictions without a target image (i.e with the is_training_flag set to False).
We found that in this post (https://github.com/ogroth/tf-gqn/issues/12#issuecomment-412849347), you said that the network occasionally is not able to figure out the geometry of the object as it was only trained for 200K epochs, but we see that this happens all the time with the gqn_pool_draw12 (and draw_8) weights and we get blurry but incorrect objects in the predictions using draw_6 and draw_4 weights.
Kindly following up on this, @ogroth can you update on status of any training, with respect to reducing the KL divergence between the generator and inference modules?
I'm happy to start a long-duration training process (or multiple, on new generative datasets) if there is confidence that the generator's posterior parameters are effectively learning from more examples.
If not, then it makes sense to raise the question of why. It may be relevant to point to recent work by the same group on preventing posterior collapse.
A few remarks on the training process: When the model is instantiated with is_training=True
, the inference LSTM is used and a posterior is drawn from the target image. During test time, the model should always be set up with is_training=False
. In this case, the generator is used. A model with DRAW depth of 8 can be successfully trained with the default parameters on the rooms_free_camera_with_object_rotations
(the most challenging one) in about 2.5M training steps. I have pre-trained models for the rooms and shepard-metzler datasets, but haven't had the time yet to upload them and document them properly (i.e. with Jupyter notebooks to illustrate their usage during test time).
Thanks for the clarification @ogroth. I've been working with @tarunsharma1 on exploring the gqn for a class project (that's due soon).
I could understand how it's been hard to find the time to document the newly trained models. If you'd like, I'd be willing to help with this task and illustrate whatever functionality you need. I'm familiar with the code and have ran examples with the rings_rooms_debug snapshots that you've uploaded. I also have a script that converts your snapshots into protobuffs, which I find helpful because of an error I was receiving in loading the .meta files.
Please let me know if I could be of help. At any rate, thanks for taking the time to answer our questions.
@farrell236 you mentioned that you are able to train shepard_metzler_5_part
. Do you modified the default parameter before training?
Because I trained mine with default parameters but the output is just a black image without any thing when I used the visualization script.
The new view interpolation notebook as well as the new GqnViewPredictor class should help with running a trained GQN and obtain visualizations from the generator.
I use view interpolation notebook to load shepard_metzler_5_parts
, But I can't get a correct result.
This is my result:
Here is my process:
'''imports'''
# stdlib
import os
import sys
import logging
# numerical computing
import numpy as np
import tensorflow as tf
# plotting
import imageio
logging.getLogger("imageio").setLevel(logging.ERROR) # switch off warnings during lossy GIF-generation
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from IPython.display import Image, display
# GQN src
root_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(root_path)
print(sys.path)
from data_provider.gqn_provider import gqn_input_fn
from gqn.gqn_predictor import GqnViewPredictor
['C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\python36.zip', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\DLLs', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew', '', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib\site-packages', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib\site-packages\win32', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib\site-packages\win32\lib', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib\site-packages\Pythonwin', 'C:\Users\lenovo\AppData\Local\conda\conda\envs\tensorflownew\lib\site-packages\IPython\extensions', 'C:\Users\lenovo\.ipython', 'E:\Desktop\tf-gqn-master\tf-gqn-master']
'''directory setup'''
data_dir = os.path.join(root_path, 'data')
model_dir = os.path.join(root_path, 'models')
tmp_dir = os.path.join(root_path, 'notebooks', 'tmp')
gqn_dataset_path = os.path.join(data_dir, 'gqn-dataset')
# dataset flags
# dataset_name = 'jaco' # one of the GQN dataset names
# dataset_name = 'rooms_ring_camera' # one of the GQN dataset names
# dataset_name = 'rooms_free_camera_no_object_rotations' # one of the GQN dataset names
# dataset_name = 'rooms_free_camera_with_object_rotations' # one of the GQN dataset names
dataset_name = 'shepard_metzler_5_parts'#'shepard_metzler_5_parts' # one of the GQN dataset names
# dataset_name = 'shepard_metzler_7_parts' # one of the GQN dataset names
data_path = os.path.join(gqn_dataset_path, dataset_name)
print("Data path: %s" % (data_path, ))
# model flags
model_name = 'gqn'#'gqn8'
# model_name = 'gqn12'
gqn_model_path = os.path.join(model_dir, dataset_name)
model_path = os.path.join(gqn_model_path, model_name)
print("Model path: %s" % (model_path, ))
# tmp
notebook_name = 'view_interpolation'
notebook_tmp_path = os.path.join(tmp_dir, notebook_name)
os.makedirs(notebook_tmp_path, exist_ok=True)
print("Tmp path: %s" % (notebook_tmp_path, ))
Data path: E:\Desktop\tf-gqn-master\tf-gqn-master\data\gqn-dataset\shepard_metzler_5_parts Model path: E:\Desktop\tf-gqn-master\tf-gqn-master\models\shepard_metzler_5_parts\gqn Tmp path: E:\Desktop\tf-gqn-master\tf-gqn-master\notebooks\tmp\view_interpolation
'''data reader setup'''
mode = tf.estimator.ModeKeys.EVAL
ctx_size=5 # needs to be the same as the context size defined in gqn_config.json in the model_path
batch_size=1 # should be kept at 1
dataset = gqn_input_fn(
dataset_name=dataset_name, root=gqn_dataset_path, mode=mode,
context_size=ctx_size, batch_size=batch_size, num_epochs=1,
num_threads=4, buffer_size=1)
iterator = dataset.make_initializable_iterator()
data = iterator.get_next()
'''video predictor & session setup'''
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # run on CPU only, adjust to GPU id for speedup
#os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
predictor = GqnViewPredictor(model_path)
sess = predictor.sess
sess.run(iterator.initializer)
print("Loop completed.")
**>>> Instantiated GQN: enc_r Tensor("GQN/Sum:0", shape=(1, 1, 1, 256), dtype=float32) canvas_0 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add:0", shape=(1, 64, 64, 256), dtype=float32) canvas_1 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_1:0", shape=(1, 64, 64, 256), dtype=float32) canvas_2 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_2:0", shape=(1, 64, 64, 256), dtype=float32) canvas_3 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_3:0", shape=(1, 64, 64, 256), dtype=float32) canvas_4 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_4:0", shape=(1, 64, 64, 256), dtype=float32) canvas_5 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_5:0", shape=(1, 64, 64, 256), dtype=float32) canvas_6 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_6:0", shape=(1, 64, 64, 256), dtype=float32) canvas_7 Tensor("GQN/GQN_RNN/Generator/LSTM_gen/add_7:0", shape=(1, 64, 64, 256), dtype=float32) mu_target Tensor("GQN/eta_g/BiasAdd:0", shape=(1, 64, 64, 3), dtype=float32) INFO:tensorflow:Restoring parameters from E:\Desktop\tf-gqn-master\tf-gqn-master\models\shepard_metzler_5_parts\gqn\model.ckpt-0 >>> Restored parameters from: E:\Desktop\tf-gqn-master\tf-gqn-master\models\shepard_metzler_5_parts\gqn\model.ckpt-0 Loop completed.**
'''data visualization'''
skip_load = 1 # adjust this to skip through records
print("Loop completed.")
# fetch & parse
for _ in range(skip_load):
d, _ = sess.run(data)
ctx_frames = d.query.context.frames
ctx_poses = d.query.context.cameras
tgt_frame = d.target
tgt_pose = d.query.query_camera
tuple_length = ctx_size + 1 # context points + 1 target
print(">>> Context frames:\t%s" % (ctx_frames.shape, ))
print(">>> Context poses: \t%s" % (ctx_poses.shape, ))
print(">>> Target frame: \t%s" % (tgt_frame.shape, ))
print(">>> Target pose: \t%s" % (tgt_pose.shape, ))
# visualization constants
MAX_COLS_PER_ROW = 6
TILE_HEIGHT, TILE_WIDTH, TILE_PAD = 3.0, 3.0, 0.8
np.set_printoptions(precision=2, suppress=True)
# visualize all data tuples in the batch
for n in range(batch_size):
# define image grid
ncols = int(np.min([tuple_length, MAX_COLS_PER_ROW]))
nrows = int(np.ceil(tuple_length / MAX_COLS_PER_ROW))
fig = plt.figure(figsize=(TILE_WIDTH * ncols, TILE_HEIGHT * nrows))
grid = ImageGrid(
fig, 111, # similar to subplot(111)
nrows_ncols=(nrows, ncols),
axes_pad=TILE_PAD, # pad between axes in inch.
)
# visualize context
for ctx_idx in range(ctx_size):
rgb = ctx_frames[n, ctx_idx]
pose = ctx_poses[n, ctx_idx]
caption = "ctx: %02d\nxyz:%s\nyp:%s" % \
(ctx_idx + 1, pose[0:3], pose[3:])
grid[ctx_idx].imshow(rgb)
grid[ctx_idx].set_title(caption, loc='center')
# visualize target
rgb = tgt_frame[n]
pose = tgt_pose[n]
caption = "target\nxyz:%s\nyp:%s" % \
(pose[0:3], pose[3:])
grid[-1].imshow(rgb)
grid[-1].set_title(caption, loc='center')
plt.show()
Loop completed. >>> Context frames: (1, 5, 64, 64, 3) >>> Context poses: (1, 5, 7) >>> Target frame: (1, 64, 64, 3) >>> Target pose: (1, 7)
'''run the view prediction'''
# visualize all data tuples in the batch
for n in range(batch_size):
print(">>> Predictions:")
# define image grid for predictions
ncols = int(np.min([tuple_length, MAX_COLS_PER_ROW]))
nrows = int(np.ceil(tuple_length / MAX_COLS_PER_ROW))
fig = plt.figure(figsize=(TILE_WIDTH * ncols, TILE_HEIGHT * nrows))
grid = ImageGrid(
fig, 111, # similar to subplot(111)
nrows_ncols=(nrows, ncols),
axes_pad=TILE_PAD, # pad between axes in inch.
)
# load the scene context
predictor.clear_context()
for i in range(ctx_size):
ctx_frame = ctx_frames[n, i]
ctx_pose = ctx_poses[n, i]
predictor.add_context_view(ctx_frame, ctx_pose)
# render query
query_pose = tgt_pose[n]
pred_frame = predictor.render_query_view(query_pose)[0]
caption = "query\nxyz:%s\nyp:%s" % \
(query_pose[0:3], query_pose[3:])
grid[0].imshow(pred_frame)
grid[0].set_title(caption, loc='center')
# re-render context (auto-encoding consistency)
for ctx_idx in range(ctx_size):
query_pose = ctx_poses[n, ctx_idx]
pred_frame = predictor.render_query_view(query_pose)[0]
caption = "ctx: %02d\nxyz:%s\nyp:%s" % \
(ctx_idx + 1, query_pose[0:3], query_pose[3:])
grid[ctx_idx + 1].imshow(pred_frame)
grid[ctx_idx + 1].set_title(caption, loc='center')
plt.show()
print(">>> Ground truth:")
# define image grid for predictions
ncols = int(np.min([tuple_length, MAX_COLS_PER_ROW]))
nrows = int(np.ceil(tuple_length / MAX_COLS_PER_ROW))
fig = plt.figure(figsize=(TILE_WIDTH * ncols, TILE_HEIGHT * nrows))
grid = ImageGrid(
fig, 111, # similar to subplot(111)
nrows_ncols=(nrows, ncols),
axes_pad=TILE_PAD, # pad between axes in inch.
)
# query
pose = tgt_pose[n]
rgb = tgt_frame[n]
caption = "query\nxyz:%s\nyp:%s" % \
(pose[0:3], pose[3:])
grid[0].imshow(rgb)
grid[0].set_title(caption, loc='center')
# context
for ctx_idx in range(ctx_size):
pose = ctx_poses[n, ctx_idx]
rgb = ctx_frames[n, ctx_idx]
caption = "ctx: %02d\nxyz:%s\nyp:%s" % \
(ctx_idx + 1, pose[0:3], pose[3:])
grid[ctx_idx + 1].imshow(rgb)
grid[ctx_idx + 1].set_title(caption, loc='center')
plt.show()
'''render a view interpolation trajectory'''
# query pose trajectory per dataset
# [[0, 0, 0, yaw, 0] for yaw in range(0, 360, 10)]
def _query_poses(num_poses=40, radius=3.0, height=2.5, angle=30.0):
x = list(radius * np.sin(np.linspace(np.pi, -np.pi, num_poses)))
y = list(radius * np.cos(np.linspace(np.pi, -np.pi, num_poses)))
z = list(height * np.ones((num_poses, )))
yaw = list(np.linspace(0.0, 360.0, num_poses))
pitch = list(angle * np.ones((num_poses, )))
poses = list(zip(x, y, z, yaw, pitch))
return poses
QUERY_POSES = {
'shepard_metzler_5_parts' : _query_poses(),
'shepard_metzler_7_parts' : _query_poses(),
}
# generate query poses
query_poses = QUERY_POSES[dataset_name]
query_poses = [np.array(qp) for qp in query_poses]
# render corresponding views
print(">>> Rendering interpolation trajectory for %d query poses..." % (len(query_poses), ))
frame_buffer = []
for i, query_pose in enumerate(query_poses):
pred_frame = predictor.render_query_view(query_pose)[0]
frame_buffer.append(pred_frame)
if (i+1) % 10 == 0:
print(" %d / %d frames rendered." % ((i+1), len(query_poses)))
# show gif of view interpolation trajectory
gif_tmp_path = os.path.join(notebook_tmp_path, 'view_interpolation_preview.gif')
imageio.mimsave(gif_tmp_path, frame_buffer)
with open(gif_tmp_path, 'rb') as file:
display(Image(file.read()))
>>> Rendering interpolation trajectory for 40 query poses... 10 / 40 frames rendered. 20 / 40 frames rendered. 30 / 40 frames rendered. 40 / 40 frames rendered.
Hello, thank you for providing this code.
I tried the code provided by @SliMM in this issue but was unable to reproduce the results @Yangshell provided.
I get the following error msg: Traceback (most recent call last): File "test_model.py", line 26, in
for prediction in estimator.predict(input_fn=input_fn):
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 551, in predict
features, None, model_fn_lib.ModeKeys.PREDICT, self.config)
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1169, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/home/kstan/tf-gqn/gqn/gqn_model.py", line 102, in gqn_draw_model_fn
predictions=mu_target),
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/ops/metrics_impl.py", line 1312, in mean_squared_error
squared_error = math_ops.square(labels - predictions)
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 889, in r_binary_op_wrapper
x = ops.convert_to_tensor(x, dtype=y.dtype.base_dtype, name="x")
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1048, in convert_to_tensor
as_ref=False)
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1144, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 228, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 207, in constant
value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/home/kstan/tf-gqn/venv/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 430, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
I suspect the error is coming from the input_fn, and that the data is not being fed properly to estimator.predict(). What is the mistake and how can it be fixed?
Thanks for the help.
Best, Kevin