sampepose / flownet2-tf

FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks
MIT License
403 stars 195 forks source link

How to save the time for loading pre-trained model #82

Open momo1986 opened 5 years ago

momo1986 commented 5 years ago

Hello guys,

Currently, my solution is restore the model every image or frame. It will be very time-consuming. I have tried to load it once, here is the code, in src/net.py:

def test_new_rule(self, checkpoint,graphpoint, cap):
    """Create session and restore weights
    """
    ops.reset_default_graph()
    g = tf.get_default_graph()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print(graphpoint)
        saver = tf.train.import_meta_graph(graphpoint)
        saver.restore(sess, checkpoint)
        ret, frame = cap.read()
        input_a = frame
        i = 0
        while ret:
            ret, frame = cap.read()
            if frame is None:
                continue
            input_b = frame
            input_a = cv2.resize(input_a, (384, 512))
            input_b = cv2.resize(input_b, (384, 512))
            # Convert from RGB -> BGR
            input_a = input_a[..., [2, 1, 0]]
            input_b = input_b[..., [2, 1, 0]]

            # Scale from [0, 255] -> [0.0, 1.0] if needed
            if input_a.max() > 1.0:
                input_a = input_a / 255.0
            if input_b.max() > 1.0:
                input_b = input_b / 255.0

            inputs = {
                'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0),
                'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0),
            }
            training_schedule = LONG_SCHEDULE

            predictions = self.model(inputs, training_schedule)
            #print(predictions)
            pred_flow = predictions['flow']
            pred_flow = sess.run(pred_flow)[0, :, :, :]

The api is called in src/flownet2/test.py: net = FlowNet2(mode=Mode.TEST) net.test_new_rule( checkpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0', graphpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0.meta', cap=cap )

**Here is error-log:

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value FlowNet2/FlowNetCSS/FlowNetCS/FlowNetC/conv1/weights_1 [[node FlowNet2/FlowNetCSS/FlowNetCS/FlowNetC/conv1/weights_1/read (defined at /root/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/variables.py:277) = IdentityT=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]] [[{{node FlowNet2/ResizeBilinear/_459}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1817_FlowNet2/ResizeBilinear", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]**

It will be very time-consuming for me to restore the model every time for image-processing. Is there a work-around with the varaible definition of flownet-2 that I can transfer to test-file? Thanks & regards!

Iamanorange commented 5 years ago

Building graph should be put out of camera capture loop:

predictions = self.model(inputs, training_schedule)
pred_flow = predictions['flow']
with tf.Session(config=config) as sess:
    saver.restore(blablabla)
    while ret:
        # Get frames and preprocessing
        flow = sess.run(pred_flow, feed_dict={
            input_a : frame_0,
            input_b : frame_1
        })[0, :, :, :]
momo1986 commented 5 years ago

Building graph should be put out of camera capture loop:

predictions = self.model(inputs, training_schedule)
pred_flow = predictions['flow']
with tf.Session(config=config) as sess:
    saver.restore(blablabla)
    while ret:
        # Get frames and preprocessing
        flow = sess.run(pred_flow, feed_dict={
            input_a : frame_0,
            input_b : frame_1
        })[0, :, :, :]

Hello, thanks for your guidance,

I write my code like this:

def test_new_rule(self, checkpoint,graphpoint, cap):
    #Create session and restore weights
    tf.reset_default_graph()
    ret, frame = cap.read()

    input_a = frame
    input_b = frame
    input_a = cv2.resize(input_a, (384, 512))
    input_b = cv2.resize(input_b, (384, 512))

    # Convert from RGB -> BGR
    input_a = input_a[..., [2, 1, 0]]
    input_b = input_b[..., [2, 1, 0]]
    print(input_a.shape)
    # Scale from [0, 255] -> [0.0, 1.0] if needed
    if input_a.max() > 1.0:
        input_a = input_a / 255.0
    if input_b.max() > 1.0:
        input_b = input_b / 255.0

    # TODO: This is a hack, we should get rid of this

    training_schedule = LONG_SCHEDULE

    inputs = {
        'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0),
        'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0),
    }

    predictions = self.model(inputs, training_schedule)
    pred_flow = predictions['flow']

    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        ret, frame = cap.read()
        frame_0 = frame
        i = 0
        distance_u = 0
        distance_v = 0
        maxrad = 0
        previous_maxrad = 0
        max_maxrad = maxrad
        increase = False
        while ret:
            ret, frame = cap.read()
            if frame is None:
                continue
            frame_1 = frame
            frame_0 = cv2.resize(frame_0, (384, 512))
            frame_1 = cv2.resize(frame_1, (384, 512))
            # Convert from RGB -> BGR
            frame_0 = frame_0[..., [2, 1, 0]]
            frame_1 = frame_1[..., [2, 1, 0]]

            # Scale from [0, 255] -> [0.0, 1.0] if needed
            if frame_0.max() > 1.0:
                frame_0 = frame_0 / 255.0
            if frame_1.max() > 1.0:
                frame_1= frame_1 / 255.0
            frame_0 = tf.convert_to_tensor(frame_0, dtype=tf.float32)
            frame_1 = tf.convert_to_tensor(frame_1, dtype=tf.float32)

            flow = sess.run(pred_flow, feed_dict={
                input_a : frame_0,
                input_b : frame_1
            })[0, :, :, :]

            print(flow)
            print(distance_u)
            print(distance_v)
            print(maxrad)
            max_maxrad = max(max_maxrad, maxrad)
            if maxrad > previous_maxrad:
                increase = True
            else:
                if increase:
                    increase = False
                    print("Direction changed")
                    print(max_maxrad)
            previous_maxrad = maxrad
            frame_0 = frame_1
            i = i + 1
            if process_video == False:
                if cv2.waitKey(0) == ord('q'): break

However, it reports that the data-type is not correct:

Traceback (most recent call last): File "/root/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/root/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/fast/flownet2-tf/src/flownet2/test_find_piano.py", line 54, in main() File "/fast/flownet2-tf/src/flownet2/test_find_piano.py", line 24, in main cap=cap File "/fast/flownet2-tf/src/net.py", line 154, in test_new_rule input_b : frame_1 TypeError: unhashable type: 'numpy.ndarray' (512, 384, 3)

Iamanorange commented 5 years ago

You should feed 2 placeholders to self.model(), instead of 2 constants.

momo1986 commented 5 years ago

You should feed 2 placeholders to self.model(), instead of 2 constants.

How can I give the 2 placeholders with the input and give it to the self.model and then sess.run()? Looks like pred_flow also depends on the input placeholder. I have tried the different methods, always error?

Iamanorange commented 5 years ago

I haven't trained Flownet2. For inference:

# Build Graph
input_a = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 3])
input_b = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 3])
inputs = {
    'input_a': input_a,
    'input_b': input_b,
}
training_schedule = LONG_SCHEDULE
predictions = self.model(inputs, training_schedule)
pred_flow = predictions['flow']

with tf.Session(config=config) as sess:
    saver.restore(blablabla)
    while ret:
        # Get frames and preprocessing
        # Feed
        # There is no need to conver frame(ndarray) to tensor.
        flow = sess.run(pred_flow, feed_dict={
            input_a : frame_0,
            input_b : frame_1
        })[0, :, :, :]
Banhalmi commented 5 years ago

This worked for me to measure running time. Change it to handle frames. The code in net.py was changed. (added imports: import cv2 import time from numpy import array)

def test(self, checkpoint, input_a_path, input_b_path, out_path, save_image=True, save_flo=False):

        input_a_ = imread(input_a_path)
        input_b_ = imread(input_b_path)

        tf.reset_default_graph()

        input_a_ = cv2.resize(input_a_, dsize=(512, 384))#, interpolation=cv2.INTER_CUBIC)
        input_b_ = cv2.resize(input_b_, dsize=(512, 384))#, interpolation=cv2.INTER_CUBIC)

        # Convert from RGB -> BGR
        input_a_ = input_a_[..., [2, 1, 0]]
        input_b_ = input_b_[..., [2, 1, 0]]

        # Scale from [0, 255] -> [0.0, 1.0] if needed
        if input_a_.max() > 1.0:
            input_a_ = input_a_ / 255.0
        if input_b_.max() > 1.0:
            input_b_ = input_b_ / 255.0

        # TODO: This is a hack, we should get rid of this
        training_schedule = LONG_SCHEDULE

        input_a = tf.placeholder(dtype=tf.float32, shape=[1,384, 512, 3])
        input_b = tf.placeholder(dtype=tf.float32, shape=[1,384, 512, 3])
        inputs = {
            'input_a': input_a,
            'input_b': input_b,
        }

        image1 = array(input_a_).reshape(1, 384, 512, 3)
        image2 = array(input_b_).reshape(1, 384, 512, 3)

        predictions = self.model(inputs, training_schedule, False)
        pred_flow = predictions['flow']

        saver = tf.train.Saver()

        with tf.Session() as sess:
            saver.restore(sess, checkpoint)
            t0=time.time()
            for i in range(20):
                flow = sess.run(pred_flow, feed_dict={
                    input_a : image1,
                    input_b : image2
                })[0, :, :, :]

                unique_name = 'flow-' + str(uuid.uuid4())
                if save_image:
                    flow_img = flow_to_image(flow)
                    full_out_path = os.path.join(out_path, unique_name + '.png')
                    imsave(full_out_path, flow_img)

                if save_flo:
                    full_out_path = os.path.join(out_path, unique_name + '.flo')
                    write_flow(pred_flow, full_out_path)
        print('time for 20 iteration:{}'.format(time.time()-t0))