cchen156 / Learning-to-See-in-the-Dark

Learning to See in the Dark. CVPR 2018
http://cchen156.web.engr.illinois.edu/SID.html
MIT License
5.44k stars 845 forks source link

How to revise test_Sony.py to test your own images? #76

Open AksChunara opened 5 years ago

cchen156 commented 5 years ago

If you need to test your JPG images from another camera, that is not supported.

minfenghong commented 5 years ago

Hi, I wrote two methods to test my jpg image files using Sony pretrained model. But it not works perfectly. Just share my experiment code for someone interested. ^^

def _pack_rgb(self, rgb):

    # Enlarge rgb before substract
    im = rgb * 256

    # Subtract the black level
    im = np.maximum(im - 512, 0) / ((16383 - 512) )

    # Multiply Amplification Factors
    im = im * [0.2, 0.36, 0.2]

    # Format image into RGBG 4 channels
    g = im[:, :, 1]
    g = np.expand_dims(g, axis=2)
    out = np.concatenate((im, g), axis=2)

    return out

def process_image(self, image):

    input_full = np.expand_dims(self._pack_rgb(image), axis=0)
    input_full = np.minimum(input_full, 1.0)

    output = self._sess.run(self._out_image, feed_dict={self._in_image: input_full})
    output = output[0, :, :, :] 
    output = np.minimum(np.maximum(output, 0), 1)
    output = output * 255
    output = output.astype(np.uint8)

    h, w = output.shape[:2]
    output = cv2.resize(output, (int(w/2), int(h/2)))

    return output
JNUChenYiHong commented 4 years ago

Hi, I wrote two methods to test my jpg image files using Sony pretrained model. But it not works perfectly. Just share my experiment code for someone interested. ^^

def _pack_rgb(self, rgb):

    # Enlarge rgb before substract
    im = rgb * 256

    # Subtract the black level
    im = np.maximum(im - 512, 0) / ((16383 - 512) )

    # Multiply Amplification Factors
    im = im * [0.2, 0.36, 0.2]

    # Format image into RGBG 4 channels
    g = im[:, :, 1]
    g = np.expand_dims(g, axis=2)
    out = np.concatenate((im, g), axis=2)

    return out

def process_image(self, image):

    input_full = np.expand_dims(self._pack_rgb(image), axis=0)
    input_full = np.minimum(input_full, 1.0)

    output = self._sess.run(self._out_image, feed_dict={self._in_image: input_full})
    output = output[0, :, :, :] 
    output = np.minimum(np.maximum(output, 0), 1)
    output = output * 255
    output = output.astype(np.uint8)

    h, w = output.shape[:2]
    output = cv2.resize(output, (int(w/2), int(h/2)))

    return output

Hi could you tell me how to use it?

minfenghong commented 4 years ago

Hi, I wrote two methods to test my jpg image files using Sony pretrained model. But it not works perfectly. Just share my experiment code for someone interested. ^^

def _pack_rgb(self, rgb):

    # Enlarge rgb before substract
    im = rgb * 256

    # Subtract the black level
    im = np.maximum(im - 512, 0) / ((16383 - 512) )

    # Multiply Amplification Factors
    im = im * [0.2, 0.36, 0.2]

    # Format image into RGBG 4 channels
    g = im[:, :, 1]
    g = np.expand_dims(g, axis=2)
    out = np.concatenate((im, g), axis=2)

    return out

def process_image(self, image):

    input_full = np.expand_dims(self._pack_rgb(image), axis=0)
    input_full = np.minimum(input_full, 1.0)

    output = self._sess.run(self._out_image, feed_dict={self._in_image: input_full})
    output = output[0, :, :, :] 
    output = np.minimum(np.maximum(output, 0), 1)
    output = output * 255
    output = output.astype(np.uint8)

    h, w = output.shape[:2]
    output = cv2.resize(output, (int(w/2), int(h/2)))

    return output

Hi could you tell me how to use it?

Hi, My original purpose is use this model to pre-process some dark images. So I wrote below class to do it. But result is not good ... orz.

from __future__ import division

import cv2
import numpy as np

import tensorflow as tf
import tensorflow.contrib.slim as slim

class SeeInDark:
    """A model wrapper of Learning to See in the Dark

    Reference code:
        https://github.com/cchen156/Learning-to-See-in-the-Dark/blob/master/test_Sony.py
    """
    CHECKPOINT_FILE_PATH = './models/seeindark/Sony'

    AMPLIFICATION_FACTORS = [0.53, 0.53, 0.381]  # Default [0.85, 1.0, 0.7]

    def __init__(self):

        def lrelu(x):
            return tf.maximum(x * 0.2, x)

        def upsample_and_concat(x1, x2, output_channels, in_channels):
            pool_size = 2
            deconv_filter = tf.Variable(
                tf.truncated_normal(
                    [pool_size, pool_size, output_channels, in_channels],
                    stddev=0.02
                )
            )
            deconv = tf.nn.conv2d_transpose(
                x1, deconv_filter, tf.shape(x2), strides=[1,
                pool_size, pool_size, 1]
            )

            deconv_output = tf.concat([deconv, x2], 3)
            deconv_output.set_shape([None, None, None, output_channels * 2])

            return deconv_output

        def network( input):
            conv1 = slim.conv2d(
                input, 32, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv1_1'
            )
            conv1 = slim.conv2d(
                conv1, 32, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv1_2'
            )
            pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME')

            conv2 = slim.conv2d(
                pool1, 64, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv2_1'
            )
            conv2 = slim.conv2d(
                conv2, 64, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv2_2'
            )
            pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME')

            conv3 = slim.conv2d(
                pool2, 128, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv3_1'
            )
            conv3 = slim.conv2d(
                conv3, 128, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv3_2'
            )
            pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME')

            conv4 = slim.conv2d(
                pool3, 256, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv4_1'
            )
            conv4 = slim.conv2d(
                conv4, 256, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv4_2'
            )
            pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME')

            conv5 = slim.conv2d(
                pool4, 512, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv5_1'
            )
            conv5 = slim.conv2d(
                conv5, 512, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv5_2'
            )

            up6 = upsample_and_concat(conv5, conv4, 256, 512)
            conv6 = slim.conv2d(
                up6, 256, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv6_1'
            )
            conv6 = slim.conv2d(
                conv6, 256, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv6_2'
            )

            up7 = upsample_and_concat(conv6, conv3, 128, 256)
            conv7 = slim.conv2d(
                up7, 128, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv7_1'
            )
            conv7 = slim.conv2d(
                conv7, 128, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv7_2'
            )

            up8 = upsample_and_concat(conv7, conv2, 64, 128)
            conv8 = slim.conv2d(
                up8, 64, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv8_1'
            )
            conv8 = slim.conv2d(
                conv8, 64, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv8_2'
            )

            up9 = upsample_and_concat(conv8, conv1, 32, 64)
            conv9 = slim.conv2d(
                up9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1')
            conv9 = slim.conv2d(
                conv9, 32, [3, 3], rate=1,
                activation_fn=lrelu, scope='g_conv9_2'
            )

            conv10 = slim.conv2d(
                conv9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10')
            out = tf.depth_to_space(conv10, 2)
            return out

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess_config = tf.ConfigProto(gpu_options=gpu_options)
        g = tf.Graph()
        with g.as_default():
            self._sess = tf.Session(config=sess_config)
            with self._sess.as_default():
                self._in_image = tf.placeholder(tf.float32, [None, None, None, 4])
                self._out_image = network(self._in_image)

                saver = tf.train.Saver()
                self._sess.run(tf.global_variables_initializer())
                ckpt = tf.train.get_checkpoint_state(
                    SeeInDark.CHECKPOINT_FILE_PATH
                )
                if ckpt:
                    print('loaded ' + ckpt.model_checkpoint_path)
                    saver.restore(self._sess, ckpt.model_checkpoint_path)
                else:
                    raise Exception('SeeInDark check point file not found!')

        g.finalize()

    def _pack_rgb(self, rgb):
        im = rgb.astype(np.float32)

        # Subtract the black level
        black_threshold = 0.0
        im = np.maximum(im - black_threshold, 0.0) / (255.0 - black_threshold)

        # Multiply Amplification Factors
        im = im * SeeInDark.AMPLIFICATION_FACTORS

        # Format image into RGBG 4 channels
        g = im[:, :, 1]
        g = np.expand_dims(g, axis=2)
        out = np.concatenate((im, g), axis=2)

        return out

    def process_image(self, image):
        """Brighten image

        Args:
            image (ndarray): rgb image

        Returns:
            (ndarray): brightened image
        """

        input_full = np.expand_dims(self._pack_rgb(image), axis=0)
        input_full = np.minimum(input_full, 1.0)

        output = self._sess.run(
            self._out_image,
            feed_dict={self._in_image: input_full}
        )
        output = output[0, :, :, :]
        output = np.minimum(np.maximum(output, 0), 1)
        output = output * 255
        output = output.astype(np.uint8)

        # Model will output a double size image
        # so resize back to original size
        h, w = output.shape[:2]
        output = cv2.resize(output, (int(w/2), int(h/2)))

        return output

if __name__ == '__main__':

    import time
    import pandas as pd

    # Setup experiment resource
    FRAME_IMAGE = 'frame-1553563852-1.jpg'
    FACE_IMAGE = 'frame-36-face-1.jpg'

    # Init model
    sid = SeeInDark()

    # Measure processing frame speed
    image = cv2.imread(FRAME_IMAGE)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Warm up model
    _ = sid.process_image(image)

    time_list = []

    for _ in range(20):
        start_time = time.time()
        _ = sid.process_image(image)
        time_list.append(time.time() - start_time)

    print('Average processing time on image size', image.shape)
    print(pd.DataFrame(time_list).describe())

    # Measure process face speed
    image = cv2.imread(FACE_IMAGE)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Warm up model
    _ = sid.process_image(image)

    time_list = []

    for _ in range(20):
        start_time = time.time()
        _ = sid.process_image(image)
        time_list.append(time.time() - start_time)

    print('Average processing time on image size', image.shape)
    print(pd.DataFrame(time_list).describe())

    print('Done')
JNUChenYiHong commented 4 years ago

Thank so much!