ialhashim / DenseDepth

High Quality Monocular Depth Estimation via Transfer Learning
https://arxiv.org/abs/1812.11941
GNU General Public License v3.0
1.59k stars 353 forks source link

about depth_shape size #60

Closed mahxn0 closed 5 years ago

mahxn0 commented 5 years ago

thanks for your great work I can't understand why shape_rgb is 640480, the shape_depth is 320240?

If I used kitti.h5 to fintuning,my dataset is 1280720, First I resized all rgb and depth to 3701224?

then there some code do I need to modify ?

there is my dataload code:

import numpy as np
from utils import DepthNorm
from io import BytesIO
from PIL import Image
from zipfile import ZipFile
from keras.utils import Sequence
from augment import BasicPolicy

def extract_zip(input_zip):
    input_zip=ZipFile(input_zip)
    return {name: input_zip.read(name) for name in input_zip.namelist()}

def nyu_resize_x(img, resolution=370, padding=6):
    from skimage.transform import resize
    return resize(img, (resolution, 1224), preserve_range=True, mode='reflect', anti_aliasing=True )

def nyu_resize_y(img, resolution=370, padding=6):
    from skimage.transform import resize
    return resize(img, (resolution,612), preserve_range=True, mode='reflect', anti_aliasing=True )

def get_nyu_data(batch_size, nyu_data_zipfile='nyu_data.zip'):
    #data = extract_zip(nyu_data_zipfile)
    #nyu2_train = list((row.split(',') for row in (data['data/nyu2_train.csv']).decode("utf-8").split('\n') if len(row) > 0))
    #nyu2_test = list((row.split(',') for row in (data['data/nyu2_test.csv']).decode("utf-8").split('\n') if len(row) > 0))
    img_lists,label_lists=None,None
    nyu2_train=[]
    nyu2_test=[]
    with open("/media/mahxn0/DATA/rili/train/train.txt","r") as f1:
        img_lists=f1.readlines()
    with open("/media/mahxn0/DATA/rili/train/label.txt","r") as f2:
        label_lists=f2.readlines()
    for i in range(len(img_lists)):
        item = [img_lists[i].rstrip('\n'),label_lists[i].rstrip('\n')]
        nyu2_train.append(item)
    with open("/media/mahxn0/DATA/rili/test/test.txt","r") as f3:
        img_test_lists=f3.readlines()
    with open("/media/mahxn0/DATA/rili/test/test_label.txt","r") as f4:
        label_test_lists=f4.readlines()
    for i in range(len(img_test_lists)):
        item = [img_test_lists[i].rstrip('\n'),label_test_lists[i].rstrip('\n')]
        nyu2_test.append(item)
    shape_rgb = (batch_size, 370, 1224, 3)
    shape_depth = (batch_size, 185, 612, 1)

    # Helpful for testing...
    if False:
        nyu2_train = nyu2_train[:10]
        nyu2_test = nyu2_test[:10]

    return nyu2_train, nyu2_test, shape_rgb, shape_depth

def get_nyu_train_test_data(batch_size):
    nyu2_train, nyu2_test, shape_rgb, shape_depth = get_nyu_data(batch_size)

    train_generator = NYU_BasicAugmentRGBSequence(nyu2_train, batch_size=batch_size, shape_rgb=shape_rgb, shape_depth=shape_depth)
    test_generator = NYU_BasicRGBSequence(nyu2_test, batch_size=batch_size, shape_rgb=shape_rgb, shape_depth=shape_depth)

    return train_generator, test_generator

class NYU_BasicAugmentRGBSequence(Sequence):
    def __init__(self,dataset, batch_size, shape_rgb, shape_depth, is_flip=False, is_addnoise=False, is_erase=False):
        self.dataset = dataset
        self.policy = BasicPolicy( color_change_ratio=0.50, mirror_ratio=0.50, flip_ratio=0.0 if not is_flip else 0.2, 
                                    add_noise_peak=0 if not is_addnoise else 20, erase_ratio=-1.0 if not is_erase else 0.5)
        self.batch_size = batch_size
        self.shape_rgb = shape_rgb
        self.shape_depth = shape_depth
        self.maxDepth = 1000.0

        from sklearn.utils import shuffle
        self.dataset = shuffle(self.dataset, random_state=0)

        self.N = len(self.dataset)

    def __len__(self):
        return int(np.ceil(self.N / float(self.batch_size)))

    def __getitem__(self, idx, is_apply_policy=True):
        batch_x, batch_y = np.zeros( self.shape_rgb ), np.zeros( self.shape_depth )
        # Augmentation of RGB images
        for i in range(batch_x.shape[0]):
            index = min((idx * self.batch_size) + i, self.N-1)

            sample = self.dataset[index]

            x = np.clip(np.asarray(Image.open( sample[0] )).reshape(370, 1224,3)/255,0,1)
            y = np.clip(np.asarray(Image.open( sample[1] )).reshape(370, 1224,1)/255*self.maxDepth,0,self.maxDepth)
            y = DepthNorm(y, maxDepth=self.maxDepth)
            batch_x[i] = nyu_resize_x(x, 370)
            batch_y[i] = nyu_resize_y(y, 185)

            if is_apply_policy: batch_x[i], batch_y[i] = self.policy(batch_x[i], batch_y[i])

            # DEBUG:
            #self.policy.debug_img(batch_x[i], np.clip(DepthNorm(batch_y[i])/maxDepth,0,1), idx, i)
        #exit()

        return batch_x, batch_y

class NYU_BasicRGBSequence(Sequence):
    def __init__(self,dataset, batch_size,shape_rgb, shape_depth):
        self.dataset = dataset
        self.batch_size = batch_size
        self.N = len(self.dataset)
        self.shape_rgb = shape_rgb
        self.shape_depth = shape_depth
        self.maxDepth = 1000.0

    def __len__(self):
        return int(np.ceil(self.N / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x, batch_y = np.zeros( self.shape_rgb ), np.zeros( self.shape_depth )
        for i in range(self.batch_size):            
            index = min((idx * self.batch_size) + i, self.N-1)

            sample = self.dataset[index]

            x = np.clip(np.asarray(Image.open( sample[0])).reshape(370,1224,3)/255,0,1)
            y = np.asarray(Image.open(sample[1]), dtype=np.float32).reshape(185,612,1).copy().astype(float) / 10.0
            y = DepthNorm(y, maxDepth=self.maxDepth)

            batch_x[i] = nyu_resize(x, 370)
            batch_y[i] = nyu_resize(y, 185)

            # DEBUG:
            #self.policy.debug_img(batch_x[i], np.clip(DepthNorm(batch_y[i])/maxDepth,0,1), idx, i)
        #exit()

        return batch_x, batch_y

look forward you reply ^_^

ialhashim commented 5 years ago

The output of the depth can be interpolated to the full size. Generally the memory limit decides how large the dense estimation would be.. in this case its half the input.

For the KITTI model you are on the right track I think. Let me know if you encounter errors.

valentinoPereira commented 5 years ago

Hey what about training my own dataset of 1920x1080 images? Do I need to resize them? Where in the code do I need to make changes? Please help since I'm stuck on this for days. image

This training bug does not seem to go away.. Also when I do some changes, the training exits on first epoch

My dataset consists of 51 training images and 654 test images. Batch size is 4.

ialhashim commented 5 years ago

You have different options to fix this. Either you simply resize all inputs to (384, 1248, 3) (height, width, channel) and do refinements on our already trained model or you could try to compute the best values from the original data that can be divided by 32 (I think in this case just cropping the height to 1056).

Which approach is best depends on many things and its hard to predict before hand. One thing to keep in mind is the batch size. If your card doesn't allow for more than 4 because of this larger image think about resizing until it fit. I found that batch size of 8 usually gives out the best result.

valentinoPereira commented 5 years ago

So I leave the width as it is and only resize the height to 1056.. so will my final training image be 1920x1056? And set the depth shape to half this value right?

ialhashim commented 5 years ago

Exactly.

valentinoPereira commented 5 years ago

You have different options to fix this. Either you simply resize all inputs to (384, 1248, 3) (height, width, channel) and do refinements on our already trained model or you could try to compute the best values from the original data that can be divided by 32 (I think in this case just cropping the height to 1056).

Which approach is best depends on many things and its hard to predict before hand. One thing to keep in mind is the batch size. If your card doesn't allow for more than 4 because of this larger image think about resizing until it fit. I found that batch size of 8 usually gives out the best result.

Okay, will give this a try

valentinoPereira commented 5 years ago

Hi, I've done as you said but the training wouldn't start. I have put a print statement in the on_epoch_end callback function but the training isn't reaching there.

Following is my log:

image

The training automatically exits after the last line with no errors thrown.