create data augmentation script for the image segmentation on chars and loc

mimus-assa commented 4 years ago

we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"

mimus-assa commented 4 years ago

this could be done on the image segmentation notebook

mimus-assa commented 4 years ago

we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"

we can modify the code on "proto data cleaning etapa 3.ipynb" but need to work whit pairs of images, for now we need to reduce this code to minimal optimal

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

folder_path = train_dir
names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ]
num_files_desired = 1000

final_val = len(names)
progress = widgets.IntProgress(value=0, min=0, max=final_val)
display(progress)

for name in names:
    progress.value +=1

    # find all files paths from the folder
    images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))]
    num_generated_files = len(images)
    while num_generated_files <= num_files_desired:

        # random image from the folder
        image_path = random.choice(images)
        # read image as an two dimensional array of pixels
        image_to_transform = sk.io.imread(image_path)
        # random num of transformation to apply
        num_transformations_to_apply = random.randint(1, len(available_transformations))

        num_transformations = 0
        transformed_image = None
        while num_transformations <= num_transformations_to_apply:
            # random transformation to apply for a single image
            key = random.choice(list(available_transformations))
            transformed_image = available_transformations[key](image_to_transform)
            num_transformations += 1
        new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files)       
        # write image to the disk
        io.imsave(new_file_path, img_as_ubyte(transformed_image))
        num_generated_files += 1

mimus-assa commented 4 years ago

we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"

we can modify the code on "proto data cleaning etapa 3.ipynb" but need to work whit pairs of images, for now we need to reduce this code to minimal optimal

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

folder_path = train_dir
names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ]
num_files_desired = 1000

final_val = len(names)
progress = widgets.IntProgress(value=0, min=0, max=final_val)
display(progress)

for name in names:
    progress.value +=1

    # find all files paths from the folder
    images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))]
    num_generated_files = len(images)
    while num_generated_files <= num_files_desired:

        # random image from the folder
        image_path = random.choice(images)
        # read image as an two dimensional array of pixels
        image_to_transform = sk.io.imread(image_path)
        # random num of transformation to apply
        num_transformations_to_apply = random.randint(1, len(available_transformations))

        num_transformations = 0
        transformed_image = None
        while num_transformations <= num_transformations_to_apply:
            # random transformation to apply for a single image
            key = random.choice(list(available_transformations))
            transformed_image = available_transformations[key](image_to_transform)
            num_transformations += 1
        new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files)       
        # write image to the disk
        io.imsave(new_file_path, img_as_ubyte(transformed_image))
        num_generated_files += 1

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

folder_path = train_dir
names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ]
num_files_desired = 1000

final_val = len(names)

for name in names: 
    # find all files paths from the folder
    images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))]
    num_generated_files = len(images)
    while num_generated_files <= num_files_desired:
        # random image from the folder
        image_path = random.choice(images)
        # read image as an two dimensional array of pixels
        image_to_transform = sk.io.imread(image_path)
        # random num of transformation to apply
        num_transformations_to_apply = random.randint(1, len(available_transformations))

        num_transformations = 0
        transformed_image = None
        while num_transformations <= num_transformations_to_apply:
            # random transformation to apply for a single image
            key = random.choice(list(available_transformations))
            transformed_image = available_transformations[key](image_to_transform)
        new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files)       
        # write image to the disk
        io.imsave(new_file_path, img_as_ubyte(transformed_image))
        num_generated_files += 1

for the

mimus-assa commented 4 years ago


import numpy as np 
import os
import pandas as pd
from scipy import ndarray
import skimage as sk
from skimage import io
import random
from skimage import transform
from skimage import img_as_ubyte
import shutil

input_dir = "/home/mimus/apiais/data/images/ANPR/training/original_char_seg/"
target_dir = "/home/mimus/apiais/data/images/ANPR/training/masks_char_seg/"

input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".jpg")
    ]
)
target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".jpg") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

def split_train_test(data,test_ratio):
    shuffled_indices=np.random.permutation(len(data))
    test_set_size=int(len(data)*test_ratio)
    test_indices=shuffled_indices[:test_set_size]
    train_indices=shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

df = pd.DataFrame(input_img_paths) 
train_set,test_set=split_train_test(df,0)
print(len(train_set),"train +",len(test_set),"test")

#here we need to copy the test files to another folder for later use

test_files = test_set.values.tolist()
test_file_names = []
test_file_names2 = []
for i in test_files:
    test_file_names.append(i[0])
    test_file_names2.append(i[0].replace('/original_', '/masks_'))
#print(test_file_names[0:10])
#print(test_file_names2[0:10])
for name in test_file_names2:
    shutil.move(name, name.replace('/training/', '/test/'))
for name in test_file_names:
    shutil.move(name, name.replace('/training/', '/test/'))

train_files = train_set.values.tolist()
train_file_names = []
train_file_names2 = []
for i in train_files:
    train_file_names.append(i[0])
    train_file_names2.append(i[0].replace('/original_', '/masks_'))

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

original_img = train_file_names
mask_img = train_file_names2
num_files_desired = 20000

final_val = len(original_img)

# find all files paths from the folder
#print("control 1")
num_generated_files = len(original_img)
counter = 0
while num_generated_files <= num_files_desired:

    # random image from the folder
    image_path = original_img[counter]
    image_path2 = mask_img[counter]
    # read image as an two dimensional array of pixels
    image_to_transform = sk.io.imread(image_path)
    image_to_transform2 = sk.io.imread(image_path2)
    # random num of transformation to apply
    num_transformations_to_apply = random.randint(1, len(available_transformations))
    num_transformations = 0
    transformed_image = None
    while num_transformations <= num_transformations_to_apply:
       # print(num_transformations,num_transformations_to_apply)
        # random transformation to apply for a single image
        key = random.choice(list(available_transformations))
        transformed_image = available_transformations[key](image_to_transform)
        if key == "noise":
            transformed_image2 = image_to_transform2
        else:
            transformed_image2 = available_transformations[key](image_to_transform2)
        #aqui deberiamso poner la misma lista de archivos pero con mask
        num_transformations+= 1
    new_file_path = '%s_augmented_image_%s.jpg' % (image_path[:-4], num_generated_files)    
    new_file_path2 = '%s_augmented_image_%s.jpg' % (image_path2[:-4], num_generated_files)    

    # write image to the disk

    io.imsave(new_file_path, img_as_ubyte(transformed_image))
    io.imsave(new_file_path2, img_as_ubyte(transformed_image2))
    num_generated_files += 1
    counter += 1
    print(counter, num_generated_files)

mimus-assa commented 4 years ago


import numpy as np 
import os
import pandas as pd
from scipy import ndarray
import skimage as sk
from skimage import io
import random
from skimage import transform
from skimage import img_as_ubyte
import shutil

input_dir = "/home/mimus/apiais/data/images/ANPR/training/original_char_seg/"
target_dir = "/home/mimus/apiais/data/images/ANPR/training/masks_char_seg/"

input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".jpg")
    ]
)
target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".jpg") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

def split_train_test(data,test_ratio):
    shuffled_indices=np.random.permutation(len(data))
    test_set_size=int(len(data)*test_ratio)
    test_indices=shuffled_indices[:test_set_size]
    train_indices=shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

df = pd.DataFrame(input_img_paths) 
train_set,test_set=split_train_test(df,0)
print(len(train_set),"train +",len(test_set),"test")

#here we need to copy the test files to another folder for later use

test_files = test_set.values.tolist()
test_file_names = []
test_file_names2 = []
for i in test_files:
    test_file_names.append(i[0])
    test_file_names2.append(i[0].replace('/original_', '/masks_'))
#print(test_file_names[0:10])
#print(test_file_names2[0:10])
for name in test_file_names2:
    shutil.move(name, name.replace('/training/', '/test/'))
for name in test_file_names:
    shutil.move(name, name.replace('/training/', '/test/'))

train_files = train_set.values.tolist()
train_file_names = []
train_file_names2 = []
for i in train_files:
    train_file_names.append(i[0])
    train_file_names2.append(i[0].replace('/original_', '/masks_'))

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise
}

original_img = train_file_names
mask_img = train_file_names2
num_files_desired = 20000

final_val = len(original_img)

# find all files paths from the folder
#print("control 1")
num_generated_files = len(original_img)
counter = 0
while num_generated_files <= num_files_desired:

    # random image from the folder
    image_path = original_img[counter]
    image_path2 = mask_img[counter]
    # read image as an two dimensional array of pixels
    image_to_transform = sk.io.imread(image_path)
    image_to_transform2 = sk.io.imread(image_path2)
    # random num of transformation to apply
    num_transformations_to_apply = random.randint(1, len(available_transformations))
    num_transformations = 0
    transformed_image = None
    while num_transformations <= num_transformations_to_apply:
       # print(num_transformations,num_transformations_to_apply)
        # random transformation to apply for a single image
        key = random.choice(list(available_transformations))
        transformed_image = available_transformations[key](image_to_transform)
        if key == "noise":
            transformed_image2 = image_to_transform2
        else:
            transformed_image2 = available_transformations[key](image_to_transform2)
        #aqui deberiamso poner la misma lista de archivos pero con mask
        num_transformations+= 1
    new_file_path = '%s_augmented_image_%s.jpg' % (image_path[:-4], num_generated_files)    
    new_file_path2 = '%s_augmented_image_%s.jpg' % (image_path2[:-4], num_generated_files)    

    # write image to the disk

    io.imsave(new_file_path, img_as_ubyte(transformed_image))
    io.imsave(new_file_path2, img_as_ubyte(transformed_image2))
    num_generated_files += 1
    counter += 1
    print(counter, num_generated_files)

this has an error, when the counter value goes up and reach num_generated_files, the original_img value does not match, maybe this can be solver using two counters, one real and anohter that when counter == num_generated_files: counter2 = 0

or so

mimus-assa commented 4 years ago

another error,

random_degree = random.uniform(-25, 25)

should be the same for the two images, but it is used two separated times

mimus-assa commented 4 years ago

getting this warning why? /home/mimus/anaconda3/envs/tf2/lib/python3.6/site-packages/ipykernel_launcher.py:51: UserWarning: /home/mimus/apiais/data/images/ANPR/training/masks_plate_loc/masks_366_29_augmented_image_10586.jpg is a low contrast image

mimus-assa commented 4 years ago

we also need data augmentation for letters; in total

data augmentation for plate_loc, char_seg, ocr

mimus-assa / APIAIS

create data augmentation script for the image segmentation on chars and loc #13