Closed mimus-assa closed 4 years ago
this could be done on the image segmentation notebook
we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"
we can modify the code on "proto data cleaning etapa 3.ipynb" but need to work whit pairs of images, for now we need to reduce this code to minimal optimal
def random_rotation(image_array: ndarray):
# pick a random degree of rotation between 25% on the left and 25% on the right
random_degree = random.uniform(-25, 25)
return sk.transform.rotate(image_array, random_degree)
def random_noise(image_array: ndarray):
# add random noise to the image
return sk.util.random_noise(image_array)
# dictionary of the transformations we defined earlier
available_transformations = {
'rotate': random_rotation,
'noise': random_noise
}
folder_path = train_dir
names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ]
num_files_desired = 1000
final_val = len(names)
progress = widgets.IntProgress(value=0, min=0, max=final_val)
display(progress)
for name in names:
progress.value +=1
# find all files paths from the folder
images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))]
num_generated_files = len(images)
while num_generated_files <= num_files_desired:
# random image from the folder
image_path = random.choice(images)
# read image as an two dimensional array of pixels
image_to_transform = sk.io.imread(image_path)
# random num of transformation to apply
num_transformations_to_apply = random.randint(1, len(available_transformations))
num_transformations = 0
transformed_image = None
while num_transformations <= num_transformations_to_apply:
# random transformation to apply for a single image
key = random.choice(list(available_transformations))
transformed_image = available_transformations[key](image_to_transform)
num_transformations += 1
new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files)
# write image to the disk
io.imsave(new_file_path, img_as_ubyte(transformed_image))
num_generated_files += 1
we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"
we can modify the code on "proto data cleaning etapa 3.ipynb" but need to work whit pairs of images, for now we need to reduce this code to minimal optimal
def random_rotation(image_array: ndarray): # pick a random degree of rotation between 25% on the left and 25% on the right random_degree = random.uniform(-25, 25) return sk.transform.rotate(image_array, random_degree) def random_noise(image_array: ndarray): # add random noise to the image return sk.util.random_noise(image_array) # dictionary of the transformations we defined earlier available_transformations = { 'rotate': random_rotation, 'noise': random_noise } folder_path = train_dir names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ] num_files_desired = 1000 final_val = len(names) progress = widgets.IntProgress(value=0, min=0, max=final_val) display(progress) for name in names: progress.value +=1 # find all files paths from the folder images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))] num_generated_files = len(images) while num_generated_files <= num_files_desired: # random image from the folder image_path = random.choice(images) # read image as an two dimensional array of pixels image_to_transform = sk.io.imread(image_path) # random num of transformation to apply num_transformations_to_apply = random.randint(1, len(available_transformations)) num_transformations = 0 transformed_image = None while num_transformations <= num_transformations_to_apply: # random transformation to apply for a single image key = random.choice(list(available_transformations)) transformed_image = available_transformations[key](image_to_transform) num_transformations += 1 new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files) # write image to the disk io.imsave(new_file_path, img_as_ubyte(transformed_image)) num_generated_files += 1
def random_rotation(image_array: ndarray):
# pick a random degree of rotation between 25% on the left and 25% on the right
random_degree = random.uniform(-25, 25)
return sk.transform.rotate(image_array, random_degree)
def random_noise(image_array: ndarray):
# add random noise to the image
return sk.util.random_noise(image_array)
# dictionary of the transformations we defined earlier
available_transformations = {
'rotate': random_rotation,
'noise': random_noise
}
folder_path = train_dir
names = [l for l in listdir(folder_path) if os.path.isdir(join(folder_path,l)) ]
num_files_desired = 1000
final_val = len(names)
for name in names:
# find all files paths from the folder
images = [os.path.join(folder_path+name, f) for f in os.listdir(folder_path+name) if os.path.isfile(os.path.join(folder_path+name, f))]
num_generated_files = len(images)
while num_generated_files <= num_files_desired:
# random image from the folder
image_path = random.choice(images)
# read image as an two dimensional array of pixels
image_to_transform = sk.io.imread(image_path)
# random num of transformation to apply
num_transformations_to_apply = random.randint(1, len(available_transformations))
num_transformations = 0
transformed_image = None
while num_transformations <= num_transformations_to_apply:
# random transformation to apply for a single image
key = random.choice(list(available_transformations))
transformed_image = available_transformations[key](image_to_transform)
new_file_path = '%s/augmented_image_%s.jpg' % (folder_path+name, num_generated_files)
# write image to the disk
io.imsave(new_file_path, img_as_ubyte(transformed_image))
num_generated_files += 1
for the
import numpy as np
import os
import pandas as pd
from scipy import ndarray
import skimage as sk
from skimage import io
import random
from skimage import transform
from skimage import img_as_ubyte
import shutil
input_dir = "/home/mimus/apiais/data/images/ANPR/training/original_char_seg/"
target_dir = "/home/mimus/apiais/data/images/ANPR/training/masks_char_seg/"
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".jpg")
]
)
target_img_paths = sorted(
[
os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".jpg") and not fname.startswith(".")
]
)
print("Number of samples:", len(input_img_paths))
for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
print(input_path, "|", target_path)
def split_train_test(data,test_ratio):
shuffled_indices=np.random.permutation(len(data))
test_set_size=int(len(data)*test_ratio)
test_indices=shuffled_indices[:test_set_size]
train_indices=shuffled_indices[test_set_size:]
return data.iloc[train_indices], data.iloc[test_indices]
df = pd.DataFrame(input_img_paths)
train_set,test_set=split_train_test(df,0)
print(len(train_set),"train +",len(test_set),"test")
#here we need to copy the test files to another folder for later use
test_files = test_set.values.tolist()
test_file_names = []
test_file_names2 = []
for i in test_files:
test_file_names.append(i[0])
test_file_names2.append(i[0].replace('/original_', '/masks_'))
#print(test_file_names[0:10])
#print(test_file_names2[0:10])
for name in test_file_names2:
shutil.move(name, name.replace('/training/', '/test/'))
for name in test_file_names:
shutil.move(name, name.replace('/training/', '/test/'))
train_files = train_set.values.tolist()
train_file_names = []
train_file_names2 = []
for i in train_files:
train_file_names.append(i[0])
train_file_names2.append(i[0].replace('/original_', '/masks_'))
def random_rotation(image_array: ndarray):
# pick a random degree of rotation between 25% on the left and 25% on the right
random_degree = random.uniform(-25, 25)
return sk.transform.rotate(image_array, random_degree)
def random_noise(image_array: ndarray):
# add random noise to the image
return sk.util.random_noise(image_array)
# dictionary of the transformations we defined earlier
available_transformations = {
'rotate': random_rotation,
'noise': random_noise
}
original_img = train_file_names
mask_img = train_file_names2
num_files_desired = 20000
final_val = len(original_img)
# find all files paths from the folder
#print("control 1")
num_generated_files = len(original_img)
counter = 0
while num_generated_files <= num_files_desired:
# random image from the folder
image_path = original_img[counter]
image_path2 = mask_img[counter]
# read image as an two dimensional array of pixels
image_to_transform = sk.io.imread(image_path)
image_to_transform2 = sk.io.imread(image_path2)
# random num of transformation to apply
num_transformations_to_apply = random.randint(1, len(available_transformations))
num_transformations = 0
transformed_image = None
while num_transformations <= num_transformations_to_apply:
# print(num_transformations,num_transformations_to_apply)
# random transformation to apply for a single image
key = random.choice(list(available_transformations))
transformed_image = available_transformations[key](image_to_transform)
if key == "noise":
transformed_image2 = image_to_transform2
else:
transformed_image2 = available_transformations[key](image_to_transform2)
#aqui deberiamso poner la misma lista de archivos pero con mask
num_transformations+= 1
new_file_path = '%s_augmented_image_%s.jpg' % (image_path[:-4], num_generated_files)
new_file_path2 = '%s_augmented_image_%s.jpg' % (image_path2[:-4], num_generated_files)
# write image to the disk
io.imsave(new_file_path, img_as_ubyte(transformed_image))
io.imsave(new_file_path2, img_as_ubyte(transformed_image2))
num_generated_files += 1
counter += 1
print(counter, num_generated_files)
import numpy as np import os import pandas as pd from scipy import ndarray import skimage as sk from skimage import io import random from skimage import transform from skimage import img_as_ubyte import shutil input_dir = "/home/mimus/apiais/data/images/ANPR/training/original_char_seg/" target_dir = "/home/mimus/apiais/data/images/ANPR/training/masks_char_seg/" input_img_paths = sorted( [ os.path.join(input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(".jpg") ] ) target_img_paths = sorted( [ os.path.join(target_dir, fname) for fname in os.listdir(target_dir) if fname.endswith(".jpg") and not fname.startswith(".") ] ) print("Number of samples:", len(input_img_paths)) for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]): print(input_path, "|", target_path) def split_train_test(data,test_ratio): shuffled_indices=np.random.permutation(len(data)) test_set_size=int(len(data)*test_ratio) test_indices=shuffled_indices[:test_set_size] train_indices=shuffled_indices[test_set_size:] return data.iloc[train_indices], data.iloc[test_indices] df = pd.DataFrame(input_img_paths) train_set,test_set=split_train_test(df,0) print(len(train_set),"train +",len(test_set),"test") #here we need to copy the test files to another folder for later use test_files = test_set.values.tolist() test_file_names = [] test_file_names2 = [] for i in test_files: test_file_names.append(i[0]) test_file_names2.append(i[0].replace('/original_', '/masks_')) #print(test_file_names[0:10]) #print(test_file_names2[0:10]) for name in test_file_names2: shutil.move(name, name.replace('/training/', '/test/')) for name in test_file_names: shutil.move(name, name.replace('/training/', '/test/')) train_files = train_set.values.tolist() train_file_names = [] train_file_names2 = [] for i in train_files: train_file_names.append(i[0]) train_file_names2.append(i[0].replace('/original_', '/masks_')) def random_rotation(image_array: ndarray): # pick a random degree of rotation between 25% on the left and 25% on the right random_degree = random.uniform(-25, 25) return sk.transform.rotate(image_array, random_degree) def random_noise(image_array: ndarray): # add random noise to the image return sk.util.random_noise(image_array) # dictionary of the transformations we defined earlier available_transformations = { 'rotate': random_rotation, 'noise': random_noise } original_img = train_file_names mask_img = train_file_names2 num_files_desired = 20000 final_val = len(original_img) # find all files paths from the folder #print("control 1") num_generated_files = len(original_img) counter = 0 while num_generated_files <= num_files_desired: # random image from the folder image_path = original_img[counter] image_path2 = mask_img[counter] # read image as an two dimensional array of pixels image_to_transform = sk.io.imread(image_path) image_to_transform2 = sk.io.imread(image_path2) # random num of transformation to apply num_transformations_to_apply = random.randint(1, len(available_transformations)) num_transformations = 0 transformed_image = None while num_transformations <= num_transformations_to_apply: # print(num_transformations,num_transformations_to_apply) # random transformation to apply for a single image key = random.choice(list(available_transformations)) transformed_image = available_transformations[key](image_to_transform) if key == "noise": transformed_image2 = image_to_transform2 else: transformed_image2 = available_transformations[key](image_to_transform2) #aqui deberiamso poner la misma lista de archivos pero con mask num_transformations+= 1 new_file_path = '%s_augmented_image_%s.jpg' % (image_path[:-4], num_generated_files) new_file_path2 = '%s_augmented_image_%s.jpg' % (image_path2[:-4], num_generated_files) # write image to the disk io.imsave(new_file_path, img_as_ubyte(transformed_image)) io.imsave(new_file_path2, img_as_ubyte(transformed_image2)) num_generated_files += 1 counter += 1 print(counter, num_generated_files)
this has an error, when the counter value goes up and reach num_generated_files, the original_img value does not match, maybe this can be solver using two counters, one real and anohter that when counter == num_generated_files: counter2 = 0
or so
another error,
random_degree = random.uniform(-25, 25)
should be the same for the two images, but it is used two separated times
getting this warning why?
/home/mimus/anaconda3/envs/tf2/lib/python3.6/site-packages/ipykernel_launcher.py:51: UserWarning: /home/mimus/apiais/data/images/ANPR/training/masks_plate_loc/masks_366_29_augmented_image_10586.jpg is a low contrast image
we also need data augmentation for letters; in total
data augmentation for plate_loc, char_seg, ocr
we can find the start point on "Hands-On Machine Learning with Scikit-Learn and Tensorflow: Concepts, Tools, and Techniques to Build Intelligent Systems"