OlafenwaMoses / ImageAI

A python library built to empower developers to build applications and systems with self-contained Computer Vision capabilities
https://www.genxr.co/#products
MIT License
8.5k stars 2.19k forks source link

Feeding images to custom model for testing #234

Closed patrickduhaime closed 5 years ago

patrickduhaime commented 5 years ago

Hello,

I trainned a custom image prediction model using this script:

from imageai.Prediction.Custom import ModelTraining model_trainer = ModelTraining() model_trainer.setModelTypeAsResNet() model_trainer.setDataDirectory("MODERN_CAR_RACING") model_trainer.trainModel(num_objects=4, num_experiments=30, enhance_data=False, batch_size=16, show_network_summary=True)

Here is how I grabbed my images while driving a car simulator (https://www.crazygames.com/game/modern-car-racing):

from PIL import ImageGrab import numpy as np import cv2 import os import time from getkeys import key_check

""" All coordinates assume a screen resolution of 1366x768, and Chrome maximized with the Bookmarks Toolbar enabled.

x_pad = 271 y_pad = 236 Play area = x_pad+1, y_pad+1, x_pad+805, y_pad+461 """

Globals

------------------

x_pad = 271 y_pad = 236 #236 changer crop 170px box = (x_pad+1, y_pad+1, x_pad+805, y_pad+461) #y_pad+461 changer crop 170px screen = None

def getScreen(): screen = np.array(ImageGrab.grab(box)) lower_yellow = np.array([160,130,0], dtype = "uint16") upper_yellow = np.array([255,255,65], dtype = "uint16") yellow_mask = cv2.inRange(screen, lower_yellow, upper_yellow) screen[yellow_mask != 0] = [255,255,255] screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY) screen = cv2.resize(screen, (80,46)) return screen

def keys_to_output(keys): ''' Convert keys to a ...multi-hot... array

[A,W,D,S] boolean values.

''' output = [0,0,0,0] global screen

if 'A' in keys: output[0] = 1 screen = getScreen() elif 'D' in keys: output[2] = 1 screen = getScreen() elif 'S' in keys: output[3] = 1 screen = getScreen() elif 'W' in keys: output[1] = 1 screen = getScreen()

return screen,output

def main():

file_name = 'training_data.npy'

if os.path.isfile(file_name): print('File exists, loading previous data!') training_data = list(np.load(file_name)) else: print('File does not exist, starting fresh!') training_data = []

while(True): keys = key_check() screen,output = keys_to_output(keys) if(output != [0,0,0,0]): training_data.append([screen,output])

   if cv2.waitKey(25) & 0xFF == ord('q'):
       cv2.destroyAllWindows()
       break

   if len(training_data) % 100 == 0 and len(training_data) != 0:
       print(len(training_data))
       np.save(file_name,training_data)

if name == 'main': main()

This is how I converted the images to the file format needed by ImageAI:

import random from scipy import ndarray import skimage as sk import os import json import scipy.misc import numpy as np import argparse

parser = argparse.ArgumentParser(description="check which user..") parser.add_argument("-u", "--user", nargs=1)

user = None

def random_noise(image_array):

add random noise to the image

return sk.util.random_noise(image_array)

def horizontal_flip(image_array):

horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !

return image_array[:, ::-1]

def initialisation(): global user args = parser.parse_args() if args.user is None or args.user[0] == 'p': user = 'p' elif args.user[0] == 's': user = 's'

def main(): initialisation()

train_data = np.load('training_data.npy', allow_pickle=True) path = "./Modern_Car_Racing/train" with open('convert_config.' + user + '.json') as json_data_file: data = json.load(json_data_file)

forward = int(data['forward']) brake = int(data['brake']) left = int(data['left']) right = int(data['right'])

for i in train_data: if i[1] == [1, 0, 0, 0]: scipy.misc.imsave(path + '/left/' + str(left) + user + '.jpeg', i[0]) left += 1 if i[1] == [0, 0, 1, 0]: scipy.misc.imsave(path + '/right/' + str(right) + user + '.jpeg', i[0]) right += 1 if i[1] == [0, 0, 0, 1]: scipy.misc.imsave(path + '/brake/' + str(brake) + user + '.jpeg', i[0]) noised = random_noise(i[0]) scipy.misc.imsave(path + '/brake/' + str(brake) + user + '.noised.jpeg', noised)

hf_image = horizontal_flip(i[0])

       # hf_noised = horizontal_flip(noised)
       # scipy.misc.imsave(path + '/brake/' + str(brake) + user + '.hf.jpeg', hf_image)
       # scipy.misc.imsave(path + '/brake/' + str(brake) + user + '.hf.noised.jpeg', hf_noised)
       brake += 1
   if i[1] == [0, 1, 0, 0]:
       scipy.misc.imsave(path + '/forward/' + str(forward) + user + '.jpeg', i[0])
       forward += 1

data['forward'] = forward data['brake'] = brake data['left'] = left data['right'] = right

with open('convert_config.' + user + '.json', 'w') as outfile: json.dump(data, outfile)

os.remove("training_data.npy")

if name == 'main': main()

Here is how I test the model :

from imageai.Prediction.Custom import CustomImagePrediction import os execution_path = os.getcwd() import numpy as np from PIL import ImageGrab import cv2 from time import sleep from directkeys import PressKey,ReleaseKey, W, A, S, D

""" vAll coordinates assume a screen resolution of 1366x768, and Chrome maximized with the Bookmarks Toolbar enabled.

x_pad = 271 y_pad = 236 Play area = x_pad+1, y_pad+1, x_pad+805, y_pad+461 """

Globals

------------------

x_pad = 271 y_pad = 236 WIDTH = 80 HEIGHT = 46

def predict_actions(image):

result = []; prediction = CustomImagePrediction() prediction.setModelTypeAsResNet() prediction.setModelPath(os.path.join(execution_path, "./models/model_ex-007_acc-0.587639.h5")) prediction.setJsonPath(os.path.join(execution_path, "model_class.json")) prediction.loadModel(num_objects=4) predictions, probabilities = prediction.predictImage(image, result_count=4, input_type="array")

for eachPrediction, eachProbability in zip(predictions, probabilities): print(eachPrediction + ":" + eachProbability)

return result;

def left(): PressKey(A) sleep(2) ReleaseKey(A)

def right(): PressKey(D) sleep(2) ReleaseKey(D)

def forward(): PressKey(W) sleep(2) ReleaseKey(W)

def release(): ReleaseKey(W) ReleaseKey(A) ReleaseKey(D) ReleaseKey(S)

def brake(): PressKey(S) sleep(0.1) ReleaseKey(S)

def main(): img = [] box = (x_pad+1, y_pad+1, x_pad+805, y_pad+461) i = 0 while(True): screen = np.array(ImageGrab.grab(box)) lower_yellow = np.array([160,130,0], dtype = "uint16") upper_yellow = np.array([255,255,65], dtype = "uint16") yellow_mask = cv2.inRange(screen, lower_yellow, upper_yellow) screen[yellow_mask != 0] = [255,255,255] screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY) screen = cv2.resize(screen, (80,46))

screen = cv2.imread("8p.jpeg")

   predictionArray = predict_actions(screen)

   if cv2.waitKey(25) & 0xFF == ord('q'):
       cv2.destroyAllWindows()
       break

if name == 'main': main()

The issue is if I uncomment this line: #screen = cv2.imread("8p.jpeg")

I get this output: left:99.99867677688599 right:0.0013042812497587875 brake:2.696586136607948e-05 forward:3.564269990263824e-07

which is just fine, this image was taken from the left folder of the trainning data, If I cmment the line I get this error:

ValueError: Error when checking input: expected input_1 to have 4 dimensions, bu t got array with shape (1, 224, 224)

Thank you

patrickduhaime commented 5 years ago

Ok got it to work... After investigation, the problem was this line:

screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)

I imported rgb2gray from skimage.color

from skimage.color import rgb2gray

And changed to gray using this line:

screen = np.stack([rgb2gray(screen[i]) for i in range(screen.shape[0])])

OlafenwaMoses commented 5 years ago

I am glad you had this figured out.