googlecreativelab / quickdraw-dataset

Documentation on how to access and use the Quick, Draw! Dataset.
https://quickdraw.withgoogle.com/data
Other
6.19k stars 945 forks source link

Convert saved drawing images/files (png/jpg) to same numpy (npy) bitmaps for prediction #34

Open shubhank008 opened 5 years ago

shubhank008 commented 5 years ago

I have been scratching my head for over 5 days now trying various models and code repos and still have not been able to make it work. The model trains well and evals well but I am failing at actual predictions.
Instead of models based on drawing strokes, I have been playing with models using actual drawing images to predict (like a image classifier) and most of these models use the numpy bitmaps dataset (npy files).

Everything is well and good except the part to feed the model drawing from a saved image file (since most of these articles or code repos fed it via canvas or JS or android). I tried to replicate their prediction code (mainly image processing) as much as I can in python but the predictions are still way way wrong.

Here is my image processing and prediction code:

from PIL import Image
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from random import randint
from scipy.misc.pilutil import imsave, imread, imresize
%matplotlib inline  

clock = qd.get_drawing("circle")
apple = clock
apple.image.save("apple.png")

mypath = "data/"
txt_name_list = []
for (dirpath, dirnames, filenames) in walk(mypath):
        if filenames != '.DS_Store':
            txt_name_list.extend(filenames)
            break

def adjust_gamma(image, gamma=1.5):
   invGamma = 1.0 / gamma
   table = np.array([((i / 255.0) ** invGamma) * 255
      for i in np.arange(0, 256)]).astype("uint8")

   return cv.LUT(image, table)

def preprocess(img):
    # for sketch & not canvas drawings use the following:

    gray = cv.bilateralFilter(img, 9, 75, 75)
    #
    gray = cv.erode(gray, None, iterations=1)
    #
    gray = adjust_gamma(gray, 1.1)
    #return gray

    th3 = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY_INV, 11, 2)
    #th3 = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY_INV, 11, 2)
    return th3

#img = apple.image.convert("L")

#imgData = request.get_data()
#convertImage(imgData)
print("debug")

x = imread('apple.png', mode='L')

x = preprocess(x)

#x = cv.bitwise_not(x)

x = imresize(x, (32, 32))

x = x.astype('float32')
x /= 255

x = x.reshape(1, 32, 32, 1)

print(txt_name_list)
#print(x)

out = model.predict(x)
#print(out)
print(np.argmax(out, axis=1))
index = np.array(np.argmax(out, axis=1))
index = index[0]

print(txt_name_list[index])

plt.imshow(x.squeeze()) 

There is quite a difference between how image looks in numpy dataset and how it comes after I process it.

chrome_2019-06-06_10-01-14 chrome_2019-06-06_10-01-27

Here is my full model:

from __future__ import print_function
import  numpy  as  np
import matplotlib.pyplot as plt
from  sklearn.model_selection  import train_test_split
from os import walk, getcwd
import h5py
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
import cv2 as cv
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, AveragePooling2D
from keras import backend as K
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.metrics import confusion_matrix

#For Multi GPU
from keras.utils import multi_gpu_model
from keras import metrics

batch_size = 128

epochs = 40

img_rows, img_cols = 28, 28

mypath = "data/"
txt_name_list = []

#slice_train = 30500
slice_train = 10000

def top_3_acc(y_true, y_pred):
    return metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)

def readData():
    x_train = []
    x_test = []
    y_train = []
    y_test = []
    xtotal = []
    ytotal = []
    x_val = []
    y_val = []

    for (dirpath, dirnames, filenames) in walk(mypath):
        if filenames != '.DS_Store':
            txt_name_list.extend(filenames)
            break

    #print(mypath)
    i=0
    classescount = 0

    for txt_name in txt_name_list:
        txt_path = mypath + txt_name
        x = np.load(txt_path)
        print(txt_name)
        print(i)
        classescount += 1
        x = x.astype('float32') / 255.  ##scale images
        y = [i] * len(x)
        x = x[:slice_train]
        y = y[:slice_train]

        if i != 0:
            xtotal = np.concatenate((x, xtotal), axis=0)
            ytotal = np.concatenate((y, ytotal), axis=0)
        else:
            xtotal = x
            ytotal = y
        i += 1

    print(classescount)
    print("xshape = ", xtotal.shape)
    print("yshape = ", ytotal.shape)
    x_train, x_test, y_train, y_test = train_test_split(xtotal, ytotal, test_size=0.3, random_state=42)
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=1)

    return x_train, x_val, x_test, y_train, y_val, y_test, classescount

def lenet(x_train, x_val, x_test, y_train, y_val, y_test, num_classes):
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        x_val = x_val.reshape(x_val.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        x_val = x_val.reshape(x_val.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    # more reshaping
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_val = x_val.astype('float32')
    x_train /= 255
    x_test /= 255
    x_val /= 255

    # convert class vectors
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    y_val = keras.utils.to_categorical(y_val, num_classes)

    x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    x_val = np.pad(x_val, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_val.shape[0], 'validation samples')
    print(x_test.shape[0], 'test samples')

    print(y_train.shape)

    print(input_shape)

    model = Sequential()

    model.add(Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 1)))
    model.add(AveragePooling2D())

    model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
    model.add(AveragePooling2D())

    model.add(Flatten())

    model.add(Dense(units=120, activation='relu'))

    model.add(Dense(units=84, activation='relu'))

    model.add(Dense(units=num_classes, activation='softmax'))

    filepath = "saved/weightslenet.{epoch:02d}.h5"
    ES = EarlyStopping(patience=5)
    check = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, mode='max')

    #model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy', top_3_acc])
    #Trying Multi GPU
    #model = multi_gpu_model(model, gpus=2)
    #model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(x_val, y_val), callbacks=[ES, check])
    #model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(x_val, y_val), callbacks=[ES, check])

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    model.save('cnnOld2.h5')
    print("Saved model to disk")
    #
    # cm = metrics.confusion_matrix(test_batch.classes, y_pred)
    # # or
    # # cm = np.array([[1401,    0],[1112, 0]])
    #
    # plt.imshow(cm, cmap=plt.cm.Blues)
    # plt.xlabel("Predicted labels")
    # plt.ylabel("True labels")
    # plt.xticks([], [])
    # plt.yticks([], [])
    # plt.title('Confusion matrix ')
    # plt.colorbar()
    # plt.show()
    print(y_test)

    loaded_model = keras.models.load_model('cnnOld2.h5', custom_objects={"top_3_acc": top_3_acc})
    print("test")
    #y_pred = loaded_model.predict_on_batch(x_test)
    #score = loaded_model.evaluate(x_test, y_test, verbose=0)

    y_pred = loaded_model.predict(x_test)
    print(y_pred)

    indexes = np.argmax(y_pred, axis=1)
    i=0
    for y in y_pred:
        y[y<1000]=0
        # print("allzero",y)
        y[indexes[i]] = 1
        i+=1

    cm = confusion_matrix(
        y_test.argmax(axis=1), y_pred.argmax(axis=1))
    acc = accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), normalize=True, sample_weight=None)
    cr = classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1))
    print(cm)
    print(acc)
    print(cr)

def main():
    x_train, x_val, x_test, y_train, y_val, y_test, num_classes = readData()
    lenet(x_train, x_val, x_test, y_train, y_val, y_test, num_classes)

if __name__ == '__main__':
    main()
shubhank008 commented 5 years ago

Another and different approach I tried with same problem running predictions on saved images. This model also works on android but cannot seem to process image file same way to get prediction

https://colab.research.google.com/github/zaidalyafeai/zaidalyafeai.github.io/blob/master/sketcher/Sketcher.ipynb

rachitanayak commented 4 years ago

Hi, were you able to do the conversion to .npy format?