Calamari-OCR / calamari

Line based ATR Engine based on OCRopy
Apache License 2.0
1.04k stars 209 forks source link

Attention layer #339

Open Tailor2019 opened 1 year ago

Tailor2019 commented 1 year ago

@andbue Hello Can you please help me to replace the ctc part in this ocr with an attention layer ? I'm trying to use this code `def get_2d_conv_LSTM_atten_model(n): ''' Create a standard deep 2D convolutional neural network''' nclass = 8 inp = Input(shape=(n,216,1)) #2D matrix of 30 MFCC bands by 216 audio length. x = Convolution2D(64, (3,3), strides=(1, 1), padding="same")(inp) #(4,10) x = BatchNormalization()(x) x = Activation("relu")(x) x = MaxPool2D()(x) x = Dropout(rate=0.2)(x) x = Convolution2D(128, (3,3), strides=(1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = MaxPool2D()(x) x = Dropout(rate=0.2)(x)

x = Convolution2D(256, (3,3), strides=(1, 1), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = MaxPool2D()(x)
x = Dropout(rate=0.2)(x)

x = Convolution2D(128, (3,3), strides=(1, 1), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = MaxPool2D()(x)
x = Dropout(rate=0.2)(x)

x = Reshape((-1, 128))(x)

LSTM

x = LSTM(32, return_sequences=True)(x)
x = SeqSelfAttention(attention_activation ='tanh')(x)
x = LSTM(32, return_sequences=False)(x)

out = Dense(nclass, activation=softmax)(x)
model = models.Model(inputs=inp, outputs=out)

opt = tf.keras.optimizers.Adam(learning_rate = 0.0001, decay=1e-6)
model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['acc'])
return model`

Normalization as per the standard NN process

X_train_norm, X_test_norm, X_val_norm = data_normalization(X_train, X_test, X_val)

one hot encode the target

lb = LabelEncoder() y_train_cat = np_utils.to_categorical(lb.fit_transform(y_train)) # for GPU y_test_cat = np_utils.to_categorical(lb.fit_transform(y_test)) # for GPU y_val_cat = np_utils.to_categorical(lb.fit_transform(y_val)) # for GPU

y_train = to_categorical(lb.fit_transform(y_train)) # for TPU

y_test = to_categorical(lb.fit_transform(y_test)) # for TPU

y_val = to_categorical(lb.fit_transform(y_val)) # for TPU

print(Xtrain.shape) print(lb.classes)

model = get_2d_conv_LSTM_atten_model(n_mfcc) model.summary() es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15) model_name = 'best_model.h5' save_dir = os.path.join(os.getcwd(), 'saved_models') if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) print('Save model and weights at %s ' % model_path) mc = ModelCheckpoint(model_path, monitor='val_acc', mode='max', verbose=1, save_best_only=True) model_history = model.fit(X_train_norm, y_train_cat, validation_data=(X_val_norm, y_val_cat), batch_size=32, verbose = 1, epochs=200, callbacks=[es, mc])

results = get_results(model_history,model,X_test_norm,y_test_cat, ref.label.unique()) results.create_plot(model_history)

model_json = model.to_json() with open("model_json.json", "w") as json_file: json_file.write(model_json)

loading json and model architecture

json_file = open('model_json.json', 'r') loaded_model_json = json_file.read() json_file.close() from keras.models import model_from_json loaded_model = model_from_json(loaded_model_json, custom_objects={'SeqSelfAttention': SeqSelfAttention})

load weights into new model

loaded_model.load_weights(model_path)#("saved_models/best_model.h5") print("Loaded model from disk") But I failed to turn this model Please help how can I use attention model with calamari Thank you in advance