Open solg1998 opened 3 weeks ago
Hello,
I can suggest you to use Google Colab. If you have a Google account, you can transfer your data to Google Drive and then access it via Colab. I think Colab's hardware may be sufficient for your project. If necessary, you can access more advanced GPUs for a fee. Here you can find how to use it.
Thanks.
I tried using Google Colab, but I had same issue. It seems that there are some issues in my code. I will upload it in here. If you can check it, I am very happy.
import tensorflow as tf from tensorflow.keras.applications import VGG19 from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input from tensorflow.keras.models import Model import os import cv2 import numpy as np
############################ Load and prepare dataset #####################
dataset_path = 'valid\images' annotations_path = 'valid\labels'
def load_dataset(dataset_path, annotations_path): X_train, y_bbox_train, y_class_train = [], [], []
# Iterate through files in the dataset directory
for filename in os.listdir(dataset_path):
image_path = os.path.join(dataset_path, filename)
# Load image
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert to RGB
# Load corresponding annotation for the image
annotation_file = os.path.join(annotations_path, filename.replace('.jpg', '.txt'))
with open(annotation_file, 'r') as file:
# Extract bounding box coordinates and class label
for line in file:
# Parse annotation data (modify as per your annotation format)
class_label, xc, yc, w, h = map(float, line.strip().split())
xmin = (xc - w/2)*640
xmax = (xc + w/2)*640
ymin = (yc - h/2)*640
ymax = (yc + h/2)*640
# Preprocess images and annotations (resize, normalize, etc.)
# Implement data augmentation if required
X_train.append(image)
y_bbox_train.append([xmin, ymin, xmax, ymax]) # Bounding box coordinates
y_class_train.append(class_label) # Class label
# Convert lists to numpy arrays
X_train = np.array(X_train)
y_bbox_train = np.array(y_bbox_train)
y_class_train = np.array(y_class_train)
return X_train, y_bbox_train, y_class_train
X_train, y_bbox_train, y_class_train = load_dataset(dataset_path, annotations_path)
from sklearn.model_selection import train_test_split X_train, X_val, y_bbox_train, y_bbox_val, y_class_train, y_class_val = train_test_split(X_train, y_bbox_train, y_class_train, test_size=0.2)
print('X_train shape:', X_train.shape) print('y_bbox_train shape:', y_bbox_train.shape) print('y_class_train shape:', y_class_train.shape)
###########################################################################
def custom_bbox_loss(y_true, y_pred):
# y_true_value = sess.run(tf.print(y_true))
# print(y_true_value)
# Reshape the true and predicted tensors
y_true = tf.reshape(y_true, shape=(-1, 4)) # Adjust shape based on the output
y_pred = tf.reshape(y_pred, shape=(-1, 4)) # Adjust shape based on the output
# Define smooth L1 loss for bounding box regression
diff = y_true - y_pred
abs_diff = tf.abs(diff)
smooth_l1_loss = tf.where(tf.less(abs_diff, 1), 0.5 * tf.square(abs_diff), abs_diff - 0.5)
# Calculate the total loss
total_loss = tf.reduce_sum(smooth_l1_loss, axis=-1) # Sum the loss for each box
return total_loss
def custom_class_loss(y_true, y_pred):
# Ensure target shape matches the output shape
y_true = tf.transpose(y_true) if y_true.shape[0] == 1 else y_true
# Define your custom loss computation here
# Example: Categorical cross-entropy loss
class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_true, y_pred))
return class_loss
num_classes = 45
input_shape = (640, 640, 3) # Adjust the input shape as per your requirements
base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
for layer in base_model.layers: layer.trainable = False
##################### Adaption of detection head of SSD ##############################
num_default_boxes = 1
x = Conv2D(256, (3, 3), activation='relu', padding='same')(base_model.output) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(256, (3, 3), activation='relu', padding='same')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = Flatten()(x)
bbox_predictions = Dense(num_default_boxes * 4, activation='sigmoid')(x) # 4 parameters per box (x, y, w, h) class_predictions = Dense(num_default_boxes, activation='softmax')(x) # Probability for each class
model = tf.keras.Model(inputs=base_model.input, outputs=[bbox_predictions, class_predictions])
model.compile(optimizer='adam', loss=[custom_bbox_loss, custom_class_loss])
model.summary()
######################################################################################
batch_size = 16 num_epochs = 10
for epoch in range(num_epochs): for i in range(len(X_train) // batch_size): X_batch = X_train[ibatch_size:(i+1)batch_size] y_bbox_batch = y_bbox_train[ibatch_size:(i+1)batch_size] y_class_batch = y_class_train[ibatch_size:(i+1)batch_size]
# Train on batch
loss = model.train_on_batch(X_batch, [y_bbox_batch, y_class_batch])
# bbox_loss = model.train_on_batch(X_batch, y_bbox_batch)
# class_loss = model.train_on_batch(X_batch, y_class_batch)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Validate the model
val_loss = model.evaluate(X_val, [y_bbox_val, y_class_val], verbose=0)
# val_loss = model.evaluate(X_val, y_bbox_val, verbose=0)
print(f'Epoch {epoch+1}/{num_epochs} - Loss: {loss} - Validation Loss: {val_loss}')
model.save('custom_object_detection_model.keras')
Hello,
Could you please upload a screenshot of the issue here? Alternatively, you can email me the screenshots related to the issue in your code at doguilmak@gmail.com. I’m having trouble understanding it clearly from the Markdown.
Dear. Nice to meet you. I am going to integrate two object detection models for performance improvement. Faster R-CNN model and SSD model. I have developed my own training script using ssd backbone as the head of faster r-cnn. But it is not trained. I have memory issues. Could you help me now? Thanks.