Open solg1998 opened 3 weeks ago
I can suggest you to use Google Colab. If you have a Google account, you can transfer your data to Google Drive and then access it via Colab. I think Colab's hardware may be sufficient for your project. If necessary, you can access more advanced GPUs for a fee. Here you can find how to use it.
I tried using Google Colab, but I had same issue. It seems that there are some issues in my code. I will upload it in here. If you can check it, I am very happy.
import tensorflow as tf from tensorflow.keras.applications import VGG19 from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input from tensorflow.keras.models import Model import os import cv2 import numpy as np
############################ Load and prepare dataset #####################
dataset_path = 'valid\images' annotations_path = 'valid\labels'
def load_dataset(dataset_path, annotations_path): X_train, y_bbox_train, y_class_train = [], [], []
# Iterate through files in the dataset directory
for filename in os.listdir(dataset_path):
image_path = os.path.join(dataset_path, filename)
# Load image
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert to RGB
# Load corresponding annotation for the image
annotation_file = os.path.join(annotations_path, filename.replace('.jpg', '.txt'))
with open(annotation_file, 'r') as file:
# Extract bounding box coordinates and class label
for line in file:
# Parse annotation data (modify as per your annotation format)
class_label, xc, yc, w, h = map(float, line.strip().split())
xmin = (xc - w/2)*640
xmax = (xc + w/2)*640
ymin = (yc - h/2)*640
ymax = (yc + h/2)*640
# Preprocess images and annotations (resize, normalize, etc.)
# Implement data augmentation if required
y_bbox_train.append([xmin, ymin, xmax, ymax]) # Bounding box coordinates
y_class_train.append(class_label) # Class label
# Convert lists to numpy arrays
X_train = np.array(X_train)
y_bbox_train = np.array(y_bbox_train)
y_class_train = np.array(y_class_train)
return X_train, y_bbox_train, y_class_train
X_train, y_bbox_train, y_class_train = load_dataset(dataset_path, annotations_path)
from sklearn.model_selection import train_test_split X_train, X_val, y_bbox_train, y_bbox_val, y_class_train, y_class_val = train_test_split(X_train, y_bbox_train, y_class_train, test_size=0.2)
print('X_train shape:', X_train.shape) print('y_bbox_train shape:', y_bbox_train.shape) print('y_class_train shape:', y_class_train.shape)
def custom_bbox_loss(y_true, y_pred):
# y_true_value =
# print(y_true_value)
# Reshape the true and predicted tensors
y_true = tf.reshape(y_true, shape=(-1, 4)) # Adjust shape based on the output
y_pred = tf.reshape(y_pred, shape=(-1, 4)) # Adjust shape based on the output
# Define smooth L1 loss for bounding box regression
diff = y_true - y_pred
abs_diff = tf.abs(diff)
smooth_l1_loss = tf.where(tf.less(abs_diff, 1), 0.5 * tf.square(abs_diff), abs_diff - 0.5)
# Calculate the total loss
total_loss = tf.reduce_sum(smooth_l1_loss, axis=-1) # Sum the loss for each box
return total_loss
def custom_class_loss(y_true, y_pred):
# Ensure target shape matches the output shape
y_true = tf.transpose(y_true) if y_true.shape[0] == 1 else y_true
# Define your custom loss computation here
# Example: Categorical cross-entropy loss
class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_true, y_pred))
return class_loss
num_classes = 45
input_shape = (640, 640, 3) # Adjust the input shape as per your requirements
base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
for layer in base_model.layers: layer.trainable = False
##################### Adaption of detection head of SSD ##############################
num_default_boxes = 1
x = Conv2D(256, (3, 3), activation='relu', padding='same')(base_model.output) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(256, (3, 3), activation='relu', padding='same')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(128, (3, 3), activation='relu', padding='same')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = Flatten()(x)
bbox_predictions = Dense(num_default_boxes * 4, activation='sigmoid')(x) # 4 parameters per box (x, y, w, h) class_predictions = Dense(num_default_boxes, activation='softmax')(x) # Probability for each class
model = tf.keras.Model(inputs=base_model.input, outputs=[bbox_predictions, class_predictions])
model.compile(optimizer='adam', loss=[custom_bbox_loss, custom_class_loss])
batch_size = 16 num_epochs = 10
for epoch in range(num_epochs): for i in range(len(X_train) // batch_size): X_batch = X_train[ibatch_size:(i+1)batch_size] y_bbox_batch = y_bbox_train[ibatch_size:(i+1)batch_size] y_class_batch = y_class_train[ibatch_size:(i+1)batch_size]
# Train on batch
loss = model.train_on_batch(X_batch, [y_bbox_batch, y_class_batch])
# bbox_loss = model.train_on_batch(X_batch, y_bbox_batch)
# class_loss = model.train_on_batch(X_batch, y_class_batch)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord("q"):
# Validate the model
val_loss = model.evaluate(X_val, [y_bbox_val, y_class_val], verbose=0)
# val_loss = model.evaluate(X_val, y_bbox_val, verbose=0)
print(f'Epoch {epoch+1}/{num_epochs} - Loss: {loss} - Validation Loss: {val_loss}')'custom_object_detection_model.keras')
Could you please upload a screenshot of the issue here? Alternatively, you can email me the screenshots related to the issue in your code at I’m having trouble understanding it clearly from the Markdown.
Dear. Nice to meet you. I am going to integrate two object detection models for performance improvement. Faster R-CNN model and SSD model. I have developed my own training script using ssd backbone as the head of faster r-cnn. But it is not trained. I have memory issues. Could you help me now? Thanks.