I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that. #1224

Open aditya2k4anu opened 3 months ago

aditya2k4anu commented 3 months ago

import os import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing.image import load_img, img_to_array from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Embedding, LSTM, TimeDistributed, RepeatVector from tensorflow.keras.preprocessing.sequence import pad_sequences from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import matplotlib.patches as patches

Step 1: Load the data from the directories

image_dir = 'D:/Image Classification using CNN/1' text_dir = 'D:/Image Classification using CNN/1-Labled'

image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')]) image_paths = [os.path.join(image_dir, f) for f in image_files] text_paths = [os.path.join(text_dir, f.replace('.jpg', '.txt')) for f in image_files]

images = [] labels = []

Function to load labels from text file

def load_labels(label_path): with open(label_path, 'r') as file: return [float(x) for x in]

target_size = (150, 150) # Define your target size here

for img_path, txt_path in zip(image_paths, text_paths): try: img = load_img(img_path, target_size=target_size) # Load and resize to target_size img = img_to_array(img) img = img / 255.0 # Normalize to [0, 1]

    lbl = load_labels(txt_path)

except Exception as e:
    print(f"Error loading {img_path} or {txt_path}: {e}")

Step 2: Pad the labels to the maximum length

max_label_length = max(len(label) for label in labels) labels = pad_sequences(labels, maxlen=max_label_length, padding='post', dtype='float32')

Convert lists to numpy arrays

images = np.array(images) labels = np.array(labels)

Split the data into training and validation sets

x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

Step 3: Build the model

Image model

image_input = Input(shape=(target_size[0], target_size[1], 3)) # Use target_size dimensions x = Conv2D(32, (3, 3), activation='relu')(image_input) x = MaxPooling2D(2, 2)(x) x = Conv2D(64, (3, 3), activation='relu')(x) x = MaxPooling2D(2, 2)(x) x = Conv2D(128, (3, 3), activation='relu')(x) x = MaxPooling2D(2, 2)(x) x = Flatten()(x) x = Dense(512, activation='relu')(x) x = Dropout(0.5)(x) image_features = Dense(256, activation='relu')(x)

Label model

label_input = Input(shape=(max_label_length,)) embedded_labels = Embedding(input_dim=1000, output_dim=256, input_length=max_label_length)(label_input) lstm_out = LSTM(256, return_sequences=True)(embedded_labels)

Repeat image features to match the label sequence length

repeated_image_features = RepeatVector(max_label_length)(image_features)

Combining repeated image features and LSTM output

combined = tf.keras.layers.Concatenate()([repeated_image_features, lstm_out]) output = TimeDistributed(Dense(1, activation='sigmoid'))(combined) # Adjust output for multi-label

model = Model(inputs=[image_input, label_input], outputs=output)

Step 4: Compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', # Use binary cross-entropy for multi-label classification metrics=['accuracy', 'binary_accuracy']) # Add binary_accuracy for evaluation

Step 5: Train the model

history = [x_train, y_train], y_train, epochs=20, batch_size=20, validation_data=([x_val, y_val], y_val) )

Step 6: Evaluate the model

loss, accuracy, binary_accuracy = model.evaluate([x_val, y_val], y_val) print(f"Validation accuracy: {accuracy100:.2f}%") print(f"Validation binary accuracy: {binary_accuracy100:.2f}%")

Step 7: Visualize predictions with annotations

def visualize_predictions(images, labels, predictions, file_names, target_size): num_images = len(images) for i in range(num_images): fig, ax = plt.subplots(figsize=(images[i].shape[1] / 100, images[i].shape[0] / 100)) # Use image dimensions for figsize ax.imshow(images[i]) ax.set_title(f"Image: {file_names[i]}")

    # Draw rectangles based on predictions
    for j, pred in enumerate(predictions[i]):
        if pred > 0.5:  # Adjust threshold as needed
            # Calculate box dimensions based on label position and target_size
            box_x = target_size[1] * (j + 1) / (max_label_length + 1)
            box_y = target_size[0] * 0.05
            box_width = target_size[1] * 0.2
            box_height = target_size[0] * 0.1
            rect = patches.Rectangle((box_x, box_y), box_width, box_height, linewidth=1, edgecolor='r', facecolor='none')


Predict on validation data

predictions = model.predict([x_val, y_val])

Convert predictions to binary format for visualization

predictions_binary = (predictions > 0.5).astype(int)

Display some predictions with annotations

num_visualize = min(5, len(x_val)) # Visualize up to 5 images, adjust as needed visualize_predictions(x_val[:num_visualize], y_val[:num_visualize], predictions_binary[:num_visualize], image_files[:num_visualize], target_size)