keras-team / keras-core

A multi-backend implementation of the Keras API, with support for TensorFlow, JAX, and PyTorch.
Apache License 2.0
1.27k stars 115 forks source link

question ... why mobilenetv3 from keras_core does not train while torchvision model does? #755

Closed batrlatom closed 10 months ago

batrlatom commented 10 months ago

Hi, this is probably a problem on my side, but I would like to have this clarified since I am a little lost in this. I want to train any keras_core applications network, but it does not work for me. But when I switch to the torchvision model or build a model in keras_core myself, it does work.

I am using everything torch except the model itself and have two different version of model and preprocessing.

torchvision preprocessing includes transformation to torch tensor, normalisation keras preprocessing includes transformation to numpy array only ( 0 - 255 )

If I use get_model_torch together with data_transforms_torch, it trains correctly If I use get_model_keras with data_transforms_keras, it does not train at all.

What am I missing? Thanks


import os
os.environ["KERAS_BACKEND"] = "torch"

import keras_core as keras
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np
import torchvision
import PIL

# Hyperparameters
num_classes = 2
batch_size = 64
num_epochs = 10
learning_rate = 1e-3

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_model_keras():
    # Create the base MobileNetV3Small model
    base_model = keras.applications.MobileNetV3Large(
        input_shape=(224, 224, 3),
        alpha=1,
        minimalistic=False,
        include_top=True,
        weights= None, #'imagenet',
        input_tensor=None,
        classes=num_classes,
        pooling=None,
        dropout_rate=0.2,
        classifier_activation=None,
        include_preprocessing=True
    )

    return base_model

def get_model_torch():
    base_model = torchvision.models.mobilenet_v3_large(num_classes=num_classes)
    return base_model

class ToChannelsLast:
    def __call__(self, x):        
        x = np.rollaxis(x, 2, 0)  
        return x

    def __repr__(self): return self.__class__.__name__ + '()'

class ToNpArrary:
    def __call__(self, x):     
        x = np.asarray(x, dtype=np.float32)        
        return x

    def __repr__(self): return self.__class__.__name__ + '()'

# Define data transformations
data_transforms_keras = transforms.Compose([
    transforms.Resize((224, 224)),
    ToNpArrary(),
])

# Define data transformations
data_transforms_torch = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

model = get_model_keras()
model = model.to(device)

# Load datasets
train_dataset = datasets.ImageFolder(
    root='/datamatrix-cli/images/', transform=data_transforms_keras)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Instantiate the torch optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Instantiate the torch loss function
loss_fn = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    running_loss = 0.0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        logits = model(inputs)
        loss = loss_fn(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print loss statistics
        if (step + 1) % 10 == 0:
            print(
                f"Epoch [{epoch+1}/{num_epochs}], "
                f"Batch [{step+1}/{len(train_loader)}], "
                f"Loss: {running_loss / 10}"
            )
            running_loss = 0.0

print('Training finished.')
fchollet commented 10 months ago

The code snippet cannot be run since it requires a specific dataset. So I haven't run it. But from ready the code, the two versions aren't equivalent since the Keras Application models uses [0-1] standardization while the other one uses mean=0, stddev=1 standardization. There might be other discrepancies as well.

My tip would be to try to change the learning rate, remove dropout, etc. until your model starts training.

batrlatom commented 10 months ago

@fchollet can you please try this complete test? even the preprocessing could be not correct, I think that it should converge at least somehow.. but mobilenetv3 does not do it, while custom model does.

edit ... efficientnet and densenet works. resnet50v2 too


import os
os.environ["KERAS_BACKEND"] = "torch"

import keras_core as keras
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np
import torchvision
import PIL

# Hyperparameters
num_classes = 10
batch_size = 64
num_epochs = 1
learning_rate = 1e-4

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_model_keras():
    # Create the base MobileNetV3Small model
    base_model = keras.applications.MobileNetV3Large(
        input_shape=(224, 224, 3),
        alpha=1,
        minimalistic=False,
        include_top=True,
        weights= None, #'imagenet',
        input_tensor=None,
        classes=num_classes,
        pooling=None,
        dropout_rate=0.2,
        classifier_activation='softmax',
        include_preprocessing=False
    )

    return base_model

def get_model_keras2():
    model = keras.Sequential(
    [
        keras.layers.Input(shape=(224, 224, 3)),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes),
    ]
    )

    return model

def get_model_keras3():
    input_layer = keras.layers.Input(shape=(224, 224, 3))
    x = keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu")(input_layer)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu")(x)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dropout(0.5)(x)
    output_layer = keras.layers.Dense(num_classes)(x)

    model = keras.Model(inputs=input_layer, outputs=output_layer)

    return model

def get_model_keras4():
   base_model = keras.applications.EfficientNetV2B0(
        input_shape=(224, 224, 3),
        weights=None,
        classes=10,
        include_preprocessing=False
   )
   return base_model

def get_model_keras5():
   base_model = keras.applications.DenseNet121(
        input_shape=(224, 224, 3),
        weights=None,
        classes=10
   )
   return base_model

def get_model_torch():
    base_model = torchvision.models.mobilenet_v3_large(num_classes=num_classes)
    return base_model

class ToChannelsLast:
    def __call__(self, x):        
        x = x.permute(1, 2, 0)
        return x

    def __repr__(self): return self.__class__.__name__ + '()'

def transform_mnist_to_rgb(dataset, batch_size=64):
    # Define transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to 224x224
        transforms.Grayscale(num_output_channels=3),  # Convert to 3-channel grayscale
        transforms.ToTensor(),  # Convert to tensor
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5]),
        ToChannelsLast()

    ])

    # Load MNIST dataset
    mnist_dataset = dataset(root='./data', train=True, transform=transform, download=True)

    # Create DataLoader
    dataloader = torch.utils.data.DataLoader(mnist_dataset, batch_size=batch_size, shuffle=True)

    return dataloader

mnist_dataset = torchvision.datasets.MNIST
train_loader = transform_mnist_to_rgb(mnist_dataset, batch_size)

# Instantiate the torch loss function
loss_fn = nn.CrossEntropyLoss()

models = [{'name': 'mobilenetv3', 'model': get_model_keras()},
         {'name': 'efficientNet', 'model': get_model_keras4()},
         {'name': 'denseNet121', 'model': get_model_keras5()}]

for model_dict in models:
  print(":::::::::::::::::::::::::::::::::::::")
  print(model_dict['name'])
  print(":::::::::::::::::::::::::::::::::::::")
  model = model_dict['model'].to(device)
  # Instantiate the torch optimizer
  optimizer = optim.Adam(model.parameters(), lr=learning_rate)

  for epoch in range(num_epochs):
    running_loss = 0.0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        logits = model(inputs)
        loss = loss_fn(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print loss statistics
        if (step + 1) % 10 == 0:
            print(
                f"Epoch [{epoch+1}/{num_epochs}], "
                f"Batch [{step+1}/{len(train_loader)}], "
                f"Loss: {running_loss / 10}"
            )
            running_loss = 0.0

print('Training finished.')