Closed batrlatom closed 10 months ago
The code snippet cannot be run since it requires a specific dataset. So I haven't run it. But from ready the code, the two versions aren't equivalent since the Keras Application models uses [0-1] standardization while the other one uses mean=0, stddev=1 standardization. There might be other discrepancies as well.
My tip would be to try to change the learning rate, remove dropout, etc. until your model starts training.
@fchollet can you please try this complete test? even the preprocessing could be not correct, I think that it should converge at least somehow.. but mobilenetv3 does not do it, while custom model does.
edit ... efficientnet and densenet works. resnet50v2 too
import os
os.environ["KERAS_BACKEND"] = "torch"
import keras_core as keras
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import numpy as np
import torchvision
import PIL
# Hyperparameters
num_classes = 10
batch_size = 64
num_epochs = 1
learning_rate = 1e-4
# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_model_keras():
# Create the base MobileNetV3Small model
base_model = keras.applications.MobileNetV3Large(
input_shape=(224, 224, 3),
alpha=1,
minimalistic=False,
include_top=True,
weights= None, #'imagenet',
input_tensor=None,
classes=num_classes,
pooling=None,
dropout_rate=0.2,
classifier_activation='softmax',
include_preprocessing=False
)
return base_model
def get_model_keras2():
model = keras.Sequential(
[
keras.layers.Input(shape=(224, 224, 3)),
keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dropout(0.5),
keras.layers.Dense(num_classes),
]
)
return model
def get_model_keras3():
input_layer = keras.layers.Input(shape=(224, 224, 3))
x = keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu")(input_layer)
x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
x = keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu")(x)
x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dropout(0.5)(x)
output_layer = keras.layers.Dense(num_classes)(x)
model = keras.Model(inputs=input_layer, outputs=output_layer)
return model
def get_model_keras4():
base_model = keras.applications.EfficientNetV2B0(
input_shape=(224, 224, 3),
weights=None,
classes=10,
include_preprocessing=False
)
return base_model
def get_model_keras5():
base_model = keras.applications.DenseNet121(
input_shape=(224, 224, 3),
weights=None,
classes=10
)
return base_model
def get_model_torch():
base_model = torchvision.models.mobilenet_v3_large(num_classes=num_classes)
return base_model
class ToChannelsLast:
def __call__(self, x):
x = x.permute(1, 2, 0)
return x
def __repr__(self): return self.__class__.__name__ + '()'
def transform_mnist_to_rgb(dataset, batch_size=64):
# Define transformations
transform = transforms.Compose([
transforms.Resize((224, 224)), # Resize to 224x224
transforms.Grayscale(num_output_channels=3), # Convert to 3-channel grayscale
transforms.ToTensor(), # Convert to tensor
transforms.Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]),
ToChannelsLast()
])
# Load MNIST dataset
mnist_dataset = dataset(root='./data', train=True, transform=transform, download=True)
# Create DataLoader
dataloader = torch.utils.data.DataLoader(mnist_dataset, batch_size=batch_size, shuffle=True)
return dataloader
mnist_dataset = torchvision.datasets.MNIST
train_loader = transform_mnist_to_rgb(mnist_dataset, batch_size)
# Instantiate the torch loss function
loss_fn = nn.CrossEntropyLoss()
models = [{'name': 'mobilenetv3', 'model': get_model_keras()},
{'name': 'efficientNet', 'model': get_model_keras4()},
{'name': 'denseNet121', 'model': get_model_keras5()}]
for model_dict in models:
print(":::::::::::::::::::::::::::::::::::::")
print(model_dict['name'])
print(":::::::::::::::::::::::::::::::::::::")
model = model_dict['model'].to(device)
# Instantiate the torch optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
running_loss = 0.0
for step, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
logits = model(inputs)
loss = loss_fn(logits, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
# Print loss statistics
if (step + 1) % 10 == 0:
print(
f"Epoch [{epoch+1}/{num_epochs}], "
f"Batch [{step+1}/{len(train_loader)}], "
f"Loss: {running_loss / 10}"
)
running_loss = 0.0
print('Training finished.')
Hi, this is probably a problem on my side, but I would like to have this clarified since I am a little lost in this. I want to train any keras_core applications network, but it does not work for me. But when I switch to the torchvision model or build a model in keras_core myself, it does work.
I am using everything torch except the model itself and have two different version of model and preprocessing.
torchvision preprocessing includes transformation to torch tensor, normalisation keras preprocessing includes transformation to numpy array only ( 0 - 255 )
If I use get_model_torch together with data_transforms_torch, it trains correctly If I use get_model_keras with data_transforms_keras, it does not train at all.
What am I missing? Thanks