timesler / facenet-pytorch

Pretrained Pytorch face detection (MTCNN) and facial recognition (InceptionResnet) models
MIT License
4.58k stars 960 forks source link

Result of Fine tuning is not good #189

Open miyamotok0105 opened 2 years ago

miyamotok0105 commented 2 years ago

Python version: 3.9.6 Operating System: Mac

Description

I'm fine-tuning a model pre-trained with Vggface2 with the data I collected, but the accuracy doesn't improve. If you have any options, parameter adjustments, or useful insights that will help improve accuracy, we would appreciate it if you could share them.

What i did

Data prepared by myself Approximately 1500 people * Using about 5 face image data, I tried learning using the following file created with reference to [inetune.ipynb](https://github.com/timesler/facenet-pytorch/blob/master/examples/finetune.ipynb)

from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from [torch.optim.lr](http://torch.optim.lr/)_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
if __name__ == "__main__":
  # Define run parameters
  data_dir = '../data/keyword'
  ACC_CSV_PATH = "./0202accuracy.csv"
  # batch_size = 32
  batch_size = 64
  epochs = 300
  workers = 0 if os.name == 'nt' else 8
  # Determinate if an nvidia GPU is available
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  print('Running on device: {}'.format(device))
  # Define MTCNN module
  mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
  )
  # Perfom MTCNN facial detection
  dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
  dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
      for p, _ in dataset.samples
  ]
  loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
  )
  for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
  # Remove mtcnn to reduce GPU memory usage
  del mtcnn
  resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
  ).to(device)
  optimizer = optim.Adam(resnet.parameters(), lr=0.001)
  scheduler = MultiStepLR(optimizer, [5, 10])
  trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
  ])
  dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
  img_inds = np.arange(len(dataset))
  np.random.shuffle(img_inds)
  train_inds = img_inds[:int(0.8 * len(img_inds))]
  val_inds = img_inds[int(0.8 * len(img_inds)):]
  train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
  )
  val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
  )
  # Define loss and evaluation functions
  loss_fn = torch.nn.CrossEntropyLoss()
  metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
  }
  # Train model
  writer = SummaryWriter(log_dir="./logs")
  writer.iteration, writer.interval = 0, 10
  print('\n\nInitial')
  print('-' * 10)
  resnet.eval()
  training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
  )
  # with open(ACC_CSV_PATH, "w") as f:
  #   f.write("epoch,accuracy")
  for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)
    resnet.train()
    training.pass_epoch(
      resnet, loss_fn, train_loader, optimizer, scheduler,
      batch_metrics=metrics, show_running=True, device=device,
      writer=writer
    )
    resnet.eval()
    training.pass_epoch(
      resnet, loss_fn, val_loader,
      batch_metrics=metrics, show_running=True, device=device,
      writer=writer
    )
    # with open(ACC_CSV_PATH, "a") as f:
    #   f.write(f"\n{epoch},{training.accuracy()}")
  writer.close()
elon-trump commented 1 year ago

have you got any solution yet?

fn-hide commented 3 months ago

Any updates?