Classification losses error

I encountered these errors when testing with SmoothCELoss and FocalLoss, the logs are below:

Focal loss


[Errno 2] No such file or directory: 'main'
/content/main
2022-03-27 11:07:21 | DEBUG    | stdout_logger.py:log_text:34 - Overriding configuration...
2022-03-27 11:07:21 | INFO     | stdout_logger.py:log_text:28 - {
"global": {
    "debug": true,
    "cfg_transform": "configs/classification/transform.yaml",
    "save_dir": "/content/main/runs",
    "device": "cuda:0",
    "use_fp16": true,
    "pretrained": null,
    "resume": null
},
"trainer": {
    "name": "SupervisedTrainer",
    "args": {
        "num_iterations": 3000,
        "clip_grad": 10.0,
        "evaluate_interval": 1,
        "print_interval": 20,
        "save_interval": 500
    }
},
"model": {
    "name": "BaseTimmModel",
    "args": {
        "name": "convnext_small",
        "from_pretrained": true,
        "num_classes": 180
    }
},
"loss": {
    "name": "FocalLoss"
},
"callbacks": [
    {
        "name": "LoggerCallbacks",
        "args": null
    },
    {
        "name": "CheckpointCallbacks",
        "args": {
            "best_key": "bl_acc"
        }
    },
    {
        "name": "VisualizerCallbacks",
        "args": null
    },
    {
        "name": "TensorboardCallbacks",
        "args": null
    }
],
"metrics": [
    {
        "name": "Accuracy",
        "args": null
    },
    {
        "name": "BalancedAccuracyMetric",
        "args": null
    },
    {
        "name": "F1ScoreMetric",
        "args": {
            "average": "weighted"
        }
    },
    {
        "name": "ConfusionMatrix",
        "args": null
    },
    {
        "name": "ErrorCases",
        "args": null
    }
],
"optimizer": {
    "name": "AdamW",
    "args": {
        "lr": 0.001,
        "weight_decay": 0.0005,
        "betas": [
            0.937,
            0.999
        ]
    }
},
"scheduler": {
    "name": "SchedulerWrapper",
    "args": {
        "scheduler_name": "cosine2",
        "t_initial": 7,
        "t_mul": 0.9,
        "eta_mul": 0.9,
        "eta_min": 1e-06
    }
},
"data": {
    "dataset": {
        "train": {
            "name": "ImageFolderDataset",
            "args": {
                "image_dir": "/content/main/data/food-classification/train",
                "txt_classnames": "configs/classification/classes.txt"
            }
        },
        "val": {
            "name": "ImageFolderDataset",
            "args": {
                "image_dir": "/content/main/data/food-classification/val",
                "txt_classnames": "configs/classification/classes.txt"
            }
        }
    },
    "dataloader": {
        "train": {
            "name": "DataLoaderWithCollator",
            "args": {
                "batch_size": 32,
                "drop_last": true,
                "shuffle": false,
                "collate_fn": {
                    "name": "MixupCutmixCollator",
                    "args": {
                        "mixup_alpha": 0.4,
                        "cutmix_alpha": 1.0,
                        "weight": [
                            0.2,
                            0.2
                        ]
                    }
                },
                "sampler": {
                    "name": "BalanceSampler",
                    "args": null
                }
            }
        },
        "val": {
            "name": "DataLoaderWithCollator",
            "args": {
                "batch_size": 32,
                "drop_last": false,
                "shuffle": true
            }
        }
    }
}
}
2022-03-27 11:07:21 | DEBUG    | stdout_logger.py:log_text:34 - Loading config from configs/classification/transform.yaml...
2022-03-27 11:07:21 | DEBUG    | stdout_logger.py:log_text:34 - Calculating class distribution...
Downloading: "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth" to /root/.cache/torch/hub/checkpoints/convnext_small_1k_224_ema.pth
2022-03-27 11:07:46 | INFO     | stdout_logger.py:log_text:28 - Number of trainable parameters: 49,593,108
2022-03-27 11:07:46 | INFO     | stdout_logger.py:log_text:28 - Using CUDA:0 (Tesla T4, 15109.75MB)

2022-03-27 11:07:46 | INFO | stdout_logger.py:log_text:28 - Number of training samples: 88814 2022-03-27 11:07:46 | INFO | stdout_logger.py:log_text:28 - Number of validation samples: 21775 2022-03-27 11:07:46 | INFO | stdout_logger.py:log_text:28 - Number of training iterations each epoch: 2775 2022-03-27 11:07:46 | INFO | stdout_logger.py:log_text:28 - Number of validation iterations each epoch: 681 2022-03-27 11:07:46 | INFO | stdout_logger.py:log_text:28 - Everything will be saved to /content/main/runs/2022-03-27_11-07-21 2022-03-27 11:07:46 | DEBUG | stdout_logger.py:log_text:34 - Saving config to /content/main/runs/2022-03-27_11-07-21/pipeline.yaml... 2022-03-27 11:07:46 | DEBUG | stdout_logger.py:log_text:34 - Saving config to /content/main/runs/2022-03-27_11-07-21/transform.yaml... 2022-03-27 11:07:46 | DEBUG | stdout_logger.py:log_text:34 - Start sanity checks 2022-03-27 11:07:47 | DEBUG | stdout_logger.py:log_text:34 - Visualizing architecture... 2022-03-27 11:07:50 | INFO | stdout_logger.py:log_text:28 - =============================EVALUATION=================================== 100% 681/681 [04:04<00:00, 2.78it/s] 2022-03-27 11:11:56 | INFO | stdout_logger.py:log_text:28 - [0|3000] || L: 0.13242 || Time: 2.7617 (it/s) 2022-03-27 11:11:56 | INFO | stdout_logger.py:log_text:28 - acc: 0.00455 | bl_acc: 0.00411 | weighted-f1: 0.00332 |

2022-03-27 11:11:56 | INFO | stdout_logger.py:log_text:28 - ========================================================================== 2022-03-27 11:11:57 | DEBUG | stdout_logger.py:log_text:34 - Visualizing model predictions... 2022-03-27 11:11:59 | DEBUG | stdout_logger.py:log_text:34 - Visualizing dataset... 2022-03-27 11:12:01 | DEBUG | stdout_logger.py:log_text:34 - Analyzing datasets... 100% 88814/88814 [12:01<00:00, 123.05it/s] 100% 21775/21775 [02:12<00:00, 163.82it/s] 2022-03-27 11:26:17 | INFO | stdout_logger.py:log_text:28 - ===========================START TRAINING================================= Traceback (most recent call last): File "/content/main/configs/classification/train.py", line 10, in train_pipeline.fit() File "/content/main/theseus/classification/pipeline.py", line 171, in fit self.trainer.fit() File "/content/main/theseus/base/trainer/base_trainer.py", line 65, in fit self.training_epoch() File "/content/main/theseus/base/trainer/supervised_trainer.py", line 68, in training_epoch outputs = self.model.training_step(batch) File "/content/main/theseus/classification/models/wrapper.py", line 34, in training_step return self.forward(batch) File "/content/main/theseus/classification/models/wrapper.py", line 22, in forward loss, loss_dict = self.criterion(outputs, batch, self.device) File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(*input, **kwargs) File "/content/main/theseus/classification/losses/focal_loss.py", line 21, in forward targets = nn.functional.one_hot(targets, num_classes=num_classes) RuntimeError: one_hot is only applicable to index tensor.


2. ```SmoothCELoss```
```python
[Errno 2] No such file or directory: 'main'
/content/main
2022-03-27 11:48:37 | DEBUG    | stdout_logger.py:log_text:34 - Overriding configuration...
2022-03-27 11:48:37 | INFO     | stdout_logger.py:log_text:28 - {
    "global": {
        "debug": true,
        "cfg_transform": "configs/classification/transform.yaml",
        "save_dir": "/content/main/runs",
        "device": "cuda:0",
        "use_fp16": true,
        "pretrained": null,
        "resume": null
    },
    "trainer": {
        "name": "SupervisedTrainer",
        "args": {
            "num_iterations": 3000,
            "clip_grad": 10.0,
            "evaluate_interval": 1,
            "print_interval": 20,
            "save_interval": 500
        }
    },
    "model": {
        "name": "BaseTimmModel",
        "args": {
            "name": "convnext_small",
            "from_pretrained": true,
            "num_classes": 180
        }
    },
    "loss": {
        "name": "SmoothCELoss"
    },
    "callbacks": [
        {
            "name": "LoggerCallbacks",
            "args": null
        },
        {
            "name": "CheckpointCallbacks",
            "args": {
                "best_key": "bl_acc"
            }
        },
        {
            "name": "VisualizerCallbacks",
            "args": null
        },
        {
            "name": "TensorboardCallbacks",
            "args": null
        }
    ],
    "metrics": [
        {
            "name": "Accuracy",
            "args": null
        },
        {
            "name": "BalancedAccuracyMetric",
            "args": null
        },
        {
            "name": "F1ScoreMetric",
            "args": {
                "average": "weighted"
            }
        },
        {
            "name": "ConfusionMatrix",
            "args": null
        },
        {
            "name": "ErrorCases",
            "args": null
        }
    ],
    "optimizer": {
        "name": "AdamW",
        "args": {
            "lr": 0.001,
            "weight_decay": 0.0005,
            "betas": [
                0.937,
                0.999
            ]
        }
    },
    "scheduler": {
        "name": "SchedulerWrapper",
        "args": {
            "scheduler_name": "cosine2",
            "t_initial": 7,
            "t_mul": 0.9,
            "eta_mul": 0.9,
            "eta_min": 1e-06
        }
    },
    "data": {
        "dataset": {
            "train": {
                "name": "ImageFolderDataset",
                "args": {
                    "image_dir": "/content/main/data/food-classification/train",
                    "txt_classnames": "configs/classification/classes.txt"
                }
            },
            "val": {
                "name": "ImageFolderDataset",
                "args": {
                    "image_dir": "/content/main/data/food-classification/val",
                    "txt_classnames": "configs/classification/classes.txt"
                }
            }
        },
        "dataloader": {
            "train": {
                "name": "DataLoaderWithCollator",
                "args": {
                    "batch_size": 32,
                    "drop_last": true,
                    "shuffle": false,
                    "collate_fn": {
                        "name": "MixupCutmixCollator",
                        "args": {
                            "mixup_alpha": 0.4,
                            "cutmix_alpha": 1.0,
                            "weight": [
                                0.2,
                                0.2
                            ]
                        }
                    },
                    "sampler": {
                        "name": "BalanceSampler",
                        "args": null
                    }
                }
            },
            "val": {
                "name": "DataLoaderWithCollator",
                "args": {
                    "batch_size": 32,
                    "drop_last": false,
                    "shuffle": true
                }
            }
        }
    }
}
2022-03-27 11:48:37 | DEBUG    | stdout_logger.py:log_text:34 - Loading config from configs/classification/transform.yaml...
2022-03-27 11:48:37 | DEBUG    | stdout_logger.py:log_text:34 - Calculating class distribution...
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Number of trainable parameters: 49,593,108
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Using CUDA:0 (Tesla T4, 15109.75MB)

2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Number of training samples: 88814
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Number of validation samples: 21775
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Number of training iterations each epoch: 2775
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Number of validation iterations each epoch: 681
2022-03-27 11:48:43 | INFO     | stdout_logger.py:log_text:28 - Everything will be saved to /content/main/runs/2022-03-27_11-48-37
2022-03-27 11:48:43 | DEBUG    | stdout_logger.py:log_text:34 - Saving config to /content/main/runs/2022-03-27_11-48-37/pipeline.yaml...
2022-03-27 11:48:43 | DEBUG    | stdout_logger.py:log_text:34 - Saving config to /content/main/runs/2022-03-27_11-48-37/transform.yaml...
2022-03-27 11:48:43 | DEBUG    | stdout_logger.py:log_text:34 - Start sanity checks
2022-03-27 11:48:44 | DEBUG    | stdout_logger.py:log_text:34 - Visualizing architecture...
2022-03-27 11:48:47 | INFO     | stdout_logger.py:log_text:28 - =============================EVALUATION===================================
100% 681/681 [04:04<00:00,  2.78it/s]
2022-03-27 11:52:53 | INFO     | stdout_logger.py:log_text:28 - [0|3000] || CE: 5.19444 || Time:     2.7645 (it/s)
2022-03-27 11:52:53 | INFO     | stdout_logger.py:log_text:28 - acc: 0.00822 | bl_acc: 0.00766 | weighted-f1: 0.00479 | 

2022-03-27 11:52:53 | INFO     | stdout_logger.py:log_text:28 - ==========================================================================
2022-03-27 11:52:54 | DEBUG    | stdout_logger.py:log_text:34 - Visualizing model predictions...
2022-03-27 11:52:56 | DEBUG    | stdout_logger.py:log_text:34 - Visualizing dataset...
2022-03-27 11:52:58 | DEBUG    | stdout_logger.py:log_text:34 - Analyzing datasets...
100% 88814/88814 [12:02<00:00, 122.99it/s]
100% 21775/21775 [02:13<00:00, 163.64it/s]
2022-03-27 12:07:15 | INFO     | stdout_logger.py:log_text:28 - ===========================START TRAINING=================================
Traceback (most recent call last):
  File "/content/main/configs/classification/train.py", line 10, in <module>
    train_pipeline.fit()
  File "/content/main/theseus/classification/pipeline.py", line 171, in fit
    self.trainer.fit()
  File "/content/main/theseus/base/trainer/base_trainer.py", line 65, in fit
    self.training_epoch()
  File "/content/main/theseus/base/trainer/supervised_trainer.py", line 68, in training_epoch
    outputs = self.model.training_step(batch)
  File "/content/main/theseus/classification/models/wrapper.py", line 34, in training_step
    return self.forward(batch)
  File "/content/main/theseus/classification/models/wrapper.py", line 22, in forward
    loss, loss_dict = self.criterion(outputs, batch, self.device)
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/content/main/theseus/classification/losses/ce_loss.py", line 37, in forward
    loss = self.criterion(pred, target.view(-1).contiguous())
  File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/timm/loss/cross_entropy.py", line 22, in forward
    nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
RuntimeError: gather(): Expected dtype int64 for index

I guess it's an error from Mixup Cutmix collator, something with torch.int64.

Here's the link to notebook that i've used for testing, if you want to have a look at: notebook

kaylode / theseus

Classification losses error #26