openvinotoolkit / anomalib

An anomaly detection library comprising state-of-the-art algorithms and features such as experiment management, hyper-parameter optimization, and edge inference.
https://anomalib.readthedocs.io/en/latest/
Apache License 2.0
3.4k stars 614 forks source link

[Bug]: RuntimeError: vstack expects a non-empty TensorList #2158

Open dong-uk-kim97 opened 1 week ago

dong-uk-kim97 commented 1 week ago

Describe the bug

I got this error all of a sudden while training. I checked through shape and it was image = torch.size ([1, 3, 256, 256]) mask = torch.size ([1, 256, 256]) and I couldn't get this error, but I got this problem. What should I do to solve it? On a side note, please make it so that even beginners can follow the guidelines. I don't get it.

Dataset

MVTec

Model

PatchCore

Steps to reproduce the behavior

.

OS information

OS information:

Expected behavior

I want to train this datasests

Screenshots

"""python from pathlib import Path

from anomalib.data.utils import read_image from anomalib.deploy import OpenVINOInferencer from anomalib.data import MVTec from anomalib.engine import Engine from anomalib.models import Patchcore from anomalib import TaskType from lightning.pytorch import Trainer

from typing import Any

import numpy as np from matplotlib import pyplot as plt from PIL import Image from torchvision.transforms import ToPILImage

from anomalib.deploy import OpenVINOInferencer, ExportType

datamodule = MVTec(num_workers=0, root=Path("datasets/MVTec"), category="screw", image_size=(256, 256), train_batch_size=1, eval_batch_size=1, val_split_ratio=0.2) datamodule.prepare_data() # Downloads the dataset if it's not in the specified root directory datamodule.setup() model = Patchcore()

engine = Trainer( accelerator="gpu", max_epochs=100, min_epochs=10, num_sanity_val_steps=-1, enable_checkpointing=True, val_check_interval=1.0, check_val_every_n_epoch=1 )

engine.fit(model=model, datamodule=datamodule) """

Pip/GitHub

GitHub

What version/branch did you use?

No response

Configuration YAML

I didn't set YAML FIle

Logs

amData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[14], line 11
      1 engine = Trainer(
      2     accelerator="gpu",
      3     max_epochs=100,
   (...)
      8     check_val_every_n_epoch=1
      9 )
---> 11 engine.fit(model=model, datamodule=datamodule)

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\trainer.py:543, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
    541 self.state.status = TrainerStatus.RUNNING
    542 self.training = True
--> 543 call._call_and_handle_interrupt(
    544     self, self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
    545 )

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\call.py:44, in _call_and_handle_interrupt(trainer, trainer_fn, *args, **kwargs)
     42     if trainer.strategy.launcher is not None:
     43         return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)
---> 44     return trainer_fn(*args, **kwargs)
     46 except _TunerExitException:
     47     _call_teardown_hook(trainer)

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\trainer.py:579, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
    572 assert self.state.fn is not None
    573 ckpt_path = self._checkpoint_connector._select_ckpt_path(
    574     self.state.fn,
    575     ckpt_path,
    576     model_provided=True,
    577     model_connected=self.lightning_module is not None,
    578 )
--> 579 self._run(model, ckpt_path=ckpt_path)
    581 assert self.state.stopped
    582 self.training = False

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\trainer.py:986, in Trainer._run(self, model, ckpt_path)
    981 self._signal_connector.register_signal_handlers()
    983 # ----------------------------
    984 # RUN THE TRAINER
    985 # ----------------------------
--> 986 results = self._run_stage()
    988 # ----------------------------
    989 # POST-Training CLEAN UP
    990 # ----------------------------
    991 log.debug(f"{self.__class__.__name__}: trainer tearing down")

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\trainer.py:1028, in Trainer._run_stage(self)
   1026 if self.training:
   1027     with isolate_rng():
-> 1028         self._run_sanity_check()
   1029     with torch.autograd.set_detect_anomaly(self._detect_anomaly):
   1030         self.fit_loop.run()

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\trainer.py:1057, in Trainer._run_sanity_check(self)
   1054 call._call_callback_hooks(self, "on_sanity_check_start")
   1056 # run eval step
-> 1057 val_loop.run()
   1059 call._call_callback_hooks(self, "on_sanity_check_end")
   1061 # reset logger connector

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\loops\utilities.py:182, in _no_grad_context.<locals>._decorator(self, *args, **kwargs)
    180     context_manager = torch.no_grad
    181 with context_manager():
--> 182     return loop_run(self, *args, **kwargs)

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\loops\evaluation_loop.py:114, in _EvaluationLoop.run(self)
    112     return []
    113 self.reset()
--> 114 self.on_run_start()
    115 data_fetcher = self._data_fetcher
    116 assert data_fetcher is not None

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\loops\evaluation_loop.py:244, in _EvaluationLoop.on_run_start(self)
    242 self._verify_dataloader_idx_requirement()
    243 self._on_evaluation_model_eval()
--> 244 self._on_evaluation_start()
    245 self._on_evaluation_epoch_start()

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\loops\evaluation_loop.py:290, in _EvaluationLoop._on_evaluation_start(self, *args, **kwargs)
    288 hook_name = "on_test_start" if trainer.testing else "on_validation_start"
    289 call._call_callback_hooks(trainer, hook_name, *args, **kwargs)
--> 290 call._call_lightning_module_hook(trainer, hook_name, *args, **kwargs)
    291 call._call_strategy_hook(trainer, hook_name, *args, **kwargs)

File c:\ProgramData\anaconda3\envs\anomalib_env\lib\site-packages\lightning\pytorch\trainer\call.py:159, in _call_lightning_module_hook(trainer, hook_name, pl_module, *args, **kwargs)
    156 pl_module._current_fx_name = hook_name
    158 with trainer.profiler.profile(f"[LightningModule]{pl_module.__class__.__name__}.{hook_name}"):
--> 159     output = fn(*args, **kwargs)
    161 # restore current_fx when nested context
    162 pl_module._current_fx_name = prev_fx_name

File C:\study\anomalib\src\anomalib\models\components\base\memory_bank_module.py:37, in MemoryBankMixin.on_validation_start(self)
     35 """Ensure that the model is fitted before validation starts."""
     36 if not self._is_fitted:
---> 37     self.fit()
     38     self._is_fitted = torch.tensor([True])

File C:\study\anomalib\src\anomalib\models\image\patchcore\lightning_model.py:87, in Patchcore.fit(self)
     85 """Apply subsampling to the embedding collected from the training set."""
     86 logger.info("Aggregating the embedding extracted from the training set.")
---> 87 embeddings = torch.vstack(self.embeddings)
     89 logger.info("Applying core-set subsampling to get the embedding.")
     90 self.model.subsample_embedding(embeddings, self.coreset_sampling_ratio)

RuntimeError: vstack expects a non-empty TensorList

Code of Conduct