but every time I reached the 10th epoch, It would report an error:
Traceback (most recent call last):
File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning__init.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric__init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch__init.py", line 128, in
raise err
OSError: [WinError 1455] 页面文件太小,无法完成操作。 Error loading "D:\Anaconda\envs\nanodet\lib\site-packages\torch\lib\cufft64_10.dll" or one of its dependencies.
Traceback (most recent call last):
File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning__init.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric__init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch__init.py", line 676, in
from .storage import _StorageBase, TypedStorage, _LegacyStorage, UntypedStorage
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\storage.py", line 11, in
import numpy as np
File "D:\Anaconda\envs\nanodet\lib\site-packages\numpy\init.py", line 154, in
from . import ma
File "D:\Anaconda\envs\nanodet\lib\site-packages\numpy\ma\init__.py", line 42, in
from . import core
File "", line 991, in _find_and_load
Traceback (most recent call last):
File "", line 1, in
File "", line 975, in _find_and_load_unlocked
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
File "", line 671, in _load_unlocked
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
File "", line 839, in exec_module
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
File "", line 934, in get_code
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "", line 1033, in get_data
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
MemoryError
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\init__.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\init__.py", line 218, in
from torch._C import * # noqa: F403
RuntimeError: MemoryError: Out of memory interning an attribute name
Traceback (most recent call last):
File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\init__.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\init.py", line 831, in
from .functional import * # noqa: F403
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\functional.py", line 8, in
import torch.nn.functional as F
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\init.py", line 1, in
from .modules import * # noqa: F403
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules\init__.py", line 18, in
from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d, SyncBatchNorm, \
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules\batchnorm.py", line 9, in
from ._functions import SyncBatchNorm as sync_batch_norm
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules_functions.py", line 4, in
from torch.autograd.function import Function
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\init.py", line 21, in
from . import functional
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\functional.py", line 3, in
from . import forward_ad as fwAD
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\forward_ad.py", line 106, in
_UnpackedDualTensor = namedtuple('_UnpackedDualTensor', ['primal', 'tangent'])
File "D:\Anaconda\envs\nanodet\lib\collections\init__.py", line 394, in namedtuple
exec(s, namespace)
MemoryError
[NanoDet][06-29 15:28:13]INFO:Val|Epoch10/30|Iter80(1/2)| mem:2.79G| lr:1.58e-04| loss_qfl:0.9156| loss_bbox:0.9682| loss_dfl:0.4420| aux_loss_qfl:0.4186| aux_loss_bbox:0.7197| aux_loss_dfl:0.3550|
INFO:NanoDet:Val|Epoch10/30|Iter80(1/2)| mem:2.79G| lr:1.58e-04| loss_qfl:0.9156| loss_bbox:0.9682| loss_dfl:0.4420| aux_loss_qfl:0.4186| aux_loss_bbox:0.7197| aux_loss_dfl:0.3550|
Traceback (most recent call last):
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1120, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "D:\Anaconda\envs\nanodet\lib\queue.py", line 178, in get
raise Empty
_queue.Empty
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "tools/train.py", line 156, in
main(args)
File "tools/train.py", line 151, in main
trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\call.py", line 38, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1112, in _run
results = self._run_stage()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1191, in _run_stage
self._run_train()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1214, in _run_train
self.fit_loop.run()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, *kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 200, in run
self.on_advance_end()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 250, in on_advance_end
self._run_validation()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 308, in _run_validation
self.val_loop.run()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(args, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\dataloader\evaluation_loop.py", line 152, in advance
dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\evaluation_epoch_loop.py", line 121, in advance
batch = next(data_fetcher)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 184, in next
return self.fetching_function()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 265, in fetching_function
self._fetch_next_batch(self.dataloader_iter)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 280, in _fetch_next_batch
batch = next(iterator)
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 628, in next
data = self._next_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1316, in _next_data
idx, data = self._get_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1272, in _get_data
success, data = self._try_get_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1133, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 10156, 4764, 12160, 20160) exited unexpectedly
what wrong with it?what should i do to solve this problem?
At the beginning of training, it was normal:
[NanoDet][06-29 15:22:19]INFO:Train|Epoch1/30|Iter0(1/8)| mem:2.61G| lr:1.00e-07| loss_qfl:0.5918| loss_bbox:1.1593| loss_dfl:0.5198| aux_loss_qfl:0.5985| aux_loss_bbox:1.1496| aux_loss_dfl:0.5269| INFO:NanoDet:Train|Epoch1/30|Iter0(1/8)| mem:2.61G| lr:1.00e-07| loss_qfl:0.5918| loss_bbox:1.1593| loss_dfl:0.5198| aux_loss
but every time I reached the 10th epoch, It would report an error: Traceback (most recent call last): File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning__init.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric__init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch__init.py", line 128, in
raise err
OSError: [WinError 1455] 页面文件太小,无法完成操作。 Error loading "D:\Anaconda\envs\nanodet\lib\site-packages\torch\lib\cufft64_10.dll" or one of its dependencies.
Traceback (most recent call last):
File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning__init.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric__init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch__init.py", line 676, in
from .storage import _StorageBase, TypedStorage, _LegacyStorage, UntypedStorage
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\storage.py", line 11, in
import numpy as np
File "D:\Anaconda\envs\nanodet\lib\site-packages\numpy\ init.py", line 154, in
from . import ma
File "D:\Anaconda\envs\nanodet\lib\site-packages\numpy\ma\ init__.py", line 42, in
from . import core
File "", line 991, in _find_and_load
Traceback (most recent call last):
File "", line 1, in
File "", line 975, in _find_and_load_unlocked
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
File "", line 671, in _load_unlocked
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
File "", line 839, in exec_module
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
File "", line 934, in get_code
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "", line 1033, in get_data
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
MemoryError
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\ init__.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\ init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\ init__.py", line 218, in
from torch._C import * # noqa: F403
RuntimeError: MemoryError: Out of memory interning an attribute name
Traceback (most recent call last):
File "", line 1, in
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Anaconda\envs\nanodet\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Anaconda\envs\nanodet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "E:\py_project\nanodet-main\tools\train.py", line 19, in
import pytorch_lightning as pl
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\ init__.py", line 34, in
from lightning_fabric.utilities.seed import seed_everything # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\ init.py", line 23, in
from lightning_fabric.fabric import Fabric # noqa: E402
File "D:\Anaconda\envs\nanodet\lib\site-packages\lightning_fabric\fabric.py", line 21, in
import torch
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\ init.py", line 831, in
from .functional import * # noqa: F403
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\functional.py", line 8, in
import torch.nn.functional as F
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\ init.py", line 1, in
from .modules import * # noqa: F403
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules\ init__.py", line 18, in
from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d, SyncBatchNorm, \
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules\batchnorm.py", line 9, in
from ._functions import SyncBatchNorm as sync_batch_norm
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\nn\modules_functions.py", line 4, in
from torch.autograd.function import Function
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\ init.py", line 21, in
from . import functional
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\functional.py", line 3, in
from . import forward_ad as fwAD
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\autograd\forward_ad.py", line 106, in
_UnpackedDualTensor = namedtuple('_UnpackedDualTensor', ['primal', 'tangent'])
File "D:\Anaconda\envs\nanodet\lib\collections\ init__.py", line 394, in namedtuple
exec(s, namespace)
MemoryError
[NanoDet][06-29 15:28:13]INFO:Val|Epoch10/30|Iter80(1/2)| mem:2.79G| lr:1.58e-04| loss_qfl:0.9156| loss_bbox:0.9682| loss_dfl:0.4420| aux_loss_qfl:0.4186| aux_loss_bbox:0.7197| aux_loss_dfl:0.3550|
INFO:NanoDet:Val|Epoch10/30|Iter80(1/2)| mem:2.79G| lr:1.58e-04| loss_qfl:0.9156| loss_bbox:0.9682| loss_dfl:0.4420| aux_loss_qfl:0.4186| aux_loss_bbox:0.7197| aux_loss_dfl:0.3550|
Traceback (most recent call last): File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1120, in _try_get_data data = self._data_queue.get(timeout=timeout) File "D:\Anaconda\envs\nanodet\lib\queue.py", line 178, in get raise Empty _queue.Empty
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "tools/train.py", line 156, in
main(args)
File "tools/train.py", line 151, in main
trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\call.py", line 38, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1112, in _run
results = self._run_stage()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1191, in _run_stage
self._run_train()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1214, in _run_train
self.fit_loop.run()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, *kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 200, in run
self.on_advance_end()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 250, in on_advance_end
self._run_validation()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 308, in _run_validation
self.val_loop.run()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(args, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\dataloader\evaluation_loop.py", line 152, in advance
dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\loop.py", line 199, in run
self.advance(*args, **kwargs)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\loops\epoch\evaluation_epoch_loop.py", line 121, in advance
batch = next(data_fetcher)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 184, in next
return self.fetching_function()
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 265, in fetching_function
self._fetch_next_batch(self.dataloader_iter)
File "D:\Anaconda\envs\nanodet\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 280, in _fetch_next_batch
batch = next(iterator)
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 628, in next
data = self._next_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1316, in _next_data
idx, data = self._get_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1272, in _get_data
success, data = self._try_get_data()
File "D:\Anaconda\envs\nanodet\lib\site-packages\torch\utils\data\dataloader.py", line 1133, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 10156, 4764, 12160, 20160) exited unexpectedly
what wrong with it?what should i do to solve this problem?