Hi, I am trying to reproduce your results. However, the training process always crashes and pops out "_pickle.UnpicklingError" after several epochs.
Here is the full info. showed while the training process ceased:
`
runfile('F:/Revlis/reference/ecg_ptbxl_benchmarking-master/code/reproduce_results.py', wdir='F:/Revlis/reference/ecg_ptbxl_benchmarking-master/code')
Training from scratch...
model: fastai_xresnet1d101
█epoch train_loss valid_loss time
Epoch 1/1 : |██████████████------| 71.43% [95/133 01:02<00:25 3.0052]0 7.200954 #na# 01:03
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
epoch train_loss valid_loss time
Epoch 1/50 : █0 0.705411 0.575564 01:34
Epoch 2/50 : █1 0.539839 0.474647 01:30
Epoch 3/50 : █2 0.423681 0.374968 01:30
Epoch 4/50 : █3 0.371818 0.363357 01:29
Epoch 5/50 : Traceback (most recent call last):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "f:\revlis\reference\ecg_ptbxl_benchmarking-master\code\reproduce_results.py", line 49, in
main()
File "f:\revlis\reference\ecg_ptbxl_benchmarking-master\code\reproduce_results.py", line 29, in main
e.perform()
File "F:\Revlis\reference\ecg_ptbxl_benchmarking-master\code\experiments\scp_experiment.py", line 112, in perform
model.fit(self.X_train, self.y_train, self.X_val, self.y_val)
File "F:\Revlis\reference\ecg_ptbxl_benchmarking-master\code\models\fastai_model.py", line 236, in fit
learn.fit_one_cycle(self.epochs,self.lr)#slice(self.lr) if self.discriminative_lrs else self.lr)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\train.py", line 23, in fit_one_cycle
learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_train.py", line 200, in fit
fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_train.py", line 99, in fit
for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastprogress\fastprogress.py", line 50, in iter
raise e
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastprogress\fastprogress.py", line 41, in iter
for i,o in enumerate(self.gen):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_data.py", line 75, in iter
for b in self.dl: yield self.proc_batch(b)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 441, in iter
return self._get_iterator()
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 388, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 1042, in init
w.start()
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\popen_spawn_win32.py", line 93, in init
reduction.dump(process_obj, to_child)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
OSError: [Errno 22] Invalid argument
Traceback (most recent call last):
File "", line 1, in
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
`
I believe this error results from multiprocessing, yet, I have no clue about the solution.
Hi, I am trying to reproduce your results. However, the training process always crashes and pops out "_pickle.UnpicklingError" after several epochs.
Here is the full info. showed while the training process ceased:
` runfile('F:/Revlis/reference/ecg_ptbxl_benchmarking-master/code/reproduce_results.py', wdir='F:/Revlis/reference/ecg_ptbxl_benchmarking-master/code') Training from scratch... model: fastai_xresnet1d101 █epoch train_loss valid_loss time
Epoch 1/1 : |██████████████------| 71.43% [95/133 01:02<00:25 3.0052]0 7.200954 #na# 01:03
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph. epoch train_loss valid_loss time
Epoch 1/50 : █0 0.705411 0.575564 01:34
Epoch 2/50 : █1 0.539839 0.474647 01:30
Epoch 3/50 : █2 0.423681 0.374968 01:30
Epoch 4/50 : █3 0.371818 0.363357 01:29
Epoch 5/50 : Traceback (most recent call last):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec exec(code, globals, locals)
File "f:\revlis\reference\ecg_ptbxl_benchmarking-master\code\reproduce_results.py", line 49, in
main()
File "f:\revlis\reference\ecg_ptbxl_benchmarking-master\code\reproduce_results.py", line 29, in main e.perform()
File "F:\Revlis\reference\ecg_ptbxl_benchmarking-master\code\experiments\scp_experiment.py", line 112, in perform model.fit(self.X_train, self.y_train, self.X_val, self.y_val)
File "F:\Revlis\reference\ecg_ptbxl_benchmarking-master\code\models\fastai_model.py", line 236, in fit learn.fit_one_cycle(self.epochs,self.lr)#slice(self.lr) if self.discriminative_lrs else self.lr)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\train.py", line 23, in fit_one_cycle learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_train.py", line 200, in fit fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_train.py", line 99, in fit for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastprogress\fastprogress.py", line 50, in iter raise e
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastprogress\fastprogress.py", line 41, in iter for i,o in enumerate(self.gen):
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\fastai\basic_data.py", line 75, in iter for b in self.dl: yield self.proc_batch(b)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 441, in iter return self._get_iterator()
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 388, in _get_iterator return _MultiProcessingDataLoaderIter(self)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\site-packages\torch\utils\data\dataloader.py", line 1042, in init w.start()
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\process.py", line 121, in start self._popen = self._Popen(self)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\context.py", line 224, in _Popen return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\context.py", line 327, in _Popen return Popen(process_obj)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\popen_spawn_win32.py", line 93, in init reduction.dump(process_obj, to_child)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\reduction.py", line 60, in dump ForkingPickler(file, protocol).dump(obj)
OSError: [Errno 22] Invalid argument
Traceback (most recent call last): File "", line 1, in
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\revlis_user\anaconda3\envs\env_ecg_benchmark\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
`
I believe this error results from multiprocessing, yet, I have no clue about the solution.