fastai / fastbook

The fastai book, published as Jupyter Notebooks
Other
21.75k stars 8.4k forks source link

"Running Your First Notebook" Failed #239

Closed tonyyuandao closed 4 years ago

tonyyuandao commented 4 years ago

Win10, fresh start, just installed latest anaconda (python3.8), latest pytorch, and pip install fastai. (Failed at conda install, saying something like conflict with cuda-driver 10.2, so I tried pip and successed)

Then I made through to the code. "Running Your First Notebook"

CLICK ME

from fastai.vision.all import * path = untar_data(URLs.PETS,dest='D:/data')/'images'

def is_cat(x): return x[0].isupper() dls = ImageDataLoaders.from_name_func( path, get_image_files(path), valid_pct=0.2, seed=42, label_func=is_cat, item_tfms=Resize(224))

learn = cnn_learner(dls, resnet34, metrics=error_rate) learn.fine_tune(1)

Result:


RuntimeError Traceback (most recent call last)

in 9 10 learn = cnn_learner(dls, resnet34, metrics=error_rate) ---> 11 learn.fine_tune(1) d:\anaconda3\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs) 452 init_args.update(log) 453 setattr(inst, 'init_args', init_args) --> 454 return inst if to_return else f(*args, **kwargs) 455 return _f 456 d:\anaconda3\lib\site-packages\fastai\callback\schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs) 159 "Fine tune with `freeze` for `freeze_epochs` then with `unfreeze` from `epochs` using discriminative LR" 160 self.freeze() --> 161 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs) 162 base_lr /= 2 163 self.unfreeze() d:\anaconda3\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs) 452 init_args.update(log) 453 setattr(inst, 'init_args', init_args) --> 454 return inst if to_return else f(*args, **kwargs) 455 return _f 456 d:\anaconda3\lib\site-packages\fastai\callback\schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt) 111 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final), 112 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))} --> 113 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd) 114 115 # Cell d:\anaconda3\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs) 452 init_args.update(log) 453 setattr(inst, 'init_args', init_args) --> 454 return inst if to_return else f(*args, **kwargs) 455 return _f 456 d:\anaconda3\lib\site-packages\fastai\learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt) 202 self.opt.set_hypers(lr=self.lr if lr is None else lr) 203 self.n_epoch,self.loss = n_epoch,tensor(0.) --> 204 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup) 205 206 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None d:\anaconda3\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final) 153 154 def _with_events(self, f, event_type, ex, final=noop): --> 155 try: self(f'before_{event_type}') ;f() 156 except ex: self(f'after_cancel_{event_type}') 157 finally: self(f'after_{event_type}') ;final() d:\anaconda3\lib\site-packages\fastai\learner.py in _do_fit(self) 192 for epoch in range(self.n_epoch): 193 self.epoch=epoch --> 194 self._with_events(self._do_epoch, 'epoch', CancelEpochException) 195 196 @log_args(but='cbs') d:\anaconda3\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final) 153 154 def _with_events(self, f, event_type, ex, final=noop): --> 155 try: self(f'before_{event_type}') ;f() 156 except ex: self(f'after_cancel_{event_type}') 157 finally: self(f'after_{event_type}') ;final() d:\anaconda3\lib\site-packages\fastai\learner.py in _do_epoch(self) 186 187 def _do_epoch(self): --> 188 self._do_epoch_train() 189 self._do_epoch_validate() 190 d:\anaconda3\lib\site-packages\fastai\learner.py in _do_epoch_train(self) 178 def _do_epoch_train(self): 179 self.dl = self.dls.train --> 180 self._with_events(self.all_batches, 'train', CancelTrainException) 181 182 def _do_epoch_validate(self, ds_idx=1, dl=None): d:\anaconda3\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final) 153 154 def _with_events(self, f, event_type, ex, final=noop): --> 155 try: self(f'before_{event_type}') ;f() 156 except ex: self(f'after_cancel_{event_type}') 157 finally: self(f'after_{event_type}') ;final() d:\anaconda3\lib\site-packages\fastai\learner.py in all_batches(self) 159 def all_batches(self): 160 self.n_iter = len(self.dl) --> 161 for o in enumerate(self.dl): self.one_batch(*o) 162 163 def _do_one_batch(self): d:\anaconda3\lib\site-packages\fastai\data\load.py in __iter__(self) 96 self.randomize() 97 self.before_iter() ---> 98 for b in _loaders[self.fake_l.num_workers==0](self.fake_l): 99 if self.device is not None: b = to_device(b, self.device) 100 yield self.after_batch(b) d:\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in __init__(self, loader) 735 # before it starts, and __del__ tries to join but will get: 736 # AssertionError: can only join a started process. --> 737 w.start() 738 self._index_queues.append(index_queue) 739 self._workers.append(w) d:\anaconda3\lib\multiprocessing\process.py in start(self) 119 'daemonic processes are not allowed to have children' 120 _cleanup() --> 121 self._popen = self._Popen(self) 122 self._sentinel = self._popen.sentinel 123 # Avoid a refcycle if the target function holds an indirect d:\anaconda3\lib\multiprocessing\context.py in _Popen(process_obj) 222 @staticmethod 223 def _Popen(process_obj): --> 224 return _default_context.get_context().Process._Popen(process_obj) 225 226 class DefaultContext(BaseContext): d:\anaconda3\lib\multiprocessing\context.py in _Popen(process_obj) 324 def _Popen(process_obj): 325 from .popen_spawn_win32 import Popen --> 326 return Popen(process_obj) 327 328 class SpawnContext(BaseContext): d:\anaconda3\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj) 91 try: 92 reduction.dump(prep_data, to_child) ---> 93 reduction.dump(process_obj, to_child) 94 finally: 95 set_spawning_popen(None) d:\anaconda3\lib\multiprocessing\reduction.py in dump(obj, file, protocol) 58 def dump(obj, file, protocol=None): 59 '''Replacement for pickle.dump() using ForkingPickler.''' ---> 60 ForkingPickler(file, protocol).dump(obj) 61 62 # d:\anaconda3\lib\site-packages\torch\multiprocessing\reductions.py in reduce_tensor(tensor) 238 ref_counter_offset, 239 event_handle, --> 240 event_sync_required) = storage._share_cuda_() 241 tensor_offset = tensor.storage_offset() 242 shared_cache[handle] = StorageWeakRef(storage) RuntimeError: cuda runtime error (801) : operation not supported at ..\torch/csrc/generic/StorageSharing.cpp:247

Is this a pytorch problem? I tried torch.cuda.is_available(), it returned True.

tonyyuandao commented 4 years ago

adding num_workers = 0 to dls fixed the issue. Pytorch multiprocess don't work on windows.

dls = ImageDataLoaders.from_name_func( path, get_image_files(path), valid_pct=0.2, seed=42, label_func=is_cat, item_tfms=Resize(224),num_workers = 0)