Open abrbhat opened 7 months ago
Describe the bug Getting error while running ns-train
To Reproduce Steps to reproduce the behavior:
ns-train nerfacto --data .\data\nerfstudio\kitchen\
Expected behavior Training should run.
Screenshots Stacktrace:
(nerfstudio) PS C:\Users\Divya\nerfstudio> ns-train nerfacto --data .\data\nerfstudio\kitchen\ [15:20:44] Using --data alias for --data.pipeline.datamanager.data train.py:230 ──────────────────────────────────────────────────────── Config ──────────────────────────────────────────────────────── TrainerConfig( _target=<class 'nerfstudio.engine.trainer.Trainer'>, output_dir=WindowsPath('outputs'), method_name='nerfacto', experiment_name=None, project_name='nerfstudio-project', timestamp='2024-02-27_152044', machine=MachineConfig(seed=42, num_devices=1, num_machines=1, machine_rank=0, dist_url='auto', device_type='cuda'), logging=LoggingConfig( relative_log_dir=WindowsPath('.'), steps_per_log=10, max_buffer_size=20, local_writer=LocalWriterConfig( _target=<class 'nerfstudio.utils.writer.LocalWriter'>, enable=True, stats_to_track=( <EventName.ITER_TRAIN_TIME: 'Train Iter (time)'>, <EventName.TRAIN_RAYS_PER_SEC: 'Train Rays / Sec'>, <EventName.CURR_TEST_PSNR: 'Test PSNR'>, <EventName.VIS_RAYS_PER_SEC: 'Vis Rays / Sec'>, <EventName.TEST_RAYS_PER_SEC: 'Test Rays / Sec'>, <EventName.ETA: 'ETA (time)'> ), max_log_size=10 ), profiler='basic' ), viewer=ViewerConfig( relative_log_filename='viewer_log_filename.txt', websocket_port=None, websocket_port_default=7007, websocket_host='0.0.0.0', num_rays_per_chunk=32768, max_num_display_images=512, quit_on_train_completion=False, image_format='jpeg', jpeg_quality=75, make_share_url=False, camera_frustum_scale=0.1, default_composite_depth=True ), pipeline=VanillaPipelineConfig( _target=<class 'nerfstudio.pipelines.base_pipeline.VanillaPipeline'>, datamanager=ParallelDataManagerConfig( _target=<class 'nerfstudio.data.datamanagers.parallel_datamanager.ParallelDataManager'>, data=WindowsPath('data/nerfstudio/kitchen'), masks_on_gpu=False, images_on_gpu=False, dataparser=NerfstudioDataParserConfig( _target=<class 'nerfstudio.data.dataparsers.nerfstudio_dataparser.Nerfstudio'>, data=WindowsPath('.'), scale_factor=1.0, downscale_factor=None, scene_scale=1.0, orientation_method='up', center_method='poses', auto_scale_poses=True, eval_mode='fraction', train_split_fraction=0.9, eval_interval=8, depth_unit_scale_factor=0.001, mask_color=None, load_3D_points=False ), train_num_rays_per_batch=4096, train_num_images_to_sample_from=-1, train_num_times_to_repeat_images=-1, eval_num_rays_per_batch=4096, eval_num_images_to_sample_from=-1, eval_num_times_to_repeat_images=-1, eval_image_indices=(0,), collate_fn=<function nerfstudio_collate at 0x0000026EB493AAF0>, camera_res_scale_factor=1.0, patch_size=1, camera_optimizer=None, pixel_sampler=PixelSamplerConfig( _target=<class 'nerfstudio.data.pixel_samplers.PixelSampler'>, num_rays_per_batch=4096, keep_full_image=False, is_equirectangular=False, ignore_mask=False, fisheye_crop_radius=None, rejection_sample_mask=True, max_num_iterations=100 ), num_processes=1, queue_size=2, max_thread_workers=None ), model=NerfactoModelConfig( _target=<class 'nerfstudio.models.nerfacto.NerfactoModel'>, enable_collider=True, collider_params={'near_plane': 2.0, 'far_plane': 6.0}, loss_coefficients={'rgb_loss_coarse': 1.0, 'rgb_loss_fine': 1.0}, eval_num_rays_per_chunk=32768, prompt=None, near_plane=0.05, far_plane=1000.0, background_color='last_sample', hidden_dim=64, hidden_dim_color=64, hidden_dim_transient=64, num_levels=16, base_res=16, max_res=2048, log2_hashmap_size=19, features_per_level=2, num_proposal_samples_per_ray=(256, 96), num_nerf_samples_per_ray=48, proposal_update_every=5, proposal_warmup=5000, num_proposal_iterations=2, use_same_proposal_network=False, proposal_net_args_list=[ {'hidden_dim': 16, 'log2_hashmap_size': 17, 'num_levels': 5, 'max_res': 128, 'use_linear': False}, {'hidden_dim': 16, 'log2_hashmap_size': 17, 'num_levels': 5, 'max_res': 256, 'use_linear': False} ], proposal_initial_sampler='piecewise', interlevel_loss_mult=1.0, distortion_loss_mult=0.002, orientation_loss_mult=0.0001, pred_normal_loss_mult=0.001, use_proposal_weight_anneal=True, use_appearance_embedding=True, use_average_appearance_embedding=True, proposal_weights_anneal_slope=10.0, proposal_weights_anneal_max_num_iters=1000, use_single_jitter=True, predict_normals=False, disable_scene_contraction=False, use_gradient_scaling=False, implementation='tcnn', appearance_embed_dim=32, average_init_density=0.01, camera_optimizer=CameraOptimizerConfig( _target=<class 'nerfstudio.cameras.camera_optimizers.CameraOptimizer'>, mode='SO3xR3', trans_l2_penalty=0.01, rot_l2_penalty=0.001, optimizer=None, scheduler=None ) ) ), optimizers={ 'proposal_networks': { 'optimizer': AdamOptimizerConfig( _target=<class 'torch.optim.adam.Adam'>, lr=0.01, eps=1e-15, max_norm=None, weight_decay=0 ), 'scheduler': ExponentialDecaySchedulerConfig( _target=<class 'nerfstudio.engine.schedulers.ExponentialDecayScheduler'>, lr_pre_warmup=1e-08, lr_final=0.0001, warmup_steps=0, max_steps=200000, ramp='cosine' ) }, 'fields': { 'optimizer': AdamOptimizerConfig( _target=<class 'torch.optim.adam.Adam'>, lr=0.01, eps=1e-15, max_norm=None, weight_decay=0 ), 'scheduler': ExponentialDecaySchedulerConfig( _target=<class 'nerfstudio.engine.schedulers.ExponentialDecayScheduler'>, lr_pre_warmup=1e-08, lr_final=0.0001, warmup_steps=0, max_steps=200000, ramp='cosine' ) }, 'camera_opt': { 'optimizer': AdamOptimizerConfig( _target=<class 'torch.optim.adam.Adam'>, lr=0.001, eps=1e-15, max_norm=None, weight_decay=0 ), 'scheduler': ExponentialDecaySchedulerConfig( _target=<class 'nerfstudio.engine.schedulers.ExponentialDecayScheduler'>, lr_pre_warmup=1e-08, lr_final=0.0001, warmup_steps=0, max_steps=5000, ramp='cosine' ) } }, vis='viewer', data=WindowsPath('data/nerfstudio/kitchen'), prompt=None, relative_model_dir=WindowsPath('nerfstudio_models'), load_scheduler=True, steps_per_save=2000, steps_per_eval_batch=500, steps_per_eval_image=500, steps_per_eval_all_images=25000, max_num_iterations=30000, mixed_precision=True, use_grad_scaler=False, save_only_latest_checkpoint=True, load_dir=None, load_step=None, load_config=None, load_checkpoint=None, log_gradients=False, gradient_accumulation_steps={} ) ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Saving config to: outputs\kitchen\nerfacto\2024-02-27_152044\config.yml experiment_config.py:136 Saving checkpoints to: outputs\kitchen\nerfacto\2024-02-27_152044\nerfstudio_models trainer.py:136 Auto image downscale factor of 1 nerfstudio_dataparser.py:484 Dataset is overriding orientation method to none nerfstudio_dataparser.py:232 [15:20:45] Dataset is overriding orientation method to none nerfstudio_dataparser.py:232 Dataset is overriding orientation method to none nerfstudio_dataparser.py:232 Loading data batch ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:02 Traceback (most recent call last): File "C:\Users\Divya\.conda\envs\nerfstudio\lib\runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\Divya\.conda\envs\nerfstudio\lib\runpy.py", line 87, in _run_code exec(code, run_globals) File "C:\Users\Divya\.conda\envs\nerfstudio\Scripts\ns-train.exe\__main__.py", line 7, in <module> File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\scripts\train.py", line 262, in entrypoint main( File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\scripts\train.py", line 247, in main launch( File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\scripts\train.py", line 189, in launch main_func(local_rank=0, world_size=world_size, config=config) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\scripts\train.py", line 99, in train_loop trainer.setup() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\engine\trainer.py", line 149, in setup self.pipeline = self.config.pipeline.setup( File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\configs\base_config.py", line 54, in setup return self._target(self, **kwargs) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\pipelines\base_pipeline.py", line 254, in __init__ self.datamanager: DataManager = config.datamanager.setup( File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\configs\base_config.py", line 54, in setup return self._target(self, **kwargs) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\data\datamanagers\parallel_datamanager.py", line 178, in __init__ super().__init__() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\data\datamanagers\base_datamanager.py", line 181, in __init__ self.setup_train() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\data\datamanagers\parallel_datamanager.py", line 255, in setup_train proc.start() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\process.py", line 121, in start self._popen = self._Popen(self) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\context.py", line 224, in _Popen return _default_context.get_context().Process._Popen(process_obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\context.py", line 327, in _Popen return Popen(process_obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\popen_spawn_win32.py", line 93, in __init__ reduction.dump(process_obj, to_child) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\reduction.py", line 63, in dump ForkingPickler(file, protocol, *args, **kwds).dump(obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 420, in dump StockPickler.dump(self, obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 487, in dump self.save(obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 603, in save self.save_reduce(obj=obj, *rv) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 717, in save_reduce save(state) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 560, in save f(self, obj) # Call unbound method with explicit self File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 1217, in save_module_dict StockPickler.save_dict(pickler, obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 971, in save_dict self._batch_setitems(obj.items()) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 997, in _batch_setitems save(v) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 560, in save f(self, obj) # Call unbound method with explicit self File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 1217, in save_module_dict StockPickler.save_dict(pickler, obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 971, in save_dict self._batch_setitems(obj.items()) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 997, in _batch_setitems save(v) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 603, in save self.save_reduce(obj=obj, *rv) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 692, in save_reduce save(args) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 560, in save f(self, obj) # Call unbound method with explicit self File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 901, in save_tuple save(element) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 603, in save self.save_reduce(obj=obj, *rv) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 692, in save_reduce save(args) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 560, in save f(self, obj) # Call unbound method with explicit self File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 886, in save_tuple save(element) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 414, in save StockPickler.save(self, obj, save_persistent_id) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 560, in save f(self, obj) # Call unbound method with explicit self File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 801, in save_bytes self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\pickle.py", line 260, in write_large_bytes write(payload) OSError: [Errno 22] Invalid argument Traceback (most recent call last): File "<string>", line 1, in <module> File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\spawn.py", line 116, in spawn_main exitcode = _main(fd, parent_sentinel) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\spawn.py", line 126, in _main self = reduction.pickle.load(from_parent) File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 289, in load return Unpickler(file, ignore=ignore, **kwds).load() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\dill\_dill.py", line 444, in load obj = StockUnpickler.load(self) _pickle.UnpicklingError: pickle data was truncated Exception ignored in: <function ParallelDataManager.__del__ at 0x0000026EB4A1EE50> Traceback (most recent call last): File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\nerfstudio\data\datamanagers\parallel_datamanager.py", line 339, in __del__ proc.terminate() File "C:\Users\Divya\.conda\envs\nerfstudio\lib\site-packages\multiprocess\process.py", line 133, in terminate self._popen.terminate() AttributeError: 'NoneType' object has no attribute 'terminate'
Additional context Add any other context about the problem here.
Have you found any solution to this?
Found the reason. Less RAM Also this #5301 Please close this issue. Thanks
I just solved this issue by changing my CUDA version from 12.4 to 11.8
Describe the bug Getting error while running ns-train
To Reproduce Steps to reproduce the behavior:
ns-train nerfacto --data .\data\nerfstudio\kitchen\
Expected behavior Training should run.
Screenshots Stacktrace:
Additional context Add any other context about the problem here.