Open DQSSSSS opened 2 years ago
Describe the bug Training on custom dataset error on metric utils with _pickle.UnpicklingError: invalid load key, '\x00'.
_pickle.UnpicklingError: invalid load key, '\x00'.
To Reproduce I trained network with command:
python train.py --outdir=./train_results/training-runs --cfg=stylegan3-t --data=./2022-06-14/0614-256x256.zip --gpus=1 --batch=4 --gamma=6.6 --mirror=1
Error log:
Loading training set... Num images: 10000 Image shape: [3, 256, 256] Label shape: [0] Constructing networks... Setting up PyTorch plugin "bias_act_plugin"... Done. Setting up PyTorch plugin "filtered_lrelu_plugin"... Done. Generator Parameters Buffers Output shape Datatype --- --- --- --- --- mapping.fc0 262656 - [4, 512] float32 mapping.fc1 262656 - [4, 512] float32 mapping - 512 [4, 16, 512] float32 synthesis.input.affine 2052 - [4, 4] float32 synthesis.input 262144 1545 [4, 512, 36, 36] float32 synthesis.L0_36_512.affine 262656 - [4, 512] float32 synthesis.L0_36_512 2359808 25 [4, 512, 36, 36] float32 synthesis.L1_36_512.affine 262656 - [4, 512] float32 synthesis.L1_36_512 2359808 25 [4, 512, 36, 36] float32 synthesis.L2_36_512.affine 262656 - [4, 512] float32 synthesis.L2_36_512 2359808 25 [4, 512, 36, 36] float32 synthesis.L3_52_512.affine 262656 - [4, 512] float32 synthesis.L3_52_512 2359808 37 [4, 512, 52, 52] float16 synthesis.L4_52_512.affine 262656 - [4, 512] float32 synthesis.L4_52_512 2359808 25 [4, 512, 52, 52] float16 synthesis.L5_84_512.affine 262656 - [4, 512] float32 synthesis.L5_84_512 2359808 37 [4, 512, 84, 84] float16 synthesis.L6_84_512.affine 262656 - [4, 512] float32 synthesis.L6_84_512 2359808 25 [4, 512, 84, 84] float16 synthesis.L7_148_512.affine 262656 - [4, 512] float32 synthesis.L7_148_512 2359808 37 [4, 512, 148, 148] float16 synthesis.L8_148_512.affine 262656 - [4, 512] float32 synthesis.L8_148_512 2359808 25 [4, 512, 148, 148] float16 synthesis.L9_148_362.affine 262656 - [4, 512] float32 synthesis.L9_148_362 1668458 25 [4, 362, 148, 148] float16 synthesis.L10_276_256.affine 185706 - [4, 362] float32 synthesis.L10_276_256 834304 37 [4, 256, 276, 276] float16 synthesis.L11_276_181.affine 131328 - [4, 256] float32 synthesis.L11_276_181 417205 25 [4, 181, 276, 276] float16 synthesis.L12_276_128.affine 92853 - [4, 181] float32 synthesis.L12_276_128 208640 25 [4, 128, 276, 276] float16 synthesis.L13_256_128.affine 65664 - [4, 128] float32 synthesis.L13_256_128 147584 25 [4, 128, 256, 256] float16 synthesis.L14_256_3.affine 65664 - [4, 128] float32 synthesis.L14_256_3 387 1 [4, 3, 256, 256] float16 synthesis - - [4, 3, 256, 256] float32 --- --- --- --- --- Total 28472133 2456 - - Setting up PyTorch plugin "upfirdn2d_plugin"... Done. Discriminator Parameters Buffers Output shape Datatype --- --- --- --- --- b256.fromrgb 512 16 [4, 128, 256, 256] float16 b256.skip 32768 16 [4, 256, 128, 128] float16 b256.conv0 147584 16 [4, 128, 256, 256] float16 b256.conv1 295168 16 [4, 256, 128, 128] float16 b256 - 16 [4, 256, 128, 128] float16 b128.skip 131072 16 [4, 512, 64, 64] float16 b128.conv0 590080 16 [4, 256, 128, 128] float16 b128.conv1 1180160 16 [4, 512, 64, 64] float16 b128 - 16 [4, 512, 64, 64] float16 b64.skip 262144 16 [4, 512, 32, 32] float16 b64.conv0 2359808 16 [4, 512, 64, 64] float16 b64.conv1 2359808 16 [4, 512, 32, 32] float16 b64 - 16 [4, 512, 32, 32] float16 b32.skip 262144 16 [4, 512, 16, 16] float16 b32.conv0 2359808 16 [4, 512, 32, 32] float16 b32.conv1 2359808 16 [4, 512, 16, 16] float16 b32 - 16 [4, 512, 16, 16] float16 b16.skip 262144 16 [4, 512, 8, 8] float32 b16.conv0 2359808 16 [4, 512, 16, 16] float32 b16.conv1 2359808 16 [4, 512, 8, 8] float32 b16 - 16 [4, 512, 8, 8] float32 b8.skip 262144 16 [4, 512, 4, 4] float32 b8.conv0 2359808 16 [4, 512, 8, 8] float32 b8.conv1 2359808 16 [4, 512, 4, 4] float32 b8 - 16 [4, 512, 4, 4] float32 b4.mbstd - - [4, 513, 4, 4] float32 b4.conv 2364416 16 [4, 512, 4, 4] float32 b4.fc 4194816 - [4, 512] float32 b4.out 513 - [4, 1] float32 --- --- --- --- --- Total 28864129 416 - - Setting up augmentation... Distributing across 1 GPUs... Setting up training phases... Exporting sample images... Initializing logs... Skipping tfevents export: No module named 'tensorboard' Training for 25000 kimg... tick 0 kimg 0.0 time 2m 23s sec/tick 10.4 sec/kimg 2587.63 maintenance 132.2 cpumem 3.17 gpumem 4.67 reserved 4.73 augment 0.000 Evaluating metrics... https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl Traceback (most recent call last): File "D:\texture_gen\stylegan3\train.py", line 286, in <module> main() # pylint: disable=no-value-for-parameter File "C:\Users\v-youkong\Anaconda3\envs\stylegan3\lib\site-packages\click\core.py", line 1128, in __call__ return self.main(*args, **kwargs) File "C:\Users\v-youkong\Anaconda3\envs\stylegan3\lib\site-packages\click\core.py", line 1053, in main rv = self.invoke(ctx) File "C:\Users\v-youkong\Anaconda3\envs\stylegan3\lib\site-packages\click\core.py", line 1395, in invoke return ctx.invoke(self.callback, **ctx.params) File "C:\Users\v-youkong\Anaconda3\envs\stylegan3\lib\site-packages\click\core.py", line 754, in invoke return __callback(*args, **kwargs) File "D:\texture_gen\stylegan3\train.py", line 281, in main launch_training(c=c, desc=desc, outdir=opts.outdir, dry_run=opts.dry_run) File "D:\texture_gen\stylegan3\train.py", line 96, in launch_training subprocess_fn(rank=0, c=c, temp_dir=temp_dir) File "D:\texture_gen\stylegan3\train.py", line 47, in subprocess_fn training_loop.training_loop(rank=rank, **c) File "D:\texture_gen\stylegan3\training\training_loop.py", line 380, in training_loop result_dict = metric_main.calc_metric(metric=metric, G=snapshot_data['G_ema'], File "D:\texture_gen\stylegan3\metrics\metric_main.py", line 48, in calc_metric results = _metric_dict[metric](opts) File "D:\texture_gen\stylegan3\metrics\metric_main.py", line 88, in fid50k_full fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000) File "D:\texture_gen\stylegan3\metrics\frechet_inception_distance.py", line 25, in compute_fid mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset( File "D:\texture_gen\stylegan3\metrics\metric_utils.py", line 228, in compute_feature_stats_for_dataset detector = get_feature_detector(url=detector_url, device=opts.device, num_gpus=opts.num_gpus, rank=opts.rank, verbose=progress.verbose) File "D:\texture_gen\stylegan3\metrics\metric_utils.py", line 51, in get_feature_detector _feature_detector_cache[key] = pickle.load(f).to(device) _pickle.UnpicklingError: invalid load key, '\x00'.
I print the download URL
def get_feature_detector(url, device=torch.device('cpu'), num_gpus=1, rank=0, verbose=False): assert 0 <= rank < num_gpus key = (url, device) if key not in _feature_detector_cache: is_leader = (rank == 0) if not is_leader and num_gpus > 1: torch.distributed.barrier() # leader goes first print(url) with dnnlib.util.open_url(url, verbose=(verbose and is_leader)) as f: _feature_detector_cache[key] = pickle.load(f).to(device) if is_leader and num_gpus > 1: torch.distributed.barrier() # others follow return _feature_detector_cache[key]
URL: https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl
Desktop (please complete the following information):
Describe the bug Training on custom dataset error on metric utils with
_pickle.UnpicklingError: invalid load key, '\x00'.
To Reproduce I trained network with command:
Error log:
I print the download URL
URL: https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl
Desktop (please complete the following information):