Open JanFschr opened 3 years ago
Got this error on colab. P100 and 25GB ram. Changed batchsize of fid50k_full to 1, still got this error. any ideas?
/content/stylegan2-ada tcmalloc: large alloc 4294967296 bytes == 0x55df82b92000 @ 0x7fe8f7786001 0x7fe8f49c954f 0x7fe8f4a19b58 0x7fe8f4a1db17 0x7fe8f4abc203 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7c056e0d 0x55df7bf28e2b 0x55df7c0591e6 0x55df7bfe969a 0x55df7c057a45 0x55df7c056b0e 0x55df7c056813 0x55df7c120592 0x55df7c12090d 0x55df7c1207b6 0x55df7c0f8103 tcmalloc: large alloc 4294967296 bytes == 0x55e082b92000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8f4abc103 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7bfe969a 0x55df7c057a45 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05be50 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade tcmalloc: large alloc 4294967296 bytes == 0x55e184190000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8a0389235 0x7fe89fd0c792 0x7fe89fd0cd42 0x7fe89fcc5aee 0x55df7bfe7fd7 0x55df7bfe7de0 0x55df7c05c244 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bf28eb0 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bfe977a 0x55df7c057c9e 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056b0e 0x55df7bfe9e11 0x55df7bfea231 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057a45 Training options: { "G_args": { "func_name": "training.networks.G_main", "fmap_base": 16384, "fmap_max": 512, "mapping_layers": 8, "num_fp16_res": 4, "conv_clamp": 256, "min_h": 5, "min_w": 8, "res_log2": 7 }, "D_args": { "func_name": "training.networks.D_main", "mbstd_group_size": 4, "fmap_base": 16384, "fmap_max": 512, "num_fp16_res": 4, "conv_clamp": 256, "min_h": 5, "min_w": 8, "res_log2": 7 }, "G_opt_args": { "beta1": 0.0, "beta2": 0.99, "learning_rate": 0.002 }, "D_opt_args": { "beta1": 0.0, "beta2": 0.99, "learning_rate": 0.002 }, "loss_args": { "func_name": "training.loss.stylegan2", "r1_gamma": 10 }, "augment_args": { "class_name": "training.augment.AdaptiveAugment", "tune_heuristic": "rt", "tune_target": 0.6, "apply_func": "training.augment.augment_pipeline", "apply_args": { "xflip": 1, "rotate90": 1, "xint": 1, "scale": 1, "rotate": 1, "aniso": 1, "xfrac": 1, "brightness": 1, "contrast": 1, "lumaflip": 1, "hue": 1, "saturation": 1 } }, "num_gpus": 1, "image_snapshot_ticks": 50, "network_snapshot_ticks": 50, "train_dataset_args": { "path": "./dataset/dataset_mountain", "max_label_size": 0, "use_raw": false, "mirror_augment": true, "min_h": 5, "min_w": 8, "res_log2": 7 }, "metric_arg_list": [ { "name": "fid50k_full", "class_name": "metrics.frechet_inception_distance.FID", "max_reals": null, "num_fakes": 50000, "minibatch_per_gpu": 1, "force_dataset_args": { "shuffle": false, "max_images": null, "repeat": false, "mirror_augment": false } } ], "metric_dataset_args": { "path": "./dataset/dataset_mountain", "max_label_size": 0, "use_raw": false, "mirror_augment": true }, "total_kimg": 25000, "minibatch_size": 4, "minibatch_gpu": 4, "G_smoothing_kimg": 10, "G_smoothing_rampup": null, "run_dir": "/content/drive/My Drive/stylegan2-ada/training/00008-dataset_mountain-mirror-v100_16gb" } Output directory: /content/drive/My Drive/stylegan2-ada/training/00008-dataset_mountain-mirror-v100_16gb Training data: ./dataset/dataset_mountain Training length: 25000 kimg Height res: 640 Width res: 1024 Number of GPUs: 1 Creating output directory... Loading training set... tcmalloc: large alloc 4294967296 bytes == 0x55df829ae000 @ 0x7fe8f7786001 0x7fe8f49c954f 0x7fe8f4a19b58 0x7fe8f4a1db17 0x7fe8f4abc203 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7c056e0d 0x55df7bf28e2b 0x55df7c0591e6 0x55df7bfe969a 0x55df7c057a45 0x55df7c056b0e 0x55df7c056813 0x55df7c120592 0x55df7c12090d 0x55df7c1207b6 0x55df7c0f8103 tcmalloc: large alloc 4294967296 bytes == 0x55e284190000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8f4abc103 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7bfe969a 0x55df7c057a45 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05be50 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade tcmalloc: large alloc 4294967296 bytes == 0x55e284190000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8a0389235 0x7fe89fd0c792 0x7fe89fd0cd42 0x7fe89fcc5aee 0x55df7bfe7fd7 0x55df7bfe7de0 0x55df7c05c244 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bf28eb0 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bfe977a 0x55df7c057c9e 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056b0e 0x55df7bfe9e11 0x55df7bfea231 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057a45 Image shape: [3, 640, 1024] Label shape: [0] Constructing networks... Setting up TensorFlow plugin "fused_bias_act.cu": Loading... Done. Setting up TensorFlow plugin "upfirdn_2d.cu": Loading... Done. resume_pkl = None G Params OutputShape WeightShape --- --- --- --- latents_in - (?, 512) - labels_in - (?, 0) - G_mapping/Normalize - (?, 512) - G_mapping/Dense0 262656 (?, 512) (512, 512) G_mapping/Dense1 262656 (?, 512) (512, 512) G_mapping/Dense2 262656 (?, 512) (512, 512) G_mapping/Dense3 262656 (?, 512) (512, 512) G_mapping/Dense4 262656 (?, 512) (512, 512) G_mapping/Dense5 262656 (?, 512) (512, 512) G_mapping/Dense6 262656 (?, 512) (512, 512) G_mapping/Dense7 262656 (?, 512) (512, 512) G_mapping/Broadcast - (?, 16, 512) - dlatent_avg - (512,) - Truncation/Lerp - (?, 16, 512) - G_synthesis/5x8/Const 20480 (?, 512, 5, 8) (1, 512, 5, 8) G_synthesis/5x8/Conv 2622465 (?, 512, 5, 8) (3, 3, 512, 512) G_synthesis/5x8/ToRGB 264195 (?, 3, 5, 8) (1, 1, 512, 3) G_synthesis/10x16/Conv0_up 2622465 (?, 512, 10, 16) (3, 3, 512, 512) G_synthesis/10x16/Conv1 2622465 (?, 512, 10, 16) (3, 3, 512, 512) G_synthesis/10x16/Upsample - (?, 3, 10, 16) - G_synthesis/10x16/ToRGB 264195 (?, 3, 10, 16) (1, 1, 512, 3) G_synthesis/20x32/Conv0_up 2622465 (?, 512, 20, 32) (3, 3, 512, 512) G_synthesis/20x32/Conv1 2622465 (?, 512, 20, 32) (3, 3, 512, 512) G_synthesis/20x32/Upsample - (?, 3, 20, 32) - G_synthesis/20x32/ToRGB 264195 (?, 3, 20, 32) (1, 1, 512, 3) G_synthesis/40x64/Conv0_up 2622465 (?, 512, 40, 64) (3, 3, 512, 512) G_synthesis/40x64/Conv1 2622465 (?, 512, 40, 64) (3, 3, 512, 512) G_synthesis/40x64/Upsample - (?, 3, 40, 64) - G_synthesis/40x64/ToRGB 264195 (?, 3, 40, 64) (1, 1, 512, 3) G_synthesis/80x128/Conv0_up 2622465 (?, 512, 80, 128) (3, 3, 512, 512) G_synthesis/80x128/Conv1 2622465 (?, 512, 80, 128) (3, 3, 512, 512) G_synthesis/80x128/Upsample - (?, 3, 80, 128) - G_synthesis/80x128/ToRGB 264195 (?, 3, 80, 128) (1, 1, 512, 3) G_synthesis/160x256/Conv0_up 1442561 (?, 256, 160, 256) (3, 3, 512, 256) G_synthesis/160x256/Conv1 721409 (?, 256, 160, 256) (3, 3, 256, 256) G_synthesis/160x256/Upsample - (?, 3, 160, 256) - G_synthesis/160x256/ToRGB 132099 (?, 3, 160, 256) (1, 1, 256, 3) G_synthesis/320x512/Conv0_up 426369 (?, 128, 320, 512) (3, 3, 256, 128) G_synthesis/320x512/Conv1 213249 (?, 128, 320, 512) (3, 3, 128, 128) G_synthesis/320x512/Upsample - (?, 3, 320, 512) - G_synthesis/320x512/ToRGB 66051 (?, 3, 320, 512) (1, 1, 128, 3) G_synthesis/640x1024/Conv0_up 139457 (?, 64, 640, 1024) (3, 3, 128, 64) G_synthesis/640x1024/Conv1 69761 (?, 64, 640, 1024) (3, 3, 64, 64) G_synthesis/640x1024/Upsample - (?, 3, 640, 1024) - G_synthesis/640x1024/ToRGB 33027 (?, 3, 640, 1024) (1, 1, 64, 3) --- --- --- --- Total 30288871 D Params OutputShape WeightShape --- --- --- --- images_in - (?, 3, 640, 1024) - labels_in - (?, 0) - 640x1024/FromRGB 256 (?, 64, 640, 1024) (1, 1, 3, 64) 640x1024/Conv0 36928 (?, 64, 640, 1024) (3, 3, 64, 64) 640x1024/Conv1_down 73856 (?, 128, 320, 512) (3, 3, 64, 128) 640x1024/Skip 8192 (?, 128, 320, 512) (1, 1, 64, 128) 320x512/Conv0 147584 (?, 128, 320, 512) (3, 3, 128, 128) 320x512/Conv1_down 295168 (?, 256, 160, 256) (3, 3, 128, 256) 320x512/Skip 32768 (?, 256, 160, 256) (1, 1, 128, 256) 160x256/Conv0 590080 (?, 256, 160, 256) (3, 3, 256, 256) 160x256/Conv1_down 1180160 (?, 512, 80, 128) (3, 3, 256, 512) 160x256/Skip 131072 (?, 512, 80, 128) (1, 1, 256, 512) 80x128/Conv0 2359808 (?, 512, 80, 128) (3, 3, 512, 512) 80x128/Conv1_down 2359808 (?, 512, 40, 64) (3, 3, 512, 512) 80x128/Skip 262144 (?, 512, 40, 64) (1, 1, 512, 512) 40x64/Conv0 2359808 (?, 512, 40, 64) (3, 3, 512, 512) 40x64/Conv1_down 2359808 (?, 512, 20, 32) (3, 3, 512, 512) 40x64/Skip 262144 (?, 512, 20, 32) (1, 1, 512, 512) 20x32/Conv0 2359808 (?, 512, 20, 32) (3, 3, 512, 512) 20x32/Conv1_down 2359808 (?, 512, 10, 16) (3, 3, 512, 512) 20x32/Skip 262144 (?, 512, 10, 16) (1, 1, 512, 512) 10x16/Conv0 2359808 (?, 512, 10, 16) (3, 3, 512, 512) 10x16/Conv1_down 2359808 (?, 512, 5, 8) (3, 3, 512, 512) 10x16/Skip 262144 (?, 512, 5, 8) (1, 1, 512, 512) 5x8/MinibatchStddev - (?, 513, 5, 8) - 5x8/Conv 2364416 (?, 512, 5, 8) (3, 3, 513, 512) 5x8/Dense0 10486272 (?, 512) (20480, 512) Output 513 (?, 1) (512, 1) --- --- --- --- Total 35274305 Exporting sample images... Replicating networks across 1 GPUs... Initializing augmentations... Setting up optimizers... Constructing training graph... Finalizing training ops... Initializing metrics... Training for 25000 kimg... tick 0 kimg 0.0 time 3m 52s sec/tick 87.8 sec/kimg 5487.29 maintenance 144.6 gpumem 11.2 augment 0.000 Evaluating metrics... Calculating real image statistics for fid50k_full... tcmalloc: large alloc 4294967296 bytes == 0x55e4f4514000 @ 0x7fe8f7786001 0x7fe8f49c954f 0x7fe8f4a19b58 0x7fe8f4a1db17 0x7fe8f4abc203 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7bfe969a 0x55df7c057c9e 0x55df7c0da858 0x55df7c057ee2 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bf28e2b 0x55df7c0591e6 0x55df7c056e0d tcmalloc: large alloc 4294967296 bytes == 0x55e5f4514000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8f4abc103 0x55df7bfe80e4 0x55df7bfe7de0 0x55df7c05c6f5 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7bfe969a 0x55df7c057a45 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05be50 0x55df7c056b0e 0x55df7bfe977a 0x55df7c05886a 0x55df7c056e0d 0x55df7bfea02c 0x55df7c02ad39 0x55df7c027c84 0x55df7bfe88e9 0x55df7c05cade tcmalloc: large alloc 4294967296 bytes == 0x55e5f4514000 @ 0x7fe8f77841e7 0x7fe8f49c946e 0x7fe8f4a19c7b 0x7fe8f4a1a35f 0x7fe8a0389235 0x7fe89fd0c792 0x7fe89fd0cd42 0x7fe89fcc5aee 0x55df7bfe7fd7 0x55df7bfe7de0 0x55df7c05c244 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bf28eb0 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057c9e 0x55df7c056e0d 0x55df7bfe977a 0x55df7c057c9e 0x55df7bfe969a 0x55df7c057c9e 0x55df7c056b0e 0x55df7bfe9e11 0x55df7bfea231 0x55df7c0591e6 0x55df7c056b0e 0x55df7bfe977a 0x55df7c057a45 ^C
Got this error on colab. P100 and 25GB ram. Changed batchsize of fid50k_full to 1, still got this error. any ideas?