StackedHourglass backbone is not working

Hi, I'm attempting to train an hourglass model and am getting the error below whenever I use more than one stack. Is there a workaround?

Thanks for your help! Sam

Training output

INFO:sleap.nn.training:Versions: SLEAP: 1.2.2 TensorFlow: 2.6.3 Numpy: 1.19.5 Python: 3.7.12 OS: Linux-5.4.0-96-generic-x86_64-with-debian-bullseye-sid INFO:sleap.nn.training:Training labels file: min_keypoints.pkg.slp INFO:sleap.nn.training:Training profile: hourglass.json INFO:sleap.nn.training: INFO:sleap.nn.training:Arguments: INFO:sleap.nn.training:{ "training_job_path": "hourglass.json", "labels_path": "min_keypoints.pkg.slp", "video_paths": [ "" ], "val_labels": null, "test_labels": null, "tensorboard": false, "save_viz": false, "zmq": false, "run_name": "", "prefix": "", "suffix": "", "cpu": false, "first_gpu": false, "last_gpu": false, "gpu": 0 } INFO:sleap.nn.training: INFO:sleap.nn.training:Training job: INFO:sleap.nn.training:{ "data": { "labels": { "training_labels": null, "validation_labels": null, "validation_fraction": 0.1, "test_labels": null, "split_by_inds": false, "training_inds": null, "validation_inds": null, "test_inds": null, "search_path_hints": [], "skeletons": [] }, "preprocessing": { "ensure_rgb": false, "ensure_grayscale": true, "imagenet_mode": null, "input_scaling": 1.0, "pad_to_stride": null, "resize_and_pad_to_target": true, "target_height": null, "target_width": null }, "instance_cropping": { "center_on_part": null, "crop_size": null, "crop_size_detection_padding": 16 } }, "model": { "backbone": { "leap": null, "unet": null, "hourglass": { "stem_stride": 4, "max_stride": 32, "output_stride": 2, "stem_filters": 128, "filters": 256, "filter_increase": 128, "stacks": 2 }, "resnet": null, "pretrained_encoder": null }, "heads": { "single_instance": null, "centroid": null, "centered_instance": { "anchor_part": null, "part_names": null, "sigma": 2.5, "output_stride": 4, "loss_weight": 1.0, "offset_refinement": false }, "multi_instance": null, "multi_class_bottomup": null, "multi_class_topdown": null } }, "optimization": { "preload_data": true, "augmentation_config": { "rotate": true, "rotation_min_angle": -2.5, "rotation_max_angle": 2.5, "translate": false, "translate_min": -5, "translate_max": 5, "scale": true, "scale_min": 0.9, "scale_max": 1.1, "uniform_noise": false, "uniform_noise_min_val": 0.0, "uniform_noise_max_val": 10.0, "gaussian_noise": false, "gaussian_noise_mean": 5.0, "gaussian_noise_stddev": 1.0, "contrast": false, "contrast_min_gamma": 0.5, "contrast_max_gamma": 2.0, "brightness": false, "brightness_min_val": 0.0, "brightness_max_val": 10.0, "random_crop": false, "random_crop_height": 256, "random_crop_width": 256, "random_flip": false, "flip_horizontal": true }, "online_shuffling": true, "shuffle_buffer_size": 128, "prefetch": true, "batch_size": 6, "batches_per_epoch": null, "min_batches_per_epoch": 200, "val_batches_per_epoch": null, "min_val_batches_per_epoch": 10, "epochs": 3000, "optimizer": "adam", "initial_learning_rate": 0.0001, "learning_rate_schedule": { "reduce_on_plateau": true, "reduction_factor": 0.5, "plateau_min_delta": 1e-06, "plateau_patience": 5, "plateau_cooldown": 3, "min_learning_rate": 1e-08 }, "hard_keypoint_mining": { "online_mining": false, "hard_to_easy_ratio": 2.0, "min_hard_keypoints": 2, "max_hard_keypoints": null, "loss_scale": 5.0 }, "early_stopping": { "stop_training_on_plateau": true, "plateau_min_delta": 1e-08, "plateau_patience": 10 } }, "outputs": { "save_outputs": true, "run_name": null, "run_name_prefix": "min_kps_hourglass", "run_name_suffix": ".centered_instance", "runs_folder": "models", "tags": [ "" ], "save_visualizations": true, "delete_viz_images": true, "zip_outputs": false, "log_to_csv": true, "checkpointing": { "initial_model": false, "best_model": true, "every_epoch": false, "latest_model": false, "final_model": true }, "tensorboard": { "write_logs": false, "loss_frequency": "epoch", "architecture_graph": false, "profile_graph": false, "visualizations": true }, "zmq": { "subscribe_to_controller": false, "controller_address": "tcp://127.0.0.1:9000", "controller_polling_timeout": 10, "publish_updates": false, "publish_address": "tcp://127.0.0.1:9001" } }, "name": "", "description": "", "sleap_version": "1.2.2", "filename": "hourglass.json" } INFO:sleap.nn.training: INFO:sleap.nn.training:Using GPU 0 for acceleration. INFO:sleap.nn.training:Disabled GPU memory pre-allocation. INFO:sleap.nn.training:System: GPUs: 1/2 available Device: /physical_device:GPU:0 Available: True Initalized: False Memory growth: True Device: /physical_device:GPU:1 Available: False Initalized: False Memory growth: None INFO:sleap.nn.training: INFO:sleap.nn.training:Initializing trainer... INFO:sleap.nn.training:Loading training labels from: min_keypoints.pkg.slp INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1 INFO:sleap.nn.training: Splits: Training = 1462 / Validation = 162. INFO:sleap.nn.training:Setting up for training... INFO:sleap.nn.training:Setting up pipeline builders... INFO:sleap.nn.training:Setting up model... INFO:sleap.nn.training:Building test pipeline... 2022-04-27 13:47:31.910299: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2022-04-27 13:47:32.363714: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 19623 MB memory: -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:17:00.0, compute capability: 7.5 2022-04-27 13:47:32.735466: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2) 2022-04-27 13:47:34.178944: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 608 } dim { size: 1440 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 3299 num_cores: 28 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.3.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 20185088 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 288 } dim { size: 288 } dim { size: 1 } } } INFO:sleap.nn.training:Loaded test example. [2.486s] INFO:sleap.nn.training: Input shape: (288, 288, 1) Traceback (most recent call last): File "/home/sam/miniconda3/envs/sleap/bin/sleap-train", line 33, in sys.exit(load_entry_point('sleap==1.2.2', 'console_scripts', 'sleap-train')()) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/sleap/nn/training.py", line 1947, in main trainer.train() File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/sleap/nn/training.py", line 906, in train self.setup() File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/sleap/nn/training.py", line 892, in setup self._setup_model() File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/sleap/nn/training.py", line 732, in _setup_model self.model.make_model(input_shape) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/sleap/nn/model.py", line 363, in make_model self.keras_model = tf.keras.Model(inputs=x_in, outputs=x_outs) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 530, in _method_wrapper result = method(self, *args, *kwargs) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/keras/engine/functional.py", line 109, in init self._init_graph_network(inputs, outputs) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 530, in _method_wrapper result = method(self, args, **kwargs) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/keras/engine/functional.py", line 193, in _init_graph_network self.inputs, self.outputs) File "/home/sam/miniconda3/envs/sleap/lib/python3.7/site-packages/keras/engine/functional.py", line 995, in _map_graph_network str(all_names.count(name)) + ' times in the model. ' ValueError: The name "CenteredInstanceConfmapsHead" is used 2 times in the model. All layer names should be unique.

talmolab / sleap

StackedHourglass backbone is not working #715

Training output