talmolab / sleap

A deep learning framework for multi-animal pose tracking.
https://sleap.ai
Other
432 stars 96 forks source link

Crop size should always be compatible with max stride #826

Closed isabelperezf closed 2 years ago

isabelperezf commented 2 years ago

Hi, @auesro and I we marked and corrected around one thousand of frames with multianimal-bottom-up. Finally we want to compare different types of backbones. We chose Unet model for the centroid model, because we obtained a good result, and Unet, leap, hourglass, pretrained and resnet backbones in the centered instances. We didn´t have problems with unet and leap when we've trained on Colab, but when we`ve tried to do the training with the rest of them we've obtained different ValueErrors. Can someone help us? Thanks.

Pretained

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:sleap.nn.training:Versions:
SLEAP: 1.2.4
TensorFlow: 2.8.2
Numpy: 1.21.5
Python: 3.7.13
OS: Linux-5.4.188+-x86_64-with-Ubuntu-18.04-bionic
INFO:sleap.nn.training:Training labels file: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Training profile: centered_instance.json
INFO:sleap.nn.training:
INFO:sleap.nn.training:Arguments:
INFO:sleap.nn.training:{
    "training_job_path": "centered_instance.json",
    "labels_path": "proyectoprueba_v9.pkg.slp",
    "video_paths": [
        ""
    ],
    "val_labels": null,
    "test_labels": null,
    "tensorboard": false,
    "save_viz": false,
    "zmq": false,
    "run_name": "",
    "prefix": "",
    "suffix": "",
    "cpu": false,
    "first_gpu": false,
    "last_gpu": false,
    "gpu": 0
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Training job:
INFO:sleap.nn.training:{
    "data": {
        "labels": {
            "training_labels": null,
            "validation_labels": null,
            "validation_fraction": 0.1,
            "test_labels": null,
            "split_by_inds": false,
            "training_inds": null,
            "validation_inds": null,
            "test_inds": null,
            "search_path_hints": [],
            "skeletons": []
        },
        "preprocessing": {
            "ensure_rgb": false,
            "ensure_grayscale": false,
            "imagenet_mode": null,
            "input_scaling": 1.0,
            "pad_to_stride": null,
            "resize_and_pad_to_target": true,
            "target_height": null,
            "target_width": null
        },
        "instance_cropping": {
            "center_on_part": null,
            "crop_size": 144,
            "crop_size_detection_padding": 16
        }
    },
    "model": {
        "backbone": {
            "leap": null,
            "unet": null,
            "hourglass": null,
            "resnet": null,
            "pretrained_encoder": {
                "encoder": "efficientnetb0",
                "pretrained": true,
                "decoder_filters": 256,
                "decoder_filters_rate": 1.0,
                "output_stride": 4,
                "decoder_batchnorm": true
            }
        },
        "heads": {
            "single_instance": null,
            "centroid": null,
            "centered_instance": {
                "anchor_part": null,
                "part_names": null,
                "sigma": 2.5,
                "output_stride": 4,
                "loss_weight": 1.0,
                "offset_refinement": false
            },
            "multi_instance": null,
            "multi_class_bottomup": null,
            "multi_class_topdown": null
        }
    },
    "optimization": {
        "preload_data": true,
        "augmentation_config": {
            "rotate": true,
            "rotation_min_angle": -180.0,
            "rotation_max_angle": 180.0,
            "translate": false,
            "translate_min": -5,
            "translate_max": 5,
            "scale": false,
            "scale_min": 0.9,
            "scale_max": 1.1,
            "uniform_noise": false,
            "uniform_noise_min_val": 0.0,
            "uniform_noise_max_val": 10.0,
            "gaussian_noise": false,
            "gaussian_noise_mean": 5.0,
            "gaussian_noise_stddev": 1.0,
            "contrast": false,
            "contrast_min_gamma": 0.5,
            "contrast_max_gamma": 2.0,
            "brightness": false,
            "brightness_min_val": 0.0,
            "brightness_max_val": 10.0,
            "random_crop": false,
            "random_crop_height": 256,
            "random_crop_width": 256,
            "random_flip": false,
            "flip_horizontal": true
        },
        "online_shuffling": true,
        "shuffle_buffer_size": 128,
        "prefetch": true,
        "batch_size": 4,
        "batches_per_epoch": null,
        "min_batches_per_epoch": 200,
        "val_batches_per_epoch": null,
        "min_val_batches_per_epoch": 10,
        "epochs": 200,
        "optimizer": "adam",
        "initial_learning_rate": 0.0001,
        "learning_rate_schedule": {
            "reduce_on_plateau": true,
            "reduction_factor": 0.5,
            "plateau_min_delta": 1e-06,
            "plateau_patience": 5,
            "plateau_cooldown": 3,
            "min_learning_rate": 1e-08
        },
        "hard_keypoint_mining": {
            "online_mining": false,
            "hard_to_easy_ratio": 2.0,
            "min_hard_keypoints": 2,
            "max_hard_keypoints": null,
            "loss_scale": 5.0
        },
        "early_stopping": {
            "stop_training_on_plateau": true,
            "plateau_min_delta": 1e-08,
            "plateau_patience": 10
        }
    },
    "outputs": {
        "save_outputs": true,
        "run_name": "220705_131125",
        "run_name_prefix": "",
        "run_name_suffix": ".centered_instance",
        "runs_folder": "models10",
        "tags": [
            ""
        ],
        "save_visualizations": true,
        "delete_viz_images": true,
        "zip_outputs": false,
        "log_to_csv": true,
        "checkpointing": {
            "initial_model": false,
            "best_model": true,
            "every_epoch": false,
            "latest_model": false,
            "final_model": false
        },
        "tensorboard": {
            "write_logs": false,
            "loss_frequency": "epoch",
            "architecture_graph": false,
            "profile_graph": false,
            "visualizations": true
        },
        "zmq": {
            "subscribe_to_controller": false,
            "controller_address": "tcp://127.0.0.1:9000",
            "controller_polling_timeout": 10,
            "publish_updates": false,
            "publish_address": "tcp://127.0.0.1:9001"
        }
    },
    "name": "",
    "description": "",
    "sleap_version": "1.2.2",
    "filename": "centered_instance.json"
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Using GPU 0 for acceleration.
INFO:sleap.nn.training:Disabled GPU memory pre-allocation.
INFO:sleap.nn.training:System:
GPUs: 1/1 available
  Device: /physical_device:GPU:0
         Available: True
        Initalized: False
     Memory growth: True
INFO:sleap.nn.training:
INFO:sleap.nn.training:Initializing trainer...
INFO:sleap.nn.training:Loading training labels from: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1
INFO:sleap.nn.training:  Splits: Training = 930 / Validation = 103.
INFO:sleap.nn.training:Setting up for training...
INFO:sleap.nn.training:Setting up pipeline builders...
INFO:sleap.nn.training:Setting up model...
INFO:sleap.nn.training:Building test pipeline...
2022-07-05 12:06:15.527912: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
INFO:sleap.nn.training:Loaded test example. [2.783s]
INFO:sleap.nn.training:  Input shape: (144, 144, 1)
Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b0_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
16809984/16804768 [==============================] - 1s 0us/step
16818176/16804768 [==============================] - 1s 0us/step
Traceback (most recent call last):
  File "/usr/local/bin/sleap-train", line 8, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 1947, in main
    trainer.train()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 906, in train
    self.setup()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 892, in setup
    self._setup_model()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 732, in _setup_model
    self.model.make_model(input_shape)
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/model.py", line 326, in make_model
    x_main, x_mid = self.backbone.make_backbone(x_in=x_in)
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/architectures/pretrained_encoders.py", line 226, in make_backbone
    utils=tf.keras.utils,
  File "/usr/local/lib/python3.7/dist-packages/segmentation_models/models/unet.py", line 240, in Unet
    use_batchnorm=decoder_use_batchnorm,
  File "/usr/local/lib/python3.7/dist-packages/segmentation_models/models/unet.py", line 139, in build_unet
    x = decoder_block(decoder_filters[i], stage=i, use_batchnorm=use_batchnorm)(x, skip)
  File "/usr/local/lib/python3.7/dist-packages/segmentation_models/models/unet.py", line 60, in wrapper
    x = layers.Concatenate(axis=concat_axis, name=concat_name)([x, skip])
  File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.7/dist-packages/keras/layers/merge.py", line 528, in build
    raise ValueError(err_msg)
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 10, 10, 1280), (None, 9, 9, 672)]

Hourglass

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:sleap.nn.training:Versions:
SLEAP: 1.2.4
TensorFlow: 2.8.2
Numpy: 1.21.5
Python: 3.7.13
OS: Linux-5.4.188+-x86_64-with-Ubuntu-18.04-bionic
INFO:sleap.nn.training:Training labels file: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Training profile: centered_instance.json
INFO:sleap.nn.training:
INFO:sleap.nn.training:Arguments:
INFO:sleap.nn.training:{
    "training_job_path": "centered_instance.json",
    "labels_path": "proyectoprueba_v9.pkg.slp",
    "video_paths": [
        ""
    ],
    "val_labels": null,
    "test_labels": null,
    "tensorboard": false,
    "save_viz": false,
    "zmq": false,
    "run_name": "",
    "prefix": "",
    "suffix": "",
    "cpu": false,
    "first_gpu": false,
    "last_gpu": false,
    "gpu": 0
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Training job:
INFO:sleap.nn.training:{
    "data": {
        "labels": {
            "training_labels": null,
            "validation_labels": null,
            "validation_fraction": 0.1,
            "test_labels": null,
            "split_by_inds": false,
            "training_inds": null,
            "validation_inds": null,
            "test_inds": null,
            "search_path_hints": [],
            "skeletons": []
        },
        "preprocessing": {
            "ensure_rgb": false,
            "ensure_grayscale": false,
            "imagenet_mode": null,
            "input_scaling": 1.0,
            "pad_to_stride": null,
            "resize_and_pad_to_target": true,
            "target_height": null,
            "target_width": null
        },
        "instance_cropping": {
            "center_on_part": null,
            "crop_size": 144,
            "crop_size_detection_padding": 16
        }
    },
    "model": {
        "backbone": {
            "leap": null,
            "unet": null,
            "hourglass": {
                "stem_stride": 4,
                "max_stride": 64,
                "output_stride": 4,
                "stem_filters": 128,
                "filters": 256,
                "filter_increase": 128,
                "stacks": 3
            },
            "resnet": null,
            "pretrained_encoder": null
        },
        "heads": {
            "single_instance": null,
            "centroid": null,
            "centered_instance": {
                "anchor_part": null,
                "part_names": null,
                "sigma": 2.5,
                "output_stride": 4,
                "loss_weight": 1.0,
                "offset_refinement": false
            },
            "multi_instance": null,
            "multi_class_bottomup": null,
            "multi_class_topdown": null
        }
    },
    "optimization": {
        "preload_data": true,
        "augmentation_config": {
            "rotate": true,
            "rotation_min_angle": -180.0,
            "rotation_max_angle": 180.0,
            "translate": false,
            "translate_min": -5,
            "translate_max": 5,
            "scale": false,
            "scale_min": 0.9,
            "scale_max": 1.1,
            "uniform_noise": false,
            "uniform_noise_min_val": 0.0,
            "uniform_noise_max_val": 10.0,
            "gaussian_noise": false,
            "gaussian_noise_mean": 5.0,
            "gaussian_noise_stddev": 1.0,
            "contrast": false,
            "contrast_min_gamma": 0.5,
            "contrast_max_gamma": 2.0,
            "brightness": false,
            "brightness_min_val": 0.0,
            "brightness_max_val": 10.0,
            "random_crop": false,
            "random_crop_height": 256,
            "random_crop_width": 256,
            "random_flip": false,
            "flip_horizontal": true
        },
        "online_shuffling": true,
        "shuffle_buffer_size": 128,
        "prefetch": true,
        "batch_size": 4,
        "batches_per_epoch": null,
        "min_batches_per_epoch": 200,
        "val_batches_per_epoch": null,
        "min_val_batches_per_epoch": 10,
        "epochs": 200,
        "optimizer": "adam",
        "initial_learning_rate": 0.0001,
        "learning_rate_schedule": {
            "reduce_on_plateau": true,
            "reduction_factor": 0.5,
            "plateau_min_delta": 1e-06,
            "plateau_patience": 5,
            "plateau_cooldown": 3,
            "min_learning_rate": 1e-08
        },
        "hard_keypoint_mining": {
            "online_mining": false,
            "hard_to_easy_ratio": 2.0,
            "min_hard_keypoints": 2,
            "max_hard_keypoints": null,
            "loss_scale": 5.0
        },
        "early_stopping": {
            "stop_training_on_plateau": true,
            "plateau_min_delta": 1e-08,
            "plateau_patience": 10
        }
    },
    "outputs": {
        "save_outputs": true,
        "run_name": "220705_100018",
        "run_name_prefix": "",
        "run_name_suffix": ".centered_instance",
        "runs_folder": "models10",
        "tags": [
            ""
        ],
        "save_visualizations": true,
        "delete_viz_images": true,
        "zip_outputs": false,
        "log_to_csv": true,
        "checkpointing": {
            "initial_model": false,
            "best_model": true,
            "every_epoch": false,
            "latest_model": false,
            "final_model": false
        },
        "tensorboard": {
            "write_logs": false,
            "loss_frequency": "epoch",
            "architecture_graph": false,
            "profile_graph": false,
            "visualizations": true
        },
        "zmq": {
            "subscribe_to_controller": false,
            "controller_address": "tcp://127.0.0.1:9000",
            "controller_polling_timeout": 10,
            "publish_updates": false,
            "publish_address": "tcp://127.0.0.1:9001"
        }
    },
    "name": "",
    "description": "",
    "sleap_version": "1.2.2",
    "filename": "centered_instance.json"
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Using GPU 0 for acceleration.
INFO:sleap.nn.training:Disabled GPU memory pre-allocation.
INFO:sleap.nn.training:System:
GPUs: 1/1 available
  Device: /physical_device:GPU:0
         Available: True
        Initalized: False
     Memory growth: True
INFO:sleap.nn.training:
INFO:sleap.nn.training:Initializing trainer...
INFO:sleap.nn.training:Loading training labels from: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1
INFO:sleap.nn.training:  Splits: Training = 930 / Validation = 103.
INFO:sleap.nn.training:Setting up for training...
INFO:sleap.nn.training:Setting up pipeline builders...
INFO:sleap.nn.training:Setting up model...
INFO:sleap.nn.training:Building test pipeline...
2022-07-05 14:16:24.364106: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
INFO:sleap.nn.training:Loaded test example. [2.695s]
INFO:sleap.nn.training:  Input shape: (144, 144, 1)
Traceback (most recent call last):
  File "/usr/local/bin/sleap-train", line 8, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 1947, in main
    trainer.train()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 906, in train
    self.setup()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 892, in setup
    self._setup_model()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 732, in _setup_model
    self.model.make_model(input_shape)
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/model.py", line 326, in make_model
    x_main, x_mid = self.backbone.make_backbone(x_in=x_in)
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/architectures/encoder_decoder.py", line 667, in make_backbone
    prefix=f"stack{i}_dec",
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/architectures/encoder_decoder.py", line 599, in make_decoder
    prefix=f"{prefix}{i}",
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/architectures/hourglass.py", line 190, in make_block
    x = tf.keras.layers.Add(name=prefix + "_skip_add")([x, x_skip])
  File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.7/dist-packages/keras/layers/merge.py", line 79, in _compute_elemwise_op_output_shape
    'Inputs have incompatible shapes. '
ValueError: Inputs have incompatible shapes. Received shapes (6, 6, 640) and (5, 5, 640)

Resnet Error:

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:sleap.nn.training:Versions:
SLEAP: 1.2.4
TensorFlow: 2.8.2
Numpy: 1.21.5
Python: 3.7.13
OS: Linux-5.4.188+-x86_64-with-Ubuntu-18.04-bionic
INFO:sleap.nn.training:Training labels file: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Training profile: centered_instance.json
INFO:sleap.nn.training:
INFO:sleap.nn.training:Arguments:
INFO:sleap.nn.training:{
    "training_job_path": "centered_instance.json",
    "labels_path": "proyectoprueba_v9.pkg.slp",
    "video_paths": [
        ""
    ],
    "val_labels": null,
    "test_labels": null,
    "tensorboard": false,
    "save_viz": false,
    "zmq": false,
    "run_name": "",
    "prefix": "",
    "suffix": "",
    "cpu": false,
    "first_gpu": false,
    "last_gpu": false,
    "gpu": 0
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Training job:
INFO:sleap.nn.training:{
    "data": {
        "labels": {
            "training_labels": null,
            "validation_labels": null,
            "validation_fraction": 0.1,
            "test_labels": null,
            "split_by_inds": false,
            "training_inds": null,
            "validation_inds": null,
            "test_inds": null,
            "search_path_hints": [],
            "skeletons": []
        },
        "preprocessing": {
            "ensure_rgb": false,
            "ensure_grayscale": false,
            "imagenet_mode": null,
            "input_scaling": 1.0,
            "pad_to_stride": null,
            "resize_and_pad_to_target": true,
            "target_height": null,
            "target_width": null
        },
        "instance_cropping": {
            "center_on_part": null,
            "crop_size": 144,
            "crop_size_detection_padding": 16
        }
    },
    "model": {
        "backbone": {
            "leap": null,
            "unet": null,
            "hourglass": null,
            "resnet": {
                "version": "ResNet50",
                "weights": "frozen",
                "upsampling": {
                    "method": "interpolation",
                    "skip_connections": null,
                    "block_stride": 2,
                    "filters": 64,
                    "filters_rate": 1.0,
                    "refine_convs": 2,
                    "batch_norm": true,
                    "transposed_conv_kernel_size": 4
                },
                "max_stride": 32,
                "output_stride": 4
            },
            "pretrained_encoder": null
        },
        "heads": {
            "single_instance": null,
            "centroid": null,
            "centered_instance": {
                "anchor_part": null,
                "part_names": null,
                "sigma": 2.5,
                "output_stride": 4,
                "loss_weight": 1.0,
                "offset_refinement": false
            },
            "multi_instance": null,
            "multi_class_bottomup": null,
            "multi_class_topdown": null
        }
    },
    "optimization": {
        "preload_data": true,
        "augmentation_config": {
            "rotate": true,
            "rotation_min_angle": -180.0,
            "rotation_max_angle": 180.0,
            "translate": false,
            "translate_min": -5,
            "translate_max": 5,
            "scale": false,
            "scale_min": 0.9,
            "scale_max": 1.1,
            "uniform_noise": false,
            "uniform_noise_min_val": 0.0,
            "uniform_noise_max_val": 10.0,
            "gaussian_noise": false,
            "gaussian_noise_mean": 5.0,
            "gaussian_noise_stddev": 1.0,
            "contrast": false,
            "contrast_min_gamma": 0.5,
            "contrast_max_gamma": 2.0,
            "brightness": false,
            "brightness_min_val": 0.0,
            "brightness_max_val": 10.0,
            "random_crop": false,
            "random_crop_height": 256,
            "random_crop_width": 256,
            "random_flip": false,
            "flip_horizontal": true
        },
        "online_shuffling": true,
        "shuffle_buffer_size": 128,
        "prefetch": true,
        "batch_size": 4,
        "batches_per_epoch": null,
        "min_batches_per_epoch": 200,
        "val_batches_per_epoch": null,
        "min_val_batches_per_epoch": 10,
        "epochs": 200,
        "optimizer": "adam",
        "initial_learning_rate": 0.0001,
        "learning_rate_schedule": {
            "reduce_on_plateau": true,
            "reduction_factor": 0.5,
            "plateau_min_delta": 1e-06,
            "plateau_patience": 5,
            "plateau_cooldown": 3,
            "min_learning_rate": 1e-08
        },
        "hard_keypoint_mining": {
            "online_mining": false,
            "hard_to_easy_ratio": 2.0,
            "min_hard_keypoints": 2,
            "max_hard_keypoints": null,
            "loss_scale": 5.0
        },
        "early_stopping": {
            "stop_training_on_plateau": true,
            "plateau_min_delta": 1e-08,
            "plateau_patience": 10
        }
    },
    "outputs": {
        "save_outputs": true,
        "run_name": "220705_113808",
        "run_name_prefix": "",
        "run_name_suffix": ".centered_instance",
        "runs_folder": "models10",
        "tags": [
            ""
        ],
        "save_visualizations": true,
        "delete_viz_images": true,
        "zip_outputs": false,
        "log_to_csv": true,
        "checkpointing": {
            "initial_model": false,
            "best_model": true,
            "every_epoch": false,
            "latest_model": false,
            "final_model": false
        },
        "tensorboard": {
            "write_logs": false,
            "loss_frequency": "epoch",
            "architecture_graph": false,
            "profile_graph": false,
            "visualizations": true
        },
        "zmq": {
            "subscribe_to_controller": false,
            "controller_address": "tcp://127.0.0.1:9000",
            "controller_polling_timeout": 10,
            "publish_updates": false,
            "publish_address": "tcp://127.0.0.1:9001"
        }
    },
    "name": "",
    "description": "",
    "sleap_version": "1.2.2",
    "filename": "centered_instance.json"
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Using GPU 0 for acceleration.
INFO:sleap.nn.training:Disabled GPU memory pre-allocation.
INFO:sleap.nn.training:System:
GPUs: 1/1 available
  Device: /physical_device:GPU:0
         Available: True
        Initalized: False
     Memory growth: True
INFO:sleap.nn.training:
INFO:sleap.nn.training:Initializing trainer...
INFO:sleap.nn.training:Loading training labels from: proyectoprueba_v9.pkg.slp
INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1
INFO:sleap.nn.training:  Splits: Training = 930 / Validation = 103.
INFO:sleap.nn.training:Setting up for training...
INFO:sleap.nn.training:Setting up pipeline builders...
INFO:sleap.nn.training:Setting up model...
INFO:sleap.nn.training:Building test pipeline...
2022-07-05 14:32:56.175942: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
INFO:sleap.nn.training:Loaded test example. [2.699s]
INFO:sleap.nn.training:  Input shape: (144, 144, 1)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 0s 0us/step
94781440/94765736 [==============================] - 0s 0us/step
INFO:sleap.nn.training:Created Keras model.
INFO:sleap.nn.training:  Backbone: ResNet50(upsampling_stack=UpsamplingStack(output_stride=4, upsampling_stride=2, transposed_conv=False, transposed_conv_filters=64, transposed_conv_filters_rate=1.0, transposed_conv_kernel_size=4, transposed_conv_batchnorm=True, make_skip_connection=False, skip_add=False, refine_convs=2, refine_convs_filters=64, refine_convs_filters_rate=1.0, refine_convs_batchnorm=True), features_output_stride=32, pretrained=True, frozen=True, skip_connections=False, model_name='resnet50', stack_configs=[{'filters': 64, 'blocks': 3, 'stride1': 1, 'name': 'conv2', 'dilation_rate': 1}, {'filters': 128, 'blocks': 4, 'stride1': 2, 'name': 'conv3', 'dilation_rate': 1}, {'filters': 256, 'blocks': 6, 'stride1': 2, 'name': 'conv4', 'dilation_rate': 1}, {'filters': 512, 'blocks': 3, 'stride1': 2, 'name': 'conv5', 'dilation_rate': 1}])
INFO:sleap.nn.training:  Max stride: 32
INFO:sleap.nn.training:  Parameters: 24,953,990
INFO:sleap.nn.training:  Heads: 
INFO:sleap.nn.training:    [0] = CenteredInstanceConfmapsHead(part_names=['snout', 'righthand', 'lefthand', 'rightleg', 'leftleg', 'tail'], anchor_part=None, sigma=2.5, output_stride=4, loss_weight=1.0)
INFO:sleap.nn.training:  Outputs: 
INFO:sleap.nn.training:    [0] = KerasTensor(type_spec=TensorSpec(shape=(None, 40, 40, 6), dtype=tf.float32, name=None), name='CenteredInstanceConfmapsHead/BiasAdd:0', description="created by layer 'CenteredInstanceConfmapsHead'")
INFO:sleap.nn.training:Setting up data pipelines...
INFO:sleap.nn.training:Training set: n = 930
INFO:sleap.nn.training:Validation set: n = 103
INFO:sleap.nn.training:Setting up optimization...
INFO:sleap.nn.training:  Learning rate schedule: LearningRateScheduleConfig(reduce_on_plateau=True, reduction_factor=0.5, plateau_min_delta=1e-06, plateau_patience=5, plateau_cooldown=3, min_learning_rate=1e-08)
INFO:sleap.nn.training:  Early stopping: EarlyStoppingConfig(stop_training_on_plateau=True, plateau_min_delta=1e-08, plateau_patience=10)
INFO:sleap.nn.training:Setting up outputs...
INFO:sleap.nn.training:Created run path: models10/220705_113808.centered_instance
INFO:sleap.nn.training:Setting up visualization...
2022-07-05 14:32:59.397705: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
2022-07-05 14:33:00.417299: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
/usr/local/lib/python3.7/dist-packages/sleap/nn/inference.py:952: UserWarning: Model input of shape (None, 144, 144, 1) does not divide evenly with output of shape (None, 40, 40, 6).
  f"Model input of shape {model.inputs[input_ind].shape} does not divide "
Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
INFO:sleap.nn.training:Finished trainer set up. [6.9s]
INFO:sleap.nn.training:Creating tf.data.Datasets for training data generation...
2022-07-05 14:33:04.050500: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
2022-07-05 14:33:06.416359: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
INFO:sleap.nn.training:Finished creating training datasets. [6.4s]
INFO:sleap.nn.training:Starting training loop...
2022-07-05 14:33:07.075474: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: 1 } dim { size: 194 } dim { size: 174 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "101" frequency: 2000 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 1048576 l3_cache_size: 40370176 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 144 } dim { size: 144 } dim { size: 1 } } }
Epoch 1/200
Traceback (most recent call last):
  File "/usr/local/bin/sleap-train", line 8, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 1947, in main
    trainer.train()
  File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 923, in train
    verbose=2,
  File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py", line 1147, in autograph_handler
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.7/dist-packages/sleap/nn/training.py", line 299, in loss_fn  *
        loss += loss_fn(y_gt, y_pr)
    File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 141, in __call__  **
        losses = call_fn(y_true, y_pred)
    File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/usr/local/lib/python3.7/dist-packages/keras/losses.py", line 1329, in mean_squared_error
        return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

    ValueError: Dimensions must be equal, but are 40 and 36 for '{{node loss_fn/mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model/CenteredInstanceConfmapsHead/BiasAdd, IteratorGetNext:1)' with input shapes: [4,40,40,6], [4,36,36,?].

Expected behaviour

Training all the networks with the selected backbone

Actual behaviour

Different ValueErrors related with shapes

personal set up

Environment packages ``` # paste output of `pip freeze` or `conda list` here # packages in environment at C:\Users\isabel.perezf\Miniconda3\envs\sleap: # # Name Version Build Channel absl-py 0.15.0 pypi_0 pypi astunparse 1.6.3 pypi_0 pypi attrs 21.2.0 pypi_0 pypi backports-zoneinfo 0.2.1 pypi_0 pypi ca-certificates 2021.10.8 h5b45459_0 conda-forge cached-property 1.5.2 hd8ed1ab_1 conda-forge cached_property 1.5.2 pyha770c72_1 conda-forge cachetools 4.2.4 pypi_0 pypi cattrs 1.1.1 pypi_0 pypi certifi 2021.10.8 pypi_0 pypi charset-normalizer 2.0.12 pypi_0 pypi clang 5.0 pypi_0 pypi colorama 0.4.4 pypi_0 pypi commonmark 0.9.1 pypi_0 pypi cuda-nvcc 11.3.58 hb8d16a4_0 nvidia cudatoolkit 11.3.1 h280eb24_10 conda-forge cudnn 8.2.1.32 h754d62a_0 conda-forge cycler 0.11.0 pypi_0 pypi efficientnet 1.0.0 pypi_0 pypi ffmpeg 4.3.1 ha925a31_0 conda-forge flatbuffers 1.12 pypi_0 pypi fonttools 4.31.2 pypi_0 pypi freetype 2.10.4 h546665d_1 conda-forge gast 0.4.0 pypi_0 pypi geos 3.9.1 h39d44d4_2 conda-forge google-auth 1.35.0 pypi_0 pypi google-auth-oauthlib 0.4.6 pypi_0 pypi google-pasta 0.2.0 pypi_0 pypi grpcio 1.44.0 pypi_0 pypi h5py 3.1.0 nompi_py37h19fda09_100 conda-forge hdf5 1.10.6 nompi_he0bbb20_101 conda-forge idna 3.3 pypi_0 pypi image-classifiers 1.0.0 pypi_0 pypi imageio 2.15.0 pypi_0 pypi imgaug 0.4.0 pypi_0 pypi imgstore 0.2.9 pypi_0 pypi importlib-metadata 4.11.1 pypi_0 pypi intel-openmp 2022.0.0 h57928b3_3663 conda-forge jbig 2.1 h8d14728_2003 conda-forge joblib 1.1.0 pypi_0 pypi jpeg 9e h8ffe710_1 conda-forge jsmin 3.0.1 pypi_0 pypi jsonpickle 1.2 pypi_0 pypi keras 2.6.0 pypi_0 pypi keras-applications 1.0.8 pypi_0 pypi keras-preprocessing 1.1.2 pypi_0 pypi kiwisolver 1.4.2 pypi_0 pypi lcms2 2.12 h2a16943_0 conda-forge lerc 3.0 h0e60522_0 conda-forge libblas 3.9.0 14_win64_mkl conda-forge libcblas 3.9.0 14_win64_mkl conda-forge libdeflate 1.10 h8ffe710_0 conda-forge liblapack 3.9.0 14_win64_mkl conda-forge libpng 1.6.37 h1d00b33_2 conda-forge libtiff 4.3.0 hc4061b1_3 conda-forge libzlib 1.2.11 h8ffe710_1014 conda-forge lz4-c 1.9.3 h8ffe710_1 conda-forge m2w64-gcc-libgfortran 5.3.0 6 conda-forge m2w64-gcc-libs 5.3.0 7 conda-forge m2w64-gcc-libs-core 5.3.0 7 conda-forge m2w64-gmp 6.1.0 2 conda-forge m2w64-libwinpthread-git 5.0.0.4634.697f757 2 conda-forge markdown 3.3.6 pypi_0 pypi matplotlib 3.5.1 pypi_0 pypi mkl 2022.0.0 h0e2418a_796 conda-forge msys2-conda-epoch 20160418 1 conda-forge networkx 2.6.3 pypi_0 pypi numpy 1.19.5 py37h4c2b6ed_3 conda-forge oauthlib 3.2.0 pypi_0 pypi olefile 0.46 pyh9f0ad1d_1 conda-forge opencv-python 4.5.5.62 pypi_0 pypi opencv-python-headless 4.5.5.62 pypi_0 pypi openjpeg 2.4.0 hb211442_1 conda-forge openssl 3.0.2 h8ffe710_1 conda-forge opt-einsum 3.3.0 pypi_0 pypi packaging 21.3 pypi_0 pypi pandas 1.3.5 py37h9386db6_0 conda-forge pillow 8.4.0 py37hd7d9ad0_0 conda-forge pip 22.0.4 pyhd8ed1ab_0 conda-forge protobuf 3.19.4 pypi_0 pypi psutil 5.9.0 pypi_0 pypi pyasn1 0.4.8 pypi_0 pypi pyasn1-modules 0.2.8 pypi_0 pypi pygments 2.11.2 pypi_0 pypi pykalman 0.9.5 pypi_0 pypi pyparsing 3.0.7 pypi_0 pypi pyreadline 2.1 py37h03978a9_1005 conda-forge pyside2 5.14.1 pypi_0 pypi python 3.7.12 h900ac77_100_cpython conda-forge python-dateutil 2.8.2 pyhd8ed1ab_0 conda-forge python-rapidjson 1.6 pypi_0 pypi python_abi 3.7 2_cp37m conda-forge pytz 2022.1 pyhd8ed1ab_0 conda-forge pytz-deprecation-shim 0.1.0.post0 pypi_0 pypi pywavelets 1.3.0 pypi_0 pypi pyzmq 22.3.0 pypi_0 pypi qimage2ndarray 1.8.3 pypi_0 pypi requests 2.27.1 pypi_0 pypi requests-oauthlib 1.3.1 pypi_0 pypi rich 10.16.1 pypi_0 pypi scikit-image 0.19.2 pypi_0 pypi scikit-learn 1.0.2 pypi_0 pypi scikit-video 1.1.11 pypi_0 pypi scipy 1.7.3 py37hb6553fb_0 conda-forge seaborn 0.11.2 pypi_0 pypi segmentation-models 1.0.1 pypi_0 pypi setuptools 59.8.0 py37h03978a9_1 conda-forge setuptools-scm 6.3.2 pypi_0 pypi shapely 1.7.1 py37hc520ffa_5 conda-forge shiboken2 5.14.1 pypi_0 pypi six 1.15.0 pyh9f0ad1d_0 conda-forge sleap 1.2.2 pypi_0 pypi sqlite 3.38.2 h8ffe710_0 conda-forge tbb 2021.5.0 h2d74725_1 conda-forge tensorboard 2.6.0 pypi_0 pypi tensorboard-data-server 0.6.1 pypi_0 pypi tensorboard-plugin-wit 1.8.1 pypi_0 pypi tensorflow 2.6.3 pypi_0 pypi tensorflow-estimator 2.6.0 pypi_0 pypi termcolor 1.1.0 pypi_0 pypi threadpoolctl 3.1.0 pypi_0 pypi tifffile 2021.11.2 pypi_0 pypi tk 8.6.12 h8ffe710_0 conda-forge tomli 2.0.1 pypi_0 pypi typing-extensions 3.10.0.2 pypi_0 pypi tzdata 2022.1 pypi_0 pypi tzlocal 4.2 pypi_0 pypi ucrt 10.0.20348.0 h57928b3_0 conda-forge urllib3 1.26.8 pypi_0 pypi vc 14.2 hb210afc_6 conda-forge vs2015_runtime 14.29.30037 h902a5da_6 conda-forge werkzeug 2.0.3 pypi_0 pypi wheel 0.37.1 pyhd8ed1ab_0 conda-forge wrapt 1.12.1 pypi_0 pypi xz 5.2.5 h62dcd97_1 conda-forge zipp 3.7.0 pypi_0 pypi zlib 1.2.11 h8ffe710_1014 conda-forge zstd 1.5.2 h6255e5f_0 conda-forge ```
roomrys commented 2 years ago

Hi @isabelperezf,

The ValueErrors seem to be coming from rounding errors that stem from the chosen crop size and the max stride. SLEAP should automatically adjust the crop size to avoid rounding errors, but allows users to override the crop size (if Crop Size > Auto is deselected).

Can you try selecting Crop Size > Auto in the SLEAP GUI to see if this resolves your problem?

image Fig: Location of Crop Size > Auto checkbox in Training Pipeline GUI.

Thanks, Liezl

isabelperezf commented 2 years ago

Hi @roomrys this solves the problem. Thanks!