google / automl

Google Brain AutoML
Apache License 2.0
6.24k stars 1.45k forks source link

incompatible shapes training #988

Closed alexlorenzo closed 3 years ago

alexlorenzo commented 3 years ago

Hello,

We have our datasets with a unique class. For that, we pass our COCO format datasets to create_coco_tf_record.py.

Changing num_classes=2 in hparams_config.py and after launching the following lines:

python main.py --mode=train --train_file_pattern=tfrecord/train*.tfrecord --val_file_pattern=tfrecord/val*.tfrecord --model_name=efficientdet-d0 --model_dir=/tmp/efficientdet-d0-finetune --ckpt=efficientdet-d0 --train_batch_size=1 --num_epochs=50

We have the following error: (1) Invalid argument: Incompatible shapes: [1,810,64,64] vs. [1,18,64,64]

For the moment, we used https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/ckptsaug/efficientdet-d0.tar.gz

After reading the Issue 704, we do not understand which pre-trained models to use.

Thanks

kartik4949 commented 3 years ago

can you show me hparams.yaml?

alexlorenzo commented 3 years ago

Thank you for your quick answer @kartik4949 .

Please find my hparams_config.py, I used the default config except fornum_classes:


#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Hparams for model architecture and trainer."""
import ast
import collections
import copy
from typing import Any, Dict, Text
import six
import tensorflow as tf
import yaml

def eval_str_fn(val):
  if val in {'true', 'false'}:
    return val == 'true'
  try:
    return ast.literal_eval(val)
  except (ValueError, SyntaxError):
    return val

# pylint: disable=protected-access
class Config(object):
  """A config utility class."""

  def __init__(self, config_dict=None):
    self.update(config_dict)

  def __setattr__(self, k, v):
    self.__dict__[k] = Config(v) if isinstance(v, dict) else copy.deepcopy(v)

  def __getattr__(self, k):
    return self.__dict__[k]

  def __getitem__(self, k):
    return self.__dict__[k]

  def __repr__(self):
    return repr(self.as_dict())

  def __deepcopy__(self, memodict):
    return type(self)(self.as_dict())

  def __str__(self):
    try:
      return yaml.dump(self.as_dict(), indent=4)
    except TypeError:
      return str(self.as_dict())

  def _update(self, config_dict, allow_new_keys=True):
    """Recursively update internal members."""
    if not config_dict:
      return

    for k, v in six.iteritems(config_dict):
      if k not in self.__dict__:
        if allow_new_keys:
          self.__setattr__(k, v)
        else:
          raise KeyError('Key `{}` does not exist for overriding. '.format(k))
      else:
        if isinstance(self.__dict__[k], Config) and isinstance(v, dict):
          self.__dict__[k]._update(v, allow_new_keys)
        elif isinstance(self.__dict__[k], Config) and isinstance(v, Config):
          self.__dict__[k]._update(v.as_dict(), allow_new_keys)
        else:
          self.__setattr__(k, v)

  def get(self, k, default_value=None):
    return self.__dict__.get(k, default_value)

  def update(self, config_dict):
    """Update members while allowing new keys."""
    self._update(config_dict, allow_new_keys=True)

  def keys(self):
    return self.__dict__.keys()

  def override(self, config_dict_or_str, allow_new_keys=False):
    """Update members while disallowing new keys."""
    if isinstance(config_dict_or_str, str):
      if not config_dict_or_str:
        return
      elif '=' in config_dict_or_str:
        config_dict = self.parse_from_str(config_dict_or_str)
      elif config_dict_or_str.endswith('.yaml'):
        config_dict = self.parse_from_yaml(config_dict_or_str)
      else:
        raise ValueError(
            'Invalid string {}, must end with .yaml or contains "=".'.format(
                config_dict_or_str))
    elif isinstance(config_dict_or_str, dict):
      config_dict = config_dict_or_str
    else:
      raise ValueError('Unknown value type: {}'.format(config_dict_or_str))

    self._update(config_dict, allow_new_keys)

  def parse_from_yaml(self, yaml_file_path: Text) -> Dict[Any, Any]:
    """Parses a yaml file and returns a dictionary."""
    with tf.io.gfile.GFile(yaml_file_path, 'r') as f:
      config_dict = yaml.load(f, Loader=yaml.FullLoader)
      return config_dict

  def save_to_yaml(self, yaml_file_path):
    """Write a dictionary into a yaml file."""
    with tf.io.gfile.GFile(yaml_file_path, 'w') as f:
      yaml.dump(self.as_dict(), f, default_flow_style=False)

  def parse_from_str(self, config_str: Text) -> Dict[Any, Any]:
    """Parse a string like 'x.y=1,x.z=2' to nested dict {x: {y: 1, z: 2}}."""
    if not config_str:
      return {}
    config_dict = {}
    try:
      for kv_pair in config_str.split(','):
        if not kv_pair:  # skip empty string
          continue
        key_str, value_str = kv_pair.split('=')
        key_str = key_str.strip()

        def add_kv_recursive(k, v):
          """Recursively parse x.y.z=tt to {x: {y: {z: tt}}}."""
          if '.' not in k:
            if '*' in v:
              # we reserve * to split arrays.
              return {k: [eval_str_fn(vv) for vv in v.split('*')]}
            return {k: eval_str_fn(v)}
          pos = k.index('.')
          return {k[:pos]: add_kv_recursive(k[pos + 1:], v)}

        def merge_dict_recursive(target, src):
          """Recursively merge two nested dictionary."""
          for k in src.keys():
            if ((k in target and isinstance(target[k], dict) and
                 isinstance(src[k], collections.Mapping))):
              merge_dict_recursive(target[k], src[k])
            else:
              target[k] = src[k]

        merge_dict_recursive(config_dict, add_kv_recursive(key_str, value_str))
      return config_dict
    except ValueError:
      raise ValueError('Invalid config_str: {}'.format(config_str))

  def as_dict(self):
    """Returns a dict representation."""
    config_dict = {}
    for k, v in six.iteritems(self.__dict__):
      if isinstance(v, Config):
        config_dict[k] = v.as_dict()
      else:
        config_dict[k] = copy.deepcopy(v)
    return config_dict
    # pylint: enable=protected-access

def default_detection_configs():
  """Returns a default detection configs."""
  h = Config()

  # model name.
  h.name = 'efficientdet-d0'

  # activation type: see activation_fn in utils.py.
  h.act_type = 'swish'

  # input preprocessing parameters
  h.image_size = 640  # An integer or a string WxH such as 640x320.
  h.target_size = None
  h.input_rand_hflip = True
  h.jitter_min = 0.1
  h.jitter_max = 2.0
  h.autoaugment_policy = None
  h.grid_mask = False
  h.sample_image = None
  h.map_freq = 5  # AP eval frequency in epochs.

  # dataset specific parameters
  # TODO(tanmingxing): update this to be 91 for COCO, and 21 for pascal.
  h.num_classes = 2  # 1+ actual classes, 0 is reserved for background.
  h.seg_num_classes = 3  # segmentation classes
  h.heads = ['object_detection']  # 'object_detection', 'segmentation'

  h.skip_crowd_during_training = True
  h.label_map = {1: 'intersection'}  # a dict or a string of 'coco', 'voc', 'waymo'.
  h.max_instances_per_image = 100  # Default to 100 for COCO.
  h.regenerate_source_id = False

  # model architecture
  h.min_level = 3
  h.max_level = 7
  h.num_scales = 3
  # ratio w/h: 2.0 means w=1.4, h=0.7. Can be computed with k-mean per dataset.
  h.aspect_ratios = [1.0, 2.0, 0.5]  # [[0.7, 1.4], [1.0, 1.0], [1.4, 0.7]]
  h.anchor_scale = 4.0
  # is batchnorm training mode
  h.is_training_bn = True
  # optimization
  h.momentum = 0.9
  h.optimizer = 'sgd'  # can be 'adam' or 'sgd'.
  h.learning_rate = 0.08  # 0.008 for adam.
  h.lr_warmup_init = 0.008  # 0.0008 for adam.
  h.lr_warmup_epoch = 1.0
  h.first_lr_drop_epoch = 200.0
  h.second_lr_drop_epoch = 250.0
  h.poly_lr_power = 0.9
  h.clip_gradients_norm = 10.0
  h.num_epochs = 300
  h.data_format = 'channels_last'
  # The default image normalization is identical to Cloud TPU ResNet.
  h.mean_rgb = [0.485 * 255, 0.456 * 255, 0.406 * 255]
  h.stddev_rgb = [0.229 * 255, 0.224 * 255, 0.225 * 255]

  # classification loss
  h.label_smoothing = 0.0  # 0.1 is a good default
  # Behold the focal loss parameters
  h.alpha = 0.25
  h.gamma = 1.5

  # localization loss
  h.delta = 0.1  # regularization parameter of huber loss.
  # total loss = box_loss * box_loss_weight + iou_loss * iou_loss_weight
  h.box_loss_weight = 50.0
  h.iou_loss_type = None
  h.iou_loss_weight = 1.0

  # regularization l2 loss.
  h.weight_decay = 4e-5
  h.strategy = None  # 'tpu', 'gpus', None
  h.mixed_precision = False  # If False, use float32.
  h.loss_scale = None  # set to 2**16 enables dynamic loss scale
  h.model_optimizations = {}  # 'prune':{}

  # For detection.
  h.box_class_repeats = 3
  h.fpn_cell_repeats = 3
  h.fpn_num_filters = 88
  h.separable_conv = True
  h.apply_bn_for_resampling = True
  h.conv_after_downsample = False
  h.conv_bn_act_pattern = False
  h.drop_remainder = True  # drop remainder for the final batch eval.

  # For post-processing nms, must be a dict.
  h.nms_configs = {
      'method': 'gaussian',
      'iou_thresh': None,  # use the default value based on method.
      'score_thresh': 0.,
      'sigma': None,
      'pyfunc': False,
      'max_nms_inputs': 0,
      'max_output_size': 100,
  }
  h.tflite_max_detections = 100

  # version.
  h.fpn_name = None
  h.fpn_weight_method = None
  h.fpn_config = None

  # No stochastic depth in default.
  h.survival_prob = None
  h.img_summary_steps = None

  h.lr_decay_method = 'cosine'
  h.moving_average_decay = 0.
  h.ckpt_var_scope = None  # ckpt variable scope.
  # If true, skip loading pretrained weights if shape mismatches.
  h.skip_mismatch = True

  h.backbone_name = 'efficientnet-b1'
  h.backbone_config = None
  h.var_freeze_expr = '(efficientnet|fpn_cells|resample_p6)'

  # A temporary flag to switch between legacy and keras models.
  h.use_keras_model = True
  h.dataset_type = None
  h.positives_momentum = None
  h.grad_checkpoint = False

  return h

efficientdet_model_param_dict = {
    'efficientdet-d0':
        dict(
            name='efficientdet-d0',
            backbone_name='efficientnet-b0',
            image_size=512,
            fpn_num_filters=64,
            fpn_cell_repeats=3,
            box_class_repeats=3,
        ),
    'efficientdet-d1':
        dict(
            name='efficientdet-d1',
            backbone_name='efficientnet-b1',
            image_size=640,
            fpn_num_filters=88,
            fpn_cell_repeats=4,
            box_class_repeats=3,
        ),
    'efficientdet-d2':
        dict(
            name='efficientdet-d2',
            backbone_name='efficientnet-b2',
            image_size=768,
            fpn_num_filters=112,
            fpn_cell_repeats=5,
            box_class_repeats=3,
        ),
    'efficientdet-d3':
        dict(
            name='efficientdet-d3',
            backbone_name='efficientnet-b3',
            image_size=896,
            fpn_num_filters=160,
            fpn_cell_repeats=6,
            box_class_repeats=4,
        ),
    'efficientdet-d4':
        dict(
            name='efficientdet-d4',
            backbone_name='efficientnet-b4',
            image_size=1024,
            fpn_num_filters=224,
            fpn_cell_repeats=7,
            box_class_repeats=4,
        ),
    'efficientdet-d5':
        dict(
            name='efficientdet-d5',
            backbone_name='efficientnet-b5',
            image_size=1280,
            fpn_num_filters=288,
            fpn_cell_repeats=7,
            box_class_repeats=4,
        ),
    'efficientdet-d6':
        dict(
            name='efficientdet-d6',
            backbone_name='efficientnet-b6',
            image_size=1280,
            fpn_num_filters=384,
            fpn_cell_repeats=8,
            box_class_repeats=5,
            fpn_weight_method='sum',  # Use unweighted sum for stability.
        ),
    'efficientdet-d7':
        dict(
            name='efficientdet-d7',
            backbone_name='efficientnet-b6',
            image_size=1536,
            fpn_num_filters=384,
            fpn_cell_repeats=8,
            box_class_repeats=5,
            anchor_scale=5.0,
            fpn_weight_method='sum',  # Use unweighted sum for stability.
        ),
    'efficientdet-d7x':
        dict(
            name='efficientdet-d7x',
            backbone_name='efficientnet-b7',
            image_size=1536,
            fpn_num_filters=384,
            fpn_cell_repeats=8,
            box_class_repeats=5,
            anchor_scale=4.0,
            max_level=8,
            fpn_weight_method='sum',  # Use unweighted sum for stability.
        ),
}

lite_common_param = dict(
    mean_rgb=127.0,
    stddev_rgb=128.0,
    act_type='relu6',
    fpn_weight_method='sum',
)

efficientdet_lite_param_dict = {
    # lite models are in progress and subject to changes.
    # mean_rgb and stddev_rgb are consistent with EfficientNet-Lite models in
    # https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/lite/efficientnet_lite_builder.py#L28
    'efficientdet-lite0':
        dict(
            name='efficientdet-lite0',
            backbone_name='efficientnet-lite0',
            image_size=320,
            fpn_num_filters=64,
            fpn_cell_repeats=3,
            box_class_repeats=3,
            anchor_scale=3.0,
            **lite_common_param,
        ),
    'efficientdet-lite1':
        dict(
            name='efficientdet-lite1',
            backbone_name='efficientnet-lite1',
            image_size=384,
            fpn_num_filters=88,
            fpn_cell_repeats=4,
            box_class_repeats=3,
            anchor_scale=3.0,
            **lite_common_param,
        ),
    'efficientdet-lite2':
        dict(
            name='efficientdet-lite2',
            backbone_name='efficientnet-lite2',
            image_size=448,
            fpn_num_filters=112,
            fpn_cell_repeats=5,
            box_class_repeats=3,
            anchor_scale=3.0,
            **lite_common_param,
        ),
    'efficientdet-lite3':
        dict(
            name='efficientdet-lite3',
            backbone_name='efficientnet-lite3',
            image_size=512,
            fpn_num_filters=160,
            fpn_cell_repeats=6,
            box_class_repeats=4,
            **lite_common_param,
        ),
    'efficientdet-lite4':
        dict(
            name='efficientdet-lite4',
            backbone_name='efficientnet-lite4',
            image_size=512,
            fpn_num_filters=224,
            fpn_cell_repeats=7,
            box_class_repeats=4,
            **lite_common_param,
        ),
}

def get_efficientdet_config(model_name='efficientdet-d1'):
  """Get the default config for EfficientDet based on model name."""
  h = default_detection_configs()
  if model_name in efficientdet_model_param_dict:
    h.override(efficientdet_model_param_dict[model_name])
  elif model_name in efficientdet_lite_param_dict:
    h.override(efficientdet_lite_param_dict[model_name])
  else:
    raise ValueError('Unknown model name: {}'.format(model_name))

  return h

def get_detection_config(model_name):
  if model_name.startswith('efficientdet'):
    return get_efficientdet_config(model_name)
  else:
    raise ValueError('model name must start with efficientdet.')```
kartik4949 commented 3 years ago

are you using latest code?

alexlorenzo commented 3 years ago

I pull the last commit, but I still have the same issue due to the number of classes.

I0517 13:06:06.383111 139963403261760 utils.py:155] skip class_net/class-predict/pointwise_kernel ((1, 1, 64, 18) vs [1, 1, 64, 810]) -- shape mismatch I0517 13:06:06.383181 139963403261760 utils.py:155] skip class_net/class-predict/bias ((18,) vs [810]) -- shape mismatch

(0) Invalid argument: Incompatible shapes: [1,810,64,64] vs. [1,18,64,64] [[{{node focal_loss/logistic_loss/GreaterEqual}}]] [[clip/global_norm_1/global_norm/_6425]] (1) Invalid argument: Incompatible shapes: [1,810,64,64] vs. [1,18,64,64] [[{{node focal_loss/logistic_loss/GreaterEqual}}]]

alexlorenzo commented 3 years ago

If it helps, working with this command:

python main.py --mode=train --train_file_pattern=tfrecord/train*.tfrecord --val_file_pattern=tfrecord/val*.tfrecord --model_name=efficientdet-d0 --model_dir=/tmp/efficientdet-d0-finetune --ckpt=efficientdet-d0 --train_batch_size=1 --num_epochs=50 --hparams="num_classes=2,moving_average_decay=0,mixed_precision=true"