tensorflow / models

Models and examples built with TensorFlow
Other
77.16k stars 45.76k forks source link

model detects only 1 class #8632

Closed TekayaNidham closed 4 years ago

TekayaNidham commented 4 years ago

System information

Describe the current behavior Hello, i ran a 3 classes object detection model on cloud but it only detects 1 classes it's return a good loss and mAP but only for one since i ran it on model_main so the evaluation is included, checked tensorboard everything(GT and detections) looks good but only for one class i tried swapping classes in the label map it detects only the last one,

Dataset 3 classes dataset with around 100 images per class 20 for evaluation

Data prep Annotation : using labelImg Generating csv

 import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

def main():
  for directory in ['train','test'] :
    image_path = os.path.join(os.getcwd(), 'images/{}'.format(directory))
    xml_df = xml_to_csv(image_path)
    xml_df.to_csv('data/{}_labels.csv'.format(directory), index=None)
    print('Successfully converted xml to csv.')

main()

Generating TFRecords

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from random import shuffle
import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('image_dir', '', 'Path to images')
FLAGS = flags.FLAGS

# TO-DO replace this with label map
def class_text_to_int(row_label):
    if row_label == 'orange':
        return 1
    elif row_label == 'tunisie_telecom' :
        return 2
    elif row_label == 'ooredoo' :
        return 3
    else:
        None

def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(FLAGS.image_dir)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    shuffle(grouped)
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))

if __name__ == '__main__':
    tf.app.run()

Label map

item {
  id: 1
  name: 'orange'

  id: 2

  name: 'tunisie_telecom'

  id: 3

  name: 'ooredoo'
}

Config file


# R-FCN with Resnet-101 (v1),  configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.

model {
  faster_rcnn {
    num_classes: 3
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 600
        max_dimension: 1024
      }
    }
    feature_extractor {
      type: 'faster_rcnn_resnet101'
      first_stage_features_stride: 16
    }
    first_stage_anchor_generator {
      grid_anchor_generator {
        scales: [0.25, 0.5, 1.0, 2.0]
        aspect_ratios: [0.5, 1.0, 2.0]
        height_stride: 16
        width_stride: 16
      }
    }
    first_stage_box_predictor_conv_hyperparams {
      op: CONV
      regularizer {
        l2_regularizer {
          weight: 0.0
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.01
        }
      }
    }
    first_stage_nms_score_threshold: 0.0
    first_stage_nms_iou_threshold: 0.7
    first_stage_max_proposals: 300
    first_stage_localization_loss_weight: 2.0
    first_stage_objectness_loss_weight: 1.0
    second_stage_box_predictor {
      rfcn_box_predictor {
        conv_hyperparams {
          op: CONV
          regularizer {
            l2_regularizer {
              weight: 0.0
            }
          }
          initializer {
            truncated_normal_initializer {
              stddev: 0.01
            }
          }
        }
        crop_height: 18
        crop_width: 18
        num_spatial_bins_height: 3
        num_spatial_bins_width: 3
      }
    }
    second_stage_post_processing {
      batch_non_max_suppression {
        score_threshold: 0.0
        iou_threshold: 0.6
        max_detections_per_class: 100
        max_total_detections: 300
      }
      score_converter: SOFTMAX
    }
    second_stage_localization_loss_weight: 2.0
    second_stage_classification_loss_weight: 1.0
  }
}

train_config: {
  batch_size: 1
  optimizer {
    adam_optimizer: {
      learning_rate: {
        manual_step_learning_rate {
          initial_learning_rate: 0.0001
          schedule {
            step: 1
            learning_rate: .00001
          }
          schedule {
            step: 4000
            learning_rate: .000001
          }
          schedule {
            step: 8000
            learning_rate: .0000001
          }
          schedule {
            step: 12000
            learning_rate: .00000001
          }
          schedule {
            step: 16000
            learning_rate: .000000001
          }
          schedule {
            step: 18000
            learning_rate: .0000000001
          }

        }
      }
      #momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  gradient_clipping_by_norm: 10.0
  fine_tune_checkpoint: "gs://nidham3/pfe/backbone/rfcn/model.ckpt"
  from_detection_checkpoint: true
  fine_tune_checkpoint_type:'detection'
  # Note: The below line limits the training process to 200K steps, which we
  # empirically found to be sufficient enough to train the pets dataset. This
  # effectively bypasses the learning rate schedule (the learning rate will
  # never decay). Remove the below line to train indefinitely.
  num_steps: 20000
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_image_scale {
      min_scale_ratio: 0.3
      max_scale_ratio: 1.5
    }
  }
  data_augmentation_options {
    random_adjust_saturation {
    }
  }
  data_augmentation_options {
    random_adjust_contrast {
    }
  }
  data_augmentation_options {
    random_adjust_hue {
    }
  }
  data_augmentation_options {
    random_pixel_value_scale {
    }
  }
  data_augmentation_options {
    random_crop_image {
    }
  }
}

train_input_reader: {
  tf_record_input_reader {
    input_path: "gs://nidham3/pfe/data/train.record"
  }
  label_map_path: "gs://nidham3/pfe/data/logos.pbtxt"
}

eval_config: {
  num_examples: 20
  # Note: The below line limits the evaluation process to 10 evaluations.
  # Remove the below line to evaluate indefinitely.
  max_evals: 20
}

eval_input_reader: {
  tf_record_input_reader {
    input_path: "gs://nidham3/pfe/data/test.record"
  }
  label_map_path: "gs://nidham3/pfe/data/logos.pbtxt"
  shuffle: false
  num_readers: 1
}

from models/research i ran :

gcloud ml-engine jobs submit training pfe_train_`date +%m_%d_%Y_%H_%M_%S` \
    --runtime-version 1.15 \
    --job-dir=gs://nidham3/pfe/job_dir/pfe_train_`date +%m_%d_%Y_%H` \
    --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \
    --module-name object_detection.model_main \
    --region us-central1 \
    --config /home/milos/Desktop/cloud_train/gcp_train.yaml \
    -- \
    --model_dir=gs://nidham3/pfe/model/resnet101_training \
    --pipeline_config_path=gs://nidham3/pfe/pipeline/rfcn.config

with gcp_train.yaml:

trainingInput:
  scaleTier: BASIC_GPU
  pythonVersion: "3.5"

I doubled check every configurable num classes i know it's all set to 3

kyscg commented 4 years ago

There are a few things you can do here:

TekayaNidham commented 4 years ago

@kyscg thank you, i did actually try every option i did even reinstall the API. HOWEVER, silly me didn't make attention while making the label map, if you look closer into the label map you'll notice that the second and third class are in the first one brackets so it got messed up. it's confusing :laughing: I corrected it and here's how it looks like:

item {
  id: 1
  name: 'orange'
}
item {
  id: 2
  name: 'tunisie_telecom'
}
item {
  id: 3
  name: 'ooredoo'
}