Open kartik4949 opened 3 years ago
Hi @kartik4949, it would be great if you could help add that :)
@mingxingtan its the top paper right now as per papers with code for object detection. please take a look so i can proceed implementing it.
I have implemented copy-paste augumentation.
Thats what we needed :) @fsx950223 nice
@fsx950223 do u sample objects from same batch or other batches?
Wow, this is great. could you check in this augmentation? I can't wait to re-train the models with this new augmentation.
I have implemented this copy and paste in my local repo will file a pr or may be @fsx950223 can file it too :)
I have implemented this copy and paste in my local repo will file a pr or may be @fsx950223 can file it too :)
You could submit a PR.
Yes will submit soon :)
Can I apply copy-paste augmentation for object detection? It seems a segmentation map is required to copy the object
You sure can treat bbox as objects and copy them around but it can add background noise from the bbox
Wow, this is great. could you check in this augmentation? I can't wait to re-train the models with this new augmentation.
import numpy as np
import tensorflow as tf
from absl import app
from absl import flags
from object_detection import tf_example_decoder
from dataloader import DetectionInputProcessor
FLAGS = flags.FLAGS
flags.DEFINE_string('labeled_dataset', '/media/fangsixie/data/keras-yolo3/coco/train/*', 'Labeled dataset')
flags.DEFINE_string('unlabeled_dataset', '/media/fangsixie/data/pascal/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/*.jp*g', 'Unlabeled dataset')
flags.DEFINE_string('output_dir', './test_dataset/', 'Output directory')
flags.DEFINE_string('seg_model_path', '/media/fangsixie/data/download/lite-model_deeplabv3-mobilenetv2_1_default_1.tflite', 'Segmentation model path')
flags.DEFINE_string('detect_model_path', '/media/fangsixie/data/download/saved_model', 'Object detection model path')
class TfliteSegmentation:
def __init__(self, model_path):
self.model = tf.lite.Interpreter(model_path)
self.model.allocate_tensors()
self.input_details = self.model.get_input_details()
self.output_details = self.model.get_output_details()
self.mean = 127.5
self.std = 127.5
def __call__(self, image):
image = (image - self.mean) / self.std
self.model.set_tensor(self.input_details[0]['index'], np.array([image], np.float32))
self.model.invoke()
mask = tf.image.resize(self.model.get_tensor(self.output_details[0]['index']), (513, 513))
return tf.reshape(tf.argmax(mask, axis=-1), (513, 513, 1))
class Dataloader:
def __init__(self, segmentation_model, detection_model):
self.example_decoder = tf_example_decoder.TfExampleDecoder(
regenerate_source_id=True
)
self.segmentation_model = segmentation_model
self.detection_model = detection_model
def get_mask(self, img):
mask = tf.numpy_function(self.segmentation_model, [img], tf.int64)
mask.set_shape((513, 513, 1))
return tf.where(mask>0, 1, 0)
def mix_image(self, image, image2):
mask = self.get_mask(image)
sub_image = image * tf.cast(mask, tf.uint8)
return image2 * tf.cast(1 - mask, tf.uint8) + sub_image
@tf.autograph.experimental.do_not_convert
def map_fn(self, value, unlabeled_image_path):
data = self.example_decoder.decode(value)
image = data['image']
boxes = data['groundtruth_boxes']
classes = data['groundtruth_classes']
classes = tf.cast(classes, dtype=tf.float32)
processor = DetectionInputProcessor(image, (513, 513),
boxes, classes)
processor.random_horizontal_flip()
processor.set_training_random_scale_factors(0.25, 2.0)
image = processor.resize_and_crop_image()
boxes, classes = processor.resize_and_crop_boxes()
content = tf.io.read_file(unlabeled_image_path)
unlabeled_image = tf.io.decode_image(content, 3)
unlabeled_image.set_shape((None, None, 3))
detected_boxes, _, detected_classes, detected_num = self.detection_model(tf.expand_dims(unlabeled_image, axis=0))
detected_boxes = detected_boxes[0]
detected_classes = detected_classes[0]
detected_num = detected_num[0]
detected_boxes = tf.slice(detected_boxes, [0, 0], [detected_num, 4])
detected_classes = tf.slice(detected_classes, [0], [detected_num])
input_processor = DetectionInputProcessor(unlabeled_image, (513, 513),
detected_boxes, detected_classes)
input_processor.random_horizontal_flip()
input_processor.set_training_random_scale_factors(0.25, 2.0)
unlabeled_image = input_processor.resize_and_crop_image()
detected_boxes, detected_classes = input_processor.resize_and_crop_boxes()
return self.mix_image(image, unlabeled_image), tf.concat([boxes, detected_boxes], 0), tf.concat([classes, detected_classes], 0)
def main(_):
segmentation_model = TfliteSegmentation(FLAGS.seg_model_path)
detection_model = tf.saved_model.load(FLAGS.detect_model_path)
dataloader = Dataloader(segmentation_model, detection_model)
labeled_dataset = tf.data.Dataset.list_files(FLAGS.labeled_dataset)
unlabeled_dataset = tf.data.Dataset.list_files(FLAGS.unlabeled_dataset)
labeled_dataset = labeled_dataset.interleave(
lambda filename: tf.data.TFRecordDataset(filename).prefetch(1),
num_parallel_calls=tf.data.AUTOTUNE)
dataset = tf.data.Dataset.zip((labeled_dataset, unlabeled_dataset)).map(dataloader.map_fn)
image, boxes, classes = next(iter(dataset))
jpeg = tf.io.encode_jpeg(image, quality=100)
tf.io.write_file('./test.jpeg', jpeg)
if __name__ == '__main__':
app.run(main)
I implemented the model with a VOC object detection model and a VOC segmentation model. You could try it with your models. Does Copy paste augmentation belong to Self-supervised Learning?
@fsx950223 @mingxingtan Copy and Past augmentation itself is supervised learning but the author combined this technique with self training method like psuedo labels hence made the entire trainging semi-supervised.
but if you only consider copy and paste augmentation, it will be supervised learning. :)
import tensorflow_hub as hub
import tensorflow as tf
from absl import app
from absl import flags
from object_detection import tf_example_decoder
from dataloader import DetectionInputProcessor
FLAGS = flags.FLAGS
flags.DEFINE_string('labeled_dataset', '/media/fangsixie/data/keras-yolo3/coco/train/*', 'Labeled dataset')
flags.DEFINE_string('unlabeled_dataset', '/media/fangsixie/data/keras-yolo3/coco/val/*', 'Unlabeled dataset')
flags.DEFINE_multi_integer('image_shape', [640, 640], 'Output directory')
def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
image_width, resize_method='bilinear'):
"""Transforms the box masks back to full image masks.
Embeds masks in bounding boxes of larger masks whose shapes correspond to
image shape.
Args:
box_masks: A tensor of size [num_masks, mask_height, mask_width].
boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
corners. Row i contains [ymin, xmin, ymax, xmax] of the box
corresponding to mask i. Note that the box corners are in
normalized coordinates.
image_height: Image height. The output mask will have the same height as
the image height.
image_width: Image width. The output mask will have the same width as the
image width.
resize_method: The resize method, either 'bilinear' or 'nearest'. Note that
'bilinear' is only respected if box_masks is a float.
Returns:
A tensor of size [num_masks, image_height, image_width] with the same dtype
as `box_masks`.
"""
resize_method = 'nearest' if box_masks.dtype == tf.uint8 else resize_method
# TODO(rathodv): Make this a public function.
def reframe_box_masks_to_image_masks_default():
"""The default function when there are more than 0 box masks."""
def transform_boxes_relative_to_boxes(boxes, reference_boxes):
boxes = tf.reshape(boxes, [-1, 2, 2])
min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
max_corner = tf.expand_dims(reference_boxes[:, 2:4], 1)
denom = max_corner - min_corner
# Prevent a divide by zero.
denom = tf.math.maximum(denom, 1e-4)
transformed_boxes = (boxes - min_corner) / denom
return tf.reshape(transformed_boxes, [-1, 4])
box_masks_expanded = tf.expand_dims(box_masks, axis=3)
num_boxes = tf.shape(box_masks_expanded)[0]
unit_boxes = tf.concat(
[tf.zeros([num_boxes, 2]), tf.ones([num_boxes, 2])], axis=1)
reverse_boxes = transform_boxes_relative_to_boxes(unit_boxes, boxes)
# TODO(vighneshb) Use matmul_crop_and_resize so that the output shape
# is static. This will help us run and test on TPUs.
resized_crops = tf.image.crop_and_resize(
image=box_masks_expanded,
boxes=reverse_boxes,
box_indices=tf.range(num_boxes),
crop_size=[image_height, image_width],
method=resize_method,
extrapolation_value=0)
return tf.cast(resized_crops, box_masks.dtype)
image_masks = tf.cond(
tf.shape(box_masks)[0] > 0,
reframe_box_masks_to_image_masks_default,
lambda: tf.zeros([0, image_height, image_width, 1], box_masks.dtype))
return tf.squeeze(image_masks, axis=3)
class Dataloader:
def __init__(self, mask_rcnn_model, image_shape):
self.example_decoder = tf_example_decoder.TfExampleDecoder(
regenerate_source_id=True
)
self.mask_rcnn_model = mask_rcnn_model
self.image_shape = image_shape
@tf.autograph.experimental.do_not_convert
def map_fn(self, value, unlabeled_value):
data = self.example_decoder.decode(value)
unlabeled_data = self.example_decoder.decode(unlabeled_value)
image = data['image']
unlabeled_image = unlabeled_data['image']
boxes = data['groundtruth_boxes']
classes = data['groundtruth_classes']
classes = tf.cast(classes, dtype=tf.float32)
processor = DetectionInputProcessor(image, self.image_shape,
boxes, classes)
processor.random_horizontal_flip()
processor.set_training_random_scale_factors(0.25, 2.0)
image = processor.resize_and_crop_image()
boxes, classes = processor.resize_and_crop_boxes()
result = self.mask_rcnn_model(tf.expand_dims(unlabeled_image, axis=0))
detection_masks = tf.convert_to_tensor(result['detection_masks'][0])
detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])
detection_scores = tf.convert_to_tensor(result['detection_scores'][0])
detection_classes = tf.convert_to_tensor(result['detection_classes'][0])
detection_masks_reframed = reframe_box_masks_to_image_masks(detection_masks,
detection_boxes,
tf.shape(unlabeled_image)[0],
tf.shape(unlabeled_image)[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8)
detection_masks_reframed = tf.boolean_mask(detection_masks_reframed, detection_scores > 0.7)
detection_masks_reframed = tf.expand_dims(detection_masks_reframed, axis=-1)
masks = tf.reduce_sum(detection_masks_reframed, axis=0)
input_processor = DetectionInputProcessor(unlabeled_image, self.image_shape,
detection_boxes, detection_classes)
input_processor.random_horizontal_flip()
input_processor.set_training_random_scale_factors(0.25, 2.0)
unlabeled_image = input_processor.resize_and_crop_image()
input_processor.image = masks
masks = input_processor.resize_and_crop_image()
detection_boxes, detection_classes = input_processor.resize_and_crop_boxes()
final_image = image * (1 - masks) + unlabeled_image * masks
return final_image, tf.concat([boxes, detection_boxes], 0), tf.concat([classes, detection_classes], 0)
def main(_):
hub_model = hub.load('https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1')
dataloader = Dataloader(hub_model, tuple(FLAGS.image_shape))
labeled_dataset = tf.data.Dataset.list_files(FLAGS.labeled_dataset)
unlabeled_dataset = tf.data.Dataset.list_files(FLAGS.unlabeled_dataset)
labeled_dataset = labeled_dataset.interleave(
lambda filename: tf.data.TFRecordDataset(filename).prefetch(1),
num_parallel_calls=tf.data.AUTOTUNE)
unlabeled_dataset = unlabeled_dataset.interleave(
lambda filename: tf.data.TFRecordDataset(filename).prefetch(1),
num_parallel_calls=tf.data.AUTOTUNE)
dataset = tf.data.Dataset.zip((labeled_dataset, unlabeled_dataset)).map(dataloader.map_fn)
image, boxes, classes = next(iter(dataset))
jpeg = tf.io.encode_jpeg(image, quality=100)
tf.io.write_file('./test.jpeg', jpeg)
if __name__ == '__main__':
app.run(main)
The implementation is based on mask-rcnn.
@fsx950223 copy and paste function itself is easy like Def copy(image1 objects, image2objects): . .
Challenge is to sample these masks/objects in our input pipeline implementation
If we could sample random masks across batches in our implementation then mosaic augmentation will be possible to add :)
@fsx950223 copy and paste function itself is easy like Def copy(image1 objects, image2objects): . .
Challenge is to sample these masks/objects in our input pipeline implementation
If we could sample random masks across batches in our implementation then mosaic augmentation will be possible to add :)
I don't believe there has any challenge on pipeline.
Well then i will refer your codes above and will file pr, but you have to help to install the code in our input pipeline.
Hi, @mingxingtan @fsx950223 https://arxiv.org/pdf/2012.07177v1.pdf @mingxingtan You must know this, as it's from the google brain team. Hope this can help. p.s: I can help to create the script:) Thanks