ZumoLabs / zpy

Synthetic data for computer vision. An open source toolkit using Blender and Python.
GNU General Public License v3.0
302 stars 35 forks source link

Bounding Box generation Error #150

Closed franferraz98 closed 2 years ago

franferraz98 commented 3 years ago

I've created this Blender scene where I have a dice, and I'm using ZPY to generate a dataset composed of images obtained by rotating around the object and jittering both the dice position and the camera. Everything seems to be working properly, but the bounding-boxes generated on the annotation file get progressively worse with each picture.

For example this is the first image's bounding-box: image

This one we get halfway through: image

And this is one of the last ones: image

This is my code (I've cut some stuff, I can't paste it all for some reason):


def run(num_steps = 20):

    # Random seed results in unique behavior
    zpy.blender.set_seed()

    # Create the saver object
    saver = zpy.saver_image.ImageSaver(description="Domain randomized dado")

    # Add the dado category
    dado_seg_color = zpy.color.random_color(output_style="frgb")
    saver.add_category(name="dado", color=dado_seg_color)

    # Segment Suzzanne (make sure a material exists for the object!)
    zpy.objects.segment("dado", color=dado_seg_color)

    # Original dice pose
    zpy.objects.save_pose("dado", "dado_pose_og")

    #Original camera pose
    zpy.objects.save_pose("Camera", "Camera_pose_og")

    # Save the positions of objects so we can jitter them later
    zpy.objects.save_pose("Camera", "cam_pose")
    zpy.objects.save_pose("dado", "dado_pose")

    asset_dir = Path(bpy.data.filepath).parent
    texture_paths = [
        asset_dir / Path("textures/madera.png"),
        asset_dir / Path("textures/ladrillo.png"),
    ]

    # Run the sim.
    for step_idx in range(num_steps):
        # Example logging
        # stp = zpy.blender.step()
        # print("BLENDER STEPS: ", stp.num_steps)
        # log.debug("This is a debug log")

        # Return camera and dado to original positions
        zpy.objects.restore_pose("Camera", "cam_pose")
        zpy.objects.restore_pose("dado", "dado_pose")

        # Rotate camera
        location = bpy.context.scene.objects["Camera"].location
        angle = step_idx*360/num_steps
        location = rotate(location, angle, axis=(0, 0, 1))
        bpy.data.objects["Camera"].location = location

        # Jitter dado pose
        zpy.objects.jitter(
            "dado",
            translate_range=((-300, 300), (-300, 300), (0, 0)),
            rotate_range=(
                (0, 0),
                (0, 0),
                (-math.pi, math.pi),
            ),
        )

        # Jitter the camera pose
        zpy.objects.jitter(
            "Camera",
            translate_range=(
                (-5, 5),
                (-5, 5),
                (-5, 5),
            ),
        )

        # Camera should be looking at dado
        zpy.camera.look_at("Camera", bpy.data.objects["dado"].location)

        texture_path = random.choice(texture_paths)

        # HDRIs are like a pre-made background with lighting
        # zpy.hdris.random_hdri()

        # Pick a random texture from the 'textures' folder (relative to blendfile)
        # Textures are images that we will map onto a material
        new_mat = zpy.material.make_mat_from_texture(texture_path)
        # zpy.material.set_mat("dado", new_mat)

        # Have to segment the new material
        zpy.objects.segment("dado", color=dado_seg_color)

        # Jitter the dado material
        # zpy.material.jitter(bpy.data.objects["dado"].active_material)

        # Jitter the HSV for empty and full images
        '''
        hsv = (
            random.uniform(0.49, 0.51),  # (hue)
            random.uniform(0.95, 1.1),  # (saturation)
            random.uniform(0.75, 1.2),  # (value)
        )
        '''

        # Name for each of the output images
        rgb_image_name = zpy.files.make_rgb_image_name(step_idx)
        iseg_image_name = zpy.files.make_iseg_image_name(step_idx)
        depth_image_name = zpy.files.make_depth_image_name(step_idx)

        # Render image
        zpy.render.render(
            rgb_path=saver.output_dir / rgb_image_name,
            iseg_path=saver.output_dir / iseg_image_name,
            depth_path=saver.output_dir / depth_image_name,
            # hsv=hsv,
        )

        # Add images to saver
        saver.add_image(
            name=rgb_image_name,
            style="default",
            output_path=saver.output_dir / rgb_image_name,
            frame=step_idx,
        )
        saver.add_image(
            name=iseg_image_name,
            style="segmentation",
            output_path=saver.output_dir / iseg_image_name,
            frame=step_idx,
        )
        saver.add_image(
            name=depth_image_name,
            style="depth",
            output_path=saver.output_dir / depth_image_name,
            frame=step_idx,
        )

        # Add annotation to segmentation image
        saver.add_annotation(
            image=rgb_image_name,
            seg_image=iseg_image_name,
            seg_color=dado_seg_color,
            category="dado",
        )

    # Write out annotations
    saver.output_annotated_images()
    saver.output_meta_analysis()

    # ZUMO Annotations
    zpy.output_zumo.OutputZUMO(saver).output_annotations()

    # COCO Annotations
    zpy.output_coco.OutputCOCO(saver).output_annotations()

    # Volver al estado inicial
    zpy.objects.restore_pose("dado", "dado_pose_og")
    zpy.objects.restore_pose("Camera", "Camera_pose_og")

Is this my fault or an actual bug?

franferraz98 commented 3 years ago

The rest of the code, for if it's needed:

import logging
import math
import random
from pathlib import Path
import random
import numpy as np

import bpy
import zpy

from mathutils.bvhtree import BVHTree

log = logging.getLogger("zpy")

def rotation_matrix(axis, theta):
    """
    Return the rotation matrix associated with counterclockwise rotation about
    the given axis by theta radians.
    """
    axis = np.asarray(axis)
    axis = axis / math.sqrt(np.dot(axis, axis))
    a = math.cos(theta / 2.0)
    b, c, d = -axis * math.sin(theta / 2.0)
    aa, bb, cc, dd = a * a, b * b, c * c, d * d
    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
    return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
                     [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
                     [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])

def rotate(point, angle_degrees, axis=(0,1,0)):
    theta_degrees = angle_degrees
    theta_radians = math.radians(theta_degrees)
    rotated_point = np.dot(rotation_matrix(axis, theta_radians), point)
    return rotated_point
hu-po commented 3 years ago

That is a weird result for sure. Code seems to looks fine. What do the depth and segmentation images look like for those weird bounding boxes?

franferraz98 commented 2 years ago

I deleted those ones, but here are some new ones:

image

image

image


image

image

image

In general they look fine, I wouldn't say that it has to do with the model.

hu-po commented 2 years ago

Interesting. Are you using your own visualization tools to show the bounding box over the image? There are a couple different styles of bounding boxes, zpy uses (x, y, width, height) as seen here. Could be that your visualization tool uses something else like (x1, y1, x2, y2)

franferraz98 commented 2 years ago

Yes, I use my own script, but I'd say I got the style you say. Here is my script:

import json
import argparse
import cv2
from matplotlib import pyplot as plt

# Initiate argument parser
parser = argparse.ArgumentParser(
    description="Sample TensorFlow COCO-to-TFRecord converter")
parser.add_argument("-a",
                    "--ann_file",
                    help="Path to the folder where the input .coco.json files are stored.",
                    type=str)
parser.add_argument("-i",
                    "--img_dir",
                    help="Path to the folder where the input image files are stored. "
                         "Defaults to the same directory as ANN_FILE.",
                    type=str, default=None)

args = parser.parse_args()

if args.img_dir is None:
    args.img_dir = args.ann_file

img_dir=args.img_dir
annotations_file=args.ann_file

# Remove annotations
# Read JSON for annotations
d = {}
with open(args.ann_file) as f:  
    d = json.load(f)

f.close()

# Get images and annotations
images = d['images']
annotations = d['annotations']

for i in images:
    print("PATH: ", args.img_dir + i['file_name'])
    img = cv2.imread(args.img_dir + i['file_name'])
    for a in annotations:
        if i['id'] == a['id']:  # Annotation for the image
            [x,y,w,h] = a['bbox']
            start = (int(x), int(y))
            end = (int(x+w), int(y+h))
            color = (255,0,0)
            thick = 2
            cv2.rectangle(img, start, end, color, thick)
            cv2.imshow(' ',img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
franferraz98 commented 2 years ago

Well yeah, it was my fault. That i['id'] == a['id'] should be i['id'] == a['image_id'].

Sigh. Sorry, had my head somewhere else when I wrote this. Thanks for your support!

hu-po commented 2 years ago

All good! Glad I could help.