"chipper" option to cut out image chips using bounding boxes

cdleong commented 5 years ago

Sometimes one does not want the entire image, only the part with the class of interest in it. For example, when training an image classifier or a GAN.

A crude implementation is shown below:

new file, modules/chip.py

import cv2
import os
import re
import numpy as np

class_list = []
flag = 0

def chip(class_name, download_dir, label_dir,total_images, index):
    '''
    '''

    global class_list

    if not os.listdir(download_dir)[index].endswith('.jpg'):
        index += 2
    img_file = os.listdir(download_dir)[index]
    current_image_path = str(os.path.join(download_dir, img_file))
    img = cv2.imread(current_image_path)
    file_name = str(img_file.split('.')[0]) + '.txt'
    file_path = os.path.join(label_dir, file_name)
    f = open(file_path, 'r')

    for idx, line in enumerate(f):
        print(f"f is {f}")
        print(f"current img is {current_image_path}")
        print(f"line is {line}")
        # each row in a file is class_name, XMin, YMix, XMax, YMax
        match_class_name = re.compile('^[a-zA-Z]+(\s+[a-zA-Z]+)*').match(line)
        class_name = line[:match_class_name.span()[1]]
        ax = line[match_class_name.span()[1]:].lstrip().rstrip().split(' ')
    # opencv top left bottom right

        if class_name not in class_list:
            class_list.append(class_name)

        xmin = int(float(ax[-4]))
        ymin = int(float(ax[-3]))
        xmax = int(float(ax[-2]))
        ymax = int(float(ax[-1]))

        roi = img[ymin:ymax, xmin:xmax]
        print(f"xmin, xmax, ymin, ymax = ({xmin}, {xmax}, {ymin}, {ymax})")
        chips_folder="chips/"
        img_chip = img[roi]
        chip_filename = os.path.splitext(os.path.basename(current_image_path))[0]+"_chip"+str(idx)+".jpg"
        print(f"chip filename is {chip_filename}")
        chip_path = os.path.join(chips_folder, chip_filename)
        print(f"chip_path is {chip_path}")
        cv2.imwrite(chip_path, roi)

Added to bounding_boxes.py:

an import statement at the top...

from modules.chip import chip

...and this section:

    elif args.command == "chipper":
        for image_dir in ["train", "test", "validation"]:
                class_image_dir = os.path.join(dataset_dir, image_dir)
                for class_name in os.listdir(class_image_dir):

                    download_dir = os.path.join(dataset_dir, image_dir, class_name)
                    label_dir = os.path.join(dataset_dir, image_dir, class_name, 'Label')
                    if not os.path.isdir(download_dir):
                        print("[ERROR] Images folder not found")
                        exit(1)
                    if not os.path.isdir(label_dir):
                        print("[ERROR] Labels folder not found")
                        exit(1)

                    index = 0

                    chip(class_name, download_dir, label_dir,len(os.listdir(download_dir))-1, index)

                    while True:
                        if index < (len(os.listdir(download_dir)) - 2):
                           index += 1
                           chip(class_name, download_dir, label_dir,len(os.listdir(download_dir))-1, index)

cdleong commented 5 years ago

This implementation is very slow, and just throws everything into one big folder. Preserving the class separation would be nice.

keldrom commented 5 years ago

Thank you for this optional addition but I don't think this is the aim of this project.

EscVM / OIDv4_ToolKit

"chipper" option to cut out image chips using bounding boxes #44