dhlab-epfl / dhSegment

Generic framework for historical document processing
https://dhlab-epfl.github.com/dhSegment
GNU General Public License v3.0
370 stars 116 forks source link

Convert generated VIA binary masks (black and white) into RGB expected format #32

Closed loic001 closed 5 years ago

loic001 commented 5 years ago

First, thanks for your work !

I tried to create masks from VIA project file (doc here). It works but how to convert the black and white generated masks into RGB masks (with classes.txt) ?

I may have missed something but I did not find the code to do it.

Thanks for your help !

solivr commented 5 years ago

Hi, It has indeed not been included in the code yet. Here's an example (not tested) of how you could do it for a simple case.

from imageio import imsave, imread
import numpy as np
import os
from glob import glob

dict_color = {'label1': (255, 255, 255), # white
              'label2': (255, 0, 0), # red
              'label3': (0, 255, 0), # green
              'label4': (0, 0, 255)} # blue

masks_directories = ['path/generated_masks/<id_image1>/'
                    'path/generated_masks/<id_image2>/',
                    ...] # where <id_image> corresponds to each image masks folder (you could use glob())
mask_labels_to_use = ['label1', 'label2', 'label3'] # all the labels you want to merge into one image (must be the label name appended to the mask filename)
export_dir = 'path/generated_label_images/'

for mask_dir in masks_directories:
    basename = mask_dir.split(os.path.sep)[-2] + '.png' # get <id_images> folder name (which is the basename of the mask/image filename)

    tmp_imgs = glob(os.path.join(mask_dir, '*.png')) 
    if tmp_imgs:
        shape_image = get_image_shape(tmp_imgs[0]) # get image shape of the image to create an image label of the same shape

    composed_mask = np.zeros([shape_image[1], shape_image[0], 3]) # create empty label image
    for mask_label in mask_label_to_use:
        mask_filenames = glob(os.path.join(mask_dir, '*{}*'.format(mask_label))) # get binary mask label

        if mask_filenames:
            mask_filename = mask_filenames[0]
        else:
            continue

        composed_mask[imread(mask_filename) > 0] = dict_color[mask_label]
        imsave(os.path.join(export_dir, basename), composed_mask)
loic001 commented 5 years ago

Thank you for your quick answer. Your code works! For anyone interested, below is a generic function to do the same.


import os
from glob import glob
import re
from imageio import imsave, imread
import numpy as np

def via_generated_masks_to_composed_masks(via_collection_dir: str, class_colors: dict, export_dir: str, images_exts=['.png', '.jpg', '.jpeg']):
    if len(class_colors) < 1:
        raise ValueError('Must specify at least one class in class_colors')

    if len(images_exts) < 1:
        raise ValueError('Must specify at least one image extension')

    # upper / lower case extensions matching
    images_exts = images_exts + \
        [ext.upper() for ext in images_exts] + [ext.lower()
                                                for ext in images_exts]

    def image_info(mask_class_path):
        mask_class_file = os.path.basename(mask_class_path)
        mask_class_filename, mask_class_fileext = os.path.splitext(
            mask_class_file)
        return {
            'mask_class_filename': mask_class_filename,
            'mask_class_fileext': mask_class_fileext,
            'im_readed': imread(mask_class_path)
        }

    if not os.path.exists(export_dir):
        os.makedirs(export_dir)

    for image_masks in glob(os.path.join(via_collection_dir, '*/')):
        basename = os.path.basename(os.path.dirname(image_masks))
        images_info = [image_info(mask_class_path) for mask_class_path in list(
            glob(os.path.join(image_masks, '*')))]
        if images_info:
            composed_mask = np.zeros(
                [images_info[0]['im_readed'].shape[0], images_info[0]['im_readed'].shape[1], 3])
            for _image in images_info:
                if _image['mask_class_fileext'] in images_exts:
                    mask_label = re.findall(
                        '(?<=-mask-class-)(.*)', _image['mask_class_filename'])[0]
                    assert mask_label in class_colors.keys()

                    composed_mask[_image['im_readed'] >
                                  0] = class_colors[mask_label]
            imsave(os.path.join(export_dir, basename +
                                images_info[0]['mask_class_fileext']), composed_mask)

if __name__== "__main__":
    via_collection_dir = '<...>\\data\\generated_masks\\mycollection'

    export_dir = '<...>\\data\\generated_label_images'
    class_colors = {
        'label_1': (255, 0, 0),  # red
        'label_2': (0, 255, 0),  # green
        'label_3': (0, 0, 255)
    }

    via_generated_masks_to_composed_masks(via_collection_dir, class_colors, export_dir)
mrocr commented 5 years ago

@loic001 also gave a look at this repo github.com/alix-tz/GT_generator