clovaai / CRAFT-pytorch

Official implementation of Character Region Awareness for Text Detection (CRAFT)
MIT License
3.02k stars 862 forks source link

Gaussian heatmap ? #3

Open tinhchuquang opened 5 years ago

tinhchuquang commented 5 years ago

In paper, They create Ground Truth Label use Gaussian heatmap by other application. Can you show me algorithm create Gaussian heatmap? Thanks

hetul-patel commented 5 years ago

You can use a standard normal distribution to calculate the probability associated with any pixel as a function of pixel's distance from the center.

import cv2
import numpy as np
from math import exp

# Probability as a function of distance from the center derived
# from a gaussian distribution with mean = 0 and stdv = 1
scaledGaussian = lambda x : exp(-(1/2)*(x**2))

imgSize = 512
isotropicGrayscaleImage = np.zeros((imgSize,imgSize),np.uint8)

for i in range(imgSize):
  for j in range(imgSize):

    # find euclidian distance from center of image (imgSize/2,imgSize/2) 
    # and scale it to range of 0 to 2.5 as scaled Gaussian
    # returns highest probability for x=0 and approximately
    # zero probability for x > 2.5

    distanceFromCenter = np.linalg.norm(np.array([i-imgSize/2,j-imgSize/2]))
    distanceFromCenter = 2.5*distanceFromCenter/(imgSize/2)
    scaledGaussianProb = scaledGaussian(distanceFromCenter)
    isotropicGrayscaleImage[i,j] = np.clip(scaledGaussianProb*255,0,255)

# Convert Grayscale to HeatMap Using Opencv
isotropicGaussianHeatmapImage = cv2.applyColorMap(isotropicGrayscaleImage, 
                                                  cv2.COLORMAP_JET)

You can find a more intuitive implementation here IsotropiceGaussianMap Implementation using python

tinhchuquang commented 5 years ago

First, Thanks show for I GaussisionHeatmap. I use it for character OK Heatmap But I use with link word (affinity box) to create Heatmap, it doesn't work, Becase I don't know end of word . Example Heatmap_word Can you show your Solution? Thanks

mayank-git-hub commented 5 years ago

A working example of creating the Gaussian heat map with perspective transform


UPDATE 27-06-19 Added functionality to incorporate out of bound character bbox


UPDATE 22-08-19 Checking if character bbox is valid using shapely.geometry.Polygon


from torch.utils import data
import matplotlib.pyplot as plt
import numpy as np
import cv2
from shapely.geometry import Polygon

DEBUG = True

def four_point_transform(image, pts):

    max_x, max_y = np.max(pts[:, 0]).astype(np.int32), np.max(pts[:, 1]).astype(np.int32)

    dst = np.array([
        [0, 0],
        [image.shape[1] - 1, 0],
        [image.shape[1] - 1, image.shape[0] - 1],
        [0, image.shape[0] - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(dst, pts)
    warped = cv2.warpPerspective(image, M, (max_x, max_y))

    return warped

class DataLoader(data.Dataset):

    def __init__(self, type_):

        self.type_ = type_
        self.base_path = '<Path for Images>'
        if DEBUG:
            import os
            if not os.path.exists('cache.pkl'):
                with open('cache.pkl', 'wb') as f:
                    import pickle
                    from scipy.io import loadmat
                    mat = loadmat('Path for gt.mat')
                    pickle.dump([mat['imnames'][0][0:1000], mat['charBB'][0][0:1000], mat['txt'][0][0:1000]], f)
                    print('Created the pickle file, rerun the program')
                    exit(0)
            else:
                with open('cache.pkl', 'rb') as f:
                    import pickle
                    self.imnames, self.charBB, self.txt = pickle.load(f)
                    print('Loaded DEBUG')

        else:

            from scipy.io import loadmat
            mat = loadmat('Path for gt.mat')

            total_number = mat['imnames'][0].shape[0]
            train_images = int(total_number * 0.9)

            if self.type_ == 'train':

                self.imnames = mat['imnames'][0][0:train_images]
                self.charBB = mat['charBB'][0][0:train_images]  # number of images, 2, 4, num_character

            else:

                self.imnames = mat['imnames'][0][train_images:]
                self.charBB = mat['charBB'][0][train_images:]  # number of images, 2, 4, num_character

        for no, i in enumerate(self.txt):
            all_words = []
            for j in i:
                all_words += [k for k in ' '.join(j.split('\n')).split() if k!='']
            self.txt[no] = all_words

        sigma = 10
        spread = 3
        extent = int(spread * sigma)
        self.gaussian_heatmap = np.zeros([2 * extent, 2 * extent], dtype=np.float32)

        for i in range(2 * extent):
            for j in range(2 * extent):
                self.gaussian_heatmap[i, j] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
                    -1 / 2 * ((i - spread * sigma - 0.5) ** 2 + (j - spread * sigma - 0.5) ** 2) / (sigma ** 2))

        self.gaussian_heatmap = (self.gaussian_heatmap / np.max(self.gaussian_heatmap) * 255).astype(np.uint8)

    def add_character(self, image, bbox):

        if not Polygon(bbox.reshape([4, 2]).astype(np.int32)).is_valid:
            return image
        top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
        if top_left[1] > image.shape[0] or top_left[0] > image.shape[1]:
            # This means there is some bug in the character bbox
            # Will have to look into more depth to understand this
            return image
        bbox -= top_left[None, :]
        transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))

        start_row = max(top_left[1], 0) - top_left[1]
        start_col = max(top_left[0], 0) - top_left[0]
        end_row = min(top_left[1]+transformed.shape[0], image.shape[0])
        end_col = min(top_left[0]+transformed.shape[1], image.shape[1])

        image[max(top_left[1], 0):end_row, max(top_left[0], 0):end_col] += transformed[start_row:end_row - top_left[1], start_col:end_col - top_left[0]]

        return image

    def generate_target(self, image_size, character_bbox):

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        for i in range(character_bbox.shape[0]):

            target = self.add_character(target, character_bbox[i])

        return target/255, np.float32(target != 0)

    def add_affinity(self, image, bbox_1, bbox_2):

        center_1, center_2 = np.mean(bbox_1, axis=0), np.mean(bbox_2, axis=0)
        tl = np.mean([bbox_1[0], bbox_1[1], center_1], axis=0)
        bl = np.mean([bbox_1[2], bbox_1[3], center_1], axis=0)
        tr = np.mean([bbox_2[0], bbox_2[1], center_2], axis=0)
        br = np.mean([bbox_2[2], bbox_2[3], center_2], axis=0)

        affinity = np.array([tl, tr, br, bl])

        return self.add_character(image, affinity)

    def generate_affinity(self, image_size, character_bbox, text):

        """

        :param image_size: shape = [3, image_height, image_width]
        :param character_bbox: [2, 4, num_characters]
        :param text: [num_words]
        :return:
        """

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        total_letters = 0

        for word in text:
            for char_num in range(len(word)-1):
                target = self.add_affinity(target, character_bbox[total_letters].copy(), character_bbox[total_letters+1].copy())
                total_letters += 1
            total_letters += 1

        return target / 255, np.float32(target != 0)

    def __getitem__(self, item):

        image = plt.imread(self.base_path+'/'+self.imnames[item][0]).transpose(2, 0, 1)/255
        weight, target = self.generate_target(image.shape, self.charBB[item].copy())
        weight_affinity, target_affinity = self.generate_affinity(image.shape, self.charBB[item].copy(), self.txt[item].copy())

        return image, weight, target, weight_affinity, target_affinity

    def __len__(self):

        return len(self.imnames)

if __name__ == "__main__":

    dataloader = DataLoader('train')
    image, weight, target, weight_affinity, target_affinity = dataloader[0]

    plt.imsave('image.png', image.transpose(1, 2, 0))
    plt.imsave('target.png', target)
    plt.imsave('weight.png', weight)
    plt.imsave('weight_affinity.png', weight_affinity)
    plt.imsave('target_affinity.png', target_affinity)
    plt.imsave('together.png', np.concatenate([weight[:, :, None], weight_affinity[:, :, None], np.zeros_like(weight)[:, :, None]], axis=2))

Reference Code - https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/

Do point me out if there is a bug, I will try my best to address it.

tinhchuquang commented 5 years ago

A working example of creating the Gaussian heat map with perspective transform

from torch.utils import data
import matplotlib.pyplot as plt
import numpy as np
import cv2

DEBUG = True

def four_point_transform(image, pts):

  max_x, max_y = np.max(pts[:, 0]).astype(np.int32), np.max(pts[:, 1]).astype(np.int32)

  dst = np.array([
      [0, 0],
      [image.shape[1] - 1, 0],
      [image.shape[1] - 1, image.shape[0] - 1],
      [0, image.shape[0] - 1]], dtype="float32")

  M = cv2.getPerspectiveTransform(dst, pts)
  warped = cv2.warpPerspective(image, M, (max_x, max_y))

  return warped

class DataLoader(data.Dataset):

  def __init__(self, type_):

      self.type_ = type_
      self.base_path = '<Path for Images>'
      if DEBUG:
          import os
          if not os.path.exists('cache.pkl'):
              with open('cache.pkl', 'wb') as f:
                  import pickle
                  from scipy.io import loadmat
                  mat = loadmat('Path for gt.mat')
                  pickle.dump([mat['imnames'][0][0:1000], mat['charBB'][0][0:1000], mat['txt'][0][0:1000]], f)
                  print('Created the pickle file, rerun the program')
                  exit(0)
          else:
              with open('cache.pkl', 'rb') as f:
                  import pickle
                  self.imnames, self.charBB, self.txt = pickle.load(f)
                  print('Loaded DEBUG')

      else:

          from scipy.io import loadmat
          mat = loadmat('Path for gt.mat')

          total_number = mat['imnames'][0].shape[0]
          train_images = int(total_number * 0.9)

          if self.type_ == 'train':

              self.imnames = mat['imnames'][0][0:train_images]
              self.charBB = mat['charBB'][0][0:train_images]  # number of images, 2, 4, num_character

          else:

              self.imnames = mat['imnames'][0][train_images:]
              self.charBB = mat['charBB'][0][train_images:]  # number of images, 2, 4, num_character

      for no, i in enumerate(self.txt):
          all_words = []
          for j in i:
              all_words += [k for k in ' '.join(j.split('\n')).split() if k!='']
          self.txt[no] = all_words

      sigma = 10
      spread = 3
      extent = int(spread * sigma)
      self.gaussian_heatmap = np.zeros([2 * extent, 2 * extent], dtype=np.float32)

      for i in range(2 * extent):
          for j in range(2 * extent):
              self.gaussian_heatmap[i, j] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
                  -1 / 2 * ((i - spread * sigma - 0.5) ** 2 + (j - spread * sigma - 0.5) ** 2) / (sigma ** 2))

      self.gaussian_heatmap = (self.gaussian_heatmap / np.max(self.gaussian_heatmap) * 255).astype(np.uint8)

  def add_character(self, image, bbox):

      top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
      bbox -= top_left[None, :]
      transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))
      image[top_left[1]:top_left[1]+transformed.shape[0], top_left[0]:top_left[0]+transformed.shape[1]] += transformed
      return image

  def generate_target(self, image_size, character_bbox):

      character_bbox = character_bbox.transpose(2, 1, 0)

      channel, height, width = image_size

      target = np.zeros([height, width], dtype=np.uint8)

      for i in range(character_bbox.shape[0]):

          target = self.add_character(target, character_bbox[i])

      return target/255, np.float32(target != 0)

  def add_affinity(self, image, bbox_1, bbox_2):

      center_1, center_2 = np.mean(bbox_1, axis=0), np.mean(bbox_2, axis=0)
      tl = np.mean([bbox_1[0], bbox_1[1], center_1], axis=0)
      bl = np.mean([bbox_1[2], bbox_1[3], center_1], axis=0)
      tr = np.mean([bbox_2[0], bbox_2[1], center_2], axis=0)
      br = np.mean([bbox_2[2], bbox_2[3], center_2], axis=0)

      affinity = np.array([tl, tr, br, bl])

      return self.add_character(image, affinity)

  def generate_affinity(self, image_size, character_bbox, text):

      """

      :param image_size: shape = [3, image_height, image_width]
      :param character_bbox: [2, 4, num_characters]
      :param text: [num_words]
      :return:
      """

      character_bbox = character_bbox.transpose(2, 1, 0)

      channel, height, width = image_size

      target = np.zeros([height, width], dtype=np.uint8)

      total_letters = 0

      for word in text:
          for char_num in range(len(word)-1):
              target = self.add_affinity(target, character_bbox[total_letters], character_bbox[total_letters+1])
              total_letters += 1
          total_letters += 1

      return target / 255, np.float32(target != 0)

  def __getitem__(self, item):

      image = plt.imread(self.base_path+'/'+self.imnames[item][0]).transpose(2, 0, 1)/255
      weight, target = self.generate_target(image.shape, self.charBB[item].copy())
      weight_affinity, target_affinity = self.generate_affinity(image.shape, self.charBB[item].copy(), self.txt[item].copy())

      return image, weight, target, weight_affinity, target_affinity

  def __len__(self):

      return len(self.imnames)

if __name__ == "__main__":

  dataloader = DataLoader('train')
  image, weight, target, weight_affinity, target_affinity = dataloader[0]

  plt.imsave('image.png', image.transpose(1, 2, 0))
  plt.imsave('target.png', target)
  plt.imsave('weight.png', weight)
  plt.imsave('weight_affinity.png', weight_affinity)
  plt.imsave('target_affinity.png', target_affinity)
  plt.imsave('together.png', np.concatenate([weight[:, :, None], weight_affinity[:, :, None], np.zeros_like(weight)[:, :, None]], axis=2))

Reference Code - https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/

Do point me out if there is a bug, I will try my best to address it.

In function "add_char" has bug, it is "operands could not be broadcast together with shapes (20,0) (20,29) (20,0)" This code for test dataloader def load_data(): dataloader = DataLoader('train') trainloader = torch.utils.data.DataLoader(dataloader, batch_size = 1, shuffle=True, num_workers=8) for batch_idx, (image, weight, target, weight_affinity, target_affinity) in enumerate(trainloader): print(batch_idx, ' -- ', image.shape, '--', weight.shape, '--', weight_affinity.shape)

mayank-git-hub commented 5 years ago

A working example of creating the Gaussian heat map with perspective transform

from torch.utils import data
import matplotlib.pyplot as plt
import numpy as np
import cv2

DEBUG = True

def four_point_transform(image, pts):

    max_x, max_y = np.max(pts[:, 0]).astype(np.int32), np.max(pts[:, 1]).astype(np.int32)

    dst = np.array([
        [0, 0],
        [image.shape[1] - 1, 0],
        [image.shape[1] - 1, image.shape[0] - 1],
        [0, image.shape[0] - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(dst, pts)
    warped = cv2.warpPerspective(image, M, (max_x, max_y))

    return warped

class DataLoader(data.Dataset):

    def __init__(self, type_):

        self.type_ = type_
        self.base_path = '<Path for Images>'
        if DEBUG:
            import os
            if not os.path.exists('cache.pkl'):
                with open('cache.pkl', 'wb') as f:
                    import pickle
                    from scipy.io import loadmat
                    mat = loadmat('Path for gt.mat')
                    pickle.dump([mat['imnames'][0][0:1000], mat['charBB'][0][0:1000], mat['txt'][0][0:1000]], f)
                    print('Created the pickle file, rerun the program')
                    exit(0)
            else:
                with open('cache.pkl', 'rb') as f:
                    import pickle
                    self.imnames, self.charBB, self.txt = pickle.load(f)
                    print('Loaded DEBUG')

        else:

            from scipy.io import loadmat
            mat = loadmat('Path for gt.mat')

            total_number = mat['imnames'][0].shape[0]
            train_images = int(total_number * 0.9)

            if self.type_ == 'train':

                self.imnames = mat['imnames'][0][0:train_images]
                self.charBB = mat['charBB'][0][0:train_images]  # number of images, 2, 4, num_character

            else:

                self.imnames = mat['imnames'][0][train_images:]
                self.charBB = mat['charBB'][0][train_images:]  # number of images, 2, 4, num_character

        for no, i in enumerate(self.txt):
            all_words = []
            for j in i:
                all_words += [k for k in ' '.join(j.split('\n')).split() if k!='']
            self.txt[no] = all_words

        sigma = 10
        spread = 3
        extent = int(spread * sigma)
        self.gaussian_heatmap = np.zeros([2 * extent, 2 * extent], dtype=np.float32)

        for i in range(2 * extent):
            for j in range(2 * extent):
                self.gaussian_heatmap[i, j] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
                    -1 / 2 * ((i - spread * sigma - 0.5) ** 2 + (j - spread * sigma - 0.5) ** 2) / (sigma ** 2))

        self.gaussian_heatmap = (self.gaussian_heatmap / np.max(self.gaussian_heatmap) * 255).astype(np.uint8)

    def add_character(self, image, bbox):

        top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
        bbox -= top_left[None, :]
        transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))
        image[top_left[1]:top_left[1]+transformed.shape[0], top_left[0]:top_left[0]+transformed.shape[1]] += transformed
        return image

    def generate_target(self, image_size, character_bbox):

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        for i in range(character_bbox.shape[0]):

            target = self.add_character(target, character_bbox[i])

        return target/255, np.float32(target != 0)

    def add_affinity(self, image, bbox_1, bbox_2):

        center_1, center_2 = np.mean(bbox_1, axis=0), np.mean(bbox_2, axis=0)
        tl = np.mean([bbox_1[0], bbox_1[1], center_1], axis=0)
        bl = np.mean([bbox_1[2], bbox_1[3], center_1], axis=0)
        tr = np.mean([bbox_2[0], bbox_2[1], center_2], axis=0)
        br = np.mean([bbox_2[2], bbox_2[3], center_2], axis=0)

        affinity = np.array([tl, tr, br, bl])

        return self.add_character(image, affinity)

    def generate_affinity(self, image_size, character_bbox, text):

        """

        :param image_size: shape = [3, image_height, image_width]
        :param character_bbox: [2, 4, num_characters]
        :param text: [num_words]
        :return:
        """

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        total_letters = 0

        for word in text:
            for char_num in range(len(word)-1):
                target = self.add_affinity(target, character_bbox[total_letters], character_bbox[total_letters+1])
                total_letters += 1
            total_letters += 1

        return target / 255, np.float32(target != 0)

    def __getitem__(self, item):

        image = plt.imread(self.base_path+'/'+self.imnames[item][0]).transpose(2, 0, 1)/255
        weight, target = self.generate_target(image.shape, self.charBB[item].copy())
        weight_affinity, target_affinity = self.generate_affinity(image.shape, self.charBB[item].copy(), self.txt[item].copy())

        return image, weight, target, weight_affinity, target_affinity

    def __len__(self):

        return len(self.imnames)

if __name__ == "__main__":

    dataloader = DataLoader('train')
    image, weight, target, weight_affinity, target_affinity = dataloader[0]

    plt.imsave('image.png', image.transpose(1, 2, 0))
    plt.imsave('target.png', target)
    plt.imsave('weight.png', weight)
    plt.imsave('weight_affinity.png', weight_affinity)
    plt.imsave('target_affinity.png', target_affinity)
    plt.imsave('together.png', np.concatenate([weight[:, :, None], weight_affinity[:, :, None], np.zeros_like(weight)[:, :, None]], axis=2))

Reference Code - https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/ Do point me out if there is a bug, I will try my best to address it.

In function "add_char" has bug, it is "operands could not be broadcast together with shapes (20,0) (20,29) (20,0)" This code for test dataloader def load_data(): dataloader = DataLoader('train') trainloader = torch.utils.data.DataLoader(dataloader, batch_size = 1, shuffle=True, num_workers=8) for batch_idx, (image, weight, target, weight_affinity, target_affinity) in enumerate(trainloader): print(batch_idx, ' -- ', image.shape, '--', weight.shape, '--', weight_affinity.shape)

Sometimes the co-ordinates of the character boxes are outside the image dimensions. In those cases, this error is being generated, I will try to update the code to incorporate the out of image dimension case, till then you can add an if else block to discard those character bboxs which have values greater than the image dimension or less than 0.

mayank-git-hub commented 5 years ago
if np.any(bbox < 0) or np.any(bbox[:, 0] > image.shape[1]) or np.any(bbox[:, 1] > image.shape[0]):
            return image

You can add this line in the add_character function

mayank-git-hub commented 5 years ago
def add_character(self, image, bbox):

        top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
        if top_left[1] > image.shape[0] or top_left[0] > image.shape[1]:
            # This means there is some bug in the character bbox
            # Will have to look into more depth to understand this
            return image
        bbox -= top_left[None, :]
        transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))

        start_row = max(top_left[1], 0) - top_left[1]
        start_col = max(top_left[0], 0) - top_left[0]
        end_row = min(top_left[1]+transformed.shape[0], image.shape[0])
        end_col = min(top_left[0]+transformed.shape[1], image.shape[1])

        image[max(top_left[1], 0):end_row, max(top_left[0], 0):end_col] += transformed[start_row:end_row - top_left[1], start_col:end_col - top_left[0]]

        return image

I have made these changes to the code and am not getting the error, I hope this resolves your error too.

tinhchuquang commented 5 years ago

Thanks pro. Code work for me is:

def add_character(self, image, bbox):
    if np.any(bbox < 0) or np.any(bbox[:, 0] > image.shape[1]) or np.any(bbox[:, 1] > image.shape[0]):
        return image

    top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
    bbox -= top_left[None, :]
    transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))

    start_row = max(top_left[1], 0) - top_left[1]
    start_col = max(top_left[0], 0) - top_left[0]
    end_row = min(top_left[1] + transformed.shape[0], image.shape[0])
    end_col = min(top_left[0] + transformed.shape[1], image.shape[1])

    image[max(top_left[1], 0):end_row, max(top_left[0], 0):end_col] += transformed[start_row:end_row - top_left[1],
                                                                       start_col:end_col - top_left[0]]

    return image

Base your gaussion heatmap. I implement paper. Thanks much.

YoungminBaek commented 5 years ago

Thanks, @mayank-git-hub for your explanation of Gaussian heatmap.

Some comment about SynthText dataset is that some trascription are incorrectly labeled in SynthText. It does not guarantee that a word box corresponds to a text trancription one-to-one. Using the code below to separate the transcrition, you will obtain the start and end points of the transcription correponding to its word box.

import re
import itertools

texts = [re.split(' \n|\n |\n| ',t.strip()) for t in texts]
texts = list(itertools.chain(*texts))
texts = [t for t in texts if len(t)>0]
mayank-git-hub commented 5 years ago

Welcome @YoungminBaek , @tinhchuquang !

@YoungminBaek isn't my code for creating word start and end points the same as yours in functionality?(Though your seems a bit cleaner!)

for no, i in enumerate(self.txt):
    all_words = []
    for j in i:
        all_words += [k for k in ' '.join(j.split('\n')).split() if k!='']
    self.txt[no] = all_words
YoungminBaek commented 5 years ago

@mayank-git-hub Oh, your code already has that functionality. I missed it. Please forget about my previous comment. :)

namedysx commented 5 years ago

HI! When there are more than one character in an image,is one charactor bounding box in mat['charBB'] is xmin ymin xmax ymax, as a vector or all the character in this image xmin...ymax as a vector. I mean one line of mat[charBB][0] is [xmin, ymin, xmax, ymax] or [xmin_char1,...,ymax_char1, xmin_char2,...,ymax_char2,...] if [xmin_char1,...,ymax_char1, xmin_char2,...,ymax_char2,...], how to cancatnate multiple vectors which are diffrent in their length.

namedysx commented 5 years ago

I want to transfrom my dataset from format .txt to .mat

tinhchuquang commented 5 years ago

@namedysx You want create file .mat same SynthText dataset, mat['charBB'][0] has shape is [2, 4, 6], mat['charBB'][1] is [2, 4, 10] ?

namedysx commented 5 years ago

@namedysx You want create file .mat same SynthText dataset, mat['charBB'][0] has shape is [2, 4, 6], mat['charBB'][1] is [2, 4, 10] ? thanks for ur reply. yes, and what does each channel mean?

tinhchuquang commented 5 years ago

My way is create matrix no same shape:

import numpy as np
def create_word():
    char_bb = []
    length_word = np.random.randint(1, 4) # random length word
    for i in range(length_word):
    # add 4 point of one char
          char_bb.append([np.random.randint(1, 255), np.random.randint(1, 255)])
          char_bb.append([np.random.randint(1, 255), np.random.randint(1, 255)])
          char_bb.append([np.random.randint(1, 255), np.random.randint(1, 255)])
          char_bb.append([np.random.randint(1, 255), np.random.randint(1, 255)])
    return np.array(char_bb)

char_bbs = []

char_bbs.append(create_word())
char_bbs.append(create_word())

char_bbs = np.array(char_bbs, ndmin=2) # shape = (1 , lenght(charbbs))
for i in range(char_bbs.shape[1]):
    char_bbs[0][i] = char_bbs[0][i].reshape(-1, 4, 2).transpose(2, 1, 0)

print(char_bbs.shape)
print(char_bbs[0][0].shape, char_bbs[0][1].shape)
brooklyn1900 commented 4 years ago

What's the max and min value of the output gassian map score text and score link?

jjprincess commented 4 years ago

I want to transfrom my dataset from format .txt to .mat

Hi, Can you share your code about txt to max?Thanks a lot!

hanish3464 commented 4 years ago

@mayank-git-hub
How appropriate is the value of spread? I think 1 is the most similar to the picture in the paper. what do you think..? or Is there a way to get the normal distribution tightly in the box?

mayank-git-hub commented 4 years ago

@mayank-git-hub How appropriate is the value of spread? I think 1 is the most similar to the picture in the paper. what do you think..? or Is there a way to get the normal distribution tightly in the box?

I thought that if the affinity and the character bbox should overlap, the spread should be large so there is less chance of the word being broken. That is why I kept the spread large. I am implementing the craft training procedure, would try out will smaller values of spread and check the results. Thanks for bringing to my mind that the spread can also be a hyper-parameter to consider.

By theory the normal distribution actually should spread infinitely, but due to floating point limits it actually gets limited. You can play with the spread and configure it to the edge values you want.

mayank-git-hub commented 4 years ago

Also the code I gave above (https://github.com/clovaai/CRAFT-pytorch/issues/3#issuecomment-505903264) breaks down if the character quadrilateral is not valid. To check you can use - from shapely.geometry import Polygon Polygon(bbox.reshape([4, 2]).astype(np.int32)).is_valid: If this is False, then no need to add that character bbox

mayank-git-hub commented 4 years ago

@mayank-git-hub How appropriate is the value of spread? I think 1 is the most similar to the picture in the paper. what do you think..? or Is there a way to get the normal distribution tightly in the box?

sigma = 10
spread = 3
extent = int(spread * sigma)
center = spread * sigma / 2
gaussian_heatmap = np.zeros([extent, extent], dtype=np.float32)

for i_ in range(extent):
    for j_ in range(extent):
        gaussian_heatmap[i_, j_] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
        -1 / 2 * ((i_ - center - 0.5) ** 2 + (j_ - center - 0.5) ** 2) / (sigma ** 2))

gaussian_heatmap = (gaussian_heatmap / np.max(gaussian_heatmap) * 255).astype(np.uint8)

This seems to work good for me.

uname0x96 commented 4 years ago

Hi @mayank-git-hub , this gausian heat map use to gen Region score and Affinity and we use it for training right ? But as i know the gausian heatmap just the same with every char, just have diffirent about the size of annotation for each char, so why we should use it for traning ?

i use this code for gen heatmap: in this example 1 image is 1 char

> import cv2
> import numpy as np
> from math import exp
> import matplotlib.pyplot as plt
> 
> # Probability as a function of distance from the center derived
> # from a gaussian distribution with mean = 0 and stdv = 1
> scaledGaussian = lambda x : exp(-(1/2)*(x**2))
> 
> image = cv2.imread('./test_imgs/3.jpg')
> h,w,c = image.shape
> isotropicGrayscaleImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
> # isotropicGrayscaleImage = np.zeros((imgSize,imgSize),np.uint8)
> 
> for i in range(h):
>   for j in range(w):
> 
>     # find euclidian distance from center of image (imgSize/2,imgSize/2) 
>     # and scale it to range of 0 to 2.5 as scaled Gaussian
>     # returns highest probability for x=0 and approximately
>     # zero probability for x > 2.5
> 
>     distanceFromCenter = np.linalg.norm(np.array([i-h/2,j-w/2]))
>     distanceFromCenter = 2.5*distanceFromCenter/(h/2)
>     scaledGaussianProb = scaledGaussian(distanceFromCenter)
>     isotropicGrayscaleImage[i,j] = np.clip(scaledGaussianProb*255,0,255)
> 
> # Convert Grayscale to HeatMap Using Opencv
> isotropicGaussianHeatmapImage = cv2.applyColorMap(isotropicGrayscaleImage, 
>                                                   cv2.COLORMAP_JET)
> 
> plt.imshow(cv2.cvtColor(isotropicGaussianHeatmapImage,4))
> plt.show()
mayank-git-hub commented 4 years ago

You are assuming that the character bbox would always be horizontal which is not the case.

Also, if you create the Gaussian heatmap everytime for a new character, it would be very computationally costly.

So for generating skewed Gaussian heatmap with less computation time, you could generate a template and do perspective transformation for it as mentioned by the author in the paper.

hanish3464 commented 4 years ago

@mayank-git-hub I have a question, your code about four_point_transform! This paper, warped isotropic 2d gaussian in skewd box. but your code seems warp isotropic 2d gaussian after box makes isotropic. please let me know if I misunderstood

mayank-git-hub commented 4 years ago

@hanish3464 You could try this explanation of the perspective transform.

https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/

They are trying to bring a skewed image back to horizontal rectangle using four_point_transform, while I am doing the opposite.

uname0x96 commented 4 years ago

@hanish3464 You could try this explanation of the perspective transform.

https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/

They are trying to bring a skewed image back to horizontal rectangle using four_point_transform, while I am doing the opposite.

it mean you are using polygon annotation instead of rectangle?

mayank-git-hub commented 4 years ago

Quadrilateral instead of rectangle, not polygon

uname0x96 commented 4 years ago

@mayank-git-hub hmm the follow you export heatmap is bellow ? Step1: read annotation get position for each char Step2: crop image with the point from step1 and skew image to horizontal rectangle Step3: convert img step2 -> gausian heatmap Step4: deskew from heatmap horizontal rectangle to origin shape with Quadrilateral Step5: put back the result step4 to original image

it's right bro?

mayank-git-hub commented 4 years ago

@mayank-git-hub hmm the follow you export heatmap is bellow ? Step1: read annotation get position for each char Step2: crop image with the point from step1 and skew image to horizontal rectangle Step3: convert img step2 -> gausian heatmap Step4: deskew from heatmap horizontal rectangle to origin shape with Quadrilateral Step5: put back the result step4 to original image

it's right bro?

Umm, not quite, your step 2 seems to be unnecessary.

1) Create an isotropic square Gaussian heatmap. 2) Read annotation and get position of each char 3) Skew square Gaussian heatmap to quadilateral annotation I got from step 2 4) Add the output of step 3 to the target annotations

uname0x96 commented 4 years ago

@mayank-git-hub and the isotropic gausian map gen by model trained with synthentic data right ?

learn01one commented 4 years ago

First, Thanks show for I GaussisionHeatmap. I use it for character OK Heatmap But I use with link word (affinity box) to create Heatmap, it doesn't work, Becase I don't know end of word . Example Heatmap_word Can you show your Solution? Thanks

hi,can you show me how to use it for character?very thanks

cuongdxk57 commented 4 years ago

What are the arguments to create the Gaussian heat map, width, height of heat map, the mean or the standard deviation of the distribution?

ThisIsIsaac commented 4 years ago

@mayank-git-hub thanks so much for the code. Since there are multiple edits to the original code you have posted, I wanted to organize them so that others can take a quick look without having to read through the entire conversation. Please correct me if I am wrong.


1. Initial draft

from torch.utils import data
import matplotlib.pyplot as plt
import numpy as np
import cv2
from shapely.geometry import Polygon

DEBUG = True

def four_point_transform(image, pts):

    max_x, max_y = np.max(pts[:, 0]).astype(np.int32), np.max(pts[:, 1]).astype(np.int32)

    dst = np.array([
        [0, 0],
        [image.shape[1] - 1, 0],
        [image.shape[1] - 1, image.shape[0] - 1],
        [0, image.shape[0] - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(dst, pts)
    warped = cv2.warpPerspective(image, M, (max_x, max_y))

    return warped

class DataLoader(data.Dataset):

    def __init__(self, type_):

        self.type_ = type_
        self.base_path = '<Path for Images>'
        if DEBUG:
            import os
            if not os.path.exists('cache.pkl'):
                with open('cache.pkl', 'wb') as f:
                    import pickle
                    from scipy.io import loadmat
                    mat = loadmat('Path for gt.mat')
                    pickle.dump([mat['imnames'][0][0:1000], mat['charBB'][0][0:1000], mat['txt'][0][0:1000]], f)
                    print('Created the pickle file, rerun the program')
                    exit(0)
            else:
                with open('cache.pkl', 'rb') as f:
                    import pickle
                    self.imnames, self.charBB, self.txt = pickle.load(f)
                    print('Loaded DEBUG')

        else:

            from scipy.io import loadmat
            mat = loadmat('Path for gt.mat')

            total_number = mat['imnames'][0].shape[0]
            train_images = int(total_number * 0.9)

            if self.type_ == 'train':

                self.imnames = mat['imnames'][0][0:train_images]
                self.charBB = mat['charBB'][0][0:train_images]  # number of images, 2, 4, num_character

            else:

                self.imnames = mat['imnames'][0][train_images:]
                self.charBB = mat['charBB'][0][train_images:]  # number of images, 2, 4, num_character

        for no, i in enumerate(self.txt):
            all_words = []
            for j in i:
                all_words += [k for k in ' '.join(j.split('\n')).split() if k!='']
            self.txt[no] = all_words

        sigma = 10
        spread = 3
        extent = int(spread * sigma)
        self.gaussian_heatmap = np.zeros([2 * extent, 2 * extent], dtype=np.float32)

        for i in range(2 * extent):
            for j in range(2 * extent):
                self.gaussian_heatmap[i, j] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
                    -1 / 2 * ((i - spread * sigma - 0.5) ** 2 + (j - spread * sigma - 0.5) ** 2) / (sigma ** 2))

        self.gaussian_heatmap = (self.gaussian_heatmap / np.max(self.gaussian_heatmap) * 255).astype(np.uint8)

    def add_character(self, image, bbox):

        if not Polygon(bbox.reshape([4, 2]).astype(np.int32)).is_valid:
            return image
        top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
        if top_left[1] > image.shape[0] or top_left[0] > image.shape[1]:
            # This means there is some bug in the character bbox
            # Will have to look into more depth to understand this
            return image
        bbox -= top_left[None, :]
        transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))

        start_row = max(top_left[1], 0) - top_left[1]
        start_col = max(top_left[0], 0) - top_left[0]
        end_row = min(top_left[1]+transformed.shape[0], image.shape[0])
        end_col = min(top_left[0]+transformed.shape[1], image.shape[1])

        image[max(top_left[1], 0):end_row, max(top_left[0], 0):end_col] += transformed[start_row:end_row - top_left[1], start_col:end_col - top_left[0]]

        return image

    def generate_target(self, image_size, character_bbox):

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        for i in range(character_bbox.shape[0]):

            target = self.add_character(target, character_bbox[i])

        return target/255, np.float32(target != 0)

    def add_affinity(self, image, bbox_1, bbox_2):

        center_1, center_2 = np.mean(bbox_1, axis=0), np.mean(bbox_2, axis=0)
        tl = np.mean([bbox_1[0], bbox_1[1], center_1], axis=0)
        bl = np.mean([bbox_1[2], bbox_1[3], center_1], axis=0)
        tr = np.mean([bbox_2[0], bbox_2[1], center_2], axis=0)
        br = np.mean([bbox_2[2], bbox_2[3], center_2], axis=0)

        affinity = np.array([tl, tr, br, bl])

        return self.add_character(image, affinity)

    def generate_affinity(self, image_size, character_bbox, text):

        """

        :param image_size: shape = [3, image_height, image_width]
        :param character_bbox: [2, 4, num_characters]
        :param text: [num_words]
        :return:
        """

        character_bbox = character_bbox.transpose(2, 1, 0)

        channel, height, width = image_size

        target = np.zeros([height, width], dtype=np.uint8)

        total_letters = 0

        for word in text:
            for char_num in range(len(word)-1):
                target = self.add_affinity(target, character_bbox[total_letters].copy(), character_bbox[total_letters+1].copy())
                total_letters += 1
            total_letters += 1

        return target / 255, np.float32(target != 0)

    def __getitem__(self, item):

        image = plt.imread(self.base_path+'/'+self.imnames[item][0]).transpose(2, 0, 1)/255
        weight, target = self.generate_target(image.shape, self.charBB[item].copy())
        weight_affinity, target_affinity = self.generate_affinity(image.shape, self.charBB[item].copy(), self.txt[item].copy())

        return image, weight, target, weight_affinity, target_affinity

    def __len__(self):

        return len(self.imnames)

if __name__ == "__main__":

    dataloader = DataLoader('train')
    image, weight, target, weight_affinity, target_affinity = dataloader[0]

    plt.imsave('image.png', image.transpose(1, 2, 0))
    plt.imsave('target.png', target)
    plt.imsave('weight.png', weight)
    plt.imsave('weight_affinity.png', weight_affinity)
    plt.imsave('target_affinity.png', target_affinity)
    plt.imsave('together.png', np.concatenate([weight[:, :, None], weight_affinity[:, :, None], np.zeros_like(weight)[:, :, None]], axis=2))

2. new add_character

this replaces the above add_character function:

def add_character(self, image, bbox):

        top_left = np.array([np.min(bbox[:, 0]), np.min(bbox[:, 1])]).astype(np.int32)
        if top_left[1] > image.shape[0] or top_left[0] > image.shape[1]:
            # This means there is some bug in the character bbox
            # Will have to look into more depth to understand this
            return image
        bbox -= top_left[None, :]
        transformed = four_point_transform(self.gaussian_heatmap.copy(), bbox.astype(np.float32))

        start_row = max(top_left[1], 0) - top_left[1]
        start_col = max(top_left[0], 0) - top_left[0]
        end_row = min(top_left[1]+transformed.shape[0], image.shape[0])
        end_col = min(top_left[0]+transformed.shape[1], image.shape[1])

        image[max(top_left[1], 0):end_row, max(top_left[0], 0):end_col] += transformed[start_row:end_row - top_left[1], start_col:end_col - top_left[0]]

        return image

3. add checks for valid bounding boxes

This checks if bounding boxes are valid.

if np.any(bbox < 0) or np.any(bbox[:, 0] > image.shape[1]) or np.any(bbox[:, 1] > image.shape[0]):
            return image

4. new hyperparameters

this replaces the latter part of __init__ of class DataLoader:

sigma = 10
spread = 3
extent = int(spread * sigma)
center = spread * sigma / 2
gaussian_heatmap = np.zeros([extent, extent], dtype=np.float32)

for i_ in range(extent):
    for j_ in range(extent):
        gaussian_heatmap[i_, j_] = 1 / 2 / np.pi / (sigma ** 2) * np.exp(
        -1 / 2 * ((i_ - center - 0.5) ** 2 + (j_ - center - 0.5) ** 2) / (sigma ** 2))

gaussian_heatmap = (gaussian_heatmap / np.max(gaussian_heatmap) * 255).astype(np.uint8)
ThisIsIsaac commented 4 years ago

@mayank-git-hub

  1. Did you also implement code for creating affinity boxes?

  2. Did you implement the training code?

If so, I would really appreciate if you could share!

ndgnuh commented 10 months ago
import numpy as np
import cv2

def make_gaussian(polygon, max_xy, k=6):
    W, H = max_xy
    polygon = np.array(polygon, dtype="float32")

    # Bounding rectangle coords
    x1, y1, w, h = cv2.boundingRect(polygon[:, None, :])
    x2 = x1 + w
    y2 = y1 + h

    # Make gaussian
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    sx = w / k
    sy = h / k
    x1, x2 = np.clip((x1, x2), 0, W)
    y1, y2 = np.clip((y1, y2), 0, H)
    xs = (np.arange(x1, x2)[None, :] - cx) / sx
    ys = (np.arange(y1, y2)[:, None] - cy) / sy
    gauss = np.exp(-(np.square(xs) + np.square(ys)) / 2)

    # Warp the gaussian
    w = x2 - x1
    h = y2 - y1
    if w <= 0 or h <= 0:
        return None, (None, None, None, None), (None, None)
    src = np.array([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]], dtype="float32")
    dst = polygon - np.array([x1, y1], dtype="float32").reshape(1, 1, 2)
    transform = cv2.getPerspectiveTransform(src, dst)
    gauss = cv2.warpPerspective(gauss, transform, (x2 - x1, y2 - y1))

    # True center, for regression
    # Also, rescale the gaussian
    cy, cx = np.unravel_index(np.argmax(gauss), gauss.shape)
    gauss = gauss / gauss[cy, cx]
    cx = cx + x1
    cy = cy + y1

    return gauss, (x1, y1, x2, y2), (cx, cy)
from matplotlib import pyplot as plt
W = H = 512
canvas = np.zeros((W, H), 'float32')
polygon = np.array([(109, 493), (310, 152), (148, 83), (50, 436)], 'int')
gauss, (x1, y1, x2, y2), (cy, cx) = make_gaussian(polygon, (W, H), k = 4)
canvas[y1:y2, x1:x2] = np.fmax(canvas[y1:y2, x1:x2], gauss)
cv2.polylines(canvas, [polygon], isClosed=True, color=0.5, thickness=2)
plt.imshow(canvas, cmap='jet')
plt.colorbar()

image