mvoelk / ssd_detectors

SSD-based object and text detection with Keras, SSD, DSOD, TextBoxes, SegLink, TextBoxes++, CRNN
MIT License
302 stars 85 forks source link

How to train other dataset like svt in sythtext sl model #12

Open vigneshgig opened 5 years ago

vigneshgig commented 5 years ago

i loaded the dataset using below code

from data_svt import GTUtility
gt_util = GTUtility('./data/svt/')
gt_util_train, gt_util_val = gt_util.split(0.7)

then i runned the code

# SegLink + DenseNet
model = DSODSL512()
#model = DSODSL512(activation='leaky_relu')
weights_path = None
batch_size = 6
experiment = 'dsodsl512_synthtext'

if weights_path is not None:
    if weights_path.find('ssd512') > -1:
        layer_list = [
            'conv1_1', 'conv1_2',
            'conv2_1', 'conv2_2',
            'conv3_1', 'conv3_2', 'conv3_3',
            'conv4_1', 'conv4_2', 'conv4_3',
            'conv5_1', 'conv5_2', 'conv5_3',
            'fc6', 'fc7',
            'conv6_1', 'conv6_2',
            'conv7_1', 'conv7_2',
            'conv8_1', 'conv8_2',
            'conv9_1', 'conv9_2',
        ]
        freeze = [
            'conv1_1', 'conv1_2',
            'conv2_1', 'conv2_2',
            'conv3_1', 'conv3_2', 'conv3_3',
            #'conv4_1', 'conv4_2', 'conv4_3',
            #'conv5_1', 'conv5_2', 'conv5_3',
        ]

        load_weights(model, weights_path, layer_list)
        for layer in model.layers:
            layer.trainable = not layer.name in freeze
    else:
        load_weights(model, weights_path)

prior_util = PriorUtil(model)

and finally

epochs = 100
initial_epoch = 0

gen_train = InputGenerator(gt_util_train, prior_util, batch_size, model.image_size, augmentation=False)
gen_val = InputGenerator(gt_util_val, prior_util, batch_size, model.image_size, augmentation=False)

checkdir = './checkpoints/' + time.strftime('%Y%m%d%H%M') + '_' + experiment
if not os.path.exists(checkdir):
    os.makedirs(checkdir)

with open(checkdir+'/source.py','wb') as f:
    source = ''.join(['# In[%i]\n%s\n\n' % (i, In[i]) for i in range(len(In))])
    f.write(source.encode())

#optim = keras.optimizers.SGD(lr=1e-3, momentum=0.9, decay=0, nesterov=True)
optim = keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=0.001, decay=0.0)

# weight decay
regularizer = keras.regularizers.l2(5e-4) # None if disabled
#regularizer = None
for l in model.layers:
    if l.__class__.__name__.startswith('Conv'):
        l.kernel_regularizer = regularizer

loss = SegLinkLoss(lambda_offsets=1.0, lambda_links=1.0, neg_pos_ratio=3.0)
#loss = SegLinkFocalLoss()
#loss = SegLinkFocalLoss(lambda_segments=1.0, lambda_offsets=1.0, lambda_links=1.0)
#loss = SegLinkFocalLoss(gamma_segments=3, gamma_links=3)

model.compile(optimizer=optim, loss=loss.compute, metrics=loss.metrics)

history = model.fit_generator(
        gen_train.generate(), 
        steps_per_epoch=gen_train.num_batches, 
        epochs=epochs, 
        verbose=1, 
        callbacks=[
            keras.callbacks.ModelCheckpoint(checkdir+'/weights.{epoch:03d}.h5', verbose=1, save_weights_only=True),
            Logger(checkdir),
            #LearningRateDecay()
        ], 
        validation_data=gen_val.generate(), 
        validation_steps=gen_val.num_batches,
        class_weight=None,
        max_queue_size=1, 
        workers=1, 
        #use_multiprocessing=False, 
        initial_epoch=initial_epoch, 
        #pickle_safe=False, # will use threading instead of multiprocessing, which is lighter on memory use but slower
        )

but i get this error

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:110: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory. "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " Epoch 1/100

ValueError Traceback (most recent call last)

in () 44 class_weight=None, 45 max_queue_size=1, ---> 46 workers=1, 47 #use_multiprocessing=False, 48 #initial_epoch=initial_epoch, /usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs) 89 warnings.warn('Update your `' + object_name + '` call to the ' + 90 'Keras 2 API: ' + signature, stacklevel=2) ---> 91 return func(*args, **kwargs) 92 wrapper._original_function = func 93 return wrapper /usr/local/lib/python3.6/dist-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 1416 use_multiprocessing=use_multiprocessing, 1417 shuffle=shuffle, -> 1418 initial_epoch=initial_epoch) 1419 1420 @interfaces.legacy_generator_methods_support /usr/local/lib/python3.6/dist-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 179 batch_index = 0 180 while steps_done < steps_per_epoch: --> 181 generator_output = next(output_generator) 182 183 if not hasattr(generator_output, '__len__'): /usr/local/lib/python3.6/dist-packages/keras/utils/data_utils.py in get(self) 707 "`use_multiprocessing=False, workers > 1`." 708 "For more information see issue #1638.") --> 709 six.reraise(*sys.exc_info()) /usr/local/lib/python3.6/dist-packages/six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None /usr/local/lib/python3.6/dist-packages/keras/utils/data_utils.py in get(self) 683 try: 684 while self.is_running(): --> 685 inputs = self.queue.get(block=True).get() 686 self.queue.task_done() 687 if inputs is not None: /usr/lib/python3.6/multiprocessing/pool.py in get(self, timeout) 668 return self._value 669 else: --> 670 raise self._value 671 672 def _set(self, i, obj): /usr/lib/python3.6/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 117 job, i, func, args, kwds = task 118 try: --> 119 result = (True, func(*args, **kwds)) 120 except Exception as e: 121 if wrap_exception and func is not _helper_reraises_exception: /usr/local/lib/python3.6/dist-packages/keras/utils/data_utils.py in next_sample(uid) 624 The next value of generator `uid`. 625 """ --> 626 return six.next(_SHARED_SEQUENCES[uid]) 627 628 /content/drive/My Drive/ssd_detectors_master/ssd_data.py in generate(self, debug, encode, seed) 565 if len(targets) == batch_size: 566 if encode: --> 567 targets = [self.prior_util.encode(y) for y in targets] 568 targets = np.array(targets, dtype=np.float32) 569 tmp_inputs = np.array(inputs, dtype=np.float32) /content/drive/My Drive/ssd_detectors_master/ssd_data.py in (.0) 565 if len(targets) == batch_size: 566 if encode: --> 567 targets = [self.prior_util.encode(y) for y in targets] 568 targets = np.array(targets, dtype=np.float32) 569 tmp_inputs = np.array(inputs, dtype=np.float32) /content/drive/My Drive/ssd_detectors_master/sl_utils.py in encode(self, gt_data, debug) 138 polygons = [] 139 for word in gt_data: --> 140 xy = np.reshape(word[:8], (-1, 2)) 141 xy = np.copy(xy) * (self.image_w, self.image_h) 142 polygons.append(xy) /usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py in reshape(a, newshape, order) 290 [5, 6]]) 291 """ --> 292 return _wrapfunc(a, 'reshape', newshape, order=order) 293 294 /usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds) 54 def _wrapfunc(obj, method, *args, **kwds): 55 try: ---> 56 return getattr(obj, method)(*args, **kwds) 57 58 # An AttributeError occurs if the object does not have ValueError: cannot reshape array of size 5 into shape (2) and how i modify this model for custom text or object detections dataset i used labelimg to create custom dataset so how can i use this dataset in your model. Thanking You
mvoelk commented 5 years ago

Your problem seems similar to issue #1. TextBoxes requires axis-aligned bounding boxes, but TextBoxes++ and SegLink require oriented bounding boxes. I was simply too lazy to implement the 'polygon' case for datasets containing only axis-aligned bounding boxes.

The implementation of the 'polygon' case in the corresponding GTUtility is actually pretty straightforward. For the SVT dataset, you can take b18d09d8e61af26a1551ae49dd98be4768943b30 as an example.

from data_svt import GTUtility
gtu_train = GTUtility('data/SVT/', polygon=True)
gtu_test = GTUtility('data/SVT/', test=True, polygon=True)

If you find the time, pull requests are welcome :)

sivatejachinnam commented 5 years ago

How can i use custom dataset which does not have orientation bounding box alignment, I using LabelImg tool to create custom pascal format dataset. Thanks for the reply

mvoelk commented 5 years ago

@sivatejachinnam Take data_voc.py, remove the conversion stuff, fix the class names, add the text attribute and add the 'polygon' case as in the SVT example... should not be that hard...

wander1985 commented 5 years ago

How do I use LabelImg tool to create custom dataset? For example, I have an image below needs to be labeled, then how to do the labeling? Should I give all the text ("Campus" and "Shop" in this example) the same class name "text" (as shown in the screenshot below), or the exact letters in the text (i.e. "Campus" and "Shop")? 2019-08-12

Thanks!