Thinklab-SJTU / R3Det_Tensorflow

Code for AAAI 2021 paper: R3Det: Refined Single-Stage Detector with Feature Refinement for Rotating Object
Apache License 2.0
540 stars 122 forks source link

Conversion of model with MobileNetV2 backbone to a TFlite model #102

Closed RomStriker closed 3 years ago

RomStriker commented 3 years ago

Hi,

Is it possible to convert this model with a MobileNetV2 backbone to a TFlite model with the converters provided by TensorFlow. If yes and if anybody has tested it, how is the performance?

RomStriker commented 3 years ago

I managed to train a R3Det model with MobileNetV2 on a custom dataset. And now I am trying to convert it into a tflite model. The first thing I need to do is to get a frozen inference graph for the checkpoint, however, I need to know the names of the output nodes. I cannot seem to find them, I tried observing the graph in tensorboard, exported a human readable .pbtext graph and went through the code. It would be great if the author or somebody else help me out with this. Thanks.

yangxue0827 commented 3 years ago

https://github.com/DetectionTeamUCAS/Faster-RCNN_Tensorflow/tree/master/libs/export_pbs

Just a reference.

RomStriker commented 3 years ago

I have modified the export_pbs and I am able to get a frozen graph with it. I will try to convert it further to a tflite model, update here whatever I get. The modified export_pbs is below. If you think I have made a mistake, please point it out. I would appreciate that.

# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function, division

import os
import sys

import tensorflow as tf
from tensorflow.python.tools import freeze_graph

from libs.networks import build_whole_network_r3det

sys.path.append('../../')
from data.io.image_preprocess import short_side_resize_for_inference_data
from libs.configs import cfgs

CKPT_PATH = '/home/test/R3Det_Tensorflow/output/trained_weights/RetinaNet_DOTA_R3Det_4x_20200819/DOTA_5801model.ckpt'
OUT_DIR = './output/Pbs'
PB_NAME = 'R3Det.pb'

def build_detection_graph():
    # 1. preprocess img
    img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
                              name='input_img')  # is RGB. not BGR
    raw_shape = tf.shape(img_plac)
    raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
    img_batch = tf.cast(img_plac, tf.float32)
    #img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
    #                          name='input_img')  # is RGB. not GBR
    #raw_shape = tf.shape(img_plac)
    #raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])

    #img_batch = tf.cast(img_plac, tf.float32)
    img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
                                                     target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                                                     length_limitation=cfgs.IMG_MAX_LENGTH)

    if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
        img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
    else:
        img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)

    img_batch = tf.expand_dims(img_batch, axis=0)
    #img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
    #img_batch = tf.expand_dims(img_batch, axis=0)  # [1, None, None, 3]

    det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                         is_training=False)
    #det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
    #                                               is_training=False)

    detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
        input_img_batch=img_batch,
        gtboxes_batch_h=None,
        gtboxes_batch_r=None)
    #detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
    #    input_img_batch=img_batch,
    #    gtboxes_batch=None)

    x_c, y_c, w, h, theta = detection_boxes[:, 0], detection_boxes[:, 1],\
                                     detection_boxes[:, 2], detection_boxes[:, 3],\
                                     detection_boxes[:, 4]
    #xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
    #                         detected_boxes[:, 2], detected_boxes[:, 3]

    resized_shape = tf.shape(img_batch)
    resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2])

    x_c = x_c * raw_w / resized_w
    w = w * raw_w / resized_w
    #xmin = xmin * raw_w / resized_w
    #xmax = xmax * raw_w / resized_w

    y_c = y_c * raw_h / resized_h
    h = h * raw_h / resized_h
    #ymin = ymin * raw_h / resized_h
    #ymax = ymax * raw_h / resized_h

    boxes = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
    #boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
    dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
                     tf.reshape(detection_scores, [-1, 1]),
                     boxes], axis=1, name='DetResults')

    return dets

def export_frozenPB():

    tf.reset_default_graph()

    dets = build_detection_graph()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        print("we have restored the weights from =====>>\n", CKPT_PATH)
        saver.restore(sess, CKPT_PATH)

        tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
        freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
                                  input_saver='',
                                  input_binary=False,
                                  input_checkpoint=CKPT_PATH,
                                  output_node_names="DetResults",
                                  restore_op_name="save/restore_all",
                                  filename_tensor_name='save/Const:0',
                                  output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
                                  clear_devices=False,
                                  initializer_nodes='')

if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = ''
    export_frozenPB()
RomStriker commented 3 years ago

So I managed to convert the model into a tflite model. However the problem is that model graph contains tf.py_func, this introduces a problem, since while serializing the model the python function body wrapped by tf.py_func is not serialized. This gives me following error when trying to perform inference on the model in Android.

Caused by: java.lang.IllegalStateException: Internal error: Unexpected failure when preparing tensor allocations: Encountered unresolved custom op: PyFunc.
    Node number 77 (PyFunc) failed to prepare.

Some of the occurences of tf.py_func that are involved in inference model I found include:

#line 244 in build_whole_network_r3det.py
tmp_anchors = tf.py_func(generate_anchors.generate_anchors_pre,
                                                 inp=[featuremap_height, featuremap_width, stride,
                                                      np.array(cfgs.ANCHOR_SCALES) * stride, cfgs.ANCHOR_RATIOS, 4.0],
                                                 Tout=[tf.float32])
#line 39 in nms_rotate.py
keep = tf.py_func(nms_rotate_cpu,
                          inp=[decode_boxes, scores, iou_threshold, max_output_size],
                          Tout=tf.int64)

Do you think it is possible to write these functions using tensorflow ops so we don't have to use tf.py_func? And do you think it can be done in short time by someone who doesn't have experience with writing models in tensorflow? Or should I switch to a different model, if my main objective is to run this model on a mobile device?

yangxue0827 commented 3 years ago

https://github.com/yangxue0827/RotationDetection/blob/main/tools/r3det/exportPb.py @RomStriker

RomStriker commented 3 years ago

Hi,

I managed to do some work regarding the conversion. Below are the changes I made.

  1. I rewrote the nms_rotate function in nms_rotate.py as below.
def nms_rotate(decode_boxes, scores, iou_threshold, max_output_size,
               use_angle_condition=False, angle_threshold=0, use_gpu=True, gpu_id=0):
    """
    :param boxes: format [x_c, y_c, w, h, theta]
    :param scores: scores of boxes
    :param threshold: iou threshold (0.7 or 0.5)
    :param max_output_size: max number of output
    :return: the remaining index of boxes
    """
    keep = gpu_nms(decode_boxes, scores, max_boxes=max_output_size, nms_thresh=iou_threshold)
    return keep

# Reference: https://github.com/dohoseok/context-based-parking-slot-detect/blob/master/parking_slot_detector/utils/nms_utils.py
def gpu_nms(boxes, scores, max_boxes=50, score_thresh=0.5, nms_thresh=0.5, apply_rotate=True):
    """
    Perform NMS on GPU using TensorFlow.
    params:
        boxes: format [x_c, y_c, w, h, theta]
        scores: tensor of shape [1, num_boxes]
        max_boxes: integer, maximum number of predicted boxes you'd like, default is 50
        score_thresh: if [ highest class probability score < score_threshold]
                        then get rid of the corresponding box
        nms_thresh: real value, "intersection over union" threshold used for NMS filtering
    """

    boxes_list, score_list, quads_list = [], [], []
    max_boxes = tf.constant(max_boxes, dtype='int32')
    angles = boxes[..., 4] * (m.pi / 180)
    boxes = boxes[..., 0:4]
    quads = tf.stack([boxes[..., 0], boxes[..., 1],
                      boxes[..., 0] + boxes[..., 2], boxes[..., 1],
                      boxes[..., 0] + boxes[..., 2], boxes[..., 1] + boxes[..., 3],
                      boxes[..., 0], boxes[..., 1] + boxes[..., 3]], axis=1)

    boxes = tf.stack([boxes[..., 0], boxes[..., 1],
                      boxes[..., 0] + boxes[..., 2],
                      boxes[..., 1] + boxes[..., 3]], axis=1)

    quads = tf.reshape(quads, [-1, 4, 2])
    quads = tf.transpose(quads, perm=[0, 2, 1])

    rot_x = tf.stack([tf.cos(angles), -tf.sin(angles)], -1)
    rot_y = tf.stack([tf.sin(angles), tf.cos(angles)], -1)
    rot_mat = tf.stack([rot_x, rot_y], -2)

    quads = tf.einsum('bij,bjk->bik', rot_mat, quads)
    quads = tf.transpose(quads, perm=[0, 2, 1])
    quads = tf.reshape(quads, [-1, 8])

    mask = tf.greater_equal(scores, tf.constant(score_thresh))

    filter_boxes = tf.boolean_mask(boxes, mask)
    filter_score = tf.boolean_mask(scores, mask)
    filter_quads = tf.boolean_mask(quads, mask)

    if apply_rotate:
        nms_indices = tf.cond(tf.greater(tf.shape(filter_score)[0], 0),
                              lambda: rot_nms(filter_score, filter_quads, max_boxes, nms_thresh),
                              lambda: tf.image.non_max_suppression(boxes=filter_boxes,
                                                                   scores=filter_score,
                                                                   max_output_size=max_boxes,
                                                                   iou_threshold=nms_thresh, name='nms_indices')
                              )
    else:
        nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
                                                   scores=filter_score,
                                                   max_output_size=max_boxes,
                                                   iou_threshold=nms_thresh, name='nms_indices')

    return tf.cast(nms_indices, dtype=tf.int64)

def rot_nms(filter_score, filter_quads, max_boxes, nms_thresh):
    # I am not sure why do we rotate by the negative of the angle of best scoring box, but according to the source paper it 
    # allows us to use the tensorflow nms function. I also test it without it but didn't seem to make a difference for me.   
    # Find rotation angle of the highest scoring box
    max_score_idx = tf.argmax(filter_score)
    best_quad = filter_quads[max_score_idx]
    y_diff = best_quad[..., 7] + best_quad[..., 5] - best_quad[..., 3] - best_quad[..., 1]
    x_diff = best_quad[..., 6] + best_quad[..., 4] - best_quad[..., 2] - best_quad[..., 0]
    angle = tf.atan2(y_diff, x_diff)

    temp_quads = tf.reshape(filter_quads, [-1, 4, 2])

    # Compute the rotation matrix
    rot_x = tf.stack([tf.cos(angle), -tf.sin(angle)], -1)
    rot_y = tf.stack([tf.sin(angle), tf.cos(angle)], -1)
    rot_mat = tf.stack([rot_x, rot_y], -2)

    # Apply a inverse rotation matrix to get standard boxes
    # Out[l, i, k] = sum_j rot_mat[j, k] * temp_quads[l, i, j]
    rot_quads = tf.einsum('jk,lij->lik', rot_mat, temp_quads)
    rot_quads = tf.reshape(rot_quads, [-1, 8])
    rot_boxes = tf.stack(
        [tf.minimum(tf.minimum(rot_quads[..., 0], rot_quads[..., 2]), tf.minimum(rot_quads[..., 4], rot_quads[..., 6])),
         tf.minimum(tf.minimum(rot_quads[..., 1], rot_quads[..., 3]), tf.minimum(rot_quads[..., 5], rot_quads[..., 7])),
         tf.maximum(tf.maximum(rot_quads[..., 0], rot_quads[..., 2]), tf.maximum(rot_quads[..., 4], rot_quads[..., 6])),
         tf.maximum(tf.maximum(rot_quads[..., 1], rot_quads[..., 3]),
                    tf.maximum(rot_quads[..., 5], rot_quads[..., 7]))],
        axis=-1)

    # Apply tf nms on standard bounding boxes
    nms_indices = tf.image.non_max_suppression(boxes=rot_boxes,
                                               scores=filter_score,
                                               max_output_size=max_boxes,
                                               iou_threshold=nms_thresh, name='nms_indices')
    return nms_indices
  1. I also rewrote the generate_anchors_pre function as below.

    def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8, 16, 32),
                         anchor_ratios=(0.5, 1, 2), base_size=16):
    """ A wrapper function to generate anchors given different scales
      Also return the number of anchors in variable 'length'
    """
    anchors = generate_anchors(
        base_size=base_size, ratios=np.array(anchor_ratios),
        scales=np.array(anchor_scales))
    A = anchors.shape[0]
    
    shift_x = tf.range(0, width) * feat_stride
    shift_y = tf.range(0, height) * feat_stride
    shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
    shifts = tf.transpose(tf.stack([tf.reshape(shift_x, [-1]), tf.reshape(shift_y, [-1]), tf.reshape(shift_x, [-1]),
                        tf.reshape(shift_y, [-1])]))
    
    K = tf.shape(shifts)[0]
    
    # width changes faster, so here it is H, W, C
    anchors = tf.reshape(tf.convert_to_tensor(anchors, np.float32), [1, A, 4]) + \
              tf.cast(tf.transpose(tf.reshape(shifts, [1, K, 4]), perm=[1, 0, 2]), dtype=tf.float32)
    anchors = tf.reshape(anchors, [K * A, 4])
    
    return anchors

    Here I just converted the Numpy code to TensorFlow code.

  2. After these changes I converted the model using below script.

    
    # -*- coding: utf-8 -*-

from future import absolute_import, print_function, division

import os import sys

import tensorflow as tf from tensorflow.python.tools import freeze_graph

from libs.networks import build_whole_network_r3det

sys.path.append('../../') from data.io.image_preprocess import short_side_resize_for_inference_data from libs.configs import cfgs

CKPT_PATH = 'path/to/checkpoint' OUT_DIR = './output/Pbs' PB_NAME = 'R3Det.pb'

def build_detection_graph():

1. preprocess img

img_plac = tf.placeholder(dtype=tf.float32, shape=[1, 640, 640, 3],
                          name='input_img')  # is RGB. not BGR

det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
                                                     is_training=False)

detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
    input_img_batch=img_plac,
    gtboxes_batch_h=None,
    gtboxes_batch_r=None)

boxes = tf.transpose(tf.stack([detection_boxes[:, 0], detection_boxes[:, 1],
                                 detection_boxes[:, 2], detection_boxes[:, 3],
                                 detection_boxes[:, 4]]))

dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
                 tf.reshape(detection_scores, [-1, 1]),
                 boxes], axis=1, name='DetResults')

return dets

def export_frozenPB():

tf.reset_default_graph()

dets = build_detection_graph()

saver = tf.train.Saver()

with tf.Session() as sess:
    print("we have restored the weights from =====>>\n", CKPT_PATH)
    saver.restore(sess, CKPT_PATH)

    tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
    freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
                              input_saver='',
                              input_binary=False,
                              input_checkpoint=CKPT_PATH,
                              output_node_names="DetResults",
                              restore_op_name="save/restore_all",
                              filename_tensor_name='save/Const:0',
                              output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
                              clear_devices=False,
                              initializer_nodes='')

if name == 'main': os.environ["CUDA_VISIBLE_DEVICES"] = '' export_frozenPB()


4. I converted the frozen pb file to a tflite model using the following script.
```python
import tensorflow as tf

def convert_tflite_model_dynamic(saved_model_path, tflite_path, type='style_predict'):

    converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
        graph_def_file=saved_model_path,  # both `.pb` and `.pbtxt` files are accepted.
        input_arrays=['input_img'],
        input_shapes={'input_img': [1, 640, 640, 3]},
        output_arrays=['DetResults'])

    converter.optimizations = [tf.lite.Optimize.DEFAULT]

    converter.target_spec.supported_ops = [
      tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
      tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
    ]
    converter.allow_custom_ops = True
    converter.experimental_new_converter = True
    tflite_model = converter.convert()

    with tf.io.gfile.GFile(tflite_path, 'wb') as f:
        f.write(tflite_model)

    print('Quantized model:', tflite_path,
          'Size:', len(tflite_model) / 1024, "kb")

saved_model_path = 'path/to/saved/model'
convert_tflite_model_dynamic(saved_model_path, 'detect_mn.tflite') 

I am attaching the converted model, so you could inspect it in something like https://netron.app/. detect_mn.zip

I checked the model and instead of having 100 detections in form [100, 7] the output seems to show only 1 as [1, 7]. image

  1. I tried testing the model using the python tflite interpreter but that gave me a floating point exception and crashed with error code 139. The error was not very descriptive here. I tried to run the model in Android Studio with tflite version 2.4.0, and it ran but the output is [0, 7]. I thought the model was not trained properly so it was not giving any outputs but that doesn't make sense as I don't know how to deal with [0,7] output.

  2. I tried retraining the model with these new functions (Previously I trained with the original functions, which also didn't give good results, but at that time I was more focused on converting the model to TFLite). I am training it on this dataset of book spines [https://data.4tu.nl/articles/dataset/Data_mannually-labelled_accompanying_the_research_on_segmentation_of_book-spine_images/12688436]. The training looks like below.

Classification Loss: image

Regression Loss: image

Total Loss: image

Images: image image image image image image

The training doesn't seem to converge. My config file looks as below.

# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
import os
import tensorflow as tf
import math

"""
v12 + one refine stage + resnet152 + data aug. + MS

Multi-scale test

This is your result for task 1:

    mAP: 0.7623486563428695
    ap of each class: plane:0.8970069480324703,
    baseball-diamond:0.8333891707216258,
    bridge:0.5043711291835025,
    ground-track-field:0.6731295782071659,
    small-vehicle:0.7897854187273754,
    large-vehicle:0.8278325775035011,
    ship:0.8785872624980835,
    tennis-court:0.9081993104399961,
    basketball-court:0.8556313751021464,
    storage-tank:0.8532797469563389,
    soccer-ball-field:0.6556255595096467,
    roundabout:0.6152145549536566,
    harbor:0.6729826099593654,
    swimming-pool:0.7811243230807063,
    helicopter:0.6890702802674632
    The submitted information is :

Description: RetinaNet_DOTA_R3Det_4x_20200819_183.6w_ms
Username: SJTU-Det
Institute: SJTU
Emailadress: yangxue-2019-sjtu@sjtu.edu.cn
TeamMembers: yangxue

add flip
This is your result for task 1:

    mAP: 0.7647417332616955
    ap of each class: plane:0.8980075439345729,
    baseball-diamond:0.8377264712427869,
    bridge:0.48115376816317507,
    ground-track-field:0.6677152155629779,
    small-vehicle:0.7876448261580992,
    large-vehicle:0.8327169902915853,
    ship:0.8783577280870772,
    tennis-court:0.9082236256734083,
    basketball-court:0.8538214275625156,
    storage-tank:0.8551179251235709,
    soccer-ball-field:0.6566936371507965,
    roundabout:0.6268171396548151,
    harbor:0.6752747779498159,
    swimming-pool:0.7856280736874842,
    helicopter:0.7262268486827509

The submitted information is :

Description: RetinaNet_DOTA_R3Det_4x_20200819_183.6w_ms_flip
Username: SJTU-Det
Institute: SJTU
Emailadress: yangxue-2019-sjtu@sjtu.edu.cn
TeamMembers: yangxue

"""

# ------------------------------------------------
VERSION = 'RetinaNet_DOTA_R3Det_4x_20200819'
NET_NAME = 'MobilenetV2' # 'resnet152_v1d'
ADD_BOX_IN_TENSORBOARD = True

# ---------------------------------------- System_config
ROOT_PATH = os.path.abspath('../')
print(20*"++--")
print(ROOT_PATH)
GPU_GROUP = "0"
NUM_GPU = 0 #len(GPU_GROUP.strip().split(','))
SHOW_TRAIN_INFO_INTE = 20
SMRY_ITER = 100
SAVE_WEIGHTS_INTE = 100 #27000 * 4
SAVE_WEIGHTS_INTE_2 = 25000 * 4
SUMMARY_PATH = ROOT_PATH + '/output/summary'
TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'

if NET_NAME.startswith("resnet"):
    weights_name = NET_NAME
elif NET_NAME.startswith("MobilenetV2"):
    weights_name = "mobilenet/mobilenet_v2_1.0_224"
else:
    raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')

PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'

# ------------------------------------------ Train config
RESTORE_FROM_RPN = False
FIXED_BLOCKS = 1  # allow 0~3
FREEZE_BLOCKS = [True, False, False, False, False]  # for gluoncv backbone
USE_07_METRIC = True

MUTILPY_BIAS_GRADIENT = 2.0  # if None, will not multipy
GRADIENT_CLIPPING_BY_NORM = 10.0  # if None, will not clip

CLS_WEIGHT = 1.0
REG_WEIGHT = 1.0
USE_IOU_FACTOR = True
ALPHA = 1.0
BETA = 1.0

BATCH_SIZE = 4
EPSILON = 1e-5
MOMENTUM = 0.9
LR = 1e-4
DECAY_STEP = [SAVE_WEIGHTS_INTE_2*12, SAVE_WEIGHTS_INTE_2*16, SAVE_WEIGHTS_INTE_2*20]
MAX_ITERATION = 20000
# MAX_ITERATION = SAVE_WEIGHTS_INTE_2*20
WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE_2)

# -------------------------------------------- Data_preprocess_config
DATASET_NAME = 'DOTA'  # 'pascal', 'DOTA', 'coco'
#PIXEL_MEAN = [123.68, 116.779, 103.939]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
#PIXEL_MEAN_ = [0.485, 0.456, 0.406]
#PIXEL_STD = [0.229, 0.224, 0.225]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR

# For book-spine dataset
PIXEL_MEAN = [127.958, 124.471, 124.831]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
PIXEL_MEAN_ = [0.502, 0.488, 0.490]
PIXEL_STD = [0.255, 0.241, 0.246]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR

IMG_SHORT_SIDE_LEN = [800, 640, 700, 900, 1000, 1100] #[640]
IMG_MAX_LENGTH = 1100
CLASS_NUM = 1

IMG_ROTATE = True
RGB2GRAY = True
VERTICAL_FLIP = True
HORIZONTAL_FLIP = True
IMAGE_PYRAMID = True

# --------------------------------------------- Network_config
SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
PROBABILITY = 0.01
FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
WEIGHT_DECAY = 1e-4
USE_GN = False
NUM_SUBNET_CONV = 4
NUM_REFINE_STAGE = 1
USE_RELU = False
FPN_CHANNEL = 256

# ---------------------------------------------Anchor config
LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] 
ANCHOR_STRIDE = [8, 16, 32, 64, 128]
ANCHOR_SCALES = [2 ** 1.5, 2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3.] # I also tried [5., 1 / 5., 7., 1/7., 9., 1/9., 11., 1/11.] 
ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
ANCHOR_SCALE_FACTORS = None
USE_CENTER_OFFSET = True
METHOD = 'H'
USE_ANGLE_COND = False
ANGLE_RANGE = 90

# --------------------------------------------RPN config
SHARE_NET = True
USE_P5 = True
IOU_POSITIVE_THRESHOLD = 0.5 #0.3
IOU_NEGATIVE_THRESHOLD = 0.4 #0.3
REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7] #[0.5, 0.9]
REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6]

NMS = True
NMS_IOU_THRESHOLD = 0.1
MAXIMUM_DETECTIONS = 100
FILTERED_SCORE = 0.05
VIS_SCORE = 0.05

# --------------------------------------------MASK config
USE_SUPERVISED_MASK = False
MASK_TYPE = 'r'  # r or h
BINARY_MASK = False
SIGMOID_ON_DOT = False
MASK_ACT_FET = True  # weather use mask generate 256 channels to dot feat.
GENERATE_MASK_LIST = ["P3", "P4", "P5", "P6", "P7"]
ADDITION_LAYERS = [4, 4, 3, 2, 2]  # add 4 layer to generate P2_mask, 2 layer to generate P3_mask
ENLAEGE_RF_LIST = ["P3", "P4", "P5", "P6", "P7"]
SUPERVISED_MASK_LOSS_WEIGHT = 1.0

I also trained a model for 200000 steps but it did not improve the results. I would appreciate it if you could go through the changes and give any suggestions on how to solve the issues. Thank you.

RomStriker commented 3 years ago

@yangxue0827 Can you please reopen this issue, so its easier to find for other people who might be able to contribute as well. Thanks.

yangxue0827 commented 3 years ago

may be you can try yangxue0827/RotationDetection,where contains many methods

RomStriker commented 3 years ago

Thanks, yeah I'll check that out as well.