Closed RomStriker closed 3 years ago
I managed to train a R3Det model with MobileNetV2 on a custom dataset. And now I am trying to convert it into a tflite model. The first thing I need to do is to get a frozen inference graph for the checkpoint, however, I need to know the names of the output nodes. I cannot seem to find them, I tried observing the graph in tensorboard, exported a human readable .pbtext graph and went through the code. It would be great if the author or somebody else help me out with this. Thanks.
I have modified the export_pbs and I am able to get a frozen graph with it. I will try to convert it further to a tflite model, update here whatever I get. The modified export_pbs is below. If you think I have made a mistake, please point it out. I would appreciate that.
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
import os
import sys
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from libs.networks import build_whole_network_r3det
sys.path.append('../../')
from data.io.image_preprocess import short_side_resize_for_inference_data
from libs.configs import cfgs
CKPT_PATH = '/home/test/R3Det_Tensorflow/output/trained_weights/RetinaNet_DOTA_R3Det_4x_20200819/DOTA_5801model.ckpt'
OUT_DIR = './output/Pbs'
PB_NAME = 'R3Det.pb'
def build_detection_graph():
# 1. preprocess img
img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
name='input_img') # is RGB. not BGR
raw_shape = tf.shape(img_plac)
raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
img_batch = tf.cast(img_plac, tf.float32)
#img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3],
# name='input_img') # is RGB. not GBR
#raw_shape = tf.shape(img_plac)
#raw_h, raw_w = tf.to_float(raw_shape[0]), tf.to_float(raw_shape[1])
#img_batch = tf.cast(img_plac, tf.float32)
img_batch = short_side_resize_for_inference_data(img_tensor=img_batch,
target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
length_limitation=cfgs.IMG_MAX_LENGTH)
if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']:
img_batch = (img_batch / 255 - tf.constant(cfgs.PIXEL_MEAN_)) / tf.constant(cfgs.PIXEL_STD)
else:
img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
img_batch = tf.expand_dims(img_batch, axis=0)
#img_batch = img_batch - tf.constant(cfgs.PIXEL_MEAN)
#img_batch = tf.expand_dims(img_batch, axis=0) # [1, None, None, 3]
det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
is_training=False)
#det_net = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME,
# is_training=False)
detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
input_img_batch=img_batch,
gtboxes_batch_h=None,
gtboxes_batch_r=None)
#detected_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
# input_img_batch=img_batch,
# gtboxes_batch=None)
x_c, y_c, w, h, theta = detection_boxes[:, 0], detection_boxes[:, 1],\
detection_boxes[:, 2], detection_boxes[:, 3],\
detection_boxes[:, 4]
#xmin, ymin, xmax, ymax = detected_boxes[:, 0], detected_boxes[:, 1], \
# detected_boxes[:, 2], detected_boxes[:, 3]
resized_shape = tf.shape(img_batch)
resized_h, resized_w = tf.to_float(resized_shape[1]), tf.to_float(resized_shape[2])
x_c = x_c * raw_w / resized_w
w = w * raw_w / resized_w
#xmin = xmin * raw_w / resized_w
#xmax = xmax * raw_w / resized_w
y_c = y_c * raw_h / resized_h
h = h * raw_h / resized_h
#ymin = ymin * raw_h / resized_h
#ymax = ymax * raw_h / resized_h
boxes = tf.transpose(tf.stack([x_c, y_c, w, h, theta]))
#boxes = tf.transpose(tf.stack([xmin, ymin, xmax, ymax]))
dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
tf.reshape(detection_scores, [-1, 1]),
boxes], axis=1, name='DetResults')
return dets
def export_frozenPB():
tf.reset_default_graph()
dets = build_detection_graph()
saver = tf.train.Saver()
with tf.Session() as sess:
print("we have restored the weights from =====>>\n", CKPT_PATH)
saver.restore(sess, CKPT_PATH)
tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
input_saver='',
input_binary=False,
input_checkpoint=CKPT_PATH,
output_node_names="DetResults",
restore_op_name="save/restore_all",
filename_tensor_name='save/Const:0',
output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
clear_devices=False,
initializer_nodes='')
if __name__ == '__main__':
os.environ["CUDA_VISIBLE_DEVICES"] = ''
export_frozenPB()
So I managed to convert the model into a tflite model. However the problem is that model graph contains tf.py_func
, this introduces a problem, since while serializing the model the python function body wrapped by tf.py_func
is not serialized. This gives me following error when trying to perform inference on the model in Android.
Caused by: java.lang.IllegalStateException: Internal error: Unexpected failure when preparing tensor allocations: Encountered unresolved custom op: PyFunc.
Node number 77 (PyFunc) failed to prepare.
Some of the occurences of tf.py_func
that are involved in inference model I found include:
#line 244 in build_whole_network_r3det.py
tmp_anchors = tf.py_func(generate_anchors.generate_anchors_pre,
inp=[featuremap_height, featuremap_width, stride,
np.array(cfgs.ANCHOR_SCALES) * stride, cfgs.ANCHOR_RATIOS, 4.0],
Tout=[tf.float32])
#line 39 in nms_rotate.py
keep = tf.py_func(nms_rotate_cpu,
inp=[decode_boxes, scores, iou_threshold, max_output_size],
Tout=tf.int64)
Do you think it is possible to write these functions using tensorflow ops so we don't have to use tf.py_func
? And do you think it can be done in short time by someone who doesn't have experience with writing models in tensorflow? Or should I switch to a different model, if my main objective is to run this model on a mobile device?
Hi,
I managed to do some work regarding the conversion. Below are the changes I made.
def nms_rotate(decode_boxes, scores, iou_threshold, max_output_size,
use_angle_condition=False, angle_threshold=0, use_gpu=True, gpu_id=0):
"""
:param boxes: format [x_c, y_c, w, h, theta]
:param scores: scores of boxes
:param threshold: iou threshold (0.7 or 0.5)
:param max_output_size: max number of output
:return: the remaining index of boxes
"""
keep = gpu_nms(decode_boxes, scores, max_boxes=max_output_size, nms_thresh=iou_threshold)
return keep
# Reference: https://github.com/dohoseok/context-based-parking-slot-detect/blob/master/parking_slot_detector/utils/nms_utils.py
def gpu_nms(boxes, scores, max_boxes=50, score_thresh=0.5, nms_thresh=0.5, apply_rotate=True):
"""
Perform NMS on GPU using TensorFlow.
params:
boxes: format [x_c, y_c, w, h, theta]
scores: tensor of shape [1, num_boxes]
max_boxes: integer, maximum number of predicted boxes you'd like, default is 50
score_thresh: if [ highest class probability score < score_threshold]
then get rid of the corresponding box
nms_thresh: real value, "intersection over union" threshold used for NMS filtering
"""
boxes_list, score_list, quads_list = [], [], []
max_boxes = tf.constant(max_boxes, dtype='int32')
angles = boxes[..., 4] * (m.pi / 180)
boxes = boxes[..., 0:4]
quads = tf.stack([boxes[..., 0], boxes[..., 1],
boxes[..., 0] + boxes[..., 2], boxes[..., 1],
boxes[..., 0] + boxes[..., 2], boxes[..., 1] + boxes[..., 3],
boxes[..., 0], boxes[..., 1] + boxes[..., 3]], axis=1)
boxes = tf.stack([boxes[..., 0], boxes[..., 1],
boxes[..., 0] + boxes[..., 2],
boxes[..., 1] + boxes[..., 3]], axis=1)
quads = tf.reshape(quads, [-1, 4, 2])
quads = tf.transpose(quads, perm=[0, 2, 1])
rot_x = tf.stack([tf.cos(angles), -tf.sin(angles)], -1)
rot_y = tf.stack([tf.sin(angles), tf.cos(angles)], -1)
rot_mat = tf.stack([rot_x, rot_y], -2)
quads = tf.einsum('bij,bjk->bik', rot_mat, quads)
quads = tf.transpose(quads, perm=[0, 2, 1])
quads = tf.reshape(quads, [-1, 8])
mask = tf.greater_equal(scores, tf.constant(score_thresh))
filter_boxes = tf.boolean_mask(boxes, mask)
filter_score = tf.boolean_mask(scores, mask)
filter_quads = tf.boolean_mask(quads, mask)
if apply_rotate:
nms_indices = tf.cond(tf.greater(tf.shape(filter_score)[0], 0),
lambda: rot_nms(filter_score, filter_quads, max_boxes, nms_thresh),
lambda: tf.image.non_max_suppression(boxes=filter_boxes,
scores=filter_score,
max_output_size=max_boxes,
iou_threshold=nms_thresh, name='nms_indices')
)
else:
nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
scores=filter_score,
max_output_size=max_boxes,
iou_threshold=nms_thresh, name='nms_indices')
return tf.cast(nms_indices, dtype=tf.int64)
def rot_nms(filter_score, filter_quads, max_boxes, nms_thresh):
# I am not sure why do we rotate by the negative of the angle of best scoring box, but according to the source paper it
# allows us to use the tensorflow nms function. I also test it without it but didn't seem to make a difference for me.
# Find rotation angle of the highest scoring box
max_score_idx = tf.argmax(filter_score)
best_quad = filter_quads[max_score_idx]
y_diff = best_quad[..., 7] + best_quad[..., 5] - best_quad[..., 3] - best_quad[..., 1]
x_diff = best_quad[..., 6] + best_quad[..., 4] - best_quad[..., 2] - best_quad[..., 0]
angle = tf.atan2(y_diff, x_diff)
temp_quads = tf.reshape(filter_quads, [-1, 4, 2])
# Compute the rotation matrix
rot_x = tf.stack([tf.cos(angle), -tf.sin(angle)], -1)
rot_y = tf.stack([tf.sin(angle), tf.cos(angle)], -1)
rot_mat = tf.stack([rot_x, rot_y], -2)
# Apply a inverse rotation matrix to get standard boxes
# Out[l, i, k] = sum_j rot_mat[j, k] * temp_quads[l, i, j]
rot_quads = tf.einsum('jk,lij->lik', rot_mat, temp_quads)
rot_quads = tf.reshape(rot_quads, [-1, 8])
rot_boxes = tf.stack(
[tf.minimum(tf.minimum(rot_quads[..., 0], rot_quads[..., 2]), tf.minimum(rot_quads[..., 4], rot_quads[..., 6])),
tf.minimum(tf.minimum(rot_quads[..., 1], rot_quads[..., 3]), tf.minimum(rot_quads[..., 5], rot_quads[..., 7])),
tf.maximum(tf.maximum(rot_quads[..., 0], rot_quads[..., 2]), tf.maximum(rot_quads[..., 4], rot_quads[..., 6])),
tf.maximum(tf.maximum(rot_quads[..., 1], rot_quads[..., 3]),
tf.maximum(rot_quads[..., 5], rot_quads[..., 7]))],
axis=-1)
# Apply tf nms on standard bounding boxes
nms_indices = tf.image.non_max_suppression(boxes=rot_boxes,
scores=filter_score,
max_output_size=max_boxes,
iou_threshold=nms_thresh, name='nms_indices')
return nms_indices
I also rewrote the generate_anchors_pre function as below.
def generate_anchors_pre(height, width, feat_stride, anchor_scales=(8, 16, 32),
anchor_ratios=(0.5, 1, 2), base_size=16):
""" A wrapper function to generate anchors given different scales
Also return the number of anchors in variable 'length'
"""
anchors = generate_anchors(
base_size=base_size, ratios=np.array(anchor_ratios),
scales=np.array(anchor_scales))
A = anchors.shape[0]
shift_x = tf.range(0, width) * feat_stride
shift_y = tf.range(0, height) * feat_stride
shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
shifts = tf.transpose(tf.stack([tf.reshape(shift_x, [-1]), tf.reshape(shift_y, [-1]), tf.reshape(shift_x, [-1]),
tf.reshape(shift_y, [-1])]))
K = tf.shape(shifts)[0]
# width changes faster, so here it is H, W, C
anchors = tf.reshape(tf.convert_to_tensor(anchors, np.float32), [1, A, 4]) + \
tf.cast(tf.transpose(tf.reshape(shifts, [1, K, 4]), perm=[1, 0, 2]), dtype=tf.float32)
anchors = tf.reshape(anchors, [K * A, 4])
return anchors
Here I just converted the Numpy code to TensorFlow code.
After these changes I converted the model using below script.
# -*- coding: utf-8 -*-
from future import absolute_import, print_function, division
import os import sys
import tensorflow as tf from tensorflow.python.tools import freeze_graph
from libs.networks import build_whole_network_r3det
sys.path.append('../../') from data.io.image_preprocess import short_side_resize_for_inference_data from libs.configs import cfgs
CKPT_PATH = 'path/to/checkpoint' OUT_DIR = './output/Pbs' PB_NAME = 'R3Det.pb'
def build_detection_graph():
img_plac = tf.placeholder(dtype=tf.float32, shape=[1, 640, 640, 3],
name='input_img') # is RGB. not BGR
det_net = build_whole_network_r3det.DetectionNetwork(base_network_name=cfgs.NET_NAME,
is_training=False)
detection_boxes, detection_scores, detection_category = det_net.build_whole_detection_network(
input_img_batch=img_plac,
gtboxes_batch_h=None,
gtboxes_batch_r=None)
boxes = tf.transpose(tf.stack([detection_boxes[:, 0], detection_boxes[:, 1],
detection_boxes[:, 2], detection_boxes[:, 3],
detection_boxes[:, 4]]))
dets = tf.concat([tf.reshape(detection_category, [-1, 1]),
tf.reshape(detection_scores, [-1, 1]),
boxes], axis=1, name='DetResults')
return dets
def export_frozenPB():
tf.reset_default_graph()
dets = build_detection_graph()
saver = tf.train.Saver()
with tf.Session() as sess:
print("we have restored the weights from =====>>\n", CKPT_PATH)
saver.restore(sess, CKPT_PATH)
tf.train.write_graph(sess.graph_def, OUT_DIR, PB_NAME)
freeze_graph.freeze_graph(input_graph=os.path.join(OUT_DIR, PB_NAME),
input_saver='',
input_binary=False,
input_checkpoint=CKPT_PATH,
output_node_names="DetResults",
restore_op_name="save/restore_all",
filename_tensor_name='save/Const:0',
output_graph=os.path.join(OUT_DIR, PB_NAME.replace('.pb', '_Frozen.pb')),
clear_devices=False,
initializer_nodes='')
if name == 'main': os.environ["CUDA_VISIBLE_DEVICES"] = '' export_frozenPB()
4. I converted the frozen pb file to a tflite model using the following script.
```python
import tensorflow as tf
def convert_tflite_model_dynamic(saved_model_path, tflite_path, type='style_predict'):
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
graph_def_file=saved_model_path, # both `.pb` and `.pbtxt` files are accepted.
input_arrays=['input_img'],
input_shapes={'input_img': [1, 640, 640, 3]},
output_arrays=['DetResults'])
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
converter.allow_custom_ops = True
converter.experimental_new_converter = True
tflite_model = converter.convert()
with tf.io.gfile.GFile(tflite_path, 'wb') as f:
f.write(tflite_model)
print('Quantized model:', tflite_path,
'Size:', len(tflite_model) / 1024, "kb")
saved_model_path = 'path/to/saved/model'
convert_tflite_model_dynamic(saved_model_path, 'detect_mn.tflite')
I am attaching the converted model, so you could inspect it in something like https://netron.app/. detect_mn.zip
I checked the model and instead of having 100 detections in form [100, 7] the output seems to show only 1 as [1, 7].
I tried testing the model using the python tflite interpreter but that gave me a floating point exception and crashed with error code 139. The error was not very descriptive here. I tried to run the model in Android Studio with tflite version 2.4.0, and it ran but the output is [0, 7]. I thought the model was not trained properly so it was not giving any outputs but that doesn't make sense as I don't know how to deal with [0,7] output.
I tried retraining the model with these new functions (Previously I trained with the original functions, which also didn't give good results, but at that time I was more focused on converting the model to TFLite). I am training it on this dataset of book spines [https://data.4tu.nl/articles/dataset/Data_mannually-labelled_accompanying_the_research_on_segmentation_of_book-spine_images/12688436]. The training looks like below.
Classification Loss:
Regression Loss:
Total Loss:
Images:
The training doesn't seem to converge. My config file looks as below.
# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
import os
import tensorflow as tf
import math
"""
v12 + one refine stage + resnet152 + data aug. + MS
Multi-scale test
This is your result for task 1:
mAP: 0.7623486563428695
ap of each class: plane:0.8970069480324703,
baseball-diamond:0.8333891707216258,
bridge:0.5043711291835025,
ground-track-field:0.6731295782071659,
small-vehicle:0.7897854187273754,
large-vehicle:0.8278325775035011,
ship:0.8785872624980835,
tennis-court:0.9081993104399961,
basketball-court:0.8556313751021464,
storage-tank:0.8532797469563389,
soccer-ball-field:0.6556255595096467,
roundabout:0.6152145549536566,
harbor:0.6729826099593654,
swimming-pool:0.7811243230807063,
helicopter:0.6890702802674632
The submitted information is :
Description: RetinaNet_DOTA_R3Det_4x_20200819_183.6w_ms
Username: SJTU-Det
Institute: SJTU
Emailadress: yangxue-2019-sjtu@sjtu.edu.cn
TeamMembers: yangxue
add flip
This is your result for task 1:
mAP: 0.7647417332616955
ap of each class: plane:0.8980075439345729,
baseball-diamond:0.8377264712427869,
bridge:0.48115376816317507,
ground-track-field:0.6677152155629779,
small-vehicle:0.7876448261580992,
large-vehicle:0.8327169902915853,
ship:0.8783577280870772,
tennis-court:0.9082236256734083,
basketball-court:0.8538214275625156,
storage-tank:0.8551179251235709,
soccer-ball-field:0.6566936371507965,
roundabout:0.6268171396548151,
harbor:0.6752747779498159,
swimming-pool:0.7856280736874842,
helicopter:0.7262268486827509
The submitted information is :
Description: RetinaNet_DOTA_R3Det_4x_20200819_183.6w_ms_flip
Username: SJTU-Det
Institute: SJTU
Emailadress: yangxue-2019-sjtu@sjtu.edu.cn
TeamMembers: yangxue
"""
# ------------------------------------------------
VERSION = 'RetinaNet_DOTA_R3Det_4x_20200819'
NET_NAME = 'MobilenetV2' # 'resnet152_v1d'
ADD_BOX_IN_TENSORBOARD = True
# ---------------------------------------- System_config
ROOT_PATH = os.path.abspath('../')
print(20*"++--")
print(ROOT_PATH)
GPU_GROUP = "0"
NUM_GPU = 0 #len(GPU_GROUP.strip().split(','))
SHOW_TRAIN_INFO_INTE = 20
SMRY_ITER = 100
SAVE_WEIGHTS_INTE = 100 #27000 * 4
SAVE_WEIGHTS_INTE_2 = 25000 * 4
SUMMARY_PATH = ROOT_PATH + '/output/summary'
TEST_SAVE_PATH = ROOT_PATH + '/tools/test_result'
if NET_NAME.startswith("resnet"):
weights_name = NET_NAME
elif NET_NAME.startswith("MobilenetV2"):
weights_name = "mobilenet/mobilenet_v2_1.0_224"
else:
raise Exception('net name must in [resnet_v1_101, resnet_v1_50, MobilenetV2]')
PRETRAINED_CKPT = ROOT_PATH + '/data/pretrained_weights/' + weights_name + '.ckpt'
TRAINED_CKPT = os.path.join(ROOT_PATH, 'output/trained_weights')
EVALUATE_DIR = ROOT_PATH + '/output/evaluate_result_pickle/'
# ------------------------------------------ Train config
RESTORE_FROM_RPN = False
FIXED_BLOCKS = 1 # allow 0~3
FREEZE_BLOCKS = [True, False, False, False, False] # for gluoncv backbone
USE_07_METRIC = True
MUTILPY_BIAS_GRADIENT = 2.0 # if None, will not multipy
GRADIENT_CLIPPING_BY_NORM = 10.0 # if None, will not clip
CLS_WEIGHT = 1.0
REG_WEIGHT = 1.0
USE_IOU_FACTOR = True
ALPHA = 1.0
BETA = 1.0
BATCH_SIZE = 4
EPSILON = 1e-5
MOMENTUM = 0.9
LR = 1e-4
DECAY_STEP = [SAVE_WEIGHTS_INTE_2*12, SAVE_WEIGHTS_INTE_2*16, SAVE_WEIGHTS_INTE_2*20]
MAX_ITERATION = 20000
# MAX_ITERATION = SAVE_WEIGHTS_INTE_2*20
WARM_SETP = int(1.0 / 4.0 * SAVE_WEIGHTS_INTE_2)
# -------------------------------------------- Data_preprocess_config
DATASET_NAME = 'DOTA' # 'pascal', 'DOTA', 'coco'
#PIXEL_MEAN = [123.68, 116.779, 103.939] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
#PIXEL_MEAN_ = [0.485, 0.456, 0.406]
#PIXEL_STD = [0.229, 0.224, 0.225] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
# For book-spine dataset
PIXEL_MEAN = [127.958, 124.471, 124.831] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
PIXEL_MEAN_ = [0.502, 0.488, 0.490]
PIXEL_STD = [0.255, 0.241, 0.246] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
IMG_SHORT_SIDE_LEN = [800, 640, 700, 900, 1000, 1100] #[640]
IMG_MAX_LENGTH = 1100
CLASS_NUM = 1
IMG_ROTATE = True
RGB2GRAY = True
VERTICAL_FLIP = True
HORIZONTAL_FLIP = True
IMAGE_PYRAMID = True
# --------------------------------------------- Network_config
SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None)
SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
PROBABILITY = 0.01
FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
WEIGHT_DECAY = 1e-4
USE_GN = False
NUM_SUBNET_CONV = 4
NUM_REFINE_STAGE = 1
USE_RELU = False
FPN_CHANNEL = 256
# ---------------------------------------------Anchor config
LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
ANCHOR_STRIDE = [8, 16, 32, 64, 128]
ANCHOR_SCALES = [2 ** 1.5, 2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3.] # I also tried [5., 1 / 5., 7., 1/7., 9., 1/9., 11., 1/11.]
ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15]
ANCHOR_SCALE_FACTORS = None
USE_CENTER_OFFSET = True
METHOD = 'H'
USE_ANGLE_COND = False
ANGLE_RANGE = 90
# --------------------------------------------RPN config
SHARE_NET = True
USE_P5 = True
IOU_POSITIVE_THRESHOLD = 0.5 #0.3
IOU_NEGATIVE_THRESHOLD = 0.4 #0.3
REFINE_IOU_POSITIVE_THRESHOLD = [0.6, 0.7] #[0.5, 0.9]
REFINE_IOU_NEGATIVE_THRESHOLD = [0.5, 0.6]
NMS = True
NMS_IOU_THRESHOLD = 0.1
MAXIMUM_DETECTIONS = 100
FILTERED_SCORE = 0.05
VIS_SCORE = 0.05
# --------------------------------------------MASK config
USE_SUPERVISED_MASK = False
MASK_TYPE = 'r' # r or h
BINARY_MASK = False
SIGMOID_ON_DOT = False
MASK_ACT_FET = True # weather use mask generate 256 channels to dot feat.
GENERATE_MASK_LIST = ["P3", "P4", "P5", "P6", "P7"]
ADDITION_LAYERS = [4, 4, 3, 2, 2] # add 4 layer to generate P2_mask, 2 layer to generate P3_mask
ENLAEGE_RF_LIST = ["P3", "P4", "P5", "P6", "P7"]
SUPERVISED_MASK_LOSS_WEIGHT = 1.0
I also trained a model for 200000 steps but it did not improve the results. I would appreciate it if you could go through the changes and give any suggestions on how to solve the issues. Thank you.
@yangxue0827 Can you please reopen this issue, so its easier to find for other people who might be able to contribute as well. Thanks.
may be you can try yangxue0827/RotationDetection,where contains many methods
Thanks, yeah I'll check that out as well.
Hi,
Is it possible to convert this model with a MobileNetV2 backbone to a TFlite model with the converters provided by TensorFlow. If yes and if anybody has tested it, how is the performance?