Closed gokulh12 closed 1 year ago
Hi @gokulh12,
In order to expedite the trouble-shooting process, please provide a code snippet to reproduce the issue reported here. Thanks!
Hi @laxmareddyp Here is the complete code:
#mounting drive
from google.colab import drive
drive.mount('/content/gdrive')
cd /content/gdrive/MyDrive/content
import os
import pathlib
# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
while "models" in pathlib.Path.cwd().parts:
os.chdir('..')
elif not pathlib.Path('models').exists():
!git clone --depth 1 https://github.com/tensorflow/models
# Install the Object Detection API
%%bash
cd /content/gdrive/MyDrive/content/models/research
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .
import matplotlib
import matplotlib.pyplot as plt
import os
import random
import io
import imageio
import glob
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Javascript
from IPython.display import Image as IPyImage
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import colab_utils
from object_detection.builders import model_builder
%matplotlib inline
#run model builder test
!python /content/gdrive/MyDrive/content/models/research/object_detection/builders/model_builder_tf2_test.py
#downloading tfrrecords
%cd /content/gdrive/MyDrive/content
!curl -L "https://app.roboflow.com/ds/UDGPGXHzX8?key=qCqU0Cwfzl" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
##change chosen model to deploy different models available in the TF2 object detection zoo
MODELS_CONFIG = {
'efficientdet-d0': {
'model_name': 'efficientdet_d0_coco17_tpu-32',
'base_pipeline_file': 'ssd_efficientdet_d0_512x512_coco17_tpu-8.config',
'pretrained_checkpoint': 'efficientdet_d0_coco17_tpu-32.tar.gz',
'batch_size': 16
},
'efficientdet-d1': {
'model_name': 'efficientdet_d1_coco17_tpu-32',
'base_pipeline_file': 'ssd_efficientdet_d1_640x640_coco17_tpu-8.config',
'pretrained_checkpoint': 'efficientdet_d1_coco17_tpu-32.tar.gz',
'batch_size': 16
},
'efficientdet-d2': {
'model_name': 'efficientdet_d2_coco17_tpu-32',
'base_pipeline_file': 'ssd_efficientdet_d2_768x768_coco17_tpu-8.config',
'pretrained_checkpoint': 'efficientdet_d2_coco17_tpu-32.tar.gz',
'batch_size': 16
},
'efficientdet-d3': {
'model_name': 'efficientdet_d3_coco17_tpu-32',
'base_pipeline_file': 'ssd_efficientdet_d3_896x896_coco17_tpu-32.config',
'pretrained_checkpoint': 'efficientdet_d3_coco17_tpu-32.tar.gz',
'batch_size': 16
}
}
chosen_model = 'efficientdet-d0'
num_steps = 10000 #The more steps, the longer the training. Increase if your loss function is still decreasing and validation metrics are increasing.
num_eval_steps = 500 #Perform evaluation after so many steps
model_name = MODELS_CONFIG[chosen_model]['model_name']
pretrained_checkpoint = MODELS_CONFIG[chosen_model]['pretrained_checkpoint']
base_pipeline_file = MODELS_CONFIG[chosen_model]['base_pipeline_file']
batch_size = MODELS_CONFIG[chosen_model]['batch_size'] #if you can fit a large batch in memory, it may speed up your training
#download pretrained weights
%mkdir /content/gdrive/MyDrive/content/models/research/deploy/
%cd /content/gdrive/MyDrive/content/models/research/deploy/
import tarfile
download_tar = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/' + pretrained_checkpoint
!wget {download_tar}
tar = tarfile.open(pretrained_checkpoint)
tar.extractall()
tar.close()
#download base training configuration file
%cd /content/gdrive/MyDrive/content/models/research/deploy
download_config = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/' + base_pipeline_file
!wget {download_config}
pipeline_fname = '/content/gdrive/MyDrive/content/models/research/deploy/' + base_pipeline_file
fine_tune_checkpoint = '/content/gdrive/MyDrive/content/models/research/deploy/' + model_name + '/checkpoint/ckpt-0'
#calculate number of classes
def get_num_classes(pbtxt_fname):
from object_detection.utils import label_map_util
label_map = label_map_util.load_labelmap('/content/gdrive/MyDrive/content/test/teams_label_map.pbtxt')
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes=90, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
return len(category_index.keys())
num_classes = get_num_classes('/content/gdrive/MyDrive/content/test/teams_label_map.pbtxt')
#read the config file and write file directories
import re
%cd /content/gdrive/MyDrive/content/models/research/deploy
print('writing custom configuration file')
with open(pipeline_fname) as f:
s = f.read()
with open('pipeline_file.config', 'w') as f:
# fine_tune_checkpoint
s = re.sub('fine_tune_checkpoint: ".*?"',
'fine_tune_checkpoint: "{}"'.format(fine_tune_checkpoint), s)
# tfrecord files train and test.
s = re.sub(
'(input_path: ".*?)(PATH_TO_BE_CONFIGURED/train)(.*?")', 'input_path: "{}"'.format('/content/gdrive/MyDrive/content/train/teams.tfrecord'), s)
s = re.sub(
'(input_path: ".*?)(PATH_TO_BE_CONFIGURED/val)(.*?")', 'input_path: "{}"'.format('/content/gdrive/MyDrive/content/test/teams.tfrecord'), s)
# label_map_path
s = re.sub(
'label_map_path: ".*?"', 'label_map_path: "{}"'.format('/content/gdrive/MyDrive/content/train/teams_label_map.pbtxt'), s)
# Set training batch_size.
s = re.sub('batch_size: [0-9]+',
'batch_size: {}'.format(batch_size), s)
# Set training steps, num_steps
s = re.sub('num_steps: [0-9]+',
'num_steps: {}'.format(num_steps), s)
# Set number of classes num_classes.
s = re.sub('num_classes: [0-9]+',
'num_classes: {}'.format(num_classes), s)
#fine-tune checkpoint type
s = re.sub(
'fine_tune_checkpoint_type: "classification"', 'fine_tune_checkpoint_type: "{}"'.format('detection'), s)
f.write(s)
%cat /content/gdrive/MyDrive/content/models/research/deploy/pipeline_file.config
pipeline_file = '/content/gdrive/MyDrive/content/models/research/deploy/pipeline_file.config'
model_dir = '/content/gdrive/MyDrive/content/training/'
#training the model
!python /content/gdrive/MyDrive/content/models/research/object_detection/model_main_tf2.py \
--pipeline_config_path={pipeline_file} \
--model_dir={model_dir} \
--alsologtostderr \
--num_train_steps={num_steps} \
--sample_1_of_n_eval_examples=1 \
--num_eval_steps={num_eval_steps}
#model evaluation
!python /content/gdrive/MyDrive/content/models/research/object_detection/model_main_tf2.py \
--pipeline_config_path={pipeline_file} \
--model_dir={model_dir} \
--checkpoint_dir={model_dir}
Hi
While downloading the model checkpoint , the config file may have mistakes and i have changed the pipeline.config
file
model {
ssd {
num_classes: 5
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 256
max_dimension: 256
pad_to_max_dimension: true
}
}
feature_extractor {
type: "ssd_efficientnet-b0_bifpn_keras"
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.9999998989515007e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.029999999329447746
}
}
activation: SWISH
batch_norm {
decay: 0.9900000095367432
scale: true
epsilon: 0.0010000000474974513
}
force_use_bias: true
}
bifpn {
min_level: 3
max_level: 7
num_iterations: 3
num_filters: 64
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 1.0
x_scale: 1.0
height_scale: 1.0
width_scale: 1.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.9999998989515007e-05
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.009999999776482582
}
}
activation: SWISH
batch_norm {
decay: 0.9900000095367432
scale: true
epsilon: 0.0010000000474974513
}
force_use_bias: true
}
depth: 64
num_layers_before_predictor: 3
kernel_size: 3
class_prediction_bias_init: -4.599999904632568
use_depthwise: true
}
}
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
scales_per_octave: 3
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 9.99999993922529e-09
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid_focal {
gamma: 1.5
alpha: 0.25
}
}
classification_weight: 1.0
localization_weight: 1.0
}
encode_background_as_zeros: true
normalize_loc_loss_by_codesize: true
inplace_batchnorm_update: true
freeze_batchnorm: false
add_background_class: false
}
}
train_config {
batch_size: 16
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_scale_crop_and_pad_to_square {
output_size: 128
scale_min: 0.10000000149011612
scale_max: 2.0
}
}
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
cosine_decay_learning_rate {
learning_rate_base: 0.07999999821186066
total_steps: 500
warmup_learning_rate: 0.0010000000474974513
warmup_steps: 100
}
}
momentum_optimizer_value: 0.8999999761581421
}
use_moving_average: false
}
fine_tune_checkpoint: "/content/efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
num_steps: 500
startup_delay_steps: 0.0
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_type: "detection"
use_bfloat16: true
fine_tune_checkpoint_version: V2
}
train_input_reader {
label_map_path: "/content/roboflow/train/teams_label_map.pbtxt"
tf_record_input_reader {
input_path: "/content/roboflow/train/teams.tfrecord"
}
}
eval_config {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
batch_size: 1;
}
eval_input_reader {
label_map_path: "/content/roboflow/train/teams_label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "/content/roboflow/test/teams.tfrecord"
}
}
Also find the gist using the above configuration ,I hope this will help you to resolve the issue.
Thanks.
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you.
Closing as stale. Please reopen if you'd like to work on this further.
Still, there is an issue with this model_main_tf2.py approach for evaluation
Hi, I am facing an error while trying to perform model evaluation after training. TypeError: 'NoneType' object is not iterable
I am using the below code to get the model evaluation:
These are the paths:
Log while running the evaluation code:
This issue happens only when I am trying to evaluate the model and get the performance metrics. There are no issue when I use the trained to model actually identify the objects using test images. In fact I get the final output with bounding boxes and acceptable performance.
Here is my config file: