Closed attafosu closed 1 year ago
The retinanet downloader provided by MLCommons was broken until https://github.com/mlcommons/inference/pull/1277#issuecomment-1304910370, so there's no automated script.
You can instead follow the instructions here to download the model and dataset:
For the model, NVIDIA used the PyTorch model and converted it to ONNX using this script provided by MLCommons, but slightly modified:
To support dynamic batch sizes and fix some issues with PyTorch -> ONNX caused by having data preprocessing in the forward() method, we applied the following git patch to the MLCommons training repo before running the script:
diff --git a/single_stage_detector/ssd/model/retinanet.py b/single_stage_detector/ssd/model/retinanet.py
index 2f10d96..cdba3be 100644
--- a/single_stage_detector/ssd/model/retinanet.py
+++ b/single_stage_detector/ssd/model/retinanet.py
@@ -12,6 +12,7 @@ from model.transform import GeneralizedRCNNTransform
from model.backbone_utils import resnet_fpn_backbone, _validate_trainable_layers
from model.feature_pyramid_network import LastLevelP6P7
from model.focal_loss import sigmoid_focal_loss
+from model.image_list import ImageList
from model.boxes import box_iou, clip_boxes_to_image, batched_nms
from model.utils import Matcher, overwrite_eps, BoxCoder
@@ -510,7 +511,13 @@ class RetinaNet(nn.Module):
original_image_sizes.append((val[0], val[1]))
# transform the input
- images, targets = self.transform(images, targets)
+ # images, targets = self.transform(images, targets)
+ _image_sizes = [img.shape[-2:] for img in images]
+ for _size in _image_sizes:
+ assert len(_size) == 2 and _size[0] == 800 and _size[1] == 800
+ # print(type(images))
+ # images = ImageList(torch.stack(images), _image_sizes)
+ images = ImageList(images, _image_sizes)
# Check for degenerate boxes
# TODO: Move this to a function
@@ -539,7 +546,11 @@ class RetinaNet(nn.Module):
# compute the retinanet heads outputs using the features
head_outputs = self.head(features)
+ for k, v in head_outputs.items():
+ print(f"{k}: {v.size()}")
+ return head_outputs
+ """
# create the set of anchors
anchors = self.anchor_generator(images, features)
@@ -576,6 +587,7 @@ class RetinaNet(nn.Module):
self._has_warned = True
return losses, detections
return self.eager_outputs(losses, detections)
+ """
model_urls = {
diff --git a/single_stage_detector/scripts/pth_to_onnx.py b/single_stage_detector/ssd/pth_to_onnx.py
similarity index 65%
rename from single_stage_detector/scripts/pth_to_onnx.py
rename to single_stage_detector/ssd/pth_to_onnx.py
index 78945aa..93679cd 100755
--- a/single_stage_detector/scripts/pth_to_onnx.py
+++ b/single_stage_detector/ssd/pth_to_onnx.py
@@ -8,7 +8,7 @@ from torch.autograd import Variable
from model.retinanet import retinanet_from_backbone
-def parse_args(add_help=True):
+def parse_args(add_help=True, custom_argv=None):
parser = argparse.ArgumentParser(description='Convert PyTorch detection file to onnx format', add_help=add_help)
parser.add_argument('--input', required=True, help='input pth file')
@@ -30,11 +30,15 @@ def parse_args(add_help=True):
help="Model data layout")
parser.add_argument('--device', default='cuda', help='device')
- args = parser.parse_args()
+ if custom_argv is None:
+ args = parser.parse_args()
+ else:
+ args = parser.parse_args(args=custom_argv)
args.output = args.output or ('retinanet_'+args.backbone+'.onnx')
return args
+
def main(args):
batch_size = args.batch_size or 1
image_size = args.image_size or [800, 800]
@@ -51,6 +55,25 @@ def main(args):
print("Loading model")
checkpoint = torch.load(args.input)
+
+ # For some reason the batchnorms in the checkpoint do not have the same sizes as the module object. The checkpoint
+ # batchnorms have a size of [1, N, 1, 1], while the model batchnorms just have a size of [N].
+ # However, this is fine, since (assuming the README is correct), the batchnorms were frozen and were not modified
+ # during training.
+ target_state_dict = model.state_dict()
+ for k, v in target_state_dict.items():
+ ckpt_val = checkpoint["model"][k]
+ if v.size() == ckpt_val.size():
+ continue
+ target_size = torch.tensor(v.size())
+ actual_size = torch.tensor(ckpt_val.size())
+ flattened = torch.flatten(actual_size)
+ if all(target_size != flattened):
+ raise ValueError(f"Real size mismatch for {k}: {target_size} vs {actual_size}")
+ checkpoint["model"][k] = checkpoint["model"][k].view(target_size)
+ # Remove unexpected keys
+ for k in [k for k in checkpoint["model"] if k not in target_state_dict]:
+ del checkpoint["model"][k]
model.load_state_dict(checkpoint['model'])
print("Creating input tensor")
@@ -60,20 +83,31 @@ def main(args):
dtype=torch.float)
inputs = torch.autograd.Variable(rand)
# Output dynamic axes
+ """
dynamic_axes = {
'boxes': {0 : 'num_detections'},
'scores': {0 : 'num_detections'},
'labels': {0 : 'num_detections'},
}
+ """
+
# Input dynamic axes
+ """
if (args.batch_size is None) or (args.image_size is None):
dynamic_axes['images'] = {}
if args.batch_size is None:
- dynamic_axes['images'][0]: 'batch_size'
+ dynamic_axes['images'][0] = 'batch_size'
if args.image_size is None:
dynamic_axes['images'][2] = 'width'
dynamic_axes['images'][3] = 'height'
-
+ """
+ # Force dynamic batch_size
+ dynamic_axes = {
+ "images": {0: "batch_size"},
+ "cls_logits": {0: "batch_size", 1: "num_regions", 2: "num_classes"},
+ "bbox_regression": {0: "batch_size", 1: "num_regions", 2: "bbox_coord_dim"},
+ }
+ print(dynamic_axes)
print("Exporting the model")
model.eval()
@@ -81,10 +115,11 @@ def main(args):
inputs,
args.output,
export_params=True,
- opset_version=13,
- do_constant_folding=False,
+ opset_version=11,
+ do_constant_folding=True,
input_names=['images'],
- output_names=['boxes', 'scores', 'labels'],
+ # output_names=['boxes', 'scores', 'labels'],
+ output_names=['cls_logits', 'bbox_regression'],
dynamic_axes=dynamic_axes)
diff --git a/single_stage_detector/ssd/run_pth_to_onnx.sh b/single_stage_detector/ssd/run_pth_to_onnx.sh
new file mode 100644
index 0000000..e244aed
--- /dev/null
+++ b/single_stage_detector/ssd/run_pth_to_onnx.sh
@@ -0,0 +1,9 @@
+docker build -t mlperf/single_stage_detector .
+docker run -v /home/mlperf_inference_data:/home/mlperf_inference_data \
+ -v /home/scratch.etcheng_sw/mlperf-training:/mnt/training \
+ --gpus=0 -e NVIDIA_VISIBLE_DEVICES=0 mlperf/single_stage_detector:latest \
+ python pth_to_onnx.py \
+ --num-classes 264 \
+ --image-size 800 800 \
+ --input /home/mlperf_inference_data/models/retinanet-resnext50-32x4d/new/retinanet_model_10.pth \
+ --output /mnt/training/resnext-retinanet-ckpts/onnx/retinanet_resnext50_32x4d_fpn.opset11.dyn_bs.800x800.onnx
We then used Polygraphy to optimize the graph with the following script:
# Set these parameters
RAW_ONNX_PATH=$1
FOLDED_ONNX_PATH=$2
BACKEND_ONNX_PATH=$3
NMS_ONNX_PATH=$4
bbox_concat_node="1775"
classification_concat_node="1599"
# Run once to install the dependencies. For some reason, this messes up Polygraphy's auto-fold loop, so we need to run a
# second time.
POLYGRAPHY_AUTOINSTALL_DEPS=1 polygraphy surgeon sanitize --fold-constants $RAW_ONNX_PATH -o $FOLDED_ONNX_PATH
polygraphy surgeon sanitize --fold-constants $RAW_ONNX_PATH -o $FOLDED_ONNX_PATH
# Extract backend
polygraphy surgeon extract $FOLDED_ONNX_PATH \
--outputs ${bbox_concat_node}:auto ${classification_concat_node}:auto \
-o $BACKEND_ONNX_PATH
# Extract NMS head
polygraphy surgeon extract $FOLDED_ONNX_PATH \
--inputs ${classification_concat_node}:[batch_size,120087,264]:auto ${bbox_concat_node}:[batch_size,120087,4]:auto \
-o $NMS_ONNX_PATH
We split the graph into 2 ONNX files, and instead attach a custom ONNX op with the TensorRT EfficientNMS plugin using: https://github.com/mlcommons/inference_results_v2.1/blob/master/closed/NVIDIA/code/retinanet/tensorrt/onnx_generator/patch_retinanet_efficientnms.py
Thank you @nv-etcheng for the detailed steps and we have added them in our CM scripts so that it becomes easier for others to reuse across different platforms. But the given instructions are not supported on macOS right as CUDA is no longer supported there?
Hi @nv-etcheng , after getting BACKEND_ONNX and NMS_ONNX, what should I do to run mlperf retinanet? It's a bit of confusing dealing with 2 onnx files
Hi @nv-etcheng , while trying these changes I'm able to get the onnx model with support for dynamic batch size and also the folded one. But then I'm getting the below error. Do we need to inspect the model and update the node values?
[I] PASSED | Runtime: 3.186s | Command: /opt/conda/bin/polygraphy surgeon sanitize --fold-constants --no-onnxruntime-shape-inference retinanet_resnext50_32x4d_fpn.opset11.dyn_bs.800x800.onnx -o folded.onnx
[I] RUNNING | Command: /opt/conda/bin/polygraphy surgeon extract folded.onnx --outputs 1775:auto 1599:auto -o backend.onnx
[I] Loading model: /home/arjun/CM/repos/local/cache/ae31acc449d14691/folded.onnx
[I] Original Model:
Name: torch_jit | ONNX Opset: 11
---- 1 Graph Input(s) ----
{images [dtype=float32, shape=('batch_size', 3, 800, 800)]}
---- 2 Graph Output(s) ----
{cls_logits [dtype=float32, shape=('batch_size', 'num_regions', 'num_classes')],
bbox_regression [dtype=float32, shape=('batch_size', 'num_regions', 'bbox_coord_dim')]}
---- 277 Initializer(s) ----
---- 426 Node(s) ----
[!] Tensor: 1775 does not exist in the model.
[E] FAILED | Runtime: 0.361s | Command: /opt/conda/bin/polygraphy surgeon extract folded.onnx --outputs 1775:auto 1599:auto -o backend.onnx
[I] RUNNING | Command: /opt/conda/bin/polygraphy surgeon extract folded.onnx --inputs 1599:[batch_size,120087,264]:auto 1775:[batch_size,120087,4]:auto -o nms.onnx
[
@arjunsuresh Regarding lack of CUDA support on Mac OS, yes, you wouldn't be running the NVIDIA submission on a Mac (at least a modern one) anyway. However, I'm not sure how that matters - the process to generate the ONNX is platform-agnostic up until needing to add the TRT Plugin Node (But you wouldn't be doing this on Mac OS anyway).
PyTorch CPU installation works fine to convert to ONNX, and I don't believe Polygraphy has any GPU dependencies.
Regarding the Polygraphy error, my suspicion is that the names of the nodes are different for you - You can figure out the names by just viewing the model in netron and looking at where the retinanet backend ends and the 10 convolutional heads begin.
@arjunsuresh The error could be due to the randomized node numbering with torch2onnx. The easiest way is to inspect the onnx graph, and change the node name in the script to the correct concat node manually. Ethan is taking a look at that and see if there's a better way. @HikariTJU Once you get the backend onnx, you need to:
python3 -m code.retinanet.tensorrt.preproccess_data
for both validation and calibration dataset (refer to https://github.com/mlcommons/inference_results_v2.1/blob/master/closed/NVIDIA/code/retinanet/tensorrt/preprocess_data.py for more details)make calibrate RUN_ARGS="--benchmarks=retinanet"
. Note you need to have the datasets (both validation and calibration) and put them to the right folder (please refer to https://github.com/mlcommons/inference_results_v2.1/blob/master/closed/NVIDIA/code/retinanet/tensorrt/calibrator.py for more details) make run RUN_ARGS="--benchmarks=retinanet"
if the calibration cache and datasets are in place. Hi @nvzhihanj
At step 4 running make run ...
, I got an error saying that plugin creation failed, any idea how to solve this?
Or maybe is it possible to provide a processed onnx file?
[12/01/2022-01:37:12] [TRT] [I] Searching for plugin: EfficientNMS_TRT, plugin_version: 1, plugin_namespace:
[12/01/2022-01:37:12] [TRT] [W] builtin_op_importers.cpp:5227: Attribute background_class not found in plugin node! Ensure that the plugin creator has a default value defined or the engine may fail to build.
[12/01/2022-01:37:12] [TRT] [F] Validation failed: background_class}tion missing required fields: {
plugin/common/plugin.cpp:41
[12/01/2022-01:37:12] [TRT] [E] std::exception
[12/01/2022-01:37:12] [TRT] [E] ModelImporter.cpp:726: While parsing node number 426 [EfficientNMS_TRT -> "num_detections"]:
[12/01/2022-01:37:12] [TRT] [E] ModelImporter.cpp:727: --- Begin node ---
[12/01/2022-01:37:12] [TRT] [E] ModelImporter.cpp:728: input: "bbox_regression"
input: "cls_logits"
input: "anchor"
output: "num_detections"
output: "detection_boxes"
output: "detection_scores"
output: "detection_classes"
name: "EfficientNMS"
op_type: "EfficientNMS_TRT"
attribute {
name: "plugin_version"
s: "1"
type: STRING
}
attribute {
name: "plugin_namespace"
s: ""
type: STRING
}
attribute {
name: "score_threshold"
f: 0.05
type: FLOAT
}
attribute {
name: "iou_threshold"
f: 0.5
type: FLOAT
}
attribute {
name: "max_output_boxes"
i: 1000
type: INT
}
attribute {
name: "score_activation"
i: 1
type: INT
}
attribute {
name: "box_coding"
i: 1
type: INT
}
[12/01/2022-01:37:12] [TRT] [E] ModelImporter.cpp:729: --- End node ---
[12/01/2022-01:37:12] [TRT] [E] ModelImporter.cpp:732: ERROR: builtin_op_importers.cpp:5418 In function importFallbackPluginImporter:
[8] Assertion failed: plugin && "Could not create plugin"
[2022-12-01 01:37:12,152 builder.py:177 INFO] Building ./build/engines/A100-PCIe-80GBx1/retinanet/Offline/retinanet-Offline-gpu-b16-int8.lwis_k_99_MaxP.plan
[12/01/2022-01:37:12] [TRT] [E] 4: [network.cpp::validate::2746] Error Code 4: Internal Error (Network must have at least one output)
Process Process-1:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/work/code/actionhandler/base.py", line 185, in subprocess_target
return self.action_handler.handle()
File "/work/code/actionhandler/generate_engines.py", line 175, in handle
total_engine_build_time += self.build_engine(job)
File "/work/code/actionhandler/generate_engines.py", line 166, in build_engine
builder.build_engines()
File "/work/code/common/builder.py", line 208, in build_engines
engine_inspector = engine.create_engine_inspector()
AttributeError: 'NoneType' object has no attribute 'create_engine_inspector'
[2022-12-01 01:37:15,178 generate_engines.py:172 INFO] Building engines for retinanet benchmark in Offline scenario...```
@HikariTJU Can you add the following to https://github.com/mlcommons/inference_results_v2.1/blob/master/closed/NVIDIA/code/retinanet/tensorrt/onnx_generator/patch_retinanet_efficientnms.py#L44?
"background_class": -1,
That should fix the error
Thank you @nv-etcheng, @nvzhihanj Actually the given instructions are not working on macos as cuda is needed (I can report the exact error when I try on mac next time). But this is not very important.
I tried to inspect using netron and identifying the node numbers - but nothing I tried worked. I'm sharing the netron image for the part around where the backend is ending. Can you please see if something is wrong here? link
@arjunsuresh backend ends after the FPN, before the NMS, which is the blue circle in the image. If you preferred to skip the shuffle+concat (and do them elsewhere), you can directly use the 10 conv head outputs marked in red circle.
Thank you @nvzhihanj for your reply. I updated the node names as follows:
bbox_concat_node="bbox_regression"
classification_concat_node="cls_logits"
and it seems to have worked well. Thanks again for your help.
"new_env": {
"CM_ML_MODEL_ACCURACY": "0.3755",
"CM_ML_MODEL_ANCHOR_PATH": "/home/arjun/CM/repos/local/cache/a01f219b640b4900/inference_results_v2.1/closed/NVIDIA/code/retinanet/tensorrt/onnx_generator/retinanet_anchor_xywh_1x1.npy",
"CM_ML_MODEL_DATASET": "open-images",
"CM_ML_MODEL_DYN_BATCHSIZE_PATH": "/home/arjun/CM/repos/local/cache/7b121afe57014eb1/retinanet_resnext50_32x4d_fpn.opset11.dyn_bs.800x800.onnx",
"CM_ML_MODEL_FILE": "retinanet_model_10.pth",
"CM_ML_MODEL_FILE_WITH_PATH": "/home/arjun/CM/repos/local/cache/d6661d4837494d55/retinanet_model_10.pth",
"CM_ML_MODEL_IMAGE_HEIGHT": "800",
"CM_ML_MODEL_IMAGE_WIDTH": "800",
"CM_ML_MODEL_NORMALIZE_DATA": "yes",
"CM_ML_MODEL_PATH": "/home/arjun/CM/repos/local/cache/d6661d4837494d55",
"CM_ML_MODEL_RETRAINING": "no",
"CM_ML_MODEL_SUBTRACT_MEAN": "yes",
"CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no",
"CM_NVIDIA_RETINANET_EFFICIENT_NMS_CONCAT_MODEL_WITH_PATH": "/home/arjun/CM/repos/local/cache/7b121afe57014eb1/fpn_efficientnms_concatall.onnx"
},
@nv-etcheng The shared codes do in fact run on macOS just that we need to specify --device=cpu
and a small patch to the training repository code. I have now wrapped all these instructions in CM. The below 3 commands should generate the final model on linux/macOS.
python3 -m pip install cmind
export PATH=$HOME/.local/bin
cm pull repo octoml@ck
cm run script --tags=get,nvidia-retinanet,_efficient-nms --out=json
should produce
"new_env": {
"CM_ML_MODEL_ACCURACY": "0.3755",
"CM_ML_MODEL_ANCHOR_PATH": "/Users/arjun/CM/repos/local/cache/3752ee9da1d54e28/inference_results_v2.1/closed/NVIDIA/code/retinanet/tensorrt/onnx_generator/retinanet_anchor_xywh_1x1.npy",
"CM_ML_MODEL_DATASET": "open-images",
"CM_ML_MODEL_DATA_LAYOUT": "NCHW",
"CM_ML_MODEL_DYN_BATCHSIZE_PATH": "/Users/arjun/CM/repos/local/cache/7c69802470634710/retinanet_resnext50_32x4d_fpn.opset11.dyn_bs.800x800.onnx",
"CM_ML_MODEL_FILE": "retinanet_model_10.pth",
"CM_ML_MODEL_FILE_WITH_PATH": "/Users/arjun/CM/repos/local/cache/6910eff269524017/retinanet_model_10.pth",
"CM_ML_MODEL_FRAMEWORK": "pytorch",
"CM_ML_MODEL_IMAGE_HEIGHT": "800",
"CM_ML_MODEL_IMAGE_WIDTH": "800",
"CM_ML_MODEL_INPUT_DATA_TYPES": "fp32",
"CM_ML_MODEL_NORMALIZE_DATA": "yes",
"CM_ML_MODEL_PATH": "/Users/arjun/CM/repos/local/cache/6910eff269524017",
"CM_ML_MODEL_PRECISION": "fp32",
"CM_ML_MODEL_RETRAINING": "no",
"CM_ML_MODEL_SUBTRACT_MEAN": "yes",
"CM_ML_MODEL_WEIGHT_DATA_TYPES": "fp32",
"CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no",
"CM_NVIDIA_RETINANET_EFFICIENT_NMS_CONCAT_MODEL_WITH_PATH": "/Users/arjun/CM/repos/local/cache/7c69802470634710/fpn_efficientnms_concatall.onnx"
},
Great! Can this issue be closed, or did I forget to address something?
After adding "background_class": -1,
to patch_retinanet_efficientnms.py
and regenerating the onnx model with efficienctnms node, I'm getting the below error:. This is the final onnx model I got. Can you please suggest what could be wrong here?
[01/20/2023-09:32:20] [TRT] [I] ----------------------------------------------------------------
[01/20/2023-09:32:20] [TRT] [I] Input filename: build/models/retinanet-resnext50-32x4d/submission/retinanet_resnext50_32x4d_efficientNMS.800x800.onnx
[01/20/2023-09:32:20] [TRT] [I] ONNX IR version: 0.0.8
[01/20/2023-09:32:20] [TRT] [I] Opset version: 11
[01/20/2023-09:32:20] [TRT] [I] Producer name: pytorch
[01/20/2023-09:32:20] [TRT] [I] Producer version: 1.13.1
[01/20/2023-09:32:20] [TRT] [I] Domain:
[01/20/2023-09:32:20] [TRT] [I] Model version: 0
[01/20/2023-09:32:20] [TRT] [I] Doc string:
[01/20/2023-09:32:20] [TRT] [I] ----------------------------------------------------------------
[01/20/2023-09:32:20] [TRT] [W] onnx2trt_utils.cpp:377: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[01/20/2023-09:32:20] [TRT] [I] No importer registered for op: EfficientNMS_TRT. Attempting to import as plugin.
[01/20/2023-09:32:20] [TRT] [I] Searching for plugin: EfficientNMS_TRT, plugin_version: 1, plugin_namespace:
[01/20/2023-09:32:20] [TRT] [W] builtin_op_importers.cpp:5245: Attribute background_class not found in plugin node! Ensure that the plugin creator has a default value defined or the engine may fail to build.
[01/20/2023-09:32:20] [TRT] [F] Validation failed: background_class}tion missing required fields: {
plugin/common/plugin.cpp:41
[01/20/2023-09:32:20] [TRT] [E] std::exception
[01/20/2023-09:32:20] [TRT] [E] ModelImporter.cpp:726: While parsing node number 586 [EfficientNMS_TRT -> "num_detections"]:
[01/20/2023-09:32:20] [TRT] [E] ModelImporter.cpp:727: --- Begin node ---
[01/20/2023-09:32:20] [TRT] [E] ModelImporter.cpp:728: input: "bbox_regression"
input: "cls_logits"
input: "anchor"
output: "num_detections"
output: "detection_boxes"
output: "detection_scores"
output: "detection_classes"
name: "EfficientNMS"
op_type: "EfficientNMS_TRT"
attribute {
name: "plugin_version"
s: "1"
type: STRING
}
attribute {
name: "plugin_namespace"
s: ""
type: STRING
}
attribute {
name: "score_threshold"
f: 0.05
type: FLOAT
}
attribute {
name: "iou_threshold"
f: 0.5
type: FLOAT
}
attribute {
name: "max_output_boxes"
i: 1000
type: INT
}
attribute {
name: "score_activation"
i: 1
type: INT
}
attribute {
name: "box_coding"
i: 1
type: INT
}
[01/20/2023-09:32:20] [TRT] [E] ModelImporter.cpp:729: --- End node ---
[01/20/2023-09:32:20] [TRT] [E] ModelImporter.cpp:732: ERROR: builtin_op_importers.cpp:5436 In function importFallbackPluginImporter:
[8] Assertion failed: plugin && "Could not create plugin"
@arjunsuresh I couldn't access the onnx graph, but seems like it's still missing the background_class field. The efficientNMS plugin looks something like this:
Thank you @nvzhihanj for your quick response. I was modifying the wrong copy and after doing it on the right code I'm able to get the background_class error disappear. But the accuracy is bad when tested with the Nvidia implementation.
RuntimeError: Accuracy = 0.006, Threshold = 37.174. Accuracy test FAILED.!
I also see a lot of warning like below - are they expected?
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/classification_head/Transpose_1_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/regression_head/Transpose_1_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/classification_head/Reshape_3_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/regression_head/Reshape_3_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/classification_head/Reshape_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[01/20/2023-11:16:40] [TRT] [W] Missing scale and zero-point for tensor /head/regression_head/Reshape_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
@arjunsuresh We also found the accuracy issue mentioned in https://github.com/mlcommons/inference/issues/1332 independently, and also independently root caused it to a difference in the annotations files. If you use the same annotations file as v2.1, the accuracy issue should be fixed. The warnings can be ignored.
If you recall, WG did decide to allow use of 2.1 annotations during our Feb 21 discussion on this topic.
Can we close this issue?
Thank you @nv-ananjappa for the pointer. I was not expecting the annotations file to cause a drop in accuracy to 0.006 but it indeed is the case. By using the r2.1 annotations file we are getting valid accuracy. It is fine to close this issue from our side.
DONE (t=28.31s).
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.375
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.524
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.405
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.025
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.125
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.414
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.419
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.596
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.626
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.081
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.336
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.676
mAP=37.477%
@rnaidu02 Could you close this issue? Also, what is Mitchelle's Github ID?
@nv-ananjappa I don't have privileges to close this issue. Will check with David T to follow up with Bruno/systems team to close this issue.
Setup instructions fails when downloading model for retinanet:
make download_model BENCHMARK="retinanet"
ERROR:
bash: code/retinanet/tensorrt/download_model.sh: No such file or directory
make: *** [Makefile:487: download_model] Error 127