I am trying to convert a ViT B-16 model from transformers. First, I am serializing it as a SavedModel resource:
from transformers import ViTFeatureExtractor, TFViTForImageClassification
import tensorflow as tf
import transformers
import tempfile
import requests
import base64
import json
import os
LOCAL_MODEL_DIR = "vit"
def normalize_img(img, mean=feature_extractor.image_mean, std=feature_extractor.image_std):
# Scale to the value range of [0, 1] first and then normalize.
img = img / 255
mean = tf.constant(mean)
std = tf.constant(std)
return (img - mean) / std
def preprocess(string_input):
decoded = tf.io.decode_jpeg(string_input, channels=3)
resized = tf.image.resize(decoded, size=(SIZE, SIZE))
normalized = normalize_img(resized)
normalized = tf.transpose(normalized, (2, 0, 1)) # Since HF models are channel-first.
return normalized
@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
def preprocess_fn(string_input):
decoded_images = tf.map_fn(
preprocess, string_input, dtype=tf.float32, back_prop=False
)
return {CONCRETE_INPUT: decoded_images}
def model_exporter(model: tf.keras.Model):
m_call = tf.function(model.call).get_concrete_function(
tf.TensorSpec(
shape=[None, 3, SIZE, SIZE], dtype=tf.float32, name=CONCRETE_INPUT
)
)
@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
def serving_fn(string_input):
labels = tf.constant(
list(model.config.id2label.values()), dtype=tf.string
)
images = preprocess_fn(string_input)
predictions = m_call(**images)
indices = tf.argmax(predictions.logits, axis=1)
pred_source = tf.gather(params=labels, indices=indices)
probs = tf.nn.softmax(predictions.logits, axis=1)
pred_confidence = tf.reduce_max(probs, axis=1)
return {"label": pred_source, "confidence": pred_confidence}
return serving_fn
# the saved_model parameter is a flag to create a saved model version of the model
print("Loading model.")
model = TFViTForImageClassification.from_pretrained("google/vit-base-patch16-224")
print("Model loaded.")
# Determine model variables.
feature_extractor = ViTFeatureExtractor()
concerete_input = "pixel_values"
size = feature_extractor.size
input_shape = (SIZE, SIZE, 3)
print("Saving model.")
tf.saved_model.save(
model,
LOCAL_MODEL_DIR,
signatures={"serving_default": model_exporter(model)},
)
print("Model saved.")
Then conversion:
import glob
import tensorflow as tf
LOCAL_IMAGE_PATH = "imagenette-validation-samples"
ORIGINAL_MODEL_PATH = "vit"
TENSORRT_MODEL_DIR = "vit-tensorrt"
def convert_to_string(image_path):
with open(image_path, "rb") as f:
image_string = f.read()
return image_string
def calibration_input_fn(image_bytes):
def fn():
for img_bytes in image_bytes:
yield tf.convert_to_tensor(img_bytes)
return fn
all_image_paths = glob.glob(f"{LOCAL_IMAGE_PATH}/*.png")
print(f"Total images found: {len(all_image_paths)}.")
all_images_bytes = [[convert_to_string(image_path)] for image_path in all_image_paths]
print(f"Length of the totyal image image bytes: {len(all_images_bytes)}.")
params = tf.experimental.tensorrt.ConversionParams(
precision_mode="FP16",
max_workspace_size_bytes=2 << 32, # 8,589,934,592 bytes
maximum_cached_engines=100,
minimum_segment_size=3,
allow_build_at_runtime=True,
)
converter = tf.experimental.tensorrt.Converter(
input_saved_model_dir=ORIGINAL_MODEL_PATH, conversion_params=params
)
converter.convert()
converter.build(input_fn=calibration_input_fn(all_images_bytes))
converter.save(TENSORRT_MODEL_DIR)
To get the imagenette-validation-samples directory, run the following from the container:
$ wget https://github.com/sayakpaul/deploy-hf-tf-vision-models/releases/download/3.0/imagenette-validation-samples.tar.gz
$ tar xf imagenette-validation-samples.tar.gz
When running conversion, I am getting:
Traceback (most recent call last):
File "convert_to_tensor.py", line 41, in <module>
converter.build(input_fn=calibration_input_fn(all_images_bytes))
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/compiler/tensorrt/trt_convert.py", line 1447, in build
func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py", line 1602, in __call__
return self._call_impl(args, kwargs)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/wrap_function.py", line 243, in _call_impl
return super(WrappedFunction, self)._call_impl(
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py", line 1620, in _call_impl
return self._call_with_flat_signature(args, kwargs, cancellation_manager)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py", line 1669, in _call_with_flat_signature
return self._call_flat(args, self.captured_inputs, cancellation_manager)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py", line 1860, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/function.py", line 497, in call
outputs = execute.execute(
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py", line 54, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
Detected at node 'StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor' defined at (most recent call last):
File "convert_to_tensor.py", line 40, in <module>
converter.convert()
Node: 'StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor'
Detected at node 'StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor' defined at (most recent call last):
File "convert_to_tensor.py", line 40, in <module>
converter.convert()
Node: 'StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor'
2 root error(s) found.
(0) INVALID_ARGUMENT: Tensor must be at least a vector, but saw shape: []
[[{{node StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor}}]]
[[StatefulPartitionedCall/GatherV2/_426]]
(1) INVALID_ARGUMENT: Tensor must be at least a vector, but saw shape: []
[[{{node StatefulPartitionedCall/PartitionedCall/map/TensorArrayUnstack/TensorListFromTensor}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_pruned_43026]
Without converter.build() the conversion succeeds but the latency is higher.
Notes
I made the model accept compressed image string to reduce request payload sizes.
System information
NVIDIA
I am using an NGC container to perform my stuff. Here's how I am running the Docker image:
After this, I get terminal access to the container.
TensorFlow build details within the container
Issue
I am trying to convert a ViT B-16 model from
transformers
. First, I am serializing it as aSavedModel
resource:Then conversion:
To get the
imagenette-validation-samples
directory, run the following from the container:When running conversion, I am getting:
Without
converter.build()
the conversion succeeds but the latency is higher.Notes
I made the model accept compressed image string to reduce request payload sizes.
What am I missing out on?