Xilinx / Vitis-AI

Vitis AI is Xilinx’s development stack for AI inference on Xilinx hardware platforms, including both edge devices and Alveo cards.
https://www.xilinx.com/ai
Apache License 2.0
1.49k stars 630 forks source link

!vai_q_tensorflow quantize : ValueError: could not convert string to float: #976

Closed Afef00 closed 1 year ago

Afef00 commented 2 years ago

Hello, I was trying to retrain ResNet50 on Iris flower classification to deploy it on ZCU104 borad using PYNQ-DPU. I ran Vitis AI CPU docker container from host using ./docker_run.sh xilinx/vitis-ai-cpu:1.3.411. The notebook runs fine for the training using the following code

import tensorflow as tf
print("TensorFlow version:", tf.__version__)
import cv2, random
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from random import shuffle 
from IPython.display import SVG
import numpy as np 
import pandas as pd 
import shutil
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import Image, display
from sklearn.model_selection import train_test_split
import os
print(os.listdir("./iris recognition/flowers"))
path ='iris recognition/flowers'
for doc in os.listdir(path):
    l = os.listdir(os.path.join(path,doc)) 
    print(doc,':',len(l), ' images')
labels = os.listdir("./iris recognition/flowers")
num_classes = len(set(labels))
IMAGE_SIZE= 224

# Create model
model = tf.keras.Sequential()
model.add(tf.keras.applications.ResNet50(include_top=False, weights='imagenet'))
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
model.layers[0].trainable = False
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
train_folder = './iris recognition/flowers'

image_size = 224
data_generator =  tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
                                    horizontal_flip=True,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    validation_split=0.2)# set validation split

train_generator = data_generator.flow_from_directory(
    train_folder,
    target_size=(image_size, image_size),
    batch_size=32,
    class_mode='categorical',
    subset='training'
    )
validation_generator = data_generator.flow_from_directory(
    train_folder,
    target_size=(image_size, image_size),
    batch_size=12,
    class_mode='categorical',
    subset='validation'
    )
NUM_EPOCHS = 70
EARLY_STOP_PATIENCE = 5
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint

cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = './working/best.hdf5',
                                  monitor = 'val_loss',
                                  save_best_only = True,
                                  mode = 'auto')
import math

fit_history = model.fit_generator(
    train_generator,
    steps_per_epoch=10,
    validation_data=validation_generator,
#     validation_steps=10,
    epochs=NUM_EPOCHS,
    callbacks=[cb_checkpointer, cb_early_stopper])
model.load_weights("./working/best.hdf5")
model.save('Resnet_iris_tf1.h5')

After that I freeze the obtained model using this pyhton script:

import cv2
import numpy as np
import sys, os
import tensorflow as tf
from tensorflow import keras

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):

   """
   Freezes the state of a session into a pruned computation graph.
   Creates a new computation graph where variable nodes are replaced by
   constants taking their current value in the session. The new graph will be
   pruned so subgraphs that are not necessary to compute the requested
   outputs are removed.
   @param session The TensorFlow session to be frozen.
   @param keep_var_names A list of variable names that should not be frozen,
                         or None to freeze all the variables in the graph.
   @param output_names Names of the relevant graph outputs.
   @param clear_devices Remove the device directives from the graph for better portability.
   @return The frozen graph definition.
   """

   graph = session.graph

   with graph.as_default():
       freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))

       output_names = output_names or []
       output_names += [v.op.name for v in tf.global_variables()]

       input_graph_def = graph.as_graph_def()

       if clear_devices:
           for node in input_graph_def.node:
               node.device = ""

       frozen_graph = tf.graph_util.convert_variables_to_constants(
           session, input_graph_def, output_names, freeze_var_names)

   return frozen_graph

if (len(sys.argv)) < 2:
       print("please specify the input model")
       exit(1)
model = sys.argv[1]   

keras.backend.set_learning_phase(0)
loaded_model= keras.models.load_model('Resnet_iris_tf1.h5')

# make list of output and input node names
input_names=[out.op.name for out in loaded_model.inputs]
output_names=[out.op.name for out in loaded_model.outputs]

print('input  node is{}'.format(input_names))
print('output node is{}'.format(output_names))

f = open("input_output_node_name.txt", "w+")
f.write('input  node is{}'.format(input_names) + "\n")
f.write('output node is{}'.format(output_names) + "\n")
f.close()

frozen_graph = freeze_session(keras.backend.get_session(), output_names=output_names)
tf.train.write_graph(frozen_graph, "./", "frozen_graph.pb", as_text=False)

However when I tried to quantize the frozen model using the following script:

!vai_q_tensorflow quantize \
    --input_frozen_graph frozen_graph_iris_tf1.pb \
    --input_fn input_func.calib_input \
    --output_dir quantized_resnet_iris_tf1 \
    --input_nodes resnet50_input \
    --output_nodes dense/Softmax \
    --input_shapes ?,224,224,3\
    --calib_iter 32

I got the following error:

INFO: Checking Float Graph...
Traceback (most recent call last):
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/bin/vai_q_tensorflow", line 11, in <module>
    sys.exit(run_main())
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/contrib/decent_q/python/decent_q.py", line 1030, in run_main
    app.run(main=my_main, argv=[sys.argv[0]] + unparsed)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
    _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/absl/app.py", line 303, in run
    _run_main(main, args)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/absl/app.py", line 251, in _run_main
    sys.exit(main(argv))
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/contrib/decent_q/python/decent_q.py", line 1029, in <lambda>
    my_main = lambda unused_args: main(unused_args, FLAGS)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/contrib/decent_q/python/decent_q.py", line 674, in main
    flags.skip_check, flags.dump_as_xir)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/contrib/decent_q/python/decent_q.py", line 375, in quantize_frozen
    check_float_graph(input_graph_def, input_fn, q_config, s_config)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/contrib/decent_q/python/decent_q.py", line 289, in check_float_graph
    sess.run(output_tensors, feed_dict)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 956, in run
    run_metadata_ptr)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1149, in _run
    np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
  File "/opt/vitis_ai/conda/envs/vitis-ai-tensorflow/lib/python3.6/site-packages/numpy/core/numeric.py", line 538, in asarray
    return array(a, dtype, copy=False, order=order)
ValueError: could not convert string to float: 'iris-setosa/iris-01ab65973fd487a6cee4c5af1551c42b264eec5abab46bffd7c307ffef647e11.jpg'

For the calibration data used I do the following:

np.savez('./calib_data_iris_tf1.npz', data = validation_generator.filenames)

zhenzhen-AMD commented 1 year ago

Hi @Afef00 , I can't reproduce the problem. Can you provide input_func.py and dataset files? Thank you.

zhenzhen-AMD commented 1 year ago

Hi @Afef00 , Based on the error message, it is inferred that the error is caused by calibration data. Please check the input_func.py and dataset files.

zhenzhen-AMD commented 1 year ago

Closing this for now. Please re-open if there are any other concerns. Thanks.