Open princyiakov opened 2 years ago
@princyiakov, Code shared is full of indentation errors, please share a colab gist with issue reported or simple stand alone indented code with all dependencies such that we can replicate the issue reported. Thank you!
!pip install -q -U "tensorflow-text==2.8.*"
!pip install cloudpickle
!pip install -q tf-models-official==2.7.0
import os
import shutil
import cloudpickle
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization # to create AdamW optimizer
url = 'https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
dataset = tf.keras.utils.get_file('aclImdb_v1.tar.gz', url,
untar=True, cache_dir='.',
cache_subdir='')
dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')
train_dir = os.path.join(dataset_dir, 'train')
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 32
seed = 42
raw_train_ds = tf.keras.utils.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='training',
seed=seed)
class_names = raw_train_ds.class_names
train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = tf.keras.utils.text_dataset_from_directory(
'aclImdb/train',
batch_size=batch_size,
validation_split=0.2,
subset='validation',
seed=seed)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = tf.keras.utils.text_dataset_from_directory(
'aclImdb/test',
batch_size=batch_size)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
bert_model = hub.KerasLayer(tfhub_handle_encoder)
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
classifier_model = build_classifier_model()
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metrics = tf.metrics.BinaryAccuracy()
epochs = 1
steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)
init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps,
optimizer_type='adamw')
classifier_model.compile(optimizer=optimizer,
loss=loss,
metrics=metrics)
classifier_model.fit(x=train_ds,
validation_data=val_ds,
epochs=epochs)
pickle = cloudpickle.dumps(classifier_model)
unpickle = cloudpickle.loads(pickle)
@gowthamkpr, I was able to reproduce the issue on tensorflow v2.8, v2.10 and nightly. Kindly find the gist of it here.
@princyiakov Can you please explain why you didn't use model.save() as thats the recommend way to save the model. Thanks!!
@gowthamkpr Thank you for your response. Our application uses multiple models, frameworks and cloudpickle help us integrate the same with ease. Using model.save would restrict us to just tensorflow. We want to have the ease to integrate with all the frameworks and models.
Issue Type Bug
Source binary
Tensorflow Version 2.8
Custom Code No
OS Platform and Distribution Linux
Python version 3.9
Current Behaviour? Hi team, Tensorflow model with AdamWeightDecay optimizer does not load with cloudpickle. Here is the error when I load the pickled model: “ValueError: Unknown optimizer: AdamWeightDecay. Please ensure this object is passed to the
custom_objects
argument. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details."The problem is only encountered with AdamWeightDecay as there are no issues when adam optimizer is used instead of AdamWeightDecay for the same code.
Cloudpickle enables deserialisation of the models irrespective of which library it belongs to and unlike Adam Optimizer, AdamWeightDecay doesn't work unless called exclusively by tensorflow by providing the custom_objects. The goal is to load the model using cloudpickle. Do you think we could have another possible solution to resolve the integration bottleneck?
Standalone code to reproduce the issue !pip install -q -U "tensorflow-text==2.8.*" !pip install cloudpickle !pip install -q tf-models-official==2.7.0
import os import shutil import cloudpickle import tensorflow as tf import tensorflow_hub as hub import tensorflow_text as text from official.nlp import optimization # to create AdamW optimizer
url = 'https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
dataset = tf.keras.utils.get_file('aclImdb_v1.tar.gz', url, untar=True, cache_dir='.', cache_subdir='')
dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')
train_dir = os.path.join(dataset_dir, 'train')
AUTOTUNE = tf.data.AUTOTUNE batch_size = 32 seed = 42
raw_train_ds = tf.keras.utils.text_dataset_from_directory( 'aclImdb/train', batch_size=batch_size, validation_split=0.2, subset='training', seed=seed)
class_names = raw_train_ds.class_names train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = tf.keras.utils.text_dataset_from_directory( 'aclImdb/train', batch_size=batch_size, validation_split=0.2, subset='validation', seed=seed)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = tf.keras.utils.text_dataset_from_directory( 'aclImdb/test', batch_size=batch_size)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1' tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3' bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess) bert_model = hub.KerasLayer(tfhub_handle_encoder)
def build_classifier_model(): text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text') preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing') encoder_inputs = preprocessing_layer(text_input) encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder') outputs = encoder(encoder_inputs) net = outputs['pooled_output'] net = tf.keras.layers.Dropout(0.1)(net) net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net) return tf.keras.Model(text_input, net)
classifier_model = build_classifier_model() loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) metrics = tf.metrics.BinaryAccuracy() epochs = 1 steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy() num_train_steps = steps_per_epoch epochs num_warmup_steps = int(0.1num_train_steps)
init_lr = 3e-5 optimizer = optimization.create_optimizer(init_lr=init_lr, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, optimizer_type='adamw') classifier_model.compile(optimizer=optimizer, loss=loss, metrics=metrics) classifier_model.fit(x=train_ds, validation_data=val_ds, epochs=epochs) pickle = cloudpickle.dumps(classifier_model) unpickle = cloudpickle.loads(pickle)
Relevant log output WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 124). These functions will not be directly callable after loading.
ValueError Traceback (most recent call last)