Multiclassification Erorr Randomly - [predictions must be in [0, 1]]

I've built a multiclassification model using the IMDb example linked below. I've added an extra label for "neutral" classification and have had some success with running my model. However, I randomly will encounter this error from time to time and find myself unable to test on a new amount of training and testing data. I have no idea why I'm getting thrown this error for predictions. Any thoughts?

IMDB Example https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb

ERORR


     [[node auc/assert_less_equal/Assert/Assert (defined at C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py:237) ]]

Caused by op 'auc/assert_less_equal/Assert/Assert', defined at:
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\spyder_kernels\console\__main__.py", line 11, in <module>
    start.main()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\spyder_kernels\console\start.py", line 310, in main
    kernel.start()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\asyncio\base_events.py", line 438, in run_forever
    self._run_once()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\asyncio\base_events.py", line 1451, in _run_once
    handle._run()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\gen.py", line 781, in inner
    self.run()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3220, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-c13ab69e0284>", line 1, in <module>
    runfile('C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py', wdir='C:/Users/WTC/Desktop/AI/Sentiment/BERT')
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)
  File "C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py", line 351, in <module>
    estimator.evaluate(input_fn=test_input_fn, steps=None)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 469, in evaluate
    name=name)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 511, in _actual_eval
    return _evaluate()
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 493, in _evaluate
    self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1424, in _evaluate_build_graph
    self._call_model_fn_eval(input_fn, self.config))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1460, in _call_model_fn_eval
    features, labels, model_fn_lib.ModeKeys.EVAL, config)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1112, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py", line 268, in model_fn
    eval_metrics = metric_fn(label_ids, predicted_labels)
  File "C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py", line 237, in metric_fn
    predicted_labels)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\metrics_impl.py", line 722, in auc
    labels, predictions, thresholds, weights)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\metrics_impl.py", line 523, in _confusion_matrix_at_thresholds
    message='predictions must be in [0, 1]')
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\check_ops.py", line 868, in assert_less_equal
    return control_flow_ops.Assert(condition, data, summarize=summarize)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\util\tf_should_use.py", line 193, in wrapped
    return _add_should_use_warning(fn(*args, **kwargs))
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 160, in Assert
    return gen_logging_ops._assert(condition, data, summarize, name="Assert")
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\gen_logging_ops.py", line 72, in _assert
    name=name)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): assertion failed: [predictions must be in [0, 1]] [Condition x <= y did not hold element-wise:x (loss/Squeeze:0) = ] [0 0 0...] [y (auc/Cast_1:0) = ] [1]
     [[node auc/assert_less_equal/Assert/Assert (defined at C:/Users/WTC/Desktop/AI/Sentiment/BERT/BERT_neutral.py:237) ]]```

MY CODE


import tensorflow as tf
import tensorflow_hub as hub
import os
import pandas as pd
import bert
from bert import tokenization 
from bert import run_classifier
from bert import optimization

######################## SETTINGS ########################

# directory for to store model output and checkpoints
OUTPUT_DIR = 'C:/Users/WTC/Desktop/AI/Sentiment/BERT'#@param 

# Set DO_DELETE to rewrite the OUTPUT_DIR if it exists - FALSE
DO_DELETE = False

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

######################## DATA ########################

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
  data = {}
  data["sentence"] = []
  #data["sentiment"] = []
  for file_path in os.listdir(directory):
    with open(os.path.join(directory, file_path), "r", encoding="utf8") as f:
      data["sentence"].append(f.read())
      #data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
  return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
  neu_df = load_directory_data(os.path.join(directory, "neu"))
  pos_df = load_directory_data(os.path.join(directory, "pos"))
  neg_df = load_directory_data(os.path.join(directory, "neg"))
  neu_df["polarity"] = '2'
  pos_df["polarity"] = '1'
  neg_df["polarity"] = '0'

  return pd.concat([neu_df, pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# download and process the dataset files
def download_and_load_datasets(force_download=False):     
  dataset = tf.keras.utils.get_file(
      fname="", 
      origin="C:/Users/WTC/Desktop/AI/Sentiment/data")   
  train_df = load_dataset("C:/Users/WTC/Desktop/AI/Sentiment/twitter scrape/data/train")
  test_df = load_dataset("C:/Users/WTC/Desktop/AI/Sentiment/twitter scrape/data/test")

  return train_df, test_df

train, test = download_and_load_datasets()

# train, test
#train = train.sample(1864)
#test = test.sample(481)
train = train.sample(1864)
test = test.sample(481)
train.columns

DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label list - 0 is for negative, 1 for positive, 2 for neutral
label_list = ['0', '1', '2']

######################## DATA PREPROCESSING ########################

#transform data into BERT format:
# 1. Creating InputExamples
# 2. Preprocessing Data (lowercase text, tokenize ('sally says hi' -> 'sally', 'says', 'hi'))
    # - Lowercase text
    # - Tokenize
    # - Break words into WordPieces ('calling' -> ['call', '##ing'])
    # - Map our words to indexes using a vocab file provided by BERT
    # - Add special 'CLS' and 'SEP' tokens 
    # - Append 'index' and 'segment' tokens to each input

# text_a is text to classify, the 'request' field in our dataframe
# text_b is used if training a model to understand sentence relationship (is text_b a translation of text_a, or vice-versa? WE WILL NOT USE THIS)

#INPUT EXAMPLE (create examples for data)

# 'guid' is globally unique ID for book keeping - wont use here 
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample (guid = None, text_a = x[DATA_COLUMN], text_b = None, label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample (guid = None, text_a = x[DATA_COLUMN], text_b = None, label = x[LABEL_COLUMN]), axis = 1)

#PREPROCESSING DATA

#uncased all version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
        #load vocab file and use lowercase data
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])

  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

# initialization of tokenizer 
tokenizer = create_tokenizer_from_hub_module()

# convert our InputExamples into features that BERT can understand
MAX_SEQ_LENGTH = 128 # sequences are at most 128 tokens long
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

######################## CREATING A MODEL ########################

#Load BERT tf hub module, then create a single new layer that will be trained to adapt BERT to our sentiment ask
# Fine tuning is the strtategy of using a mostly trained model

def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn

# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 16  # number of samples processed before the model is updated.
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 4.0 # the number of complete passes through the training dataset.

# Warmup is a period of time where the learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1

# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Model configs
SAVE_CHECKPOINTS_STEPS = 0
SAVE_SUMMARY_STEPS = 0
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

# Create an input builder function that takes our training feature set (train_features) and produces a generator
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

#actually train it
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

#use test data to see how well the model did
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

estimator.evaluate(input_fn=test_input_fn, steps=None)

# Code for making predictions on new sentences:
def getPrediction(in_sentences):
  labels = ["Neutral", "Negative", "Positive"]
  input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
  input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
  predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
  predictions = estimator.predict(predict_input_fn)
  return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]```

google-research / bert

Multiclassification Erorr Randomly - [predictions must be in [0, 1]] #923