kpe / bert-for-tf2

A Keras TensorFlow 2.0 implementation of BERT, ALBERT and adapter-BERT.
https://github.com/kpe/bert-for-tf2
MIT License
802 stars 193 forks source link

TensorFlow ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list) #73

Closed mitramir55 closed 4 years ago

mitramir55 commented 4 years ago

I'm trying to write this code into colab. Interestingly, I was running the same code in colab a few days agobut now it doesn't work. the code also works in kaggle kernel. Why do you think this error occurs?

I tried using np.array and tf.convert_to_tensor() for the model's input and also changed the version of tensorflow and tensorflow-hub but none worked. This is the colab notebook if you needed more info. Thanks in advance!

the code for short is using this class: `class DisasterDetector:

def __init__(self, tokenizer, bert_layer, max_len =30, lr = 0.0001,
             epochs = 15, batch_size = 32, dtype = tf.int32 ,
             activation = 'sigmoid', optimizer = 'SGD',
             beta_1=0.9, beta_2=0.999, epsilon=1e-07,
             metrics = 'accuracy', loss = 'binary_crossentropy'):

    self.lr = lr
    self.epochs = epochs
    self.max_len = max_len
    self.batch_size = batch_size
    self.tokenizer = tokenizer
    self.bert_layer = bert_layer
    self.models = []

    self.activation = activation
    self.optimizer = optimizer
    self.dtype = dtype

    self.beta_1 = beta_1
    self.beta_2 = beta_2
    self.epsilon =epsilon

    self.metrics = metrics
    self.loss = loss

def encode(self, texts):
    all_tokens = []
    masks = []
    segments = []

    for text in texts:

        tokenized = self.tokenizer.convert_tokens_to_ids(['[CLS]'] + self.tokenizer.tokenize(text) + ['[SEP]'])

        len_zeros = self.max_len - len(tokenized)

        padded = tokenized + [0] * len_zeros
        mask = [1] * len(tokenized) + [0] * len_zeros
        segment = [0] * self.max_len

        all_tokens.append(padded)
        masks.append(mask)
        segments.append(segment)

    print(len(all_tokens[0]))
    return np.array(all_tokens), np.array(masks), np.array(segments)

def make_model(self):

    input_word_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
                        name = 'input_word_ids')

    input_mask = Input(shape = (self.max_len, ), dtype=tf.int32,
                       name = 'input_mask')

    segment_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
                        name = 'segment_ids')

    #pooled output is the output of dimention and

    pooled_output, sequence_output = self.bert_layer([input_word_ids,
                                                 input_mask,
                                                 segment_ids])

    clf_output = sequence_output[:, 0, :]
    out = tf.keras.layers.Dense(1, activation = self.activation)(clf_output)
    #out = tf.keras.layers.Dense(1, activation = 'sigmoid', input_shape =  (clf_output,) )(clf_output)

    model = Model(inputs = [input_word_ids, input_mask, segment_ids],
                  outputs = out)
    if self.optimizer is 'SGD':
        optimizer = SGD(learning_rate = self.lr)

    elif self.optimizer is 'Adam': 
        optimizer = Adam(learning_rate = self.lr, beta_1=self.beta_1,
                         beta_2=self.beta_2, epsilon=self.epsilon)

    model.compile(loss = self.loss, optimizer = self.optimizer,
                  metrics = [self.metrics])

    return model

def train(self, x, k = 3):    
    kfold = StratifiedKFold(n_splits = k, shuffle = True)

    for fold, (train_idx, val_idx) in enumerate(kfold.split(x['cleaned_text'], x['target'])):
        print('fold: ', fold)

        x_trn = self.encode(x.loc[train_idx, 'cleaned_text'])
        x_val = self.encode(x.loc[val_idx, 'cleaned_text'])
        y_trn = np.array(x.loc[train_idx, 'target'], dtype = np.uint8)
        y_val = np.array(x.loc[val_idx, 'target'], dtype = np.uint8)
        print('the data type of y train: ', type(y_trn))
        print('x_val shape', x_val[0].shape)
        print('x_trn shape', x_trn[0].shape)

        model = self.make_model()
        print('model made.')
        model.fit(x_trn, tf.convert_to_tensor(y_trn),
                validation_data = (x_val, tf.convert_to_tensor(y_val)),
                batch_size=self.batch_size, epochs = self.epochs)

        self.models.append(model)`

And after defining it:

classifier = DisasterDetector(tokenizer = tokenizer, bert_layer = bert_layer, max_len = max_len, lr = 0.0001, epochs = 10, activation = 'sigmoid', batch_size = 32,optimizer = 'SGD', beta_1=0.9, beta_2=0.999, epsilon=1e-07)

when I want to train it:

classifier.train(train_cleaned)

I get this error:

ValueError Traceback (most recent call last)

in () ----> 1 classifier.train(train_cleaned) in train(self, x, k) 109 model.fit(x_trn, y_trn, 110 validation_data = (x_val, y_val), --> 111 batch_size=self.batch_size, epochs = self.epochs) 112 113 self.models.append(model) /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs) 106 def _method_wrapper(self, *args, **kwargs): 107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access --> 108 return method(self, *args, **kwargs) 109 110 # Running inside `run_distribute_coordinator` already. /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1061 use_multiprocessing=use_multiprocessing, 1062 model=self, -> 1063 steps_per_execution=self._steps_per_execution) 1064 1065 # Container that configures and calls `tf.keras.Callback`s. /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution) 1115 use_multiprocessing=use_multiprocessing, 1116 distribution_strategy=ds_context.get_strategy(), -> 1117 model=model) 1118 1119 strategy = ds_context.get_strategy() /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, sample_weight_modes, batch_size, epochs, steps, shuffle, **kwargs) 263 **kwargs): 264 super(TensorLikeDataAdapter, self).__init__(x, y, **kwargs) --> 265 x, y, sample_weights = _process_tensorlike((x, y, sample_weights)) 266 sample_weight_modes = broadcast_sample_weight_modes( 267 sample_weights, sample_weight_modes) /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in _process_tensorlike(inputs) 1019 return x 1020 -> 1021 inputs = nest.map_structure(_convert_numpy_and_scipy, inputs) 1022 return nest.list_to_tuple(inputs) 1023 /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in map_structure(func, *structure, **kwargs) 633 634 return pack_sequence_as( --> 635 structure[0], [func(*x) for x in entries], 636 expand_composites=expand_composites) 637 /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in (.0) 633 634 return pack_sequence_as( --> 635 structure[0], [func(*x) for x in entries], 636 expand_composites=expand_composites) 637 /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in _convert_numpy_and_scipy(x) 1014 if issubclass(x.dtype.type, np.floating): 1015 dtype = backend.floatx() -> 1016 return ops.convert_to_tensor(x, dtype=dtype) 1017 elif scipy_sparse and scipy_sparse.issparse(x): 1018 return _scipy_sparse_to_sparse_tensor(x) /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types) 1497 1498 if ret is None: -> 1499 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) 1500 1501 if ret is NotImplemented: /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***) 50 def _default_conversion_function(value, dtype, name, as_ref): 51 del as_ref # Unused. ---> 52 return constant_op.constant(value, dtype, name=name) 53 54 /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name) 262 """ 263 return _constant_impl(value, dtype, shape, name, verify_shape=False, --> 264 allow_broadcast=True) 265 266 /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast) 273 with trace.Trace("tf.constant"): 274 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape) --> 275 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape) 276 277 g = ops.get_default_graph() /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape) 298 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape): 299 """Implementation of eager constant.""" --> 300 t = convert_to_eager_tensor(value, ctx, dtype) 301 if shape is None: 302 return t /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype) 96 dtype = dtypes.as_dtype(dtype).as_datatype_enum 97 ctx.ensure_initialized() ---> 98 return ops.EagerTensor(value, ctx.device_name, dtype) 99 100 ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).