dbmdz / berts

DBMDZ BERT, DistilBERT, ELECTRA, GPT-2 and ConvBERT models
MIT License
155 stars 12 forks source link

Issue with TensorFlow #52

Open ShubhamModi77 opened 8 months ago

ShubhamModi77 commented 8 months ago

Hello, I am using bert-base-german-cased model with TensorFlow but in embedding layer I got this error

TypeError Traceback (most recent call last) Cell In[57], line 1 ----> 1 TFBertEmbeddings = bert(input_ids,attention_mask = attention_mask)[1]

File ~/work/myenv/lib/python3.11/site-packages/tf_keras/src/utils/traceback_utils.py:70, in filter_traceback..error_handler(*args, **kwargs) 67 filtered_tb = _process_traceback_frames(e.traceback) 68 # To get the full stack trace, call: 69 # tf.debugging.disable_traceback_filtering() ---> 70 raise e.with_traceback(filtered_tb) from None 71 finally: 72 del filtered_tb

File ~/work/myenv/lib/python3.11/site-packages/transformers/modeling_tf_utils.py:428, in unpack_inputs..run_call_with_unpacked_inputs(self, *args, kwargs) 425 config = self.config 427 unpacked_inputs = input_processing(func, config, fn_args_and_kwargs) --> 428 return func(self, **unpacked_inputs)

File ~/work/myenv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py:1234, in TFBertModel.call(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, training) 1190 @unpack_inputs 1191 @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) 1192 @add_code_sample_docstrings( (...) 1212 training: Optional[bool] = False, 1213 ) -> Union[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]]: 1214 r""" 1215 encoder_hidden_states (tf.Tensor of shape (batch_size, sequence_length, hidden_size), optional): 1216 Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if (...) 1232 past_key_values). Set to False during training, True during generation 1233 """ -> 1234 outputs = self.bert( 1235 input_ids=input_ids, 1236 attention_mask=attention_mask, 1237 token_type_ids=token_type_ids, 1238 position_ids=position_ids, 1239 head_mask=head_mask, 1240 inputs_embeds=inputs_embeds, 1241 encoder_hidden_states=encoder_hidden_states, 1242 encoder_attention_mask=encoder_attention_mask, 1243 past_key_values=past_key_values, 1244 use_cache=use_cache, 1245 output_attentions=output_attentions, 1246 output_hidden_states=output_hidden_states, 1247 return_dict=return_dict, 1248 training=training, 1249 ) 1250 return outputs

File ~/work/myenv/lib/python3.11/site-packages/transformers/modeling_tf_utils.py:428, in unpack_inputs..run_call_with_unpacked_inputs(self, *args, kwargs) 425 config = self.config 427 unpacked_inputs = input_processing(func, config, fn_args_and_kwargs) --> 428 return func(self, **unpacked_inputs)

File ~/work/myenv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py:912, in TFBertMainLayer.call(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, training) 909 if token_type_ids is None: 910 token_type_ids = tf.fill(dims=input_shape, value=0) --> 912 embedding_output = self.embeddings( 913 input_ids=input_ids, 914 position_ids=position_ids, 915 token_type_ids=token_type_ids, 916 inputs_embeds=inputs_embeds, 917 past_key_values_length=past_key_values_length, 918 training=training, 919 ) 921 # We create a 3D attention mask from a 2D tensor mask. 922 # Sizes are [batch_size, 1, 1, to_seq_length] 923 # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] 924 # this attention mask is more simple than the triangular masking of causal attention 925 # used in OpenAI GPT, we just need to prepare the broadcast dimension here. 926 attention_mask_shape = shape_list(attention_mask)

File ~/work/myenv/lib/python3.11/site-packages/transformers/models/bert/modeling_tf_bert.py:206, in TFBertEmbeddings.call(self, input_ids, position_ids, token_type_ids, inputs_embeds, past_key_values_length, training) 203 raise ValueError("Need to provide either input_ids or input_embeds.") 205 if input_ids is not None: --> 206 check_embeddings_within_bounds(input_ids, self.config.vocab_size) 207 inputs_embeds = tf.gather(params=self.weight, indices=input_ids) 209 input_shape = shape_list(inputs_embeds)[:-1]

File ~/work/myenv/lib/python3.11/site-packages/transformers/tf_utils.py:163, in check_embeddings_within_bounds(tensor, embed_dim, tensor_name) 153 def check_embeddings_within_bounds(tensor: tf.Tensor, embed_dim: int, tensor_name: str = "input_ids") -> None: 154 """ 155 tf.gather, on which TF embedding layers are based, won't check positive out of bound indices on GPU, returning 156 zeros instead. This function adds a check against that dangerous silent behavior. (...) 161 tensor_name (str, optional): The name of the tensor to use in the error message. 162 """ --> 163 tf.debugging.assert_less( 164 tensor, 165 tf.cast(embed_dim, dtype=tensor.dtype), 166 message=( 167 f"The maximum value of {tensor_name} ({tf.math.reduce_max(tensor)}) must be smaller than the embedding " 168 f"layer's input dimension ({embed_dim}). The likely cause is some problem at tokenization time." 169 ), 170 )

File ~/work/myenv/lib/python3.11/site-packages/keras/src/layers/core/tf_op_layer.py:119, in KerasOpDispatcher.handle(self, op, args, kwargs) 114 """Handle the specified operation with the specified arguments.""" 115 if any( 116 isinstance(x, keras_tensor.KerasTensor) 117 for x in tf.nest.flatten([args, kwargs]) 118 ): --> 119 return TFOpLambda(op)(*args, **kwargs) 120 else: 121 return self.NOT_SUPPORTED

File ~/work/myenv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback..error_handler(*args, **kwargs) 67 filtered_tb = _process_traceback_frames(e.traceback) 68 # To get the full stack trace, call: 69 # tf.debugging.disable_traceback_filtering() ---> 70 raise e.with_traceback(filtered_tb) from None 71 finally: 72 del filtered_tb

TypeError: Exception encountered when calling layer 'embeddings' (type TFBertEmbeddings).

Could not build a TypeSpec for name: "tf.debugging.assert_less_2/assert_less/Assert/Assert" op: "Assert" input: "tf.debugging.assert_less_2/assert_less/All" input: "tf.debugging.assert_less_2/assert_less/Assert/Assert/data_0" input: "tf.debugging.assert_less_2/assert_less/Assert/Assert/data_1" input: "tf.debugging.assert_less_2/assert_less/Assert/Assert/data_2" input: "Placeholder" input: "tf.debugging.assert_less_2/assert_less/Assert/Assert/data_4" input: "tf.debugging.assert_less_2/assert_less/y" attr { key: "summarize" value { i: 3 } } attr { key: "T" value { list { type: DT_STRING type: DT_STRING type: DT_STRING type: DT_INT32 type: DT_STRING type: DT_INT32 } } } of unsupported type <class 'tensorflow.python.framework.ops.Operation'>.

Call arguments received by layer 'embeddings' (type TFBertEmbeddings): • input_ids=<KerasTensor: shape=(None, 400) dtype=int32 (created by layer 'input_ids')> • position_ids=None • token_type_ids=<KerasTensor: shape=(None, 400) dtype=int32 (created by layer 'tf.fill_3')> • inputs_embeds=None • past_key_values_length=0 • training=False

I tried everything but not solved the issue