kpe / bert-for-tf2

A Keras TensorFlow 2.0 implementation of BERT, ALBERT and adapter-BERT.
https://github.com/kpe/bert-for-tf2
MIT License
803 stars 193 forks source link

There may be something wrong with this convenient module when using model_to_estimator in tensorflow 2.0 #56

Closed Bin4writing closed 4 years ago

Bin4writing commented 4 years ago

My scripts raise a Value Error when invoking model_to_estimator:

image

My code:

        input_token_ids = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='input_ids')
        input_segment_ids = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='token_type_ids')
        input_mask = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='input_mask')
        bert_params = bert.params_from_pretrained_ckpt(model_dir)
        l_bert = bert.BertModelLayer.from_params(bert_params)
        bert_output = l_bert(inputs=[input_token_ids, input_segment_ids], mask=input_mask)
        first_token = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
        pooled_output = tf.keras.layers.Dense(units=first_token.shape[-1], activation=tf.math.tanh)(first_token)
        dropout = tf.keras.layers.Dropout(rate=0.1)(pooled_output)
        logits = tf.keras.layers.Dense(units=num_labels, name='logits')(dropout)
        output_prob = tf.keras.layers.Softmax(name='output_prob')(logits)
        model = tf.keras.Model(inputs=[input_token_ids, input_segment_ids, input_mask], outputs=[logits, output_prob])
        model.build(input_shape=[(None, max_seq_len,), (None, max_seq_len,), (None, max_seq_len,)])
        freeze_bert_layers(l_bert)
        bert.load_stock_weights(l_bert, op.join(model_dir, 'bert_model.ckpt'))
        weight_decays = get_weight_decays(model)
        for k, v in weight_decays.items():
            if use_weight_decay(k):
                weight_decays[k] = 0.01
            else:
                del weight_decays[k]
        model.compile(
            optimizer=create_optimizer(
                init_lr=learning_rate,
                steps=steps,
                warmup_steps=warmup_steps,
                weight_decays=weight_decays
            ),
            loss={
                      'logits': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
            },  
            metrics=[tf.keras.metrics.AUC()]
        )
        model.summary()
        return tf.keras.estimator.model_to_estimator(keras_model=model,
                                                     model_dir=args.model_dir)

Could u help me? I am all gratitude.