strongio / keras-bert

A simple technique to integrate BERT from tf hub to keras
258 stars 108 forks source link

No pooling embeddings #20

Open zparcheta opened 5 years ago

zparcheta commented 5 years ago

Hi, I wonder if it is possible to modify the code, to get embeddings at word level instead of at the sentence level.

rachel-sorek commented 5 years ago

Yes, I Would also like to know how to do this, since I would like to fine-tune BERT for entity tagging.

Thank you

zparcheta commented 5 years ago

Finally I did it. When I have a minute I will upload the code.

zparcheta commented 5 years ago

Hi, sorry for take so long.

I think that that modifying the following code in bert class you can get word embeddings:

    def build(self, input_shape):
        self.bert = hub.Module(self.bert_path, trainable=self.trainable, name=f"{self.name}_module")

        # Remove unused layers
        trainable_vars = self.bert.variables
        if self.pooling == "first":
            trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
            trainable_layers = ["pooler/dense"]

        elif self.pooling == "mean":
            trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name and not "/pooler/" in var.name]
            trainable_layers = []
        else:
            trainable_layers = []
            """raise NameError(
                f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
            )"""

        # Select how many layers to fine tune
        for i in range(self.n_fine_tune_layers):
            trainable_layers.append(f"encoder/layer_{str(11 - i)}")

        # Update trainable vars to contain only the specified layers
        trainable_vars = [
            var
            for var in trainable_vars
            if any([l in var.name for l in trainable_layers])
        ]

        # Add to trainable weights
        for var in trainable_vars:
            self._trainable_weights.append(var)

        for var in self.bert.variables:
            if var not in self._trainable_weights:
                self._non_trainable_weights.append(var)

    def call(self, inputs):
        inputs = [K.cast(x, dtype="int32") for x in inputs]
        input_ids, input_mask, segment_ids = inputs
        bert_inputs = dict(
            input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
        )
        mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
        masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
                    tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
        if self.pooling == "first":
            pooled = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
                "pooled_output"
            ]
        elif self.pooling == "mean":
            result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)["sequence_output"]
            input_mask = tf.cast(input_mask, tf.float32)
            pooled = masked_reduce_mean(result, input_mask)
        else:
            result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)["sequence_output"]
            input_mask = tf.cast(input_mask, tf.float32)
            pooled = mul_mask(result, input_mask)
            #raise NameError(f"Undefined pooling type (must be either first or mean, but is {self.pooling}")

        return pooled