mpk001 / Sentiment_analysis

使用深度学习方法对IMDB电影评价做情感分析,使用的方法分别为:MLP、BiRNN、BiGRU+Attention Model
219 stars 85 forks source link

ValueError: Dimensions must be equal #3

Open Kiris-tingna opened 6 years ago

Kiris-tingna commented 6 years ago

I implement attention model


class AttentionLayer(Layer):
    '''
    Attention Layer over LSTM
    '''
    def __init__(self, output_dim=None, init='glorot_uniform', attn_activation='tanh',
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None, bias=True, **kwargs):
        self.output_dim = output_dim
        self.supports_masking = True

        self.init = initializers.get(init)

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.attn_activation = activations.get(attn_activation)

        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape): # (batch, steps, dim)
        input_dim = input_shape[2]

        if not self.output_dim:
            self.output_dim = input_dim

        self.steps = input_shape[1]
        # shape = (input_dim, output_dim)

        self.W_s = self.add_weight(shape=(input_dim, self.output_dim),
                initializer=self.init,
                name='{}_Ws'.format(self.name),
                regularizer=self.W_regularizer,
                constraint=self.W_constraint,
                trainable=True)

        self.B_s = self.add_weight(shape=(self.output_dim,),
                initializer='zero',
                regularizer=self.b_regularizer,
                constraint=self.b_constraint,
                name='{}_bs'.format(self.name))

        self.Attention_vec = self.add_weight(shape=(self.output_dim,),
                initializer='normal',
                name='{}_att_vec'.format(self.name))
        self.built = True

    def compute_mask(self,  x, input_mask=None):
        return None

    def call(self, x, mask=None):
        # 1. transform, (None, steps, idim)*(idim, outdim) -> (None, steps, outdim)
        uit = K.dot(x, self.W_s)

        if self.bias:
            uit += self.B_s

        u = self.attn_activation(uit)

        # 2. * attention sum : {(None, steps, outdim) *(outdim), axis = 2} -> (None, steps)
        att = K.sum(u * self.Attention_vec, axis=2)
        # 3. softmax, (None, steps)
        att = K.exp(att)

        # att_sum = att_sum.dimshuffle(0,'x')
        att /= K.cast(K.sum(att, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        # 4. weighted sum
        # att = att.dimshuffle(0, 1, 'x')
        att = K.expand_dims(att, 1)
        weighted_input = att * x
        return K.sum(weighted_input, axis=1)

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], input_shape[-1])

and got this

Traceback (most recent call last):
  File "G:/Kiristingna/nlp_segment_analyse/sentiment/classifier/BiLSTM_ATT.py", line 257, in <module>
    bilstm_att_wv(index_dict.tolist(), word_vectors.tolist(), sequences, y)
  File "G:/Kiristingna/nlp_segment_analyse/sentiment/classifier/BiLSTM_ATT.py", line 162, in bilstm_att_wv
    model.add(AttentionLayer())
  File "D:\Program Files (x86)\Python\lib\site-packages\keras\models.py", line 489, in add
    output_tensor = layer(self.outputs[0])
  File "D:\Program Files (x86)\Python\lib\site-packages\keras\engine\topology.py", line 603, in __call__
    output = self.call(inputs, **kwargs)
  File "G:\Kiristingna\nlp_segment_analyse\sentiment\classifier\Attention.py", line 87, in call
    weighted_input = att * x
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\math_ops.py", line 821, in binary_op_wrapper
    return func(x, y, name=name)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1044, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1434, in _mul
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 2338, in create_op
    set_shapes_for_outputs(ret)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 1719, in set_shapes_for_outputs
    shapes = shape_func(op)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in call_with_requiring
    return call_cpp_shape_fn(op, require_shape_fn=True)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 610, in call_cpp_shape_fn
    debug_python_shape_fn, require_shape_fn)
  File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 676, in _call_cpp_shape_fn_impl
    raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 100 and 250 for 'attention_layer_1/mul_1' (op: 'Mul') with input shapes: [?,1,100], [?,?,250].
Kiris-tingna commented 6 years ago

print(att.shape)->(?, 100)

print(x.shape)->(?, ?, 250)

whonor commented 4 years ago

@Kiris-tingna Hi, I have the same problem. I wanna know how to solve this problem. Thanks!