anargyri / lstm_han

LSTM and Hierarchical Attention Network on DSVM
41 stars 13 forks source link

can it work with tensorflow backend? #4

Open connectdotz opened 6 years ago

connectdotz commented 6 years ago

First, thanks for this wonderful repo, I am trying to adopt your hatt-archive-cntk.ipynb to tensorflow backend, it is giving the following error when constructing the model, wondering if you have any insight:

InvalidArgumentError                      Traceback (most recent call last)
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1566   try:
-> 1567     c_op = c_api.TF_FinishOperation(op_desc)
   1568   except errors.InvalidArgumentError as e:

InvalidArgumentError: Dimension size must be evenly divisible by 3000 but is 200 for 'time_distributed_1/Reshape_1' (op: 'Reshape') with input shapes: [200], [3] and with input tensors computed as partial shapes: input[1] = [?,15,200].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-24-38f8f519e000> in <module>()
     10 
     11 review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
---> 12 review_encoder = TimeDistributed(sentEncoder)(review_input)
     13 l_lstm_sent = Bidirectional(GRU(GRU_UNITS, return_sequences=True, kernel_regularizer=l2_reg, 
     14                                 implementation=GPU_IMPL))(review_encoder)

/usr/local/lib/python3.5/dist-packages/keras/engine/base_layer.py in __call__(self, inputs, **kwargs)
    458             # Actually call the layer,
    459             # collecting output(s), mask(s), and shape(s).
--> 460             output = self.call(inputs, **kwargs)
    461             output_mask = self.compute_mask(inputs, previous_mask)
    462 

/usr/local/lib/python3.5/dist-packages/keras/layers/wrappers.py in call(self, inputs, training, mask)
    253             output_shape = self._get_shape_tuple(
    254                 (-1, input_length), y, 1, output_shape[2:])
--> 255             y = K.reshape(y, output_shape)
    256 
    257         # Apply activity regularizer if any:

/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py in reshape(x, shape)
   1935     """
   1936     print('before x={}, shape={}'.format(x, shape))
-> 1937     _x = tf.reshape(x, shape)
   1938     print('after _x={}'.format(_x))
   1939     return _x

/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
   6111   if _ctx is None or not _ctx._eager_context.is_eager:
   6112     _, _, _op = _op_def_lib._apply_op_helper(
-> 6113         "Reshape", tensor=tensor, shape=shape, name=name)
   6114     _result = _op.outputs[:]
   6115     _inputs_flat = _op.inputs

/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   3390           input_types=input_types,
   3391           original_op=self._default_original_op,
-> 3392           op_def=op_def)
   3393 
   3394       # Note: shapes are lazily computed with the C API enabled.

/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1732           op_def, inputs, node_def.attr)
   1733       self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1734                                 control_input_ops)
   1735     else:
   1736       self._c_op = None

/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1568   except errors.InvalidArgumentError as e:
   1569     # Convert to ValueError for backwards compatibility.
-> 1570     raise ValueError(str(e))
   1571 
   1572   return c_op

ValueError: Dimension size must be evenly divisible by 3000 but is 200 for 'time_distributed_1/Reshape_1' (op: 'Reshape') with input shapes: [200], [3] and with input tensors computed as partial shapes: input[1] = [?,15,200].
kharveyresearch commented 5 years ago

Seems to be the dot product that is failing. Try replacing K.dot calls with the following in AttLayer.call:

def dot_product(x, kernel): if len(kernel.shape)==2: return K.squeeze(K.expand_dims(K.dot(x, kernel)), axis=-1) else: return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)