Closed yanyuxiaoxiao closed 8 years ago
my lstm have all most same prediction value, is that i construct my networks wrong ? this is my unroll_lstm referece example:
def lstm_unroll(num_lstm_layer, num_lstm_nerual, seq_len, input_size, num_hidden, num_embed, num_label, dropout=0.): """unrolled lstm network"""
embed_weight=mx.sym.Variable("embed_weight") cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] for i in range(num_lstm_layer): layer_params_cells = list() layer_states = list() for j in range(num_lstm_nerual): layer_params_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_n%d_i2h_weight" % (i, j)), i2h_bias = mx.sym.Variable("l%d_n%d_i2h_bias" % (i, j)), h2h_weight = mx.sym.Variable("l%d_n%d_h2h_weight" % (i, j)), h2h_bias = mx.sym.Variable("l%d_n%d_h2h_bias" % (i, j)))) state = LSTMState(c=mx.sym.Variable("l%d_n%d_init_c" % (i, j)), h=mx.sym.Variable("l%d_n%d_init_h" % (i, j))) layer_states.append(state) param_cells.append(layer_params_cells) last_states.append(layer_states) assert(len(last_states) == num_lstm_layer) label = mx.sym.Variable("label") last_hidden = [] for seqidx in range(seq_len): # embeding layer data = mx.sym.Variable("t%d_data" % seqidx) #data = mx.sym.BatchNorm(data=data) # stack LSTM for i in range(num_lstm_layer): for j in range(num_lstm_nerual): if i == 0: dp = 0. _indata=data else: dp == dropout _indata = last_states[i-1][j].h print "_indata.list_arguments", _indata.list_arguments() next_state = lstm(num_hidden, indata=_indata, prev_state=last_states[i][j], param=param_cells[i][j], seqidx=seqidx, layeridx=i, nerualidx=j ,dropout=dp) last_states[i][j] = next_state hidden = next_state.h # decoder if i == (num_lstm_layer - 1) and j == (num_lstm_nerual - 1): if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) #hidden = mx.sym.BatchNorm(data=hidden) last_hidden.append(hidden) # every last layer nerual to encode sm_list = list() print "last_hidden len ", len(last_hidden) for idx in range (len(last_hidden)): _last_hidden = last_hidden[idx] #concat = mx.sym.Concat(_last_hidden, dim = 0) fc = mx.sym.FullyConnected(data=_last_hidden, weight=cls_weight, bias=cls_bias, num_hidden=num_label) sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="sm%d" % idx) sm_list.append(sm) for i in range(num_lstm_layer): for j in range(num_lstm_nerual): state = last_states[i][j] state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_n%d_last_c" % (i, j)), h=mx.sym.BlockGrad(state.h, name="l%d_n%d_last_h" % (i, j))) last_states[i][j] = state unpack_h = [last_states[i][j].h for j in range(num_lstm_nerual) for i in range(num_lstm_layer)] unpack_c = [last_states[i][j].c for j in range(num_lstm_nerual) for i in range(num_lstm_layer)] all_outputs = sm_list + unpack_h + unpack_c return mx.sym.Group(all_outputs)
why me code style is so strange!
closing for now time no actions
my lstm have all most same prediction value, is that i construct my networks wrong ? this is my unroll_lstm referece example:
def lstm_unroll(num_lstm_layer, num_lstm_nerual, seq_len, input_size, num_hidden, num_embed, num_label, dropout=0.): """unrolled lstm network"""
initialize the parameter symbols
def lstm_unroll(num_lstm_layer, num_lstm_nerual, seq_len, input_size, num_hidden, num_embed, num_label, dropout=0.): """unrolled lstm network"""
initialize the parameter symbols