Closed Melonzhou closed 1 year ago
` def forward(self, fields_dict, phase): """前向计算组网部分包括loss值的计算,必须由子类实现 :param: fields_dict: 序列化好的id :param: phase: 当前调用的阶段,如训练、预测,不同的阶段组网可以不一样 :return: 一个dict数据,存放TARGET_FEED_NAMES, TARGET_PREDICTS, PREDICT_RESULT,LABEL,LOSS等所有你希望获取的数据 """ instance_text_a = fields_dict["text_a"] record_id_text_a = instance_text_a[InstanceName.RECORD_ID] text_a_src = record_id_text_a[InstanceName.SRC_IDS] text_a_pos = record_id_text_a[InstanceName.POS_IDS] text_a_sent = record_id_text_a[InstanceName.SENTENCE_IDS] text_a_mask = record_id_text_a[InstanceName.MASK_IDS] text_a_task = record_id_text_a[InstanceName.TASK_IDS] text_a_lens = record_id_text_a[InstanceName.SEQ_LENS]
instance_label = fields_dict["label"]
record_id_label = instance_label[InstanceName.RECORD_ID]
label = record_id_label[InstanceName.SRC_IDS]
label_lens = record_id_label[InstanceName.SEQ_LENS]
unpad_labels = fluid.layers.sequence_unpad(label, length=label_lens)
emb_dict = self.make_embedding(fields_dict, phase)
emb_text_a = emb_dict["text_a"]
unpad_emb = fluid.layers.sequence_unpad(emb_text_a, length=text_a_lens)
num_labels = 7
# demo config
grnn_hidden_dim = 128 # 768
crf_lr = 0.2
bigru_num = 2
init_bound = 0.1
def _bigru_layer(input_feature):
"""define the bidirectional gru layer
"""
pre_gru = fluid.layers.fc(
input=input_feature,
size=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru = fluid.layers.dynamic_gru(
input=pre_gru,
size=grnn_hidden_dim,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
pre_gru_r = fluid.layers.fc(
input=input_feature,
size=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru_r = fluid.layers.dynamic_gru(
input=pre_gru_r,
size=grnn_hidden_dim,
is_reverse=True,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1)
return bi_merge
input_feature = unpad_emb
for i in range(bigru_num):
bigru_output = _bigru_layer(input_feature)
input_feature = bigru_output
emission = fluid.layers.fc(
size=num_labels,
input=bigru_output,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
crf_cost = fluid.layers.linear_chain_crf(
input=emission,
label=unpad_labels,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=crf_lr))
prediction = fluid.layers.crf_decoding(
input=emission, param_attr=fluid.ParamAttr(name='crfw'))
run_value = fluid.layers.chunk_eval(input=prediction, label=unpad_labels, chunk_scheme="IOB",
num_chunk_types=int(math.ceil((num_labels - 1) / 2.0)))
precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = run_value
if phase == InstanceName.SAVE_INFERENCE:
target_predict_list = [prediction]
target_feed_name_list = [text_a_src.name, text_a_pos.name, text_a_sent.name,
text_a_mask.name, text_a_lens.name]
emb_params = self.model_params.get("embedding")
ernie_config = ErnieConfig(emb_params.get("config_path"))
if ernie_config.get('use_task_id', False):
target_feed_name_list.append(text_a_task.name)
forward_return_dict = {
InstanceName.TARGET_FEED_NAMES: target_feed_name_list,
InstanceName.TARGET_PREDICTS: target_predict_list
}
return forward_return_dict
avg_cost = fluid.layers.mean(x=crf_cost)
forward_return_dict = {
# InstanceName.PREDICT_RESULT: prediction,
# InstanceName.LABEL: label,
"num_infer_chunks": num_infer_chunks,
"num_label_chunks": num_label_chunks,
"num_correct_chunks": num_correct_chunks,
InstanceName.LOSS: avg_cost
}
return forward_return_dict
`
补充一下组网代码。chunk_eval应该只是指标计算,并不会影响反向传播吧?
chunk_eval应该只是指标计算,并不会影响反向传播吧?
是的,不会影响反向传播
chunk_eval 这个op中的num_chunk_types参数,这个取值要怎么设置啊,我的真实label是7个,当我把num_chunk_types设置成3、4、5的时候,计算出来的结果差异很多,按理来说输入为4和5时应该出错才对 。
使用IOB真实label是7个的话,num_chunk_types的正确设置应该3,输入为4和5时目前不会报错。请问7个label分别是什么呢
另外可否对num_chunk_types=3时指标不符合预期的case,在layers.chunk_eval前使用layers.Print
使用出下贴出来,我们这边复现下
嗯嗯,有的,方便的话你可以登我的机器看看,复现环境是好的。
label=7, num_chunk_types=3
label=7, num_chunk_types = 4
label=7, num_chunk_types = 5