PaddlePaddle / Paddle

PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署)
http://www.paddlepaddle.org/
Apache License 2.0
22.17k stars 5.56k forks source link

Var read_file_0.tmp_10 has more than one negative dim #27497

Closed Angus07 closed 3 years ago

Angus07 commented 4 years ago

image

wanghaoshuang commented 4 years ago

请补充下定义data layer和data reader的相关代码?应该是shape没设置对。

Angus07 commented 4 years ago

见374行左右

from future import absolute_import from future import division from future import print_function import paddle.fluid.layers.utils as paddle_utils import os import time import argparse import numpy as np import multiprocessing import sys import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers

from six.moves import xrange

from model.ernie import ErnieModel

from tokenization import BasicTokenizer from utils.bleu import compute_bleu from paddle.fluid.layers import Print as Print

def cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens):

Print(src_ids, message="src_ids", summarize=10000)

origin_enc = enc_out
print("tgt po")
print("src_ids in cal", src_ids)
enc_out = fluid.layers.reshape(x=enc_out,
        shape=[-1, ernie_config["hidden_size"]])
if tgt_pos:
    tgt_pos = fluid.layers.cast(x=tgt_pos, dtype='int32')
    #tgt_feat2 = fluid.layers.gather(input=enc_out, index=tgt_pos)
    tgt_feat = fluid.layers.gather(input=enc_out, index=tgt_pos)
    '''
    weights_trans = fluid.layers.reshape(x=weights,
                                         shape=[-1, 1600])
    weights = fluid.layers.gather(input=weights_trans, index=tgt_pos)
    gens_trans = fluid.layers.reshape(x=gens,
                                      shape=[-1, 1600])
    gens = fluid.layers.gather(input=gens_trans, index=tgt_pos)
    '''
    #Print(tgt_feat, message="tgt_feat1", summarize=20)
    #shape: [117, 768, ]
else:
    tgt_feat = enc_out
    #tgt_feat2 = enc_out
    #Print(tgt_feat, message="tgt_feat2", summarize=20)
tgt_trans_feat = fluid.layers.fc(
    input=tgt_feat,
    size=ernie_config["hidden_size"],
    act="relu",
    param_attr=fluid.ParamAttr(
        name="seq2seq_trans_w",
        initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
    bias_attr=fluid.ParamAttr(
        name="seq2seq_trans_b",
        initializer=fluid.initializer.Constant(0.)))

seq2seq_out_bias_attr = fluid.ParamAttr(
    name="seq2seq_out_fc.b_0",
    initializer=fluid.initializer.Constant(value=0.0))

if args.weight_sharing:
    fc_out = fluid.layers.matmul(
        x=tgt_trans_feat,
        y=fluid.default_main_program().global_block().var(
            "word_embedding"),
        transpose_y=True)
    fc_out += fluid.layers.create_parameter(
        shape=[ernie_config['vocab_size']],
        dtype="float32",
        attr=seq2seq_out_bias_attr,
        is_bias=True)
else:
    fc_out = fluid.layers.fc(input=tgt_trans_feat,
            size=ernie_config['vocab_size'],
            param_attr=fluid.ParamAttr(
                name="seq2seq_out_fc.w_0",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=seq2seq_out_bias_attr)

gens_trans = fluid.layers.reshape(x=gens,
                                  shape=[-1, 1])

if tgt_pos:
    gens_feat = fluid.layers.gather(input=gens_trans, index=tgt_pos)
else:
    gens_feat = gens_trans

gens_trans = fluid.layers.expand(gens_feat, expand_times=[1,18000])

'''
if tgt_pos:
    origin_vocab_dists = fluid.layers.gather(input=fc_out, index=tgt_pos)
else:
    origin_vocab_dists = fc_out
'''

fc_out = layers.softmax(fc_out)
vocab_dists = fluid.layers.elementwise_mul(gens_trans, fc_out)

gens = layers.squeeze(input = gens,  axes = [2])
attn_dists= fluid.layers.elementwise_mul(weights,  (1.0 - gens), axis=0)
print("attn_dists", attn_dists)
src_ids = fluid.one_hot(src_ids, 18000)
src_ids = layers.squeeze(input=src_ids, axes=[2])
attn_dists_projected = layers.matmul(attn_dists, src_ids)

attn_dists_projected = fluid.layers.reshape(x=attn_dists_projected,
                                    shape=[-1, 18000])

if tgt_pos:
    attn_dists_projected = fluid.layers.gather(input=attn_dists_projected, index=tgt_pos)
final_dists = attn_dists_projected + vocab_dists

final_dists = fluid.layers.reshape(x=final_dists,
                                    shape=[-1, 18000])

return final_dists

def create_model(args, pyreader_name, ernie_config, is_prediction=False, is_reinforce=False):

if is_prediction:
    return fast_decode(args, pyreader_name, ernie_config)
if is_reinforce:
    return greedy_decode(args, pyreader_name, ernie_config)
#print("debug", args.max_seq_len)
pyreader = fluid.layers.py_reader(
    capacity=50,
    shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len],[-1, args.max_seq_len, args.max_seq_len],
            [-1, 1], [-1, 1]],
    dtypes=['int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64'],
    lod_levels=[0, 0, 0, 0, 0, 0, 0],
    name=pyreader_name,
    use_double_buffer=True)

(src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_labels,
 tgt_pos) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
    src_ids=src_ids,
    position_ids=pos_ids,
    sentence_ids=sent_ids,
    input_mask=input_mask,
    input_mask_encoder=input_mask_encoder,
    config=ernie_config,
    use_fp16=args.use_fp16,
    is_unidirectional=True)

enc_out, weights, gens = ernie.get_sequence_output()
fc_out = cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens)
ce_loss = fluid.layers.cross_entropy(
    fc_out, tgt_labels)
loss = fluid.layers.mean(x=ce_loss)
if args.use_fp16 and args.loss_scaling > 1.0:
    loss *= args.loss_scaling
graph_vars = {
    "loss": loss,
}
for k, v in graph_vars.items():
    v.persistable = True
return pyreader, graph_vars

def greedy_decode(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, 1], [-1, 1], [-1], [-1, 1, args.max_seq_len], [-1, 1], [-1, -1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'float32', 'int32', 'float32', 'int64', 'int64', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_ids, init_scores, parent_idx, tgt_input_mask, tgt_start_pos, data_ids, tgt_labels, tgt_pos) = fluid.layers.read_file(pyreader)

ernie = ErnieModel(
    src_ids=src_ids,
    position_ids=pos_ids,
    sentence_ids=sent_ids,
    input_mask=input_mask,
    input_mask_encoder=input_mask_encoder,
    config=ernie_config,
    use_fp16=args.use_fp16,
    is_unidirectional=True)

enc_out, weights, gens = ernie.get_sequence_output()
fc_out = cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens)
ce_loss = fluid.layers.cross_entropy(
    fc_out, tgt_labels)
loss = fluid.layers.mean(x=ce_loss)

ernie1 = ErnieModel(
    src_ids=src_ids,
    position_ids=pos_ids,
    sentence_ids=sent_ids,
    input_mask=input_mask,
    input_mask_encoder=input_mask_encoder,
    config=ernie_config,
    use_fp16=args.use_fp16,
    is_unidirectional=True,
    decoding=True,
    gather_idx=parent_idx)

ernie2 = ErnieModel(
    src_ids=src_ids,
    position_ids=pos_ids,
    sentence_ids=sent_ids,
    input_mask=input_mask,
    input_mask_encoder=input_mask_encoder,
    config=ernie_config,
    use_fp16=args.use_fp16,
    is_unidirectional=True,
    decoding=True,
    gather_idx=parent_idx)

max_len = layers.fill_constant(
    shape=[1],
    dtype=tgt_ids.dtype,
    value=args.max_tgt_len,
    force_cpu=True)

step_idx = layers.fill_constant(
    shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)
ids = layers.array_write(
    layers.reshape(tgt_ids, (-1, 1)), step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx)
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx)
global_src_id = fluid.layers.create_tensor(dtype='int64')
fluid.layers.assign(src_ids, global_src_id )
#global_src_id = src_ids
#baseline greedy
with while_op.block():
    pre_ids = layers.array_read(array=ids, i=step_idx)
    pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
    pre_mask = layers.array_read(tgt_masks, i=step_idx)
    pre_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
    append_mask = layers.fill_constant_batch_size_like(
        input=pre_mask,
        value=1.0,
        shape=[-1, 1, 1],
        dtype=pre_mask.dtype)
    append_mask_encoder = layers.fill_constant_batch_size_like(
        input=pre_mask_encoder,
        value=0.0,
        shape=[-1, 1, 1],
        dtype=pre_mask_encoder.dtype)
    pre_mask = layers.concat([pre_mask, append_mask], axis=2)
    pre_mask_encoder = layers.concat([pre_mask_encoder, append_mask_encoder], axis=2)
    pre_pos = layers.elementwise_mul(
        x=layers.fill_constant_batch_size_like(
            input=pre_mask,
            value=1,
            shape=[-1, 1, 1],
            dtype=pre_ids.dtype), y=step_idx, axis=0)
    type_ids = layers.fill_constant_batch_size_like(
        input=pre_mask,
        value=args.tgt_type_id,
        shape=[-1, 1, 1],
        dtype=pre_ids.dtype)
    dec_out, weights, gens = ernie1.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)
    tmp_id = layers.concat([global_src_id, pre_ids], axis=1)
    layers.assign(tmp_id, global_src_id)
    fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)
    #if not sample:
    topk_scores, topk_indices = layers.topk(
        input=fc_out, k=1)
    #else:
    #    topk_indices = layers.sampling_id(fc_out)
    layers.increment(x=step_idx, value=1.0, in_place=True)
    layers.array_write(topk_indices, i=step_idx, array=ids)
    layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
    layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
    length_cond = layers.less_than(x=step_idx, y=max_len)
    finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
    layers.logical_and(x=length_cond, y=finish_cond, out=cond)

baseline_ids = ids
baseline_ids_tensor , _ = fluid.layers.tensor_array_to_tensor(input=baseline_ids)
print("gap")
# sample
step_idx = layers.fill_constant(
    shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)
ids = layers.array_write(
    layers.reshape(tgt_ids, (-1, 1)), step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx)
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx)
global_src_id2 = fluid.layers.create_tensor(dtype='int64')
fluid.layers.assign(src_ids, global_src_id2 )

#global_src_id = src_ids
global_loss = layers.fill_constant(
    shape=[1], dtype="float32", value=0)

with while_op.block():
    pre_ids = layers.array_read(array=ids, i=step_idx)
    pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
    pre_mask = layers.array_read(tgt_masks, i=step_idx)
    pre_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
    append_mask = layers.fill_constant_batch_size_like(
        input=pre_mask,
        value=1.0,
        shape=[-1, 1, 1],
        dtype=pre_mask.dtype)
    append_mask_encoder = layers.fill_constant_batch_size_like(
        input=pre_mask_encoder,
        value=0.0,
        shape=[-1, 1, 1],
        dtype=pre_mask_encoder.dtype)
    pre_mask = layers.concat([pre_mask, append_mask], axis=2)
    pre_mask_encoder = layers.concat([pre_mask_encoder, append_mask_encoder], axis=2)
    pre_pos = layers.elementwise_mul(
        x=layers.fill_constant_batch_size_like(
            input=pre_mask,
            value=1,
            shape=[-1, 1, 1],
            dtype=pre_ids.dtype), y=step_idx, axis=0)
    type_ids = layers.fill_constant_batch_size_like(
        input=pre_mask,
        value=args.tgt_type_id,
        shape=[-1, 1, 1],
        dtype=pre_ids.dtype)
    dec_out, weights, gens = ernie2.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)
    tmp_id = layers.concat([global_src_id2, pre_ids], axis=1)
    layers.assign(tmp_id, global_src_id2)
    fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)
    topk_indices = layers.sampling_id(fc_out)
    layers.increment(x=step_idx, value=1.0, in_place=True)
    layers.array_write(topk_indices, i=step_idx, array=ids)
    layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
    layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
    length_cond = layers.less_than(x=step_idx, y=max_len)
    finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
    layers.logical_and(x=length_cond, y=finish_cond, out=cond)
    nll_loss = layers.log_loss(fc_out, topk_indices)
    tmp_global_loss = global_loss + nll_loss
    layers.assign(tmp_global_loss, global_loss)
sample_ids = ids

sample_ids_tensor , _ = fluid.layers.tensor_array_to_tensor(input=sample_ids)
neg_reward = fluid.data(
    name="reward",
    shape=[1],  # batch_size
    dtype="float32")
neg_reward = fluid.layers.py_func(
        func=reward_func, x=[sample_ids_tensor, baseline_ids_tensor], out=neg_reward)

#neg_reward = score_baseline - score_sample
rl_loss = neg_reward * global_loss
rl_loss = layers.mean(rl_loss)
loss = 0.7* loss + (1- 0.7) * rl_loss
graph_vars = {
    "loss": loss
}
for k, v in graph_vars.items():
    v.persistable = True
return pyreader, graph_vars

def reward_func(samples, baselines): samples = np.array(samples) baselines = np.array(baselines) neg_reward = np.mean(baselines,axis=1) - np.mean(samples,axis=1) return neg_reward

def fast_decode(args, pyreader_name, ernie_config):

pyreader = fluid.layers.py_reader(
    capacity=50,
    shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, args.max_seq_len],
            [-1, args.max_seq_len, 1], [-1, 1], [-1],
            [-1, 1, args.max_seq_len], [-1, 1], [-1, -1]],
    dtypes=['int64', 'int64', 'int64', 'float32', 'float32','int64', 'float32',
        'int32', 'float32', 'int64', 'int64'],
    lod_levels=[0, 0, 0, 0, 0, 2, 2, 0, 0 ,0, 0],
    name=pyreader_name,
    use_double_buffer=True)

(src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_ids, init_scores, parent_idx,
    tgt_input_mask, tgt_start_pos, data_ids) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
    src_ids=src_ids,
    position_ids=pos_ids,
    sentence_ids=sent_ids,
    input_mask=input_mask,
    input_mask_encoder=input_mask_encoder,
    config=ernie_config,
    use_fp16=args.use_fp16,
    is_unidirectional=True,
    decoding=True,
    gather_idx=parent_idx)
#print("debug277", file=sys.stderr)
max_len = layers.fill_constant(
        shape=[1],
        dtype=tgt_ids.dtype,
        value=args.max_tgt_len,
        force_cpu=True)
step_idx = layers.fill_constant(
        shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)

ids = layers.array_write(
    layers.reshape(tgt_ids, (-1, 1)), step_idx)
scores = layers.array_write(init_scores, step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx) #?
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx) #?
#global_src_id = layers.expand(src_ids, expand_times=[args.beam_size, 1, 1])
#print("debug292", file=sys.stderr)

global_src_id = src_ids
with while_op.block():
    pre_ids = layers.array_read(array=ids, i=step_idx)
    pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
    pre_scores = layers.array_read(array=scores, i=step_idx)

    tmp_tgt_input_mask = layers.array_read(tgt_masks, i=step_idx)
    tmp_tgt_input_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
    append_mask = layers.fill_constant_batch_size_like(
            input=tmp_tgt_input_mask,
            value=1.0,
            shape=[-1, 1, 1],
            dtype=tmp_tgt_input_mask.dtype)
    append_mask_encoder = layers.fill_constant_batch_size_like(
            input=tmp_tgt_input_mask,
            value=0.0,
            shape=[-1, 1, 1],
            dtype=tmp_tgt_input_mask.dtype)

    tmp_tgt_input_mask = layers.concat([tmp_tgt_input_mask, append_mask], axis=2)
    tmp_tgt_input_mask_encoder = layers.concat([tmp_tgt_input_mask_encoder, append_mask_encoder], axis=2)

    pre_mask = layers.gather(input=tmp_tgt_input_mask, index=parent_idx)
    pre_mask_encoder = layers.gather(input=tmp_tgt_input_mask_encoder, index=parent_idx)
    pre_pos = layers.elementwise_mul(
        x=layers.fill_constant_batch_size_like(
            input=pre_mask,
            value=1,
            shape=[-1, 1, 1],
            dtype=pre_ids.dtype), y=step_idx, axis=0)
    type_ids = layers.fill_constant_batch_size_like(
        input=pre_mask,
        value=args.tgt_type_id,
        shape=[-1, 1, 1],
        dtype=pre_ids.dtype)

    dec_out, weights, gens = ernie.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)

    tmp_id = layers.gather(input=global_src_id, index=parent_idx)

    tmp_id =  layers.concat([tmp_id, pre_ids], axis=1)
    layers.assign(tmp_id, global_src_id)
    fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)

    topk_scores, topk_indices = layers.topk(
        input=fc_out, k=args.beam_size)

    accu_scores = layers.elementwise_add(
        x=layers.log(topk_scores), y=pre_scores, axis=0)
    topk_indices = layers.lod_reset(topk_indices, pre_ids)
    accu_scores = layers.lod_reset(accu_scores, pre_ids)
    selected_ids, selected_scores, gather_idx = layers.beam_search(
        pre_ids=pre_ids,
        pre_scores=pre_scores,
        ids=topk_indices,
        scores=accu_scores,
        beam_size=args.beam_size,
        end_id=args.eos_idx,

        return_parent_idx=True)

    layers.increment(x=step_idx, value=1.0, in_place=True)
    layers.array_write(selected_ids, i=step_idx, array=ids)
    layers.array_write(selected_scores, i=step_idx, array=scores)
    layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
    layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
    layers.assign(gather_idx, parent_idx)

    length_cond = layers.less_than(x=step_idx, y=max_len)
    finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
    layers.logical_and(x=length_cond, y=finish_cond, out=cond)

finished_ids, finished_scores = layers.beam_search_decode(
    ids, scores, beam_size=args.beam_size, end_id=args.eos_idx)

graph_vars = {
    "finished_ids": finished_ids,
    "finished_scores": finished_scores,
    "data_ids": data_ids
}

for k, v in graph_vars.items():
    v.persistable = True

return pyreader, graph_vars

def post_process_seq(seq, eos_idx): """ Post-process the beam-search decoded sequence. Truncate from the first

and remove the and tokens currently. """ eos_pos = len(seq) for i, idx in enumerate(seq): if idx == eos_idx: eos_pos = i break seq = seq[1:eos_pos] return seq def merge_subword(tokens): ret = [] for token in tokens: if token.startswith("##"): real_token = token[2:] if len(ret): ret[-1] += real_token else: ret.append(real_token) else: ret.append(token) return ret def evaluate_bleu(refs, preds, bleu_n=4): eval_res = compute_bleu(refs, preds, max_order=bleu_n) return eval_res[0] def evaluate(exe, program, pyreader, graph_vars, eval_phase, dev_count=1, do_dec=False, vocab_path=None, features=None, eval_bleu=True, decode_path=""): if do_dec and not hasattr(evaluate, 'trg_idx2word'): evaluate.trg_idx2word = {} fin = open(vocab_path) for line in fin: v, k = line.strip().decode("utf8").split("\t") evaluate.trg_idx2word[int(k)] = v if v == "[SEP]": evaluate.eos_idx = int(k) if eval_phase == "train": fetch_list = [ graph_vars["loss"].name ] if "learning_rate" in graph_vars: fetch_list.append(graph_vars["learning_rate"].name) outputs = exe.run(fetch_list=fetch_list) np_loss = outputs[0] ret = { "loss": np.mean(np_loss), "ppl": np.exp(np.mean(np_loss)) } if "learning_rate" in graph_vars: ret["learning_rate"] = float(outputs[1][0]) return ret if not do_dec: fetch_list = [ graph_vars["loss"].name ] else: fetch_list = [ graph_vars["finished_ids"].name, graph_vars["finished_scores"].name, graph_vars["data_ids"].name, ] if do_dec: return_numpy = False dec_out = {} else: steps = 0 cost = 0.0 return_numpy = True time_begin = time.time() pyreader.start() while True: try: outputs = exe.run(program=program, fetch_list=fetch_list, return_numpy=return_numpy) if not do_dec: np_loss = outputs[0] cost += np.mean(np_loss) steps += 1 else: seq_ids, seq_scores, data_ids = outputs seq_ids_list, seq_scores_list = [seq_ids], [ seq_scores] if isinstance( seq_ids, paddle.fluid.core.LoDTensor) else (seq_ids, seq_scores) data_ids = np.array(data_ids).reshape(-1).tolist() data_idx = 0 for seq_ids, seq_scores in zip(seq_ids_list, seq_scores_list): # How to parse the results: # Suppose the lod of seq_ids is: # [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]] # then from lod[0]: # there are 2 source sentences, beam width is 3. # from lod[1]: # the first source sentence has 3 hyps; the lengths are 12, 12, 16 # the second source sentence has 3 hyps; the lengths are 14, 13, 15 # hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)] # scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)] for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence start = seq_ids.lod()[0][i] end = seq_ids.lod()[0][i + 1] for j in range(end - start): # for each candidate sub_start = seq_ids.lod()[1][start + j] sub_end = seq_ids.lod()[1][start + j + 1] tokens = [evaluate.trg_idx2word[idx] for idx in post_process_seq( np.array(seq_ids)[sub_start:sub_end], evaluate.eos_idx) ] score = np.array(seq_scores)[sub_end - 1] data_id = data_ids[data_idx] data_idx += 1 dec_out[data_id] = (tokens, score) break except fluid.core.EOFException: pyreader.reset() break time_end = time.time() if not do_dec: print( "[%s evaluation] loss: %f, ppl: %f, elapsed time: %f s" % (eval_phase, cost / steps, np.exp(cost / steps), time_end - time_begin)) else: if not eval_bleu: return dec_out tk = BasicTokenizer() preds = [] refs = [] keys = features.keys() writer = None if decode_path != "": writer = open(decode_path, "w") for i in keys: pred = merge_subword(dec_out[i][0]) refs.append([tk.tokenize(features[i].tgt)]) preds.append(pred) if writer: writer.write(features[i].tgt + "\t" + " ".join(pred).encode("utf8") + "\n") bleu = evaluate_bleu(refs, preds) print( "[%s evaluation] bleu-4: %f, elapsed time: %f s" % (eval_phase, bleu, time_end - time_begin))
wanghaoshuang commented 4 years ago

@chenwhql 帮忙看下这个问题?关于memory_usage

wanghaoshuang commented 4 years ago

@angus 你是自己显式的调用memory_usage么?先把这个调用屏蔽掉?

Angus07 commented 4 years ago

屏蔽了,依然报错。信息如下:

2020-09-24 15:45:34,445-WARNING: Your decorated reader has raised an exception! Exception in thread Thread-1: Traceback (most recent call last): File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/threading.py", line 801, in bootstrap_inner self.run() File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/threading.py", line 754, in run self.target(*self.args, **self.kwargs) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 491, in provider_thread six.reraise(sys.exc_info()) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 472, in __provider_thread__ for tensors in func(): File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1153, in wrapper examples, batch_size, phase=phase, do_dec=do_dec, place=place): File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1103, in _prepare_batch_data yield self._pad_batch_records(batch_records, do_dec, place) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1218, in _pad_batch_records place, [range(tgt_word.shape[0] + 1)] 2) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1164, in _to_lodtensor data_tensor.set(data, place) TypeError: set(): incompatible function arguments. The following argument types are supported:

  1. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CPUPlace, zero_copy: bool=False) -> None
  2. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPlace, zero_copy: bool=False) -> None
  3. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPinnedPlace, zero_copy: bool=False) -> None

Invoked with: <paddle.fluid.core_avx.LoDTensor object at 0x7f52d4c4b5b0>, array([[0.], [0.], [0.], [0.]], dtype=float32), None

/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py:782: UserWarning: The following exception is not an EOF exception. "The following exception is not an EOF exception.") Traceback (most recent call last): File "./run_seq2seq.py", line 359, in main(args) File "./run_seq2seq.py", line 275, in main train_exe.run(fetch_list=[]) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/parallel_executor.py", line 311, in run return_numpy=return_numpy) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 783, in run six.reraise(*sys.exc_info()) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 778, in run use_program_cache=use_program_cache) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 843, in _run_impl return_numpy=return_numpy) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 677, in _run_parallel tensors = exe.run(fetch_var_names)._move_to_list() paddle.fluid.core_avx.EnforceNotMet:


C++ Call Stacks (More useful to developers):

0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const, int) 2 paddle::operators::reader::BlockingQueue<std::vector<paddle::framework::LoDTensor, std::allocator > >::Receive(std::vector<paddle::framework::LoDTensor, std::allocator >) 3 paddle::operators::reader::PyReader::ReadNext(std::vector<paddle::framework::LoDTensor, std::allocator >) 4 std::_Function_handler<std::unique_ptr<std::future_base::_Result_base, std::future_base::_Result_base::_Deleter> (), std::future_base::_Task_setter<std::unique_ptr<std::future_base::_Result, std::future_base::_Result_base::_Deleter>, unsigned long> >::_M_invoke(std::_Any_data const&) 5 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&) 6 ThreadPool::ThreadPool(unsigned long)::{lambda()#1}::operator()() const


Python Call Stacks (More useful to users):

File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/framework.py", line 2525, in append_op attrs=kwargs.get("attrs", None)) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op return self.main_program.current_block().append_op(*args, **kwargs) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 889, in read_file type='read', inputs={'Reader': [reader]}, outputs={'Out': out}) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/finetune/seq2seq.py", line 249, in greedy_decode tgt_input_mask, tgt_start_pos, data_ids, tgt_labels, tgt_pos) = fluid.layers.read_file(pyreader) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/finetune/seq2seq.py", line 196, in create_model return greedy_decode(args, pyreader_name, ernie_config) File "./run_seq2seq.py", line 105, in main is_reinforce = True) File "./run_seq2seq.py", line 359, in main(args)


Error Message Summary:

Error: Blocking queue is killed because the data reader raises an exception [Hint: Expected killed != true, but received killed:1 == true:1.] at (/paddle/paddle/fluid/operators/reader/blocking_queue.h:141) [operator < read > error]

Angus07 commented 4 years ago

原始代码的数据feed是没问题的,我也没有修改,就是改了reinforce learning相关的逻辑

wanghaoshuang commented 4 years ago

File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1164, in _to_lodtensor data_tensor.set(data, place) TypeError: set(): incompatible function arguments. The following argument types are supported:

  1. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CPUPlace, zero_copy: bool=False) -> None
  2. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPlace, zero_copy: bool=False) -> None
  3. (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPinnedPlace, zero_copy: bool=False) -> None

你在data_tensor.set(data, place)之前打印下data和place的type?

Angus07 commented 4 years ago

这个问题解决了,确实是由于place没有传输,导致place为空导致的。但是目前到100多step的时候程序会出core。也无法看出错误的原因是什么。错误信息如下: Traceback (most recent call last): File "./run_seq2seq.py", line 360, in main(args) File "./run_seq2seq.py", line 276, in main train_exe.run(fetch_list=[]) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/parallel_executor.py", line 311, in run return_numpy=return_numpy) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 783, in run six.reraise(*sys.exc_info()) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 778, in run use_program_cache=use_program_cache) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 843, in _run_impl return_numpy=return_numpy) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py", line 677, in _run_parallel tensors = exe.run(fetch_var_names)._move_to_list() paddle.fluid.core_avx.EnforceNotMet:


C++ Call Stacks (More useful to developers):

0 std::string paddle::platform::GetTraceBackString<char const>(char const&&, char const, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const, int) 2 paddle::platform::CUDADeviceContext::Wait() const 3 paddle::framework::details::ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() 4 paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocator > const&) 5 paddle::framework::ParallelExecutor::Run(std::vector<std::string, std::allocator > const&)


Error Message Summary:

FatalError: cudaStreamSynchronize raises error: an illegal memory access was encountered, errono: 77: an illegal memory access was encountered at (/paddle/paddle/fluid/platform/device_context.cc:331)

terminate called after throwing an instance of 'paddle::platform::EnforceNotMet' what():


C++ Call Stacks (More useful to developers):

0 std::string paddle::platform::GetTraceBackString<char const>(char const&&, char const, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const, int) 2 paddle::framework::details::OpHandleBase::~OpHandleBase() 3 paddle::framework::details::ComputationOpHandle::~ComputationOpHandle() 4 paddle::framework::ir::Node::~Node() 5 paddle::framework::ir::Node::~Node() 6 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 7 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 8 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 9 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 10 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 11 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 12 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 13 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 14 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 15 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 16 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 17 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 18 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 19 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 20 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 21 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 22 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 23 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 24 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 25 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 26 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) 27 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > >, std::_Select1st<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >, std::less<paddle::framework::ir::Node>, std::allocator<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > > >::_M_erase(std::_Rb_tree_node<std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete > > >) r::Node> > > >*) 28 paddle::framework::ir::Graph::~Graph() 29 paddle::framework::ir::Graph::~Graph() 30 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()


Error Message Summary:

Error: An error occurred here. There is no accurate error hint for this error yet. We are continuously in the process of increasing hint for this kind of error check. It would be helpful if you could inform us of how this conversion went by opening a github issue. And we will resolve it with high priority.

W0927 20:16:46.246031 81933 init.cc:209] Warning: PaddlePaddle catches a failure signal, it may not work properly W0927 20:16:46.246052 81933 init.cc:211] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle W0927 20:16:46.246055 81933 init.cc:214] The detail failure signal is:

W0927 20:16:46.246060 81933 init.cc:217] Aborted at 1601209006 (unix time) try "date -d @1601209006" if you are using GNU date W0927 20:16:46.247614 81933 init.cc:217] PC: @ 0x0 (unknown) W0927 20:16:46.247680 81933 init.cc:217] SIGABRT (@0x3f50001400d) received by PID 81933 (TID 0x7f579e118700) from PID 81933; stack trace: W0927 20:16:46.248822 81933 init.cc:217] @ 0x7f579dedb390 (unknown) W0927 20:16:46.249893 81933 init.cc:217] @ 0x7f579d425428 gsignal W0927 20:16:46.250926 81933 init.cc:217] @ 0x7f579d42702a abort W0927 20:16:46.251379 81933 init.cc:217] @ 0x7f56fa0a284a __gnu_cxx::verbose_terminate_handler() W0927 20:16:46.251665 81933 init.cc:217] @ 0x7f56fa0a0f47 cxxabiv1::terminate() W0927 20:16:46.251929 81933 init.cc:217] @ 0x7f56fa0a03a5 cxa_call_terminate W0927 20:16:46.252239 81933 init.cc:217] @ 0x7f56fa0a0bd8 __gxx_personality_v0 W0927 20:16:46.253146 81933 init.cc:217] @ 0x7f579cdd1aab _Unwind_RaiseException_Phase2 W0927 20:16:46.254060 81933 init.cc:217] @ 0x7f579cdd1f49 _Unwind_Resume W0927 20:16:46.256429 81933 init.cc:217] @ 0x7f56dab78b9c paddle::framework::details::OpHandleBase::~OpHandleBase() W0927 20:16:46.258352 81933 init.cc:217] @ 0x7f56dab7b92f paddle::framework::details::ComputationOpHandle::~ComputationOpHandle() W0927 20:16:46.260051 81933 init.cc:217] @ 0x7f56d8460a89 paddle::framework::ir::Node::~Node() W0927 20:16:46.262677 81933 init.cc:217] @ 0x7f56d8460c31 paddle::framework::ir::Node::~Node() W0927 20:16:46.265332 81933 init.cc:217] @ 0x7f56d844aba2 std::_Rb_tree<>::_M_erase() W0927 20:16:46.267894 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.270385 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.272855 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.275336 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.277817 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.280300 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.282804 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.285288 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.287781 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.290254 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.292726 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.295192 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.297705 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.300161 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.302662 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.305121 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.307587 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.310070 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase()

wanghaoshuang commented 4 years ago

在某些paddle版本下,batch size过大会导致这个问题:errono: 77: an illegal memory access was encountered

请问您使用的paddle版本是?

wanghaoshuang commented 4 years ago

如果减小batch size还无法解决问题,建议您升级Paddle到1.8.5版本。 一个相关的issue: https://github.com/PaddlePaddle/Paddle/issues/18464

paddle-bot-old[bot] commented 3 years ago

Since you haven\'t replied for more than a year, we have closed this issue/pr. If the problem is not solved or there is a follow-up one, please reopen it at any time and we will continue to follow up. 由于您超过一年未回复,我们将关闭这个issue/pr。 若问题未解决或有后续问题,请随时重新打开,我们会继续跟进。