Closed Angus07 closed 3 years ago
请补充下定义data layer和data reader的相关代码?应该是shape没设置对。
见374行左右
from future import absolute_import from future import division from future import print_function import paddle.fluid.layers.utils as paddle_utils import os import time import argparse import numpy as np import multiprocessing import sys import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers
from six.moves import xrange
from model.ernie import ErnieModel
from tokenization import BasicTokenizer from utils.bleu import compute_bleu from paddle.fluid.layers import Print as Print
def cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens):
origin_enc = enc_out
print("tgt po")
print("src_ids in cal", src_ids)
enc_out = fluid.layers.reshape(x=enc_out,
shape=[-1, ernie_config["hidden_size"]])
if tgt_pos:
tgt_pos = fluid.layers.cast(x=tgt_pos, dtype='int32')
#tgt_feat2 = fluid.layers.gather(input=enc_out, index=tgt_pos)
tgt_feat = fluid.layers.gather(input=enc_out, index=tgt_pos)
'''
weights_trans = fluid.layers.reshape(x=weights,
shape=[-1, 1600])
weights = fluid.layers.gather(input=weights_trans, index=tgt_pos)
gens_trans = fluid.layers.reshape(x=gens,
shape=[-1, 1600])
gens = fluid.layers.gather(input=gens_trans, index=tgt_pos)
'''
#Print(tgt_feat, message="tgt_feat1", summarize=20)
#shape: [117, 768, ]
else:
tgt_feat = enc_out
#tgt_feat2 = enc_out
#Print(tgt_feat, message="tgt_feat2", summarize=20)
tgt_trans_feat = fluid.layers.fc(
input=tgt_feat,
size=ernie_config["hidden_size"],
act="relu",
param_attr=fluid.ParamAttr(
name="seq2seq_trans_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="seq2seq_trans_b",
initializer=fluid.initializer.Constant(0.)))
seq2seq_out_bias_attr = fluid.ParamAttr(
name="seq2seq_out_fc.b_0",
initializer=fluid.initializer.Constant(value=0.0))
if args.weight_sharing:
fc_out = fluid.layers.matmul(
x=tgt_trans_feat,
y=fluid.default_main_program().global_block().var(
"word_embedding"),
transpose_y=True)
fc_out += fluid.layers.create_parameter(
shape=[ernie_config['vocab_size']],
dtype="float32",
attr=seq2seq_out_bias_attr,
is_bias=True)
else:
fc_out = fluid.layers.fc(input=tgt_trans_feat,
size=ernie_config['vocab_size'],
param_attr=fluid.ParamAttr(
name="seq2seq_out_fc.w_0",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=seq2seq_out_bias_attr)
gens_trans = fluid.layers.reshape(x=gens,
shape=[-1, 1])
if tgt_pos:
gens_feat = fluid.layers.gather(input=gens_trans, index=tgt_pos)
else:
gens_feat = gens_trans
gens_trans = fluid.layers.expand(gens_feat, expand_times=[1,18000])
'''
if tgt_pos:
origin_vocab_dists = fluid.layers.gather(input=fc_out, index=tgt_pos)
else:
origin_vocab_dists = fc_out
'''
fc_out = layers.softmax(fc_out)
vocab_dists = fluid.layers.elementwise_mul(gens_trans, fc_out)
gens = layers.squeeze(input = gens, axes = [2])
attn_dists= fluid.layers.elementwise_mul(weights, (1.0 - gens), axis=0)
print("attn_dists", attn_dists)
src_ids = fluid.one_hot(src_ids, 18000)
src_ids = layers.squeeze(input=src_ids, axes=[2])
attn_dists_projected = layers.matmul(attn_dists, src_ids)
attn_dists_projected = fluid.layers.reshape(x=attn_dists_projected,
shape=[-1, 18000])
if tgt_pos:
attn_dists_projected = fluid.layers.gather(input=attn_dists_projected, index=tgt_pos)
final_dists = attn_dists_projected + vocab_dists
final_dists = fluid.layers.reshape(x=final_dists,
shape=[-1, 18000])
return final_dists
def create_model(args, pyreader_name, ernie_config, is_prediction=False, is_reinforce=False):
if is_prediction:
return fast_decode(args, pyreader_name, ernie_config)
if is_reinforce:
return greedy_decode(args, pyreader_name, ernie_config)
#print("debug", args.max_seq_len)
pyreader = fluid.layers.py_reader(
capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len],[-1, args.max_seq_len, args.max_seq_len],
[-1, 1], [-1, 1]],
dtypes=['int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0, 0],
name=pyreader_name,
use_double_buffer=True)
(src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_labels,
tgt_pos) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
input_mask=input_mask,
input_mask_encoder=input_mask_encoder,
config=ernie_config,
use_fp16=args.use_fp16,
is_unidirectional=True)
enc_out, weights, gens = ernie.get_sequence_output()
fc_out = cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens)
ce_loss = fluid.layers.cross_entropy(
fc_out, tgt_labels)
loss = fluid.layers.mean(x=ce_loss)
if args.use_fp16 and args.loss_scaling > 1.0:
loss *= args.loss_scaling
graph_vars = {
"loss": loss,
}
for k, v in graph_vars.items():
v.persistable = True
return pyreader, graph_vars
def greedy_decode(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, 1], [-1, 1], [-1], [-1, 1, args.max_seq_len], [-1, 1], [-1, -1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'float32', 'int32', 'float32', 'int64', 'int64', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_ids, init_scores, parent_idx, tgt_input_mask, tgt_start_pos, data_ids, tgt_labels, tgt_pos) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
input_mask=input_mask,
input_mask_encoder=input_mask_encoder,
config=ernie_config,
use_fp16=args.use_fp16,
is_unidirectional=True)
enc_out, weights, gens = ernie.get_sequence_output()
fc_out = cal_logit(src_ids, enc_out, tgt_pos, args, ernie_config, weights, gens)
ce_loss = fluid.layers.cross_entropy(
fc_out, tgt_labels)
loss = fluid.layers.mean(x=ce_loss)
ernie1 = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
input_mask=input_mask,
input_mask_encoder=input_mask_encoder,
config=ernie_config,
use_fp16=args.use_fp16,
is_unidirectional=True,
decoding=True,
gather_idx=parent_idx)
ernie2 = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
input_mask=input_mask,
input_mask_encoder=input_mask_encoder,
config=ernie_config,
use_fp16=args.use_fp16,
is_unidirectional=True,
decoding=True,
gather_idx=parent_idx)
max_len = layers.fill_constant(
shape=[1],
dtype=tgt_ids.dtype,
value=args.max_tgt_len,
force_cpu=True)
step_idx = layers.fill_constant(
shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)
ids = layers.array_write(
layers.reshape(tgt_ids, (-1, 1)), step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx)
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx)
global_src_id = fluid.layers.create_tensor(dtype='int64')
fluid.layers.assign(src_ids, global_src_id )
#global_src_id = src_ids
#baseline greedy
with while_op.block():
pre_ids = layers.array_read(array=ids, i=step_idx)
pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
pre_mask = layers.array_read(tgt_masks, i=step_idx)
pre_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
append_mask = layers.fill_constant_batch_size_like(
input=pre_mask,
value=1.0,
shape=[-1, 1, 1],
dtype=pre_mask.dtype)
append_mask_encoder = layers.fill_constant_batch_size_like(
input=pre_mask_encoder,
value=0.0,
shape=[-1, 1, 1],
dtype=pre_mask_encoder.dtype)
pre_mask = layers.concat([pre_mask, append_mask], axis=2)
pre_mask_encoder = layers.concat([pre_mask_encoder, append_mask_encoder], axis=2)
pre_pos = layers.elementwise_mul(
x=layers.fill_constant_batch_size_like(
input=pre_mask,
value=1,
shape=[-1, 1, 1],
dtype=pre_ids.dtype), y=step_idx, axis=0)
type_ids = layers.fill_constant_batch_size_like(
input=pre_mask,
value=args.tgt_type_id,
shape=[-1, 1, 1],
dtype=pre_ids.dtype)
dec_out, weights, gens = ernie1.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)
tmp_id = layers.concat([global_src_id, pre_ids], axis=1)
layers.assign(tmp_id, global_src_id)
fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)
#if not sample:
topk_scores, topk_indices = layers.topk(
input=fc_out, k=1)
#else:
# topk_indices = layers.sampling_id(fc_out)
layers.increment(x=step_idx, value=1.0, in_place=True)
layers.array_write(topk_indices, i=step_idx, array=ids)
layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
length_cond = layers.less_than(x=step_idx, y=max_len)
finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
baseline_ids = ids
baseline_ids_tensor , _ = fluid.layers.tensor_array_to_tensor(input=baseline_ids)
print("gap")
# sample
step_idx = layers.fill_constant(
shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)
ids = layers.array_write(
layers.reshape(tgt_ids, (-1, 1)), step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx)
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx)
global_src_id2 = fluid.layers.create_tensor(dtype='int64')
fluid.layers.assign(src_ids, global_src_id2 )
#global_src_id = src_ids
global_loss = layers.fill_constant(
shape=[1], dtype="float32", value=0)
with while_op.block():
pre_ids = layers.array_read(array=ids, i=step_idx)
pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
pre_mask = layers.array_read(tgt_masks, i=step_idx)
pre_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
append_mask = layers.fill_constant_batch_size_like(
input=pre_mask,
value=1.0,
shape=[-1, 1, 1],
dtype=pre_mask.dtype)
append_mask_encoder = layers.fill_constant_batch_size_like(
input=pre_mask_encoder,
value=0.0,
shape=[-1, 1, 1],
dtype=pre_mask_encoder.dtype)
pre_mask = layers.concat([pre_mask, append_mask], axis=2)
pre_mask_encoder = layers.concat([pre_mask_encoder, append_mask_encoder], axis=2)
pre_pos = layers.elementwise_mul(
x=layers.fill_constant_batch_size_like(
input=pre_mask,
value=1,
shape=[-1, 1, 1],
dtype=pre_ids.dtype), y=step_idx, axis=0)
type_ids = layers.fill_constant_batch_size_like(
input=pre_mask,
value=args.tgt_type_id,
shape=[-1, 1, 1],
dtype=pre_ids.dtype)
dec_out, weights, gens = ernie2.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)
tmp_id = layers.concat([global_src_id2, pre_ids], axis=1)
layers.assign(tmp_id, global_src_id2)
fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)
topk_indices = layers.sampling_id(fc_out)
layers.increment(x=step_idx, value=1.0, in_place=True)
layers.array_write(topk_indices, i=step_idx, array=ids)
layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
length_cond = layers.less_than(x=step_idx, y=max_len)
finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
nll_loss = layers.log_loss(fc_out, topk_indices)
tmp_global_loss = global_loss + nll_loss
layers.assign(tmp_global_loss, global_loss)
sample_ids = ids
sample_ids_tensor , _ = fluid.layers.tensor_array_to_tensor(input=sample_ids)
neg_reward = fluid.data(
name="reward",
shape=[1], # batch_size
dtype="float32")
neg_reward = fluid.layers.py_func(
func=reward_func, x=[sample_ids_tensor, baseline_ids_tensor], out=neg_reward)
#neg_reward = score_baseline - score_sample
rl_loss = neg_reward * global_loss
rl_loss = layers.mean(rl_loss)
loss = 0.7* loss + (1- 0.7) * rl_loss
graph_vars = {
"loss": loss
}
for k, v in graph_vars.items():
v.persistable = True
return pyreader, graph_vars
def reward_func(samples, baselines): samples = np.array(samples) baselines = np.array(baselines) neg_reward = np.mean(baselines,axis=1) - np.mean(samples,axis=1) return neg_reward
def fast_decode(args, pyreader_name, ernie_config):
pyreader = fluid.layers.py_reader(
capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, args.max_seq_len],
[-1, args.max_seq_len, 1], [-1, 1], [-1],
[-1, 1, args.max_seq_len], [-1, 1], [-1, -1]],
dtypes=['int64', 'int64', 'int64', 'float32', 'float32','int64', 'float32',
'int32', 'float32', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 2, 2, 0, 0 ,0, 0],
name=pyreader_name,
use_double_buffer=True)
(src_ids, sent_ids, pos_ids, input_mask, input_mask_encoder, tgt_ids, init_scores, parent_idx,
tgt_input_mask, tgt_start_pos, data_ids) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
input_mask=input_mask,
input_mask_encoder=input_mask_encoder,
config=ernie_config,
use_fp16=args.use_fp16,
is_unidirectional=True,
decoding=True,
gather_idx=parent_idx)
#print("debug277", file=sys.stderr)
max_len = layers.fill_constant(
shape=[1],
dtype=tgt_ids.dtype,
value=args.max_tgt_len,
force_cpu=True)
step_idx = layers.fill_constant(
shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True)
cond = layers.less_than(x=step_idx, y=max_len)
while_op = layers.While(cond)
ids = layers.array_write(
layers.reshape(tgt_ids, (-1, 1)), step_idx)
scores = layers.array_write(init_scores, step_idx)
tgt_masks = layers.array_write(tgt_input_mask, step_idx) #?
tgt_masks_encoder = layers.array_write(tgt_input_mask, step_idx) #?
#global_src_id = layers.expand(src_ids, expand_times=[args.beam_size, 1, 1])
#print("debug292", file=sys.stderr)
global_src_id = src_ids
with while_op.block():
pre_ids = layers.array_read(array=ids, i=step_idx)
pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
pre_scores = layers.array_read(array=scores, i=step_idx)
tmp_tgt_input_mask = layers.array_read(tgt_masks, i=step_idx)
tmp_tgt_input_mask_encoder = layers.array_read(tgt_masks_encoder, i=step_idx)
append_mask = layers.fill_constant_batch_size_like(
input=tmp_tgt_input_mask,
value=1.0,
shape=[-1, 1, 1],
dtype=tmp_tgt_input_mask.dtype)
append_mask_encoder = layers.fill_constant_batch_size_like(
input=tmp_tgt_input_mask,
value=0.0,
shape=[-1, 1, 1],
dtype=tmp_tgt_input_mask.dtype)
tmp_tgt_input_mask = layers.concat([tmp_tgt_input_mask, append_mask], axis=2)
tmp_tgt_input_mask_encoder = layers.concat([tmp_tgt_input_mask_encoder, append_mask_encoder], axis=2)
pre_mask = layers.gather(input=tmp_tgt_input_mask, index=parent_idx)
pre_mask_encoder = layers.gather(input=tmp_tgt_input_mask_encoder, index=parent_idx)
pre_pos = layers.elementwise_mul(
x=layers.fill_constant_batch_size_like(
input=pre_mask,
value=1,
shape=[-1, 1, 1],
dtype=pre_ids.dtype), y=step_idx, axis=0)
type_ids = layers.fill_constant_batch_size_like(
input=pre_mask,
value=args.tgt_type_id,
shape=[-1, 1, 1],
dtype=pre_ids.dtype)
dec_out, weights, gens = ernie.encode(pre_ids, pre_pos, type_ids, pre_mask, pre_mask_encoder, parent_idx)
tmp_id = layers.gather(input=global_src_id, index=parent_idx)
tmp_id = layers.concat([tmp_id, pre_ids], axis=1)
layers.assign(tmp_id, global_src_id)
fc_out = cal_logit(tmp_id, dec_out, None, args, ernie_config, weights, gens)
topk_scores, topk_indices = layers.topk(
input=fc_out, k=args.beam_size)
accu_scores = layers.elementwise_add(
x=layers.log(topk_scores), y=pre_scores, axis=0)
topk_indices = layers.lod_reset(topk_indices, pre_ids)
accu_scores = layers.lod_reset(accu_scores, pre_ids)
selected_ids, selected_scores, gather_idx = layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=args.beam_size,
end_id=args.eos_idx,
return_parent_idx=True)
layers.increment(x=step_idx, value=1.0, in_place=True)
layers.array_write(selected_ids, i=step_idx, array=ids)
layers.array_write(selected_scores, i=step_idx, array=scores)
layers.array_write(pre_mask, i=step_idx, array=tgt_masks)
layers.array_write(pre_mask_encoder, i=step_idx, array=tgt_masks_encoder)
layers.assign(gather_idx, parent_idx)
length_cond = layers.less_than(x=step_idx, y=max_len)
finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
finished_ids, finished_scores = layers.beam_search_decode(
ids, scores, beam_size=args.beam_size, end_id=args.eos_idx)
graph_vars = {
"finished_ids": finished_ids,
"finished_scores": finished_scores,
"data_ids": data_ids
}
for k, v in graph_vars.items():
v.persistable = True
return pyreader, graph_vars
def post_process_seq(seq, eos_idx): """ Post-process the beam-search decoded sequence. Truncate from the first
@chenwhql 帮忙看下这个问题?关于memory_usage
@angus
你是自己显式的调用memory_usage
么?先把这个调用屏蔽掉?
屏蔽了,依然报错。信息如下:
2020-09-24 15:45:34,445-WARNING: Your decorated reader has raised an exception! Exception in thread Thread-1: Traceback (most recent call last): File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/threading.py", line 801, in bootstrap_inner self.run() File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/threading.py", line 754, in run self.target(*self.args, **self.kwargs) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 491, in provider_thread six.reraise(sys.exc_info()) File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 472, in __provider_thread__ for tensors in func(): File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1153, in wrapper examples, batch_size, phase=phase, do_dec=do_dec, place=place): File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1103, in _prepare_batch_data yield self._pad_batch_records(batch_records, do_dec, place) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1218, in _pad_batch_records place, [range(tgt_word.shape[0] + 1)] 2) File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1164, in _to_lodtensor data_tensor.set(data, place) TypeError: set(): incompatible function arguments. The following argument types are supported:
Invoked with: <paddle.fluid.core_avx.LoDTensor object at 0x7f52d4c4b5b0>, array([[0.], [0.], [0.], [0.]], dtype=float32), None
/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/executor.py:782: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "./run_seq2seq.py", line 359, in
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const, int)
2 paddle::operators::reader::BlockingQueue<std::vector<paddle::framework::LoDTensor, std::allocator
File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/framework.py", line 2525, in append_op
attrs=kwargs.get("attrs", None))
File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/mnt/sg/chenliangyu/1-paddle_1.7.1_py2.7/miniconda2/lib/python2.7/site-packages/paddle/fluid/layers/io.py", line 889, in read_file
type='read', inputs={'Reader': [reader]}, outputs={'Out': out})
File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/finetune/seq2seq.py", line 249, in greedy_decode
tgt_input_mask, tgt_start_pos, data_ids, tgt_labels, tgt_pos) = fluid.layers.read_file(pyreader)
File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/finetune/seq2seq.py", line 196, in create_model
return greedy_decode(args, pyreader_name, ernie_config)
File "./run_seq2seq.py", line 105, in main
is_reinforce = True)
File "./run_seq2seq.py", line 359, in
Error: Blocking queue is killed because the data reader raises an exception [Hint: Expected killed != true, but received killed:1 == true:1.] at (/paddle/paddle/fluid/operators/reader/blocking_queue.h:141) [operator < read > error]
原始代码的数据feed是没问题的,我也没有修改,就是改了reinforce learning相关的逻辑
File "/mnt/sg/chenliangyu/45/chenliangyu/ernie-gen-copy-title-reinforcement/reader/task_reader.py", line 1164, in _to_lodtensor data_tensor.set(data, place) TypeError: set(): incompatible function arguments. The following argument types are supported:
- (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CPUPlace, zero_copy: bool=False) -> None
- (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPlace, zero_copy: bool=False) -> None
- (self: paddle.fluid.core_avx.Tensor, array: object, place: paddle::platform::CUDAPinnedPlace, zero_copy: bool=False) -> None
你在data_tensor.set(data, place)
之前打印下data和place的type?
这个问题解决了,确实是由于place没有传输,导致place为空导致的。但是目前到100多step的时候程序会出core。也无法看出错误的原因是什么。错误信息如下:
Traceback (most recent call last):
File "./run_seq2seq.py", line 360, in
0 std::string paddle::platform::GetTraceBackString<char const>(char const&&, char const, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const, int)
2 paddle::platform::CUDADeviceContext::Wait() const
3 paddle::framework::details::ScopeBufferedSSAGraphExecutor::DropLocalExeScopes()
4 paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocator
FatalError: cudaStreamSynchronize raises error: an illegal memory access was encountered, errono: 77: an illegal memory access was encountered at (/paddle/paddle/fluid/platform/device_context.cc:331)
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet' what():
0 std::string paddle::platform::GetTraceBackString<char const>(char const&&, char const, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const, int)
2 paddle::framework::details::OpHandleBase::~OpHandleBase()
3 paddle::framework::details::ComputationOpHandle::~ComputationOpHandle()
4 paddle::framework::ir::Node::~Node()
5 paddle::framework::ir::Node::~Node()
6 std::_Rb_tree<paddle::framework::ir::Node, std::pair<paddle::framework::ir::Node const, std::unique_ptr<paddle::framework::ir::Node, std::default_delete
Error: An error occurred here. There is no accurate error hint for this error yet. We are continuously in the process of increasing hint for this kind of error check. It would be helpful if you could inform us of how this conversion went by opening a github issue. And we will resolve it with high priority.
W0927 20:16:46.246031 81933 init.cc:209] Warning: PaddlePaddle catches a failure signal, it may not work properly W0927 20:16:46.246052 81933 init.cc:211] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle W0927 20:16:46.246055 81933 init.cc:214] The detail failure signal is:
W0927 20:16:46.246060 81933 init.cc:217] Aborted at 1601209006 (unix time) try "date -d @1601209006" if you are using GNU date W0927 20:16:46.247614 81933 init.cc:217] PC: @ 0x0 (unknown) W0927 20:16:46.247680 81933 init.cc:217] SIGABRT (@0x3f50001400d) received by PID 81933 (TID 0x7f579e118700) from PID 81933; stack trace: W0927 20:16:46.248822 81933 init.cc:217] @ 0x7f579dedb390 (unknown) W0927 20:16:46.249893 81933 init.cc:217] @ 0x7f579d425428 gsignal W0927 20:16:46.250926 81933 init.cc:217] @ 0x7f579d42702a abort W0927 20:16:46.251379 81933 init.cc:217] @ 0x7f56fa0a284a __gnu_cxx::verbose_terminate_handler() W0927 20:16:46.251665 81933 init.cc:217] @ 0x7f56fa0a0f47 cxxabiv1::terminate() W0927 20:16:46.251929 81933 init.cc:217] @ 0x7f56fa0a03a5 cxa_call_terminate W0927 20:16:46.252239 81933 init.cc:217] @ 0x7f56fa0a0bd8 __gxx_personality_v0 W0927 20:16:46.253146 81933 init.cc:217] @ 0x7f579cdd1aab _Unwind_RaiseException_Phase2 W0927 20:16:46.254060 81933 init.cc:217] @ 0x7f579cdd1f49 _Unwind_Resume W0927 20:16:46.256429 81933 init.cc:217] @ 0x7f56dab78b9c paddle::framework::details::OpHandleBase::~OpHandleBase() W0927 20:16:46.258352 81933 init.cc:217] @ 0x7f56dab7b92f paddle::framework::details::ComputationOpHandle::~ComputationOpHandle() W0927 20:16:46.260051 81933 init.cc:217] @ 0x7f56d8460a89 paddle::framework::ir::Node::~Node() W0927 20:16:46.262677 81933 init.cc:217] @ 0x7f56d8460c31 paddle::framework::ir::Node::~Node() W0927 20:16:46.265332 81933 init.cc:217] @ 0x7f56d844aba2 std::_Rb_tree<>::_M_erase() W0927 20:16:46.267894 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.270385 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.272855 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.275336 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.277817 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.280300 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.282804 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.285288 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.287781 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.290254 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.292726 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.295192 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.297705 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.300161 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.302662 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.305121 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.307587 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase() W0927 20:16:46.310070 81933 init.cc:217] @ 0x7f56d844ab8f std::_Rb_tree<>::_M_erase()
在某些paddle版本下,batch size过大会导致这个问题:errono: 77: an illegal memory access was encountered
请问您使用的paddle版本是?
如果减小batch size还无法解决问题,建议您升级Paddle到1.8.5版本。 一个相关的issue: https://github.com/PaddlePaddle/Paddle/issues/18464
Since you haven\'t replied for more than a year, we have closed this issue/pr. If the problem is not solved or there is a follow-up one, please reopen it at any time and we will continue to follow up. 由于您超过一年未回复,我们将关闭这个issue/pr。 若问题未解决或有后续问题,请随时重新打开,我们会继续跟进。