PaddlePaddle / Paddle

PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署)
http://www.paddlepaddle.org/
Apache License 2.0
22.2k stars 5.57k forks source link

capi forward函数core: Check failed: size != 0 allocate 0 bytes #7774

Closed fty8788 closed 6 years ago

fty8788 commented 6 years ago

core信息:

F0123 13:55:23.367887 14617 MemoryHandle.cpp:49] Check failed: size != 0  allocate 0 bytes
*** Check failure stack trace: ***
    @     0x7ffff6cee87d  google::LogMessage::Fail()
    @     0x7ffff6cf232c  google::LogMessage::SendToLog()
    @     0x7ffff6cee3a3  google::LogMessage::Flush()
    @     0x7ffff6cf383e  google::LogMessageFatal::~LogMessageFatal()
    @     0x7ffff6c5c1b9  paddle::CpuMemoryHandle::CpuMemoryHandle()
    @     0x7ffff6c3e141  paddle::CpuMatrix::CpuMatrix()
    @     0x7ffff6c3e3a6  paddle::Matrix::create()
    @     0x7ffff6c4bc30  paddle::Matrix::resizeOrCreate()
    @     0x7ffff6a59507  paddle::Layer::resetSpecifyOutput()
    @     0x7ffff6a59774  paddle::Layer::resetOutput()
    @     0x7ffff6ac9300  paddle::MixedLayer::forward()
    @     0x7ffff6b65530  paddle::NeuralNetwork::forward()
    @     0x7ffff6a1c4e6  paddle_gradient_machine_forward
    @     0x7ffff660da0c  paddle_model::IntentRank::predict_b()
    @     0x7ffff660d7ea  paddle_model::IntentRank::predict_a()
    @           0x400fe3  main
    @     0x7ffff5a64bd5  __libc_start_main
    @           0x400dc9  (unknown)
    @              (nil)  (unknown)

Program received signal SIGABRT, Aborted.
0x00007ffff5a783f7 in raise () from /opt/compiler/gcc-4.8.2/lib/libc.so.6
(gdb) bt
#0  0x00007ffff5a783f7 in raise () from /opt/compiler/gcc-4.8.2/lib/libc.so.6
#1  0x00007ffff5a797d8 in abort () from /opt/compiler/gcc-4.8.2/lib/libc.so.6
#2  0x00007ffff6cf7715 in google::DumpStackTraceAndExit() () from /home/yanchunwei/third_party/tengfei/Paddle/paddle/capi/examples/model_inference/usr/local/lib/libpaddle_capi_shared.so
#3  0x00007ffff6cee87d in google::LogMessage::Fail() () from /home/yanchunwei/third_party/tengfei/Paddle/paddle/capi/examples/model_inference/usr/local/lib/libpaddle_capi_shared.so
#4  0x00007ffff6cf232c in google::LogMessage::SendToLog() () from /home/yanchunwei/third_party/tengfei/Paddle/paddle/capi/examples/model_inference/usr/local/lib/libpaddle_capi_shared.so
#5  0x00007ffff6cee3a3 in google::LogMessage::Flush() () from /home/yanchunwei/third_party/tengfei/Paddle/paddle/capi/examples/model_inference/usr/local/lib/libpaddle_capi_shared.so
#6  0x00007ffff6cf383e in google::LogMessageFatal::~LogMessageFatal() () from /home/yanchunwei/third_party/tengfei/Paddle/paddle/capi/examples/model_inference/usr/local/lib/libpaddle_capi_shared.so
#7  0x00007ffff6c5c1b9 in paddle::CpuMemoryHandle::CpuMemoryHandle(unsigned long) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/math/MemoryHandle.cpp:49
#8  0x00007ffff6c3e141 in paddle::CpuMatrix::CpuMatrix(unsigned long, unsigned long, bool) () at /home/opt/gcc-4.8.2.bpkg-r2/gcc-4.8.2.bpkg-r2/include/c++/4.8.2/ext/new_allocator.h:120
#9  0x00007ffff6c3e3a6 in paddle::Matrix::create(unsigned long, unsigned long, bool, bool) () at /home/opt/gcc-4.8.2.bpkg-r2/gcc-4.8.2.bpkg-r2/include/c++/4.8.2/ext/new_allocator.h:120
#10 0x00007ffff6c4bc30 in paddle::Matrix::resizeOrCreate(std::shared_ptr<paddle::Matrix>&, unsigned long, unsigned long, bool, bool) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/math/Matrix.cpp:172
#11 0x00007ffff6a59507 in paddle::Layer::resetSpecifyOutput(paddle::Argument&, unsigned long, unsigned long, bool, bool) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/gserver/layers/Layer.cpp:134
#12 0x00007ffff6a59774 in paddle::Layer::resetOutput(unsigned long, unsigned long) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/gserver/layers/Layer.cpp:165
#13 0x00007ffff6ac9300 in paddle::MixedLayer::forward(paddle::enumeration_wrapper::PassType) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/gserver/layers/MixedLayer.cpp:119
#14 0x00007ffff6b65530 in paddle::NeuralNetwork::forward(std::vector<paddle::Argument, std::allocator<paddle::Argument> > const&, std::vector<paddle::Argument, std::allocator<paddle::Argument> >*, paddle::enumeration_wrapper::PassType) () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/gserver/gradientmachines/NeuralNetwork.cpp:250
#15 0x00007ffff6a1c4e6 in paddle_gradient_machine_forward () at /home/yuyang/BuildAgent3/work/d55918cf60d51073/paddle/capi/gradient_machine.cpp:81
#16 0x00007ffff660da0c in paddle_model::IntentRank::predict_b(paddle_model::VectorInstance*, float*) () from /home/yanchunwei/third_party/tengfei/intent_rank_lib/lib/libintent_rank.so
#17 0x00007ffff660d7ea in paddle_model::IntentRank::predict_a() () from /home/yanchunwei/third_party/tengfei/intent_rank_lib/lib/libintent_rank.so
#18 0x0000000000400fe3 in main ()

代码:

//int query_ids[] = {330070,1515788,1606717,163247,1622216,251207,304166,729241,1177768};
  paddle_ivector sentence0 = paddle_ivector_create(
      query_ids, query_len, false, false);
  CHECK(paddle_arguments_set_ids(left_in_args, 0, sentence0));

  int seq_pos_array0[] = {0, query_len};

  paddle_ivector seq_pos0 = paddle_ivector_create(
      seq_pos_array0, sizeof(seq_pos_array0) / sizeof(int), false, false);

  CHECK(paddle_arguments_set_sequence_start_pos(left_in_args, 0, 0, seq_pos0));

  printf("forward\n");
  start = clock();
  CHECK(paddle_gradient_machine_forward(left_machine,
                                        left_in_args,
                                        out_args,
                                        /* isTrain */ false));

网络结构:

type: "nn"
layers {
  name: "source_input"
  type: "data"
  size: 2000001
  active_type: ""
}
layers {
  name: "__embedding_0__"
  type: "mixed"
  size: 128
  active_type: ""
  inputs {
    input_layer_name: "source_input"
    input_parameter_name: "__emb.w"
    proj_conf {
      type: "table"
      name: "___embedding_0__.w0"
      input_size: 2000001
      output_size: 128
    }
  }
}
layers {
  name: "__seq_pooling_0__"
  type: "average"
  size: 128
  active_type: ""
  inputs {
    input_layer_name: "__embedding_0__"
  }
  average_strategy: "sum"
  trans_type: "non-seq"
}
layers {
  name: "__concat_0__"
  type: "concat"
  size: 128
  active_type: "tanh"
  inputs {
    input_layer_name: "__seq_pooling_0__"
  }
}
parameters {
  name: "__emb.w"
  size: 256000128
  initial_mean: 0.0
  initial_std: 0.00070710660441
  dims: 2000001
  dims: 128
  initial_strategy: 0
  initial_smart: true
}
input_layer_names: "source_input"
output_layer_names: "__concat_0__"
sub_models {
  name: "root"
  layer_names: "source_input"
  layer_names: "__embedding_0__"
  layer_names: "__seq_pooling_0__"
  layer_names: "__concat_0__"
  input_layer_names: "source_input"
  output_layer_names: "__concat_0__"
  is_recurrent_layer_group: false
  reversed: false
}
Xreki commented 6 years ago

你好,从堆栈日志来看,错误发生在:

https://github.com/PaddlePaddle/Paddle/blob/88a95a02d0649c1642a23482486b396f9f07906b/paddle/gserver/layers/MixedLayer.cpp#L119-L119

这一行会申请Matrix的内存。出现申请的内存长度为0,也就是要么batchSize是0,要么size是0。

https://github.com/PaddlePaddle/Paddle/blob/88a95a02d0649c1642a23482486b396f9f07906b/paddle/gserver/layers/Layer.h#L291-L291

是通过配置获取的,配置里面是128

所以似乎是因为batchSize为0?

fty8788 commented 6 years ago

是因为我的输入为0么 int batchSize = getInput(0).getBatchSize();

chengduoZH commented 6 years ago

@fty8788 建用python -m pdb train.py 调试一下

chengduoZH commented 6 years ago

Close due to low activity, please feel free to reopen it.