Closed littletomatodonkey closed 1 year ago
此外,shape强制指定之后,运行时会报错(1-2个minibatch后),麻烦也帮忙看下~是用的cuda9,paddle1.6.1,官网提供的docker环境,将该op替换成roialign是可以正常运行的 代码如下:
def _conv_offset(self,
input,
filter_size=3,
stride=1,
padding=1,
act=None,
name=None):
out_channel = input.shape[1] * 2
out = fluid.layers.conv2d(
input,
num_filters=out_channel,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(
initializer=Constant(0.0), name=name + ".w_0"),
bias_attr=ParamAttr(
initializer=Constant(0.0), name=name + ".b_0"),
act=act,
name=name)
return out
def __call__(self, head_inputs, rois, spatial_scale, is_mask=False):
"""
Adopt RoI align onto several level of feature maps to get RoI features.
Distribute RoIs to different levels by area and get a list of RoI
features by distributed RoIs and their corresponding feature maps.
Returns:
roi_feat(Variable): RoI features with shape of [M, C, R, R],
where M is the number of RoIs and R is RoI resolution
"""
k_min = self.min_level
k_max = self.max_level
num_roi_lvls = k_max - k_min + 1
name_list = list(head_inputs.keys())
input_name_list = name_list[-num_roi_lvls:]
spatial_scale = spatial_scale[-num_roi_lvls:]
rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(
rois, k_min, k_max, self.canconical_level, self.canonical_size)
# rois_dist is in ascend order
roi_out_list = []
resolution = is_mask and self.mask_resolution or self.box_resolution
for lvl in range(num_roi_lvls):
name_index = num_roi_lvls - lvl - 1
rois_input = rois_dist[lvl]
head_input = head_inputs[input_name_list[name_index]]
sc = spatial_scale[name_index]
offset_name = input_name_list[name_index] + "_dpooling_offset"
trans = self._conv_offset( head_input, name=offset_name )
print( "[gry debug]in dpooling, offset name: {}, trans shape: {}".format(offset_name, trans.shape) )
roi_out = fluid.layers.deformable_roi_pooling(input=head_input,
rois=rois_input,
trans=trans,
no_trans=False,
spatial_scale=sc,
group_size=(1, 1),
pooled_height=resolution,
pooled_width=resolution,
sample_per_part=4,
trans_std=0.1,
position_sensitive=False)
roi_out_list.append(roi_out)
roi_feat_shuffle = fluid.layers.concat(roi_out_list)
roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index)
roi_feat = fluid.layers.lod_reset(roi_feat_, rois)
return roi_feat
报错信息如下:
2019-12-25 10:37:47,071-INFO: iter: 0, lr: 0.006667, 'loss_bbox': '0.007775', 'loss_rpn_cls': '0.695010', 'loss_rpn_bbox': '0.015740', 'loss_cls': '4.402408', 'loss': '5.120934', time: 0.004, eta: 0:12:54
F1225 10:37:47.355821 28440 device_context.cc:334] cudaGetLastError invalid configuration argument errno: 9
*** Check failure stack trace: ***
@ 0x7f78b1f5438d google::LogMessage::Fail()
@ 0x7f78b1f57e3c google::LogMessage::SendToLog()
@ 0x7f78b1f53eb3 google::LogMessage::Flush()
@ 0x7f78b1f5934e google::LogMessageFatal::~LogMessageFatal()
@ 0x7f78b470c557 paddle::platform::CUDADeviceContext::Wait()
@ 0x7f78b46a62d1 paddle::framework::TransDataDevice()
@ 0x7f78b46a4c9e paddle::framework::TransformData()
@ 0x7f78b46833eb paddle::framework::OperatorWithKernel::PrepareData()
@ 0x7f78b4684838 paddle::framework::OperatorWithKernel::RunImpl()
@ 0x7f78b4684f11 paddle::framework::OperatorWithKernel::RunImpl()
@ 0x7f78b467eb9c paddle::framework::OperatorBase::Run()
@ 0x7f78b4463e26 paddle::framework::details::ComputationOpHandle::RunImpl()
@ 0x7f78b441b0c6 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync()
@ 0x7f78b4419e0f paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp()
@ 0x7f78b441a0d4 _ZNSt17_Function_handlerIFvvESt17reference_wrapperISt12_Bind_simpleIFS1_ISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS6_12OpHandleBaseESt6atomicIiESt4hashISA_ESt8equal_toISA_ESaISt4pairIKSA_SC_EEESA_RKSt10shared_ptrINS5_13BlockingQueueImEEEEUlvE_vEEEvEEEE9_M_invokeERKSt9_Any_data
@ 0x7f78b20733d3 std::_Function_handler<>::_M_invoke()
@ 0x7f78b1ebf5b7 std::__future_base::_State_base::_M_do_set()
@ 0x7f795ed24a99 __pthread_once_slow
@ 0x7f78b4415c22 _ZNSt13__future_base11_Task_stateISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS4_12OpHandleBaseESt6atomicIiESt4hashIS8_ESt8equal_toIS8_ESaISt4pairIKS8_SA_EEES8_RKSt10shared_ptrINS3_13BlockingQueueImEEEEUlvE_vEESaIiEFvvEE6_M_runEv
@ 0x7f78b1ec0d74 _ZZN10ThreadPoolC1EmENKUlvE_clEv
@ 0x7f78f712ac80 (unknown)
@ 0x7f795ed1d6ba start_thread
@ 0x7f795ea5341d clone
@ (nil) (unknown)
Aborted
这个assert在编译时可以删去,但是设定part_height的值时一定要小于等于trans_dims[2],不然会发生越界
在zhen
2019-12-25 10:37:47,071-INFO: iter: 0, lr: 0.006667, 'loss_bbox': '0.007775', 'loss_rpn_cls': '0.695010', 'loss_rpn_bbox': '0.015740', 'loss_cls': '4.402408', 'loss': '5.120934', time: 0.004, eta: 0:12:54 F1225 10:37:47.355821 28440 device_context.cc:334] cudaGetLastError invalid configuration argument errno: 9 Check failure stack trace: @ 0x7f78b1f5438d google::LogMessage::Fail() @ 0x7f78b1f57e3c google::LogMessage::SendToLog() @ 0x7f78b1f53eb3 google::LogMessage::Flush() @ 0x7f78b1f5934e google::LogMessageFatal::~LogMessageFatal() @ 0x7f78b470c557 paddle::platform::CUDADeviceContext::Wait() @ 0x7f78b46a62d1 paddle::framework::TransDataDevice() @ 0x7f78b46a4c9e paddle::framework::TransformData() @ 0x7f78b46833eb paddle::framework::OperatorWithKernel::PrepareData() @ 0x7f78b4684838 paddle::framework::OperatorWithKernel::RunImpl() @ 0x7f78b4684f11 paddle::framework::OperatorWithKernel::RunImpl() @ 0x7f78b467eb9c paddle::framework::OperatorBase::Run() @ 0x7f78b4463e26 paddle::framework::details::ComputationOpHandle::RunImpl() @ 0x7f78b441b0c6 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync() @ 0x7f78b4419e0f paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp() @ 0x7f78b441a0d4 _ZNSt17_Function_handlerIFvvESt17reference_wrapperISt12_Bind_simpleIFS1_ISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS6_12OpHandleBaseESt6atomicIiESt4hashISA_ESt8equal_toISA_ESaISt4pairIKSA_SC_EEESA_RKSt10shared_ptrINS5_13BlockingQueueImEEEEUlvE_vEEEvEEEE9_M_invokeERKSt9_Any_data @ 0x7f78b20733d3 std::_Function_handler<>::_M_invoke() @ 0x7f78b1ebf5b7 std::future_base::_State_base::_M_do_set() @ 0x7f795ed24a99 pthread_once_slow @ 0x7f78b4415c22 _ZNSt13__future_base11_Task_stateISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS4_12OpHandleBaseESt6atomicIiESt4hashIS8_ESt8equal_toIS8_ESaISt4pairIKS8_SA_EEES8_RKSt10shared_ptrINS3_13BlockingQueueImEEEEUlvE_vEESaIiEFvvEE6_M_runEv @ 0x7f78b1ec0d74 _ZZN10ThreadPoolC1EmENKUlvE_clEv @ 0x7f78f712ac80 (unknown) @ 0x7f795ed1d6ba start_thread @ 0x7f795ea5341d clone @ (nil) (unknown) Aborted
trans的输入应该时接的fc层吧,维度应该是batch_size x num 我看demo上写的是tran是[N,C,H,W]的tensor,是我的理解有问题嘛?
trans (Variable)- 池化时ROIs上的特征偏移,输入为数据类型为float32的Tensor。格式为[N,C,H,W],其中N是ROIs的数量,C是通道的数量,指示x和y方向上的偏移距离,H是池化的高度,W是池化的宽度。
没有理解错,格式是[N,C,H,W]的,您可以看看每次rois_input是否会存在,因为fpn不是每个layers都会有roi,不确定是不是因为rois_input为空造成的问题
看了下,在报错前,roi没有空的情况,我看trans的描述,H和W为pooled height以及pooled width,但是在下面的demo里,写的是和feature的H和W相同,所以不知道这一块是不是文档或者demo表述有冲突嘛?
trans (Variable)- 池化时ROIs上的特征偏移,输入为数据类型为float32的Tensor。格式为[N,C,H,W],其中N是ROIs的数量,C是通道的数量,指示x和y方向上的偏移距离,H是池化的高度,W是池化的宽度。
定义代码
import paddle.fluid as fluid
input = fluid.data(name="input",
shape=[2, 192, 64, 64],
dtype='float32')
rois = fluid.data(name="rois",
shape=[-1, 4],
dtype='float32',
lod_level=1)
trans = fluid.data(name="trans",
shape=[2, 384, 64, 64],
dtype='float32')
我修改了使用方式,在fpn中,roi为空时,的确会报错,但是roi align不会,我在标准的faster rcnn r50的结构中使用dpooling,迭代100个minbatch,没有出现问题(标准frcnn中,roi不会为空),感觉也是验证了前面的结论,想问下这个是否方便修复呢?还有那个assertion的问题,感觉可以去掉,不然用户shape为-1的话,必须自己编译paddle,成本感觉有点高~ fpn中使用dpooling的时候,报错信息如下:
/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py:773: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Traceback (most recent call last):
File "tools/train.py", line 323, in <module>
main()
File "tools/train.py", line 233, in main
outs = exe.run(compiled_train_prog, fetch_list=train_values)
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py", line 774, in run
six.reraise(*sys.exc_info())
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py", line 769, in run
use_program_cache=use_program_cache)
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py", line 828, in _run_impl
return_numpy=return_numpy)
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py", line 668, in _run_parallel
tensors = exe.run(fetch_var_names)._move_to_list()
paddle.fluid.core_avx.EnforceNotMet:
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > paddle::platform::GetTraceBackString<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&&, char const*, int)
1 paddle::framework::Tensor::check_memory_size() const
2 float const* paddle::framework::Tensor::data<float>() const
3 paddle::operators::DeformablePSROIPoolGradCUDAKernel<paddle::platform::CUDADeviceContext, float>::Compute(paddle::framework::ExecutionContext const&) const
4 std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::DeformablePSROIPoolGradCUDAKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::DeformablePSROIPoolGradCUDAKernel<paddle::platform::CUDADeviceContext, double> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&)
5 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&, paddle::framework::RuntimeContext*) const
6 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&) const
7 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, paddle::platform::Place const&)
8 paddle::framework::details::ComputationOpHandle::RunImpl()
9 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*)
10 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue<unsigned long> > const&, unsigned long*)
11 std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*)
12 std::thread::_Impl<std::_Bind_simple<ThreadPool::ThreadPool(unsigned long)::{lambda()#1} ()> >::_M_run()
------------------------------------------
Python Call Stacks (More useful to users):
------------------------------------------
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/framework.py", line 2503, in append_op
attrs=kwargs.get("attrs", None))
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/layers/nn.py", line 13549, in deformable_roi_pooling
"trans_std": trans_std
File "/paddle/code/gry/PaddleDetection/ppdet/modeling/roi_extractors/roi_extractor.py", line 200, in __call__
name = "test_dpooling_st{}".format(lvl))
File "/paddle/code/gry/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py", line 133, in build
roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
File "/paddle/code/gry/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py", line 240, in train
return self.build(feed_vars, 'train')
File "tools/train.py", line 116, in main
train_fetches = model.train(feed_vars)
File "tools/train.py", line 323, in <module>
main()
----------------------
Error Message Summary:
----------------------
Error: Tensor holds no memory. Call Tensor::mutable_data first.
[Hint: holder_ should not be null.] at (/paddle/libs/paddle_versions/Paddle/paddle/fluid/framework/tensor.cc:23)
[operator < deformable_psroi_pooling_grad > error]
在deformable_psroi_pooling_op里面,添加了编译shape的assertion,但是我得到的tensor的shape在编译时可能是-1,想问下这个shape检查的assertion是否可以删去呢?(大部分tensor在reshape之后,都会是-1)
https://github.com/PaddlePaddle/Paddle/blob/70c073a0019f3405f9074579a8c48a43281dfdea/paddle/fluid/operators/deformable_psroi_pooling_op.cc#L184