Closed xubin1994 closed 4 years ago
重新贴一遍训练代码,上面的格式有点问题
from dataloader import *
from IresNet import IresNet
from loss import model_loss
if __name__ == '__main__':
#main_program = fluid.default_main_program()
datapath = '/data/home/xubin/dataset/dataset/GANet_dataset'
file_name = 'sceneflow_train.txt'
max_disp = 192
left_input = fluid.layers.data(name='left_input', shape=[3, 384, 768], dtype='float32')
right_input = fluid.layers.data(name='right_input', shape=[3, 384, 768], dtype='float32')
left_disp_input = fluid.layers.data(name='left_disp_input', shape=[1, 384, 768], dtype='float32')
predict_final, r_res2_predict, r_res1_predict, r_res0=IresNet(left_input,right_input)
avg_cost = model_loss(predict_final, r_res2_predict, r_res1_predict, r_res0,left_disp_input,max_disp)
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001)
opt = optimizer.minimize(avg_cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place=place)
exe.run(program=fluid.default_startup_program())
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=train_r(datapath,file_name), buf_size=3000),
batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[left_input, right_input,left_disp_input])
for pass_id in range(2):
for batch_id, data in enumerate(train_reader()):
train_cost, train_acc = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
print(avg_cost)
model_loss的代码麻烦贴一下
model_loss 的代码麻烦贴一下
loss代码:
import paddle
import paddle.fluid as fluid
def loss_func(predict,disp_gt,maxdisp):
mask1 = (disp_gt > 0)
mask2 = (disp_gt < maxdisp)
mask = paddle.fluid.layers.cast(mask1, 'float32') * paddle.fluid.layers.cast(mask2, 'float32')
#mask.stop_gradient =True
sum_mask = paddle.fluid.layers.reduce_sum(mask)
predict_valid = predict * mask
disp_gt_valid = disp_gt * mask
loss_sum = paddle.fluid.layers.reduce_sum(paddle.fluid.layers.abs(predict_valid - disp_gt_valid))
loss_mean = loss_sum/(sum_mask+1e-6)
return loss_mean
def model_loss(predict_final, r_res2_predict, r_res1_predict, r_res0, disp_gt,maxdisp):
all_losses = []
#loss 0
loss0 = loss_func(fluid.layers.relu(predict_final), disp_gt, maxdisp)
all_losses.append(loss0)
#loss 1
loss1 = loss_func(fluid.layers.relu(predict_final + r_res0),disp_gt,maxdisp)
all_losses.append(loss1)
#loss2
gt2 = paddle.fluid.layers.image_resize(disp_gt, scale=0.25, resample='BILINEAR',
align_corners=False, align_mode=1)/4.0
predict_final_2 = paddle.fluid.layers.image_resize(predict_final, scale=0.25, resample='BILINEAR',
align_corners=False, align_mode=1)/4.0
loss2 = loss_func(fluid.layers.relu(predict_final_2 + r_res2_predict), gt2, maxdisp//4)
all_losses.append(0.2 * loss2)
#loss3
gt3 = paddle.fluid.layers.image_resize(disp_gt, scale=0.5, resample='BILINEAR',
align_corners=False, align_mode=1)/2.0
predict_final_3 = paddle.fluid.layers.image_resize(predict_final, scale=0.5, resample='BILINEAR',
align_corners=False, align_mode=1)/2.0
loss3 = loss_func(fluid.layers.relu(predict_final_3 + r_res1_predict), gt3, maxdisp // 2)
all_losses.append(0.2 * loss3)
return sum(all_losses)
maxdisp
, mask_sum
这些设置stop_gradient=True
试下
maxdisp
,mask_sum
这些设置stop_gradient=True
试下
设置了: mask.stop_gradient =True mask_sum.stop_gradient =True 还是报同样的错误,‘maxdisp’是一个我外部传入的一个整数,int类型,不存在stop_gradient属性的
"/usr/local/lib/python3.6/dist-packages/paddle/fluid/backward.py", line 518
上一行把op_desc.type()
print出来看下
op_desc.type() 按你的要求,插入代码如下: 打印信息如下: test_op fill_constant test_op elementwise_add_grad test_op elementwise_add_grad test_op elementwise_add_grad test_op elementwise_add_grad test_op elementwise_mul_grad test_op elementwise_div_grad test_op elementwise_add_grad test_op reduce_sum_grad test_op abs_grad test_op elementwise_sub_grad test_op elementwise_mul_grad test_op elementwise_mul_grad test_op reduce_sum_grad test_op sum test_op elementwise_mul_grad test_op cast test_op cast test_op relu_grad test_op elementwise_add_grad test_op elementwise_div_grad test_op bilinear_interp_grad test_op elementwise_div_grad test_op elementwise_mul_grad test_op elementwise_div_grad test_op elementwise_add_grad test_op reduce_sum_grad test_op abs_grad test_op elementwise_sub_grad test_op elementwise_mul_grad test_op elementwise_mul_grad test_op reduce_sum_grad test_op sum test_op elementwise_mul_grad test_op cast test_op cast test_op relu_grad test_op elementwise_add_grad test_op elementwise_div_grad test_op bilinear_interp_grad test_op elementwise_div_grad test_op elementwise_div_grad test_op elementwise_add_grad test_op reduce_sum_grad test_op abs_grad test_op elementwise_sub_grad test_op elementwise_mul_grad test_op elementwise_mul_grad test_op reduce_sum_grad test_op sum test_op elementwise_mul_grad test_op cast test_op cast test_op relu_grad test_op elementwise_add_grad test_op elementwise_div_grad test_op elementwise_add_grad test_op reduce_sum_grad test_op abs_grad test_op elementwise_sub_grad test_op elementwise_mul_grad test_op elementwise_mul_grad test_op reduce_sum_grad test_op sum test_op elementwise_mul_grad test_op cast test_op cast test_op relu_grad test_op conv2d_grad test_op conv2d_grad test_op concat_grad test_op leaky_relu_grad test_op conv2d_transpose_grad test_op conv2d_transpose_grad test_op sum test_op conv2d_grad test_op sum test_op conv2d_grad test_op concat_grad test_op leaky_relu_grad test_op conv2d_transpose_grad test_op conv2d_transpose_grad test_op sum test_op conv2d_grad test_op sum test_op leaky_relu_grad test_op conv2d_grad test_op leaky_relu_grad test_op conv2d_grad test_op sum test_op leaky_relu_grad test_op conv2d_grad test_op concat_grad test_op concat_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op pad_grad test_op slice_grad test_op unsqueeze2_grad test_op reduce_mean_grad test_op elementwise_mul_grad test_op sum test_op leaky_relu_grad test_op conv2d_grad test_op sum test_op leaky_relu_grad test_op conv2d_grad test_op leaky_relu_grad test_op conv2d_grad test_op sum test_op leaky_relu_grad test_op conv2d_grad test_op concat_grad test_op abs_grad test_op elementwise_sub_grad test_op transpose2_grad test_op reshape2_grad test_op elementwise_add_grad test_op elementwise_mul_grad test_op elementwise_mul_grad test_op unsqueeze2_grad test_op elementwise_sub_grad test_op unsqueeze2_grad test_op elementwise_sub_grad test_op cast test_op cast test_op gather_grad test_op gather_grad test_op cast
确认下你代码里面哪里连续使用的gather
和cast
OP
确认下你代码里面哪里连续使用的
gather
和cast
OP gather连续使用: im_flat = paddle.fluid.layers.cast(im_flat, 'float32') idx_a = paddle.fluid.layers.cast(idx_a, 'int32') idx_b = paddle.fluid.layers.cast(idx_b, 'int32') Ia = paddle.fluid.layers.gather(im_flat, idx_a) Ib = paddle.fluid.layers.gather(im_flat, idx_b) cast连续使用的地方有很多,不能连续使用吗?
可以连续用,报错报在这个OP上,涉及到这两个OP的代码麻烦贴下,我确认下
可以连续用,报错报在这个 OP 上,涉及到这两个 OP 的代码麻烦贴下,我确认下
关于cast: 第一部分: B = fluid.layers.slice(fluid.layers.shape(fea1), starts=[0], ends=[1], axes=[0]) B = fluid.layers.cast(x=B, dtype='int32') 第二部分: x0_f = paddle.fluid.layers.cast(x0, 'float32') x1_f = paddle.fluid.layers.cast(x1, 'float32') 第三部分: batch1 = fluid.layers.slice(fluid.layers.shape(T), starts=[0], ends=[1], axes=[0]) batch1 = fluid.layers.cast(x=batch1, dtype='int32')
第四部分: grid = fluid.layers.cast(x=grid, dtype='int32') grid = paddle.fluid.layers.expand(grid, [batch1,1]) print(grid.shape)
grid = paddle.fluid.layers.reshape(grid, [batch1, 2, height * width])
grid = fluid.layers.cast(x=grid, dtype='float32')
cast和gather主要集中在下面这个类上:
class SpatialTransformer(object):
"""
Spatial Transformer Network
"""
def __init__(self):
pass
def __call__(self, U, T, data_format='channel_first', **kwargs):
if (data_format == 'channel_first'):
U = paddle.fluid.layers.transpose(U, (0, 2, 3, 1))
T = paddle.fluid.layers.transpose(T, (0, 2, 3, 1))
elif (data_format != 'channel_last'):
raise ValueError('data_format only can be one of channel_last or channel_first')
batch = U.shape[0]
height = U.shape[1]
width = U.shape[2]
channels = U.shape[3]
# batch = paddle.fluid.layers.shape(U)[0]
# process T to [-1.0, 1.0]
batch1 = fluid.layers.slice(fluid.layers.shape(T), starts=[0], ends=[1], axes=[0])
batch1 = fluid.layers.cast(x=batch1, dtype='int32')
# print('batch1',batch1)
d = paddle.fluid.layers.reshape(T, [-1, 1, height * width])
# print(d)
print(d.shape)
zeros = paddle.fluid.layers.zeros_like(d)
d = paddle.fluid.layers.concat([d, zeros], axis=1)
print(d.shape)
# #grid的shape为[2,540*960]
grid = self.__meshgrid(height, width)
grid = paddle.fluid.layers.reshape(grid, [-1])
grid = paddle.fluid.layers.unsqueeze(grid,[0])
grid = fluid.layers.cast(x=grid, dtype='int32')
grid = paddle.fluid.layers.expand(grid, [batch1,1])
print(grid.shape)
# # 最后一个维度应该是height*width
grid = paddle.fluid.layers.reshape(grid, [batch1, 2, height * width])
grid = fluid.layers.cast(x=grid, dtype='float32')
T_g = paddle.fluid.layers.elementwise_add(grid, d) # target grid
#T_g = grid
x_s = T_g[:, 0, :]
y_s = T_g[:, 1, :]
# # x_s = paddle.fluid.layers.slice(T_g, [0, 0, 0], [-1, 1, -1])
# # y_s = paddle.fluid.layers.slice(T_g, [0, 1, 0], [-1, 1, -1])
x_s_flat = paddle.fluid.layers.reshape(x_s, [-1])
y_s_flat = paddle.fluid.layers.reshape(y_s, [-1])
#
input_transformed = self.__interpolate(U, x_s_flat, y_s_flat)
# # 四舍五入取整之后再进行类型转换
# # input_transformed = tf.round(input_transformed)
# # input_transformed = tf.cast(tf.floor(input_transformed1), U.dtype)
output = paddle.fluid.layers.reshape(input_transformed,
[batch1, height, width, channels])
# # output = paddle.fluid.layers.cast(paddle.fluid.layers.reshape(input_transformed,
# # paddle.fluid.layers.stack(
# # [batch, height, width, channels])),'float32')
if (data_format == 'channel_first'):
output = paddle.fluid.layers.transpose(output, (0, 3, 1, 2))
return output
#网格代码正确运行
def __meshgrid(self, height, width):
x_t = paddle.fluid.layers.matmul(paddle.fluid.layers.ones(shape=[height, 1], dtype='float32'),
paddle.fluid.layers.transpose(paddle.fluid.layers.unsqueeze(
paddle.fluid.layers.cast(paddle.fluid.layers.range(0, width, 1, 'float32'),
'float32'), [1]), [1, 0]))
y_t = paddle.fluid.layers.matmul(
paddle.fluid.layers.unsqueeze(
paddle.fluid.layers.cast(paddle.fluid.layers.range(0, height, 1, 'float32'), 'float32'), [1]),
paddle.fluid.layers.ones(shape=[1, width], dtype='float32'))
x_t_flat = paddle.fluid.layers.reshape(x_t, (1, -1))
y_t_flat = paddle.fluid.layers.reshape(y_t, (1, -1))
grid = paddle.fluid.layers.concat([x_t_flat, y_t_flat], 0)
return grid
#base = self.__repeat(paddle.fluid.layers.range(0, num_batch, 1, 'int32') * dim1, height * width)
def __repeat(self, x, n_repeats):
#rep[1,height*width]
rep = paddle.fluid.layers.transpose(
paddle.fluid.layers.ones(shape=[n_repeats,1], dtype='float32'), [1, 0])
x = paddle.fluid.layers.cast(x, 'float32')
print('rep',rep.shape)
x = paddle.fluid.layers.matmul(paddle.fluid.layers.reshape(x, (-1, 1)), rep)
return paddle.fluid.layers.reshape(x, [-1])
def __interpolate(self, im, x, y):
# constants
# num_batch = paddle.fluid.layers.shape(im)[0]
# height = paddle.fluid.layers.shape(im)[1]
# width = paddle.fluid.layers.shape(im)[2]
# channels = paddle.fluid.layers.shape(im)[-1]
#num_batch = im.shape[0]
num_batch = fluid.layers.slice(fluid.layers.shape(im), starts=[0], ends=[1], axes=[0])
num_batch = fluid.layers.cast(x=num_batch, dtype='int32')
height = im.shape[1]
width = im.shape[2]
channels = im.shape[-1]
x = paddle.fluid.layers.cast(x, 'float32')
y = paddle.fluid.layers.cast(y, 'int32')
zero = paddle.fluid.layers.zeros([], 'int32')
max_x = float(im.shape[2] - 1)
# max_x = paddle.fluid.layers.cast(im.shape[2] - 1, 'float32')
# scale indices from [-1, 1] to [0, width/height]
# x = (x + 1.0) * (width_f) / 2.0
# y = (y + 1.0) * (height_f) / 2.0
# do sampling
x0 = paddle.fluid.layers.cast(paddle.fluid.layers.floor(x), 'float32')
x1 = x0 + 1
x0 = paddle.fluid.layers.clip(x0, 0.0, max_x)
x1 = paddle.fluid.layers.clip(x1, 0.0, max_x)
dim2 = width
dim1 = width * height
base = self.__repeat(paddle.fluid.layers.range(0, num_batch, 1, 'int32') * dim1, height * width)
base_y = base + y * dim2
#idx_b - idx_a = 1
idx_a = base_y + x0
idx_b = base_y + x1
# use indices to lookup pixels in the
# flat image and restore channels dim
im_flat = paddle.fluid.layers.reshape(im, [-1, channels])
im_flat = paddle.fluid.layers.cast(im_flat, 'float32')
idx_a = paddle.fluid.layers.cast(idx_a, 'int32')
idx_b = paddle.fluid.layers.cast(idx_b, 'int32')
Ia = paddle.fluid.layers.gather(im_flat, idx_a)
Ib = paddle.fluid.layers.gather(im_flat, idx_b)
print('Ia',Ia.shape)
# and finally calculate interpolated values
x0_f = paddle.fluid.layers.cast(x0, 'float32')
x1_f = paddle.fluid.layers.cast(x1, 'float32')
print('x1_f', x1_f.shape)
wa = paddle.fluid.layers.unsqueeze((x1_f - x), [1])
wb = paddle.fluid.layers.unsqueeze((x - x0_f), [1])
# wa = paddle.fluid.layers.expand(wa,[1,channels])
# wb = paddle.fluid.layers.expand(wb, [1, channels])
# print('wa', wa.shape)
# output = wa * Ia + wb * Ib
output = paddle.fluid.layers.elementwise_mul(Ia, wa) + paddle.fluid.layers.elementwise_mul(Ib, wb)
#output = Ib
return output
idx_a = paddle.fluid.layers.cast(idx_a, 'int32')
idx_a.stop_gradient = True
idx_b = paddle.fluid.layers.cast(idx_b, 'int32')
idx_b.stop_gradient = True
Ia = paddle.fluid.layers.gather(im_flat, idx_a)
Ib = paddle.fluid.layers.gather(im_flat, idx_b)
试下
idx_a = paddle.fluid.layers.cast(idx_a, 'int32') idx_a.stop_gradient = True idx_b = paddle.fluid.layers.cast(idx_b, 'int32') idx_b.stop_gradient = True Ia = paddle.fluid.layers.gather(im_flat, idx_a) Ib = paddle.fluid.layers.gather(im_flat, idx_b)
试下
更新一下刚才的回复,我发现在windows下和linux下跑同样的代码结果不一样,linux下的log更新了,报错如下:
Traceback (most recent call last):
File "xubin_train.py", line 26, in <module>
fetch_list=[avg_cost])
File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/executor.py", line 657, in run
use_program_cache=use_program_cache)
File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/executor.py", line 755, in _run
exe.run(program.desc, scope, 0, True, True, fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet: Invoke operator expand_grad error.
Python Call stacks:
File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/framework.py", line 1774, in append_op
attrs=kwargs.get("attrs", None))
File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layers/nn.py", line 9518, in expand
type='expand', inputs=inputs, outputs={'Out': out}, attrs=attrs)
File "/data/home/xubin/data/Paddle-IresNet/stn_paddlepaddle.py", line 85, in __call__
grid = paddle.fluid.layers.expand(grid, [batch1,1])
File "/data/home/xubin/data/Paddle-IresNet/submodule.py", line 222, in DRS_net
w_up_1b2b = transformer(up_1b2b, minus_predict_final)
File "/data/home/xubin/data/Paddle-IresNet/IresNet.py", line 6, in IresNet
r_res2_predict, r_res1_predict, r_res0 = DRS_net(conv1a, conv1b, up_1a2a, up_1b2b, predict_final)
File "xubin_train.py", line 12, in <module>
predict_final, r_res2_predict, r_res1_predict, r_res0=IresNet(left_input,right_input)
C++ Call stacks:
op expand_grad does not have kernel for data_type[int]:data_layout[ANY_LAYOUT]:place[CPUPlace]:library_type[PLAIN] at [/paddle/paddle/fluid/framework/operator.cc:979]
PaddlePaddle Call Stacks:
0 0x7f9e7b30d278p void paddle::platform::EnforceNotMet::Init<std::string>(std::string, char const*, int) + 360
1 0x7f9e7b30d5c7p paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) + 87
2 0x7f9e7d3aed09p paddle::framework::OperatorWithKernel::ChooseKernel(paddle::framework::RuntimeContext const&, paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 1289
3 0x7f9e7d3b0a58p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const + 728
4 0x7f9e7d3b0cd1p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 529
5 0x7f9e7d3ab863p paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 307
6 0x7f9e7b497dfep paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 398
7 0x7f9e7b49ae9fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocator<std::string> > const&, bool) + 143
8 0x7f9e7b2fe1fdp
9 0x7f9e7b33fd56p
10 0x56204cp _PyCFunction_FastCallDict + 860
11 0x4f88bap
12 0x4f98c7p _PyEval_EvalFrameDefault + 1127
13 0x4f6128p
14 0x4f7d60p
15 0x4f876dp
16 0x4fa6c0p _PyEval_EvalFrameDefault + 4704
17 0x4f6128p
18 0x4f7d60p
19 0x4f876dp
20 0x4fa6c0p _PyEval_EvalFrameDefault + 4704
21 0x4f6128p
22 0x4f9023p PyEval_EvalCode + 35
23 0x6415b2p
24 0x64166ap PyRun_FileExFlags + 154
25 0x643730p PyRun_SimpleFileExFlags + 400
26 0x62b26ep Py_Main + 1438
27 0x4b4cb0p main + 224
28 0x7f9f1ac9fb97p __libc_start_main + 231
29 0x5bdf6ap _start + 42
看报错是expand 不支持int32,可以排查下是否有int32的expand操作
看报错是 expand 不支持 int32,可以排查下是否有 int32 的 expand 操作 有一个expand 的int32操作: grid = fluid.layers.cast(x=grid, dtype='int32') grid = paddle.fluid.layers.expand(grid, [batch1,1]) 但是我去掉int32的转换,它又给我报错说应该是要int的,日志如下: Python Call stacks: File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/framework.py", line 1774, in append_op attrs=kwargs.get("attrs", None)) File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layer_helper.py", line 43, in append_op return self.main_program.current_block().append_op(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layers/nn.py", line 9518, in expand type='expand', inputs=inputs, outputs={'Out': out}, attrs=attrs) File "/data/home/xubin/data/Paddle-IresNet/stn_paddlepaddle.py", line 86, in call grid = paddle.fluid.layers.expand(grid, [batch1,1]) File "/data/home/xubin/data/Paddle-IresNet/submodule.py", line 222, in DRS_net w_up_1b2b = transformer(up_1b2b, minus_predict_final) File "/data/home/xubin/data/Paddle-IresNet/IresNet.py", line 6, in IresNet r_res2_predict, r_res1_predict, r_res0 = DRS_net(conv1a, conv1b, up_1a2a, up_1b2b, predict_final) File "xubin_train.py", line 12, in
predict_final, r_res2_predict, r_res1_predict, r_res0=IresNet(left_input,right_input) C++ Call stacks: Tensor holds the wrong type, it holds float, but desires to be int at [/paddle/paddle/fluid/framework/tensor_impl.h:30]
看报错是 expand 不支持 int32,可以排查下是否有 int32 的 expand 操作 有一个expand 的int32操作: grid = fluid.layers.cast(x=grid, dtype='int32') grid = paddle.fluid.layers.expand(grid, [batch1,1]) 但是我去掉int32的转换,它又给我报错说应该是要int的,日志如下: Python Call stacks: File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/framework.py", line 1774, in append_op attrs=kwargs.get("attrs", None)) File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layer_helper.py", line 43, in append_op return self.main_program.current_block().append_op(*args, kwargs) File "/usr/local/lib/python3.6/dist-packages/paddle/fluid/layers/nn.py", line 9518, in expand type='expand', inputs=inputs, outputs={'Out': out}, attrs=attrs) File "/data/home/xubin/data/Paddle-IresNet/stn_paddlepaddle.py", line 86, in call** grid = paddle.fluid.layers.expand(grid, [batch1,1]) File "/data/home/xubin/data/Paddle-IresNet/submodule.py", line 222, in DRS_net w_up_1b2b = transformer(up_1b2b, minus_predict_final) File "/data/home/xubin/data/Paddle-IresNet/IresNet.py", line 6, in IresNet r_res2_predict, r_res1_predict, r_res0 = DRS_net(conv1a, conv1b, up_1a2a, up_1b2b, predict_final) File "xubin_train.py", line 12, in predict_final, r_res2_predict, r_res1_predict, r_res0=IresNet(left_input,right_input) C++ Call stacks: Tensor holds the wrong type, it holds float, but desires to be int at [/paddle/paddle/fluid/framework/tensor_impl.h:30]
确认了下,expand_grad没有int的kernel,如果你这个grid是不需要梯度的,可以设置一下stop_gradient,如果需要梯度,建议把cast放到expand后面
grid
搞定,的确加个grid.stop_gradient就解决了。把cast放到expand后面应该是行不通的,我前面提到了这里不在前面加cast就会报另一个错。
grid
搞定,的确加个grid.stop_gradient就解决了。把cast放到expand后面应该是行不通的,我前面提到了这里不在前面加cast就会报另一个错。把cast和expand换个顺序
不是不加cast,是cast之前先expand,因为expand_grad不支持int,所以在float上先expand,expand完之后再cast成int32, cast和expand换个顺序
grid
搞定,的确加个 grid.stop_gradient 就解决了。把 cast 放到 expand 后面应该是行不通的,我前面提到了这里不在前面加 cast 就会报另一个错。把 cast 和 expand 换个顺序
不是不加 cast,是 cast 之前先 expand,因为 expand_grad 不支持 int,所以在 float 上先 expand,expand 完之后再 cast 成 int32, cast 和 expand 换个顺序
我的意思是说,按照下面的操作,报错会提示在expand上,
grid = paddle.fluid.layers.expand(grid, [batch1,1])
log:()更详细的报错在今早第一次回复上 C++ Call stacks: Tensor holds the wrong type, it holds float, but desires to be int at [/paddle/paddle/fluid/framework/tensor_impl.h:30]
grid
搞定,的确加个 grid.stop_gradient 就解决了。把 cast 放到 expand 后面应该是行不通的,我前面提到了这里不在前面加 cast 就会报另一个错。把 cast 和 expand 换个顺序
不是不加 cast,是 cast 之前先 expand,因为 expand_grad 不支持 int,所以在 float 上先 expand,expand 完之后再 cast 成 int32, cast 和 expand 换个顺序
我的意思是说,按照下面的操作,报错会提示在expand上,
grid = fluid.layers.cast(x=grid, dtype='int32')
grid = paddle.fluid.layers.expand(grid, [batch1,1])
grid = fluid.layers.cast(x=grid, dtype='int32')
log:()更详细的报错在今早第一次回复上 C++ Call stacks: Tensor holds the wrong type, it holds float, but desires to be int at [/paddle/paddle/fluid/framework/tensor_impl.h:30]
好的,那还是加stop_gradient吧
Since you haven\'t replied for more than a year, we have closed this issue/pr. If the problem is not solved or there is a follow-up one, please reopen it at any time and we will continue to follow up. 由于您超过一年未回复,我们将关闭这个issue/pr。 若问题未解决或有后续问题,请随时重新打开,我们会继续跟进。
代码数据读取正常,网络框架正常,但整体拼接在一起时使用optimizer.minimize(avg_cost) 报错,似乎是说无法反向传播 训练部分代码如下:
请问这种是出现了什么问题?