Closed feixuedudiao closed 6 months ago
Do I need to use the pytoch version of oneDNN to convert model parameters from pytorch checkpoint to onnx?
Converting model parameters from Pytorch checkpoint to ONNX has nothing to do with oneDNN. I would guess that the difference in result you see is caused by using incorrect memory format for the weights or activations or both. Getting started section of the Developer Guide may help.
thks . i think it may be right for the mmeory of weights, above is the convolution code:
int32_t ConvOp_new(seg_model &segNet,std::string opName, std::vector
if (segNet.netWeight.ops.size() <= 0 || segNet.netWeight.ops.size() <= 0)
{
std::cout << "the weight of net is empyty!!" << std::endl;
return -1;
}
auto netWeight = segNet.netWeight;
auto netOneDnn = segNet.netOneDnn;
if(netWeight.ops.count(opWeightName) <= 0 || netWeight.ops.count(opBiasName) <= 0)
{
return -1;
}
net_op conv_weight_op = netWeight.ops[opWeightName];
net_op conv_bias_op = netWeight.ops[opBiasName];
auto weightShape = conv_weight_op.shapes;
float *userWeights = &conv_weight_op.weights[0];
const float *userBias = &conv_bias_op.weights[0];
memory::dims conv_inp_tz = intVecToDims(inShape);
memory::dims conv_out_tz = intVecToDims(outShape);
memory::dims conv_bias_tz = intVecToDims(conv_bias_op.shapes);
/* the kernel is 3x3, to padding */
if ((weightShape[2] == 3) && (weightShape[3] == 3))
{
convPadding = 1;
}
if (weightShape[1] == 1)
{
isDw = 1;
}
std::vector<int32_t> tmp_weight_tz = {conv_weight_op.shapes[0], ioSize, ioSize, conv_weight_op.shapes[2],
conv_weight_op.shapes[3]};
memory::dims conv_weights_tz= isDw ? intVecToDims(tmp_weight_tz) : intVecToDims(conv_weight_op.shapes);
memory::dims conv_strides = {stride, stride};
memory::dims conv_paddings = {convPadding, convPadding};
/* create memory descriptors for convolution */
auto user_weights_md = memory::desc({conv_weights_tz}, dt::f32, tag::any);
auto conv_bias_md = memory::desc({conv_bias_tz}, dt::f32, tag::any);
auto conv_weight_md = memory::desc({conv_weights_tz}, dt::f32, tag::any);
auto conv_src_md = memory::desc({conv_inp_tz}, dt::f32, tag::any);
auto conv_dst_md = memory::desc({conv_out_tz}, dt::f32, tag::any);
// create memory for weight from model parmameter, oihw layout is assumed for weight, and x or a for bias
auto user_weight_mem = memory({{conv_weights_tz}, dt::f32, isDw ? tag::goihw : tag::oihw},
segNet.netEng, (float*)userWeights);
segNet.conv_bias = memory({{conv_bias_tz}, dt::f32, tag::x}, segNet.netEng);
{
auto ptr = segNet.conv_bias.map_data();
::memcpy(ptr, userBias, conv_bias_op.weights.size());
segNet.conv_bias.unmap_data(ptr);
}
/* the active, if the isRelu is true , it is relu, or not is hardswish */
post_ops conv_ops;
primitive_attr conv_attr;
if (isActive)
{
int32_t testVal = isRelu ? 1 : 0;
conv_ops.append_eltwise(isRelu ? algorithm::eltwise_relu : algorithm::eltwise_hardswish, negative_slope, negative_slope);
}
conv_attr.set_post_ops(conv_ops);
/* create a convolution */
auto conv_pd = convolution_forward::primitive_desc(segNet.netEng,
prop_kind::forward_inference, algorithm::convolution_direct,
conv_src_md, conv_weight_md, conv_bias_md, conv_dst_md,
conv_strides, conv_paddings, conv_paddings, conv_attr);
auto conv_src_mem = segNet.conv_src;
if(conv_pd.src_desc() != segNet.conv_src.get_desc())
{
conv_src_mem = memory(conv_pd.src_desc(), segNet.netEng);
auto rd = reorder(segNet.conv_src, conv_src_mem);
segNet.netOneDnn.net.push_back(rd);
segNet.netOneDnn.netArgs.push_back({{DNNL_ARG_FROM, segNet.conv_src},
{DNNL_ARG_TO, conv_src_mem}});
}
/* the memory of weights */
segNet.conv_weights = user_weight_mem;
if (conv_pd.weights_desc() != user_weight_mem.get_desc())
{
segNet.conv_weights = memory(conv_pd.weights_desc(), segNet.netEng);
auto rd = reorder(user_weight_mem, segNet.conv_weights);
rd.execute(segNet.netOneDnn.netStream, user_weight_mem, segNet.conv_weights);
}
/* the memory of dst */
segNet.conv_dst = memory(conv_pd.dst_desc(), segNet.netEng);
/* create convolution primitive and add it to net */
segNet.netOneDnn.net.push_back(convolution_forward(conv_pd));
segNet.netOneDnn.netArgs.push_back({{DNNL_ARG_SRC, conv_src_mem},
{DNNL_ARG_WEIGHTS, segNet.conv_weights},
{DNNL_ARG_BIAS, segNet.conv_bias},
{DNNL_ARG_DST, segNet.conv_dst}});
/* update the memory of conv src */
segNet.conv_src = segNet.conv_dst;
return 0;
}
@feixuedudiao I took a quick look at the code snippet you provided, it seems that rd.execute(...)
is missing in the code block if(conv_pd.src_desc() != segNet.conv_src.get_desc()) {...}
. Could you please refer to the example convolution.cpp for this primitive and see if anything else is missing or incompletely configured?
@shu1chen Thanks to answer me. The implementation process of convolution mainly refers to cnn_inference_f32.cpp(examples/cnn_inference_f32.cpp). The specific code is as follows: ... auto conv1_src_md = memory::desc({conv1_src_tz}, dt::f32, tag::any); auto conv1_bias_md = memory::desc({conv1_bias_tz}, dt::f32, tag::any); auto conv1_weights_md = memory::desc({conv1_weights_tz}, dt::f32, tag::any); auto conv1_dst_md = memory::desc({conv1_dst_tz}, dt::f32, tag::any); //[Create convolution memory descriptors]
/// Create a convolution primitive descriptor by specifying engine,
/// propagation kind, [convolution algorithm](@ref dev_guide_convolution),
/// shapes of input, weights, bias, output, convolution strides, padding,
/// and kind of padding.
/// Propagation kind is set to prop_kind::forward_inference to optimize for
/// inference execution and omit computations that are necessary only for
/// backward propagation.
/// Once created, it has specific formats instead of the `any` format.
/// @snippet cnn_inference_f32.cpp Create convolution primitive descriptor
//[Create convolution primitive descriptor]
auto conv1_prim_desc = convolution_forward::primitive_desc(eng,
prop_kind::forward_inference, algorithm::convolution_direct,
conv1_src_md, conv1_weights_md, conv1_bias_md, conv1_dst_md,
conv1_strides, conv1_padding, conv1_padding);
//[Create convolution primitive descriptor]
/// Check whether data and weights formats required by convolution is different
/// from the user format. In case it is different change the layout using
/// reorder primitive.
/// @snippet cnn_inference_f32.cpp Reorder data and weights
//[Reorder data and weights]
auto conv1_src_memory = user_src_memory;
if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng);
net.push_back(reorder(user_src_memory, conv1_src_memory));
net_args.push_back({{DNNL_ARG_FROM, user_src_memory},
{DNNL_ARG_TO, conv1_src_memory}});
}
auto conv1_weights_memory = user_weights_memory;
if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng);
reorder(user_weights_memory, conv1_weights_memory)
.execute(s, user_weights_memory, conv1_weights_memory);
}
//[Reorder data and weights]
/// Create a memory primitive for output.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create memory for output]
auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng);
//[Create memory for output]
/// Create a convolution primitive and add it to the net.
/// @snippet cnn_inference_f32.cpp Create memory for output
//[Create convolution primitive]
net.push_back(convolution_forward(conv1_prim_desc));
net_args.push_back({{DNNL_ARG_SRC, conv1_src_memory},
{DNNL_ARG_WEIGHTS, conv1_weights_memory},
{DNNL_ARG_BIAS, conv1_user_bias_memory},
{DNNL_ARG_DST, conv1_dst_memory}});
...
for (int j = 0; j < times; ++j) {
assert(net.size() == net_args.size() && "something is missing");
for (size_t i = 0; i < net.size(); ++i)
net.at(i).execute(s, net_args.at(i));
}
//[Execute model]
s.wait();
it see to differ with the convotuon.cpp's。But i not understand it.
@feixuedudiao Thank you for providing this info. Yes, it's also good to refer to the cnn_inference_f32.cpp example. The implementation processes in the two examples mentioned are essentially the same. It was my mistake that I missed the part of adding reorder primitive to net and executing it in net. Possible points that can be investigated:
eltwise_relu
/eltwise_hardswish
after convolution, do you also have the same for Pytorch?stream.wait()
to wait for all primitives in the execution stream to finish computations?@feixuedudiao Has your question been resolved by trying the ways I suggested? Any update?
@feixuedudiao Has your question been resolved? I will close this issue in 7 days if there is no more update.
typedef struct tagNetOneDNN { stream netStream; / stream / std::vector net; / primitive /
std::vector<std::unordered_map<int, memory>> netArgs; / argument /
}net_oneDNN;
typedef struct tagSegModel { net_weight netWeight; // weight net_oneDNN netOneDnn; // net of oneDNN
}seg_model;
int32_t initModel(seg_model &segNet) { std::vector concatIn0 = {1, 3, 224, 384};
std::vector concatIn1 = {1, 1, 224, 384};
std::vector convIn = {1, 4, 224, 384};
std::rector convOut = {1,32,112,192};
int32_t convStrid = 2;
int32_t convPad = 1;
int32_t isActive = 1;
int32_t isRelu = 1;
std::string opName = "backbone.stage0";
}
int32_t concat_test(imgInfo &img, seg_model & segNet) {
}
pytorch_concat_result.txt
oneDNN_concat_result.txt oneDNN_conv_result.txt