Closed lyb36524 closed 1 year ago
@lyb36524
If the code is running in the TX2 device, please modify the code(https://github.com/FeiYull/TensorRT-Alpha/blob/a0de42fd8e6d30af55e7068bdde6d4564156bd75/utils/yolo.cpp#L150): Set line 152 to 0, line 165 set to 1, as shown in the following code:
//-------------------------------------------------------------------------------------------------------------
void yolo::YOLO::copy(const std::vector
cv::Mat img_fp32 = cv::Mat::zeros(imgsBatch[0].size(), CV_32FC3); // todo
cudaHostRegister(img_fp32.data, img_fp32.elemSize() * img_fp32.total(), cudaHostRegisterPortable);
float* pi = m_input_src_device;
for (size_t i = 0; i < imgsBatch.size(); i++)
{
imgsBatch[i].convertTo(img_fp32, CV_32FC3);
checkRuntime(cudaMemcpy(pi, img_fp32.data, sizeof(float) * 3 * m_param.src_h * m_param.src_w, cudaMemcpyHostToDevice));
pi += 3 * m_param.src_h * m_param.src_w;
}
cudaHostUnregister(img_fp32.data);
cv::Mat img_fp32 = cv::Mat::zeros(imgsBatch[0].size(), CV_32FC3); // todo
float* pi = m_input_src_device;
for (size_t i = 0; i < imgsBatch.size(); i++)
{
std::vector<float> img_vec = std::vector<float>(imgsBatch[i].reshape(1, 1));
imgsBatch[i].convertTo(img_fp32, CV_32FC3);
checkRuntime(cudaMemcpy(pi, img_fp32.data, sizeof(float) * 3 * m_param.src_h * m_param.src_w, cudaMemcpyHostToDevice));
pi += 3 * m_param.src_h * m_param.src_w;
}
}
This part of the code has been modified to be compatible with TX2. https://github.com/FeiYull/TensorRT-Alpha/blob/72cdd9dde2c1db3d2fea6988cf31e59317a5fc45/utils/yolo.cpp#L178
报错信息: terminate called after throwing an instance of 'thrust::system::system_error' what(): radix_sort: failed on 2nd step: cudaErrorHostMemoryNotRegistered: pointer does not correspond to a registered memory region Aborted (core dumped) 解决方案: utils/yolo.cpp文件中,原始代码: utils/yolo.cpp文件中,更改为:
飞哥已经同步更新代码,再次感谢飞哥: