Closed renshujiajia closed 1 day ago
the sub is my main infer code, i use the code to infer fp16 model got the correct resut of image.
bool SrInfer::processInput(const std::string& imagepath)
{
const int inputH = 4320;
const int inputW = 7680;
if (!imagepath.empty())
{
std::cout << "will process a image" << std::endl;
// read image
cv::Mat srcimg = cv::imread(imagepath);
if (srcimg.empty())
{
std::cerr << "read image failed" << std::endl;
return false;
}
srcimg.convertTo(srcimg, CV_32F, 1.0 / 255.0);
// preprocess:使用int16操作
half* hostDataBuffer = static_cast<half*>(mBuffers->getHostBuffer("input"));
//
int channels = 3;
int height = 4320;
int width = 7680;
// 优化版本
int channelSize = height * width;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
for (int c = 0; c < channels; ++c) {
int dstIdx = c * channelSize + h * width + w;
int srcIdx = (h * width + w) * channels + c;
float normalized_pixel = srcimg.ptr<float>(h)[w * channels + c];
hostDataBuffer[dstIdx] = __float2half(normalized_pixel);
}
}
}
}
return true;
}
bool SrInfer::postProcess(const std::string& outputpath)
{
// 从host device的输出buffer中读取输出张量并进行后处理,而后完成推理结果图像保存
half* hostResultBuffer = static_cast<half*>(mBuffers->getHostBuffer("output"));
std::cout << " the output size is " << mBuffers->size("output") << std::endl;
// chw->hwc
int channels = 3;
int height = 4320 * 2;
int width = 7680 * 2;
float* data_fp32 = new float[channels * height * width];
// 优化版本
int planeSize = height * wi
int planeSize = height * width;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
for (int c = 0; c < channels; ++c) {
// 计算FP16和FP32数组中的索引
int index_fp16 = c * planeSize + h * width + w;
int index_fp32 = h * width * channels + w * channels + c;
// 转换并赋值
data_fp32[index_fp32] = __half2float(hostResultBuffer[index_fp16]);
}
}
}
cv::Mat image(height, width, CV_32FC3, data_fp32);
// image.convertTo(image, CV_8UC3, 255.0);
image = image * 255.0;
image.convertTo(image, CV_8UC3);
cv::imwrite(outputpath, image);
delete[] data_fp32;
return true;
}
bool SrInfer::srInfer(const std::string& inputimage, const std::string& outputimage)
{
// 确定binds
for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
auto const name = mEngine->getIOTensorName(i);
mContext->setTensorAddress(name, mBuffers->getDeviceBuffer(name));
}
// Read the input data into the managed buffers
// ASSERT(mParams.inputTensorNames.size() == 1); // only one input for now
if (!processInput(inputimage))
{
return false;
}
mBuffers->copyInputToDevice();
auto time_start = std::chrono::steady_clock::now();
bool status = mContext->executeV2(mBuffers->getDeviceBindings().data());
auto time_end = std::chrono::steady_clock::now();
std::cout << "infer time is " << std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count() << "ms" << std::endl;
if (!status)
{
return false;
}
// // Memcpy from device output buffers to host output buffers
mBuffers->copyOutputToHost();
if (!postProcess(outputimage))
{
return false;
}
}
Maybe
half* hostResultBuffer = static_cast<half*>(mBuffers->getHostBuffer("output"));
in your postProcess function is not right for int8 case. You should make sure your out tensor's data type.
If you provide the layinfo, maybe more helpful.
Maybe
half* hostResultBuffer = static_cast<half*>(mBuffers->getHostBuffer("output"));
in your postProcess function is not right for int8 case. You should make sure your out tensor's data type.
If you provide the layinfo, maybe more helpful.
Thanks a lot, i thougt the input type of int8_mix_fp16 model was same as fp16 model🤣,
thanks again
Description
i converted an onnx_fp32 etds model to booth fp16 model and mix_ptq model (int8 and fp16, also has been calibrated), i try to infer the model booth by Python and C++ api
what makes me confused is that, booth two models are infered by the same C++ code, but got different result in C++。
Please help me analyze this🙏
Environment
TensorRT Version: 10.0.1
NVIDIA GPU: RTX4090
NVIDIA Driver Version: 12.0
CUDA Version: 12.0
CUDNN Version: 8.2.0
Operating System: Linux interactive11554 5.11.0-27-generic #29~20.04.1-Ubuntu SMP Wed Aug 11 15:58:17 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
Python Version (if applicable): 3.8.19
Tensorflow Version (if applicable):
PyTorch Version (if applicable):
Baremetal or Container (if so, version):
Relevant Files
Model link:
Steps To Reproduce
Commands or scripts:
Have you tried the latest release?:
Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (
polygraphy run <model.onnx> --onnxrt
):