Closed MolianWH closed 3 years ago
It seems like you are having a problem on how to use PyTorch rather than a problem with the model. I am less familiar with the C++ API so I can't really help you.
cv::Mat res(fgr.size(2), fgr.size(3), CV_8UC3); auto res_tensor = (pha * fgr + (1-pha) * torch::ones_like(fgr)).mul(255).clamp(0,255).to(torch::kU8); std::memcpy((void *) res.data, res_tensor.data_ptr(), sizeof(torch::kU8) * res_tensor.numel());
Thanks for your greate contributions. I refered model_usage for C++ , but I don't know how to transform the results and show
I also refered inference_webcam.py. I get inspiration from the code
pha, fgr = model(src, bgr)[:2] res = pha * fgr + (1 - pha) * torch.ones_like(fgr) res = res.mul(255).byte().cpu().permute(0, 2, 3, 1).numpy()[0] res = cv2.cvtColor(res, cv2.COLOR_RGB2BGR) key = dsp.step(res)
I need transform it to c++ but there still some questions.
auto outputs = model.forward({src, bgr}).toTuple()->elements(); auto pha = outputs[0].toTensor(); auto fgr = outputs[1].toTensor(); // the fllowing code is error, but I have no idea. auto res_tensor = (pha * fgr + (1-pha) * torch::ones_like(fgr)).mul(255).cpu(); Mat res(res_tensor.size(2), res_tensor.size(3), CV_8UC3, (void*) res_tensor.data_ptr<uint8_t>()); cvtColor(res, res, COLOR_RGB2BGR); imshow("matting", res);
Would you please show me the code to study?Thanks.
I am a C ++ beginner, I also want to achieve C ++ code under the image display, how to do, I hope you can show your code to learn
https://github.com/PeterL1n/BackgroundMattingV2/issues/54#issuecomment-813871636 have you finished to show it in c++ ? if finished, how to do ? thanks !
@PeterL1n i run it in c++, when i get the fgr image, it is wrong:
my env: cuda101 libtorch 1.7.1_debug_cu101 python3.6 vs2019 image_size:1920*1080 model: torchscript_resnet101_fp32.pth
code:
auto pha = outputs[0].toTensor();
auto fgr = outputs[1].toTensor();
auto res_tensor = fgr; // pha* fgr + (1 - pha) * nbg_tensor;
res_tensor = res_tensor.mul(255).clamp(0, 255).permute({ 0, 2, 3, 1 })[0].to(torch::kU8).cpu();
cv::Mat res(res_tensor.size(0), res_tensor.size(1), CV_8UC3, res_tensor.data_ptr());
cv::cvtColor(res, res, cv::COLOR_RGB2BGR);
cv::imwrite("res.png", res);
it shows 9 images with 640*360, what happened?
Thanks for your greate contributions. I refered model_usage for C++ , but I don't know how to transform the results and show I also refered inference_webcam.py. I get inspiration from the code
pha, fgr = model(src, bgr)[:2] res = pha * fgr + (1 - pha) * torch.ones_like(fgr) res = res.mul(255).byte().cpu().permute(0, 2, 3, 1).numpy()[0] res = cv2.cvtColor(res, cv2.COLOR_RGB2BGR) key = dsp.step(res)
I need transform it to c++ but there still some questions.
auto outputs = model.forward({src, bgr}).toTuple()->elements(); auto pha = outputs[0].toTensor(); auto fgr = outputs[1].toTensor(); // the fllowing code is error, but I have no idea. auto res_tensor = (pha * fgr + (1-pha) * torch::ones_like(fgr)).mul(255).cpu(); Mat res(res_tensor.size(2), res_tensor.size(3), CV_8UC3, (void*) res_tensor.data_ptr<uint8_t>()); cvtColor(res, res, COLOR_RGB2BGR); imshow("matting", res);
Would you please show me the code to study?Thanks.
I am a C ++ beginner, I also want to achieve C ++ code under the image display, how to do, I hope you can show your code to learn
Have you solved it? I ran it successfully, but the image turned out to be wrong.
And is your result image right?
If right, what should be done?
How to improve the FPS in C++. my cuda is used, but low Utilization rate。
@luoww1992
This is pretty weird. My intuition is that the torch tensor has a different memory layout than cv Mat, so it is reading the 3 channels wrong and that's why the image is divided to 3 parts on height and width. Alpha won't have this problem because it only has 1 channel.
Maybe try calling .contiguous() after permute and try play around with permute. Idk
@PeterL1n this is mycode:
#include <torch/torch.h>
#include <torch/script.h>
#include <ATen/ATen.h>
#include <Windows.h>
#include <time.h>
#include <iostream>
#include <memory>
#include <opencv2/highgui.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
int main()
{
printf("Hello World!\n");
LoadLibraryA("torch_cuda.dll");
auto hasCuda = torch::cuda::is_available();
auto hasCudnn = torch::cuda::cudnn_is_available();
auto hasCount = torch::cuda::device_count();
std::cout << "Has cuda? " << hasCuda << " - Has cudnn? " << hasCudnn << std::endl;
std::cout << "Device count : " << hasCount << std::endl;
std::cout << "start load model......\n";
auto device = torch::Device("cuda");
auto precision = torch::kFloat32;
auto model = torch::jit::load("./torchscript_resnet101_fp32.pth");
model.setattr("backbone_scale", 0.25);
model.setattr("refine_mode", "sampling");
model.setattr("refine_sample_pixels", 80000);
model.to(device);
std::cout << "model load over\n";
std::cout << "start load image......\n";
cv::Mat image1 = cv::imread("fgr.png");
std::cout << "fgr " << image1.rows << " " << image1.cols << " " << image1.channels() << std::endl;
torch::Tensor fgr_tensor = torch::from_blob(image1.data, { 1, image1.rows, image1.cols, 3 }, torch::kByte);//
fgr_tensor = fgr_tensor.permute({ 0, 3, 1, 2 });
fgr_tensor = fgr_tensor.toType(precision);
fgr_tensor = fgr_tensor.div(255);
fgr_tensor = fgr_tensor.to(device);
cv::Mat image2 = cv::imread("bgr.png");
std::cout << "bgr " << image2.rows << " " << image2.cols << " " << image2.channels() << std::endl;
torch::Tensor bgr_tensor = torch::from_blob(image2.data, { 1, image2.rows, image2.cols, 3 }, torch::kByte);//
bgr_tensor = bgr_tensor.permute({ 0, 3, 1, 2 });
bgr_tensor = bgr_tensor.toType(precision);
bgr_tensor = bgr_tensor.div(255);
bgr_tensor = bgr_tensor.to(device);
cv::Mat image3 = cv::imread("newbg.png");
std::cout << "newbg " << image3.rows << " " << image3.cols << " " << image3.channels() << std::endl;
torch::Tensor nbg_tensor = torch::from_blob(image3.data, { 1, image3.rows, image3.cols, 3 }, torch::kByte);//
nbg_tensor = nbg_tensor.permute({ 0, 3, 1, 2 });
nbg_tensor = nbg_tensor.toType(precision);
nbg_tensor = nbg_tensor.div(255);
nbg_tensor = nbg_tensor.to(device);
auto a = nbg_tensor.sizes();
std::cout << "nbg_tensor size " << a << std::endl;
std::cout << "start inference......\n";
clock_t start = clock();
for (int i = 0; i < 10; i++) {
clock_t start1 = clock();
auto outputs = model.forward({ fgr_tensor, bgr_tensor }).toTuple()->elements();
auto pha = outputs[0].toTensor();
auto fgr = outputs[1].toTensor();
auto pha_size = pha.sizes();
std::cout << "pha_size size " << pha_size << std::endl;
auto fgr_size = fgr.sizes();
std::cout << "fgr_size size " << fgr_size << std::endl;
//auto res_tensor = fgr;
auto res_tensor = pha* fgr + (1 - pha) * nbg_tensor;
res_tensor = res_tensor.mul(255).clamp(0, 255).permute({ 0, 2, 3, 1 })[0].to(torch::kU8).cpu();
cv::Mat res(res_tensor.size(0), res_tensor.size(1), CV_8UC3, res_tensor.data_ptr());
cv::cvtColor(res, res, cv::COLOR_RGB2BGR);
cv::imwrite("res.png", res);
clock_t end1 = clock();
std::cout << "time epoch:" << (end1 - start1) << std::endl;
}
clock_t end = clock();
std::cout << "time:" << (end - start) << std::endl;
};
if do .contiguous(), will the result image size change ? in python , it spents 40ms in inferencing, while in c++ it spents a lot of time in whole inference, how to improve the inference fps ?
@PeterL1n it is my code , https://github.com/PeterL1n/BackgroundMattingV2/issues/54#issuecomment-849594672 what is your meaning? And i notice you will show a new better modle, so the time ? thank you !
@luoww1992 Unfortunately, I do not have time to debug the code in C++. You are on your own.
Python to C++ according to inference_webcam.py.
#include <torch/script.h>
#include <torch/csrc/api/include/torch/cuda.h>
#include <QImage>
int main(int argc, char *argv[])
{
std::cout << "cuda :" << torch::cuda::is_available() << std::endl;
std::cout << "cudnn:" << torch::cuda::cudnn_is_available() << std::endl;
//! Load model
auto device = torch::Device("cuda");
auto precision = torch::kFloat32;
auto model = torch::jit::load(R"(E:\JetBrains\PyCharm 2018.3.5\works\bgmatt\model\TorchScript\torchscript_mobilenetv2_fp32.pth)");
model.setattr("backbone_scale", 0.25);
model.setattr("refine_mode", "sampling");
model.setattr("refine_sample_pixels", 80000);
model.to(device);
//! Load image
QImage imgSrc(R"(E:\Microsoft Visual Studio\2017\Enterprise\works\QtBgMatt\x64\Release\input_img\src\src1.png)");
QImage imgBg(R"(E:\Microsoft Visual Studio\2017\Enterprise\works\QtBgMatt\x64\Release\input_img\bg\bg1.png)");
//! Convert BGRA to RGB
imgSrc = imgSrc.convertToFormat(QImage::Format_RGB888);
imgBg = imgBg.convertToFormat(QImage::Format_RGB888);
auto tensorSrc = torch::from_blob(imgSrc.bits(), { imgSrc.height(),imgSrc.width(),3 }, torch::kByte);
tensorSrc = tensorSrc.to(device);
tensorSrc = tensorSrc.permute({ 2,0,1 }).contiguous();
auto tmpSrc = tensorSrc.to(precision).div(255);
tmpSrc.unsqueeze_(0);
tmpSrc = tmpSrc.to(precision);
auto tensorBg = torch::from_blob(imgBg.bits(), { imgBg.height(),imgBg.width(),3 }, torch::kByte);
tensorBg = tensorBg.to(device);
tensorBg = tensorBg.permute({ 2,0,1 }).contiguous();
auto tmpBg = tensorBg.to(precision).div(255);
tmpBg.unsqueeze_(0);
tmpBg = tmpBg.to(precision);
//! Inference
auto start = std::chrono::high_resolution_clock::now();
//torch::NoGradGuard no_grad;
auto outputs = model.forward({ tmpSrc, tmpBg }).toTuple()->elements();
std::cout << "time(ms):" << static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count()) << std::endl;
auto pha = outputs[0].toTensor();
auto fgr = outputs[1].toTensor();
auto pha_size = pha.sizes();
std::cout << "pha_size size " << pha_size << std::endl;
auto fgr_size = fgr.sizes();
std::cout << "fgr_size size " << fgr_size << std::endl;
auto tgt_bgr = torch::tensor({ 120.f / 255, 255.f / 255, 155.f / 255 }).toType(precision).to(device).view({ 1, 3, 1, 1 });
std::cout << "tgt_bgr size " << tgt_bgr.sizes() << std::endl;
auto res_tensor = pha * fgr + (1 - pha) * tgt_bgr;
res_tensor = res_tensor.mul(255).to(torch::kUInt8).cpu().permute({ 0,2,3,1 });
res_tensor.squeeze_(0);
res_tensor = res_tensor.contiguous();
std::cout << "res_tensor size " << res_tensor.sizes() << std::endl;
QImage imgRes(static_cast<uchar *>(res_tensor.data_ptr()), res_tensor.size(1), res_tensor.size(0), QImage::Format_RGB888);
std::cout << "Save: " << imgRes.save("Res.png") << std::endl;
}
Thanks for your greate contributions. I refered model_usage for C++ , but I don't know how to transform the results and show
I also refered inference_webcam.py. I get inspiration from the code
I need transform it to c++ but there still some questions.
Would you please show me the code to study?Thanks.