Closed LukeAI closed 3 years ago
add #include \<chrono>
@LukeAI You can comment those lines. Those are just to calculate time. If you are using some older versions, you will face this issue afaik.
`#include "ScaledYOLOv4.h"
ScaledYOLOv4::ScaledYOLOv4(const std::string &config_file) {
YAML::Node root = YAML::LoadFile(config_file);
YAML::Node config = root["ScaledYOLOv4"];
onnx_file = config["onnx_file"].as
ScaledYOLOv4::~ScaledYOLOv4() = default;
void ScaledYOLOv4::LoadEngine() { // create and load engine std::fstream existEngine; existEngine.open(engine_file, std::ios::in); if (existEngine) { readTrtFile(engine_file, engine); assert(engine != nullptr); } else { onnxToTRTModel(onnx_file, engine_file, engine, BATCH_SIZE); assert(engine != nullptr); } }
bool ScaledYOLOv4::InferenceFolder(const std::string &folder_name) {
std::vector
//get buffers
assert(engine->getNbBindings() == 2);
void *buffers[2];
std::vector<int64_t> bufferSize;
int nbBindings = engine->getNbBindings();
bufferSize.resize(nbBindings);
for (int i = 0; i < nbBindings; ++i) {
nvinfer1::Dims dims = engine->getBindingDimensions(i);
nvinfer1::DataType dtype = engine->getBindingDataType(i);
int64_t totalSize = volume(dims) * 1 * getElementSize(dtype);
bufferSize[i] = totalSize;
std::cout << "binding" << i << ": " << totalSize << std::endl;
cudaMalloc(&buffers[i], totalSize);
}
//get stream
cudaStream_t stream;
cudaStreamCreate(&stream);
int outSize = bufferSize[1] / sizeof(float) / BATCH_SIZE;
EngineInference(sample_images, outSize, buffers, bufferSize, stream);
// release the stream and the buffers
cudaStreamDestroy(stream);
cudaFree(buffers[0]);
cudaFree(buffers[1]);
// destroy the engine
context->destroy();
engine->destroy();
}
void ScaledYOLOv4::EngineInference(const std::vector
// do inference
std::cout << "execute" << std::endl;
// auto t_start = std::chrono::high_resolution_clock::now();
context->execute(BATCH_SIZE, buffers);
// auto t_end = std::chrono::high_resolution_clock::now();
// float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
// std::cout << "Inference take: " << total_inf << " ms." << std::endl;
// total_time += total_inf;
std::cout << "execute success" << std::endl;
std::cout << "device2host" << std::endl;
std::cout << "post process" << std::endl;
// auto r_start = std::chrono::high_resolution_clock::now();
auto *out = new float[outSize * BATCH_SIZE];
cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
auto boxes = postProcess(vec_Mat, out, outSize);
// auto r_end = std::chrono::high_resolution_clock::now();
// float total_res = std::chrono::duration<float, std::milli>(r_end - r_start).count();
// std::cout << "Post process take: " << total_res << " ms." << std::endl;
// total_time += total_res;
for (int i = 0; i < (int)vec_Mat.size(); i++)
{
auto org_img = vec_Mat[i];
if (!org_img.data)
continue;
auto rects = boxes[i];
// cv::cvtColor(org_img, org_img, cv::COLOR_BGR2RGB);
for(const auto &rect : rects)
{
char t[256];
sprintf(t, "%.2f", rect.prob);
std::string name = detect_labels[rect.classes] + "-" + t;
cv::putText(org_img, name, cv::Point(rect.x - rect.w / 2, rect.y - rect.h / 2 - 5), cv::FONT_HERSHEY_COMPLEX, 0.7, class_colors[rect.classes], 2);
cv::Rect rst(rect.x - rect.w / 2, rect.y - rect.h / 2, rect.w, rect.h);
cv::rectangle(org_img, rst, class_colors[rect.classes], 2, cv::LINE_8, 0);
}
int pos = vec_name[i].find_last_of(".");
std::string rst_name = vecname[i].insert(pos, "");
std::cout << rst_name << std::endl;
cv::imwrite(rst_name, org_img);
}
vec_Mat = std::vector
void ScaledYOLOv4::GenerateReferMatrix() {
refer_matrix = cv::Mat(refer_rows, refer_cols, CV_32FC1);
int position = 0;
for (int n = 0; n < (int)grids.size(); n++)
{
for (int c = 0; c < grids[n][0]; c++)
{
std::vector
std::vector
//HWC TO CHW
int channelLength = IMAGE_WIDTH * IMAGE_HEIGHT;
std::vector<cv::Mat> split_img = {
cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * (index + 2)),
cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * (index + 1)),
cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * index)
};
index += 3;
cv::split(flt_img, split_img);
}
return result;
}
std::vector<std::vector
void ScaledYOLOv4::NmsDetect(std::vector
for (int i = 0; i < (int)detections.size(); i++)
for (int j = i + 1; j < (int)detections.size(); j++)
{
if (detections[i].classes == detections[j].classes)
{
float iou = IOUCalculate(detections[i], detections[j]);
if (iou > nms_threshold)
detections[j].prob = 0;
}
}
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const DetectRes &det)
{ return det.prob == 0; }), detections.end());
}
float ScaledYOLOv4::IOUCalculate(const ScaledYOLOv4::DetectRes &det_a, const ScaledYOLOv4::DetectRes &det_b) { cv::Point2f center_a(det_a.x, det_a.y); cv::Point2f center_b(det_b.x, det_b.y); cv::Point2f left_up(std::min(det_a.x - det_a.w / 2, det_b.x - det_b.w / 2), std::min(det_a.y - det_a.h / 2, det_b.y - det_b.h / 2)); cv::Point2f right_down(std::max(det_a.x + det_a.w / 2, det_b.x + det_b.w / 2), std::max(det_a.y + det_a.h / 2, det_b.y + det_b.h / 2)); float distance_d = (center_a - center_b).x (center_a - center_b).x + (center_a - center_b).y (center_a - center_b).y; float distance_c = (left_up - right_down).x (left_up - right_down).x + (left_up - right_down).y (left_up - right_down).y; float inter_l = det_a.x - det_a.w / 2 > det_b.x - det_b.w / 2 ? det_a.x - det_a.w / 2 : det_b.x - det_b.w / 2; float inter_t = det_a.y - det_a.h / 2 > det_b.y - det_b.h / 2 ? det_a.y - det_a.h / 2 : det_b.y - det_b.h / 2; float inter_r = det_a.x + det_a.w / 2 < det_b.x + det_b.w / 2 ? det_a.x + det_a.w / 2 : det_b.x + det_b.w / 2; float inter_b = det_a.y + det_a.h / 2 < det_b.y + det_b.h / 2 ? det_a.y + det_a.h / 2 : det_b.y + det_b.h / 2; if (inter_b < inter_t || inter_r < inter_l) return 0; float inter_area = (inter_b - inter_t) (inter_r - inter_l); float union_area = det_a.w det_a.h + det_b.w * det_b.h - inter_area; if (union_area == 0) return 0; else return inter_area / union_area - distance_d / distance_c; } `
ok thanks for reply! I just added the include line in my local header but I thought it might be worth adding upstream too as I guess everybody will face this?
Building ScaledYolov4 doesn't work unless I add
#include <chrono>
to ScaleYOLOv4.hI get these complaints: