linghu8812 / tensorrt_inference

706 stars 208 forks source link

missing #include <chrono> #29

Closed LukeAI closed 3 years ago

LukeAI commented 3 years ago

Building ScaledYolov4 doesn't work unless I add #include <chrono> to ScaleYOLOv4.h

I get these complaints:

Scanning dependencies of target ScaledYOLOv4_trt
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:117:37: error: ‘std::chrono’ has not been declared
  117 |             auto t_start_pre = std::chrono::high_resolution_clock::now();
      |                                     ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:120:35: error: ‘std::chrono’ has not been declared
  120 |             auto t_end_pre = std::chrono::high_resolution_clock::now();
      |                                   ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:121:36: error: ‘std::chrono’ has not been declared
  121 |             float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count();
      |                                    ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:121:53: error: expected primary-expression before ‘float’
  121 |             float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count();
      |                                                     ^~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:135:33: error: ‘std::chrono’ has not been declared
  135 |             auto t_start = std::chrono::high_resolution_clock::now();
      |                                 ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:137:31: error: ‘std::chrono’ has not been declared
  137 |             auto t_end = std::chrono::high_resolution_clock::now();
      |                               ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:138:36: error: ‘std::chrono’ has not been declared
  138 |             float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
      |                                    ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:138:53: error: expected primary-expression before ‘float’
  138 |             float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
      |                                                     ^~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:144:33: error: ‘std::chrono’ has not been declared
  144 |             auto r_start = std::chrono::high_resolution_clock::now();
  149 |             auto r_end = std::chrono::high_resolution_clock::now();
      |                               ^~~~~~
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp: In member function ‘bool ScaledYOLOv4::InferenceFolder(const string&)’:
/home/luke/projects/tensorrt_inference/ScaledYOLOv4/ScaledYOLOv4.cpp:94:1: warning: control reaches end of non-void function [-Wreturn-type]
   94 | }
      | ^
CMakeFiles/ScaledYOLOv4_trt.dir/build.make:94: recipe for target 'CMakeFiles/ScaledYOLOv4_trt.dir/ScaledYOLOv4.cpp.o' failed
make[2]: *** [CMakeFiles/ScaledYOLOv4_trt.dir/ScaledYOLOv4.cpp.o] Error 1
CMakeFiles/Makefile2:94: recipe for target 'CMakeFiles/ScaledYOLOv4_trt.dir/all' failed
make[1]: *** [CMakeFiles/ScaledYOLOv4_trt.dir/all] Error 2
Makefile:102: recipe for target 'all' failed
make: *** [all] Error 2
IGnoredBird commented 3 years ago

add #include \<chrono>

bobbilichandu commented 3 years ago

@LukeAI You can comment those lines. Those are just to calculate time. If you are using some older versions, you will face this issue afaik.

`#include "ScaledYOLOv4.h"

include "yaml-cpp/yaml.h"

include "common.hpp"

ScaledYOLOv4::ScaledYOLOv4(const std::string &config_file) { YAML::Node root = YAML::LoadFile(config_file); YAML::Node config = root["ScaledYOLOv4"]; onnx_file = config["onnx_file"].as(); engine_file = config["engine_file"].as(); labels_file = config["labels_file"].as(); BATCH_SIZE = config["BATCH_SIZE"].as(); INPUT_CHANNEL = config["INPUT_CHANNEL"].as(); IMAGE_WIDTH = config["IMAGE_WIDTH"].as(); IMAGE_HEIGHT = config["IMAGE_HEIGHT"].as(); obj_threshold = config["obj_threshold"].as(); nms_threshold = config["nms_threshold"].as(); strides = config["strides"].as<std::vector>(); num_anchors = config["num_anchors"].as<std::vector>(); assert(strides.size() == num_anchors.size()); anchors = config["anchors"].as<std::vector<std::vector>>(); detect_labels = readCOCOLabel(labels_file); CATEGORY = detect_labels.size(); int index = 0; for (const int &stride : strides) { grids.push_back({num_anchors[index], int(IMAGE_WIDTH / stride), int(IMAGE_HEIGHT / stride)}); } refer_rows = 0; refer_cols = 6; for (const std::vector &grid : grids) { refer_rows += std::accumulate(grid.begin(), grid.end(), 1, std::multiplies()); } GenerateReferMatrix(); class_colors.resize(CATEGORY); srand((int) time(nullptr)); for (cv::Scalar &class_color : class_colors) class_color = cv::Scalar(rand() % 255, rand() % 255, rand() % 255); // std::cout << "refer matrix: " <<refer_matrix << std::endl; }

ScaledYOLOv4::~ScaledYOLOv4() = default;

void ScaledYOLOv4::LoadEngine() { // create and load engine std::fstream existEngine; existEngine.open(engine_file, std::ios::in); if (existEngine) { readTrtFile(engine_file, engine); assert(engine != nullptr); } else { onnxToTRTModel(onnx_file, engine_file, engine, BATCH_SIZE); assert(engine != nullptr); } }

bool ScaledYOLOv4::InferenceFolder(const std::string &folder_name) { std::vector sample_images = readFolder(folder_name); //get context assert(engine != nullptr); context = engine->createExecutionContext(); assert(context != nullptr);

//get buffers
assert(engine->getNbBindings() == 2);
void *buffers[2];
std::vector<int64_t> bufferSize;
int nbBindings = engine->getNbBindings();
bufferSize.resize(nbBindings);

for (int i = 0; i < nbBindings; ++i) {
    nvinfer1::Dims dims = engine->getBindingDimensions(i);
    nvinfer1::DataType dtype = engine->getBindingDataType(i);
    int64_t totalSize = volume(dims) * 1 * getElementSize(dtype);
    bufferSize[i] = totalSize;
    std::cout << "binding" << i << ": " << totalSize << std::endl;
    cudaMalloc(&buffers[i], totalSize);
}

//get stream
cudaStream_t stream;
cudaStreamCreate(&stream);

int outSize = bufferSize[1] / sizeof(float) / BATCH_SIZE;

EngineInference(sample_images, outSize, buffers, bufferSize, stream);

// release the stream and the buffers
cudaStreamDestroy(stream);
cudaFree(buffers[0]);
cudaFree(buffers[1]);

// destroy the engine
context->destroy();
engine->destroy();

}

void ScaledYOLOv4::EngineInference(const std::vector &image_list, const int &outSize, void **buffers, const std::vector &bufferSize, cudaStream_t stream) { int index = 0; int batch_id = 0; std::vector vec_Mat(BATCH_SIZE); std::vector vec_name(BATCH_SIZE); float total_time = 0; for (const std::string &image_name : image_list) { index++; std::cout << "Processing: " << image_name << std::endl; cv::Mat src_img = cv::imread(image_name); if (src_img.data) { // cv::cvtColor(src_img, src_img, cv::COLOR_BGR2RGB); vec_Mat[batch_id] = src_img.clone(); vec_name[batch_id] = image_name; batch_id++; } if (batch_id == BATCH_SIZE or index == image_list.size()) { // auto t_start_pre = std::chrono::high_resolution_clock::now(); std::cout << "prepareImage" << std::endl; std::vectorcurInput = prepareImage(vec_Mat); // auto t_end_pre = std::chrono::high_resolution_clock::now(); // float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count(); // std::cout << "prepare image take: " << total_pre << " ms." << std::endl; // total_time += total_pre; batch_id = 0; if (!curInput.data()) { std::cout << "prepare images ERROR!" << std::endl; continue; } // DMA the input to the GPU, execute the batch asynchronously, and DMA it back: std::cout << "host2device" << std::endl; cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream);

        // do inference
        std::cout << "execute" << std::endl;
        // auto t_start = std::chrono::high_resolution_clock::now();
        context->execute(BATCH_SIZE, buffers);
        // auto t_end = std::chrono::high_resolution_clock::now();
        // float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
        // std::cout << "Inference take: " << total_inf << " ms." << std::endl;
        // total_time += total_inf;
        std::cout << "execute success" << std::endl;
        std::cout << "device2host" << std::endl;
        std::cout << "post process" << std::endl;
        // auto r_start = std::chrono::high_resolution_clock::now();
        auto *out = new float[outSize * BATCH_SIZE];
        cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream);
        cudaStreamSynchronize(stream);
        auto boxes = postProcess(vec_Mat, out, outSize);
        // auto r_end = std::chrono::high_resolution_clock::now();
        // float total_res = std::chrono::duration<float, std::milli>(r_end - r_start).count();
        // std::cout << "Post process take: " << total_res << " ms." << std::endl;
        // total_time += total_res;
        for (int i = 0; i < (int)vec_Mat.size(); i++)
        {
            auto org_img = vec_Mat[i];
            if (!org_img.data)
                continue;
            auto rects = boxes[i];

// cv::cvtColor(org_img, org_img, cv::COLOR_BGR2RGB); for(const auto &rect : rects) { char t[256]; sprintf(t, "%.2f", rect.prob); std::string name = detect_labels[rect.classes] + "-" + t; cv::putText(org_img, name, cv::Point(rect.x - rect.w / 2, rect.y - rect.h / 2 - 5), cv::FONT_HERSHEY_COMPLEX, 0.7, class_colors[rect.classes], 2); cv::Rect rst(rect.x - rect.w / 2, rect.y - rect.h / 2, rect.w, rect.h); cv::rectangle(org_img, rst, class_colors[rect.classes], 2, cv::LINE_8, 0); } int pos = vec_name[i].find_last_of("."); std::string rst_name = vecname[i].insert(pos, ""); std::cout << rst_name << std::endl; cv::imwrite(rst_name, org_img); } vec_Mat = std::vector(BATCH_SIZE); delete[] out; } } // std::cout << "Average processing time is " << total_time / image_list.size() << "ms" << std::endl; }

void ScaledYOLOv4::GenerateReferMatrix() { refer_matrix = cv::Mat(refer_rows, refer_cols, CV_32FC1); int position = 0; for (int n = 0; n < (int)grids.size(); n++) { for (int c = 0; c < grids[n][0]; c++) { std::vector anchor = anchors[n grids[n][0] + c]; for (int h = 0; h < grids[n][1]; h++) for (int w = 0; w < grids[n][2]; w++) { float row = refer_matrix.ptr(position); row[0] = w; row[1] = grids[n][1]; row[2] = h; row[3] = grids[n][2]; row[4] = anchor[0]; row[5] = anchor[1]; position++; } } } }

std::vector ScaledYOLOv4::prepareImage(std::vector &vec_img) { std::vector result(BATCH_SIZE IMAGE_WIDTH IMAGE_HEIGHT INPUT_CHANNEL); float data = result.data(); int index = 0; for (const cv::Mat &src_img : vec_img) { if (!src_img.data) continue; float ratio = float(IMAGE_WIDTH) / float(src_img.cols) < float(IMAGE_HEIGHT) / float(src_img.rows) ? float(IMAGE_WIDTH) / float(src_img.cols) : float(IMAGE_HEIGHT) / float(src_img.rows); cv::Mat flt_img = cv::Mat::zeros(cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT), CV_8UC3); cv::Mat rsz_img; cv::resize(src_img, rsz_img, cv::Size(), ratio, ratio); rsz_img.copyTo(flt_img(cv::Rect(0, 0, rsz_img.cols, rsz_img.rows))); flt_img.convertTo(flt_img, CV_32FC3, 1.0 / 255);

    //HWC TO CHW
    int channelLength = IMAGE_WIDTH * IMAGE_HEIGHT;
    std::vector<cv::Mat> split_img = {
            cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * (index + 2)),
            cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * (index + 1)),
            cv::Mat(IMAGE_WIDTH, IMAGE_HEIGHT, CV_32FC1, data + channelLength * index)
    };
    index += 3;
    cv::split(flt_img, split_img);
}
return result;

}

std::vector<std::vector> ScaledYOLOv4::postProcess(const std::vector &vec_Mat, float output, const int &outSize) { std::vector<std::vector> vec_result; // std::cout << "vec result" << vec_result.size() << std::endl; int index = 0; for (const cv::Mat &src_img : vec_Mat) { std::vector result; float ratio = float(src_img.cols) / float(IMAGE_WIDTH) > float(src_img.rows) / float(IMAGE_HEIGHT) ? float(src_img.cols) / float(IMAGE_WIDTH) : float(src_img.rows) / float(IMAGE_HEIGHT); float out = output + index outSize; cv::Mat result_matrix = cv::Mat(refer_rows, CATEGORY + 5, CV_32FC1, out); for (int row_num = 0; row_num < refer_rows; row_num++) { DetectRes box; auto row = result_matrix.ptr(row_num); auto max_pos = std::max_element(row + 5, row + CATEGORY + 5); box.prob = row[4] row[max_pos - row]; if (box.prob < obj_threshold) continue; box.classes = max_pos - row - 5; auto anchor = refer_matrix.ptr(row_num); box.x = (row[0] 2 - 0.5 + anchor[0]) / anchor[1] IMAGE_WIDTH ratio; box.y = (row[1] 2 - 0.5 + anchor[2]) / anchor[3] IMAGE_HEIGHT ratio; box.w = pow(row[2] 2, 2) anchor[4] ratio; box.h = pow(row[3] 2, 2) anchor[5] ratio; result.push_back(box); // std::cout << "dets: " << box.x << " " << box.y << " " << box.w << " " << box.h << " " << box.classes << " " << box.prob << " " << std::endl; } NmsDetect(result); for(int idx = 0; idx < (int)result.size(); idx++){ DetectRes box = result[idx]; std::cout << "dets: " << box.x << " " << box.y << " " << box.w << " " << box.h << " " << box.prob << " " << box.classes << " " << std::endl; } vec_result.push_back(result); index++; } return vec_result; }

void ScaledYOLOv4::NmsDetect(std::vector &detections) { sort(detections.begin(), detections.end(), [=](const DetectRes &left, const DetectRes &right) { return left.prob > right.prob; });

for (int i = 0; i < (int)detections.size(); i++)
    for (int j = i + 1; j < (int)detections.size(); j++)
    {
        if (detections[i].classes == detections[j].classes)
        {
            float iou = IOUCalculate(detections[i], detections[j]);
            if (iou > nms_threshold)
                detections[j].prob = 0;
        }
    }

detections.erase(std::remove_if(detections.begin(), detections.end(), [](const DetectRes &det)
{ return det.prob == 0; }), detections.end());

}

float ScaledYOLOv4::IOUCalculate(const ScaledYOLOv4::DetectRes &det_a, const ScaledYOLOv4::DetectRes &det_b) { cv::Point2f center_a(det_a.x, det_a.y); cv::Point2f center_b(det_b.x, det_b.y); cv::Point2f left_up(std::min(det_a.x - det_a.w / 2, det_b.x - det_b.w / 2), std::min(det_a.y - det_a.h / 2, det_b.y - det_b.h / 2)); cv::Point2f right_down(std::max(det_a.x + det_a.w / 2, det_b.x + det_b.w / 2), std::max(det_a.y + det_a.h / 2, det_b.y + det_b.h / 2)); float distance_d = (center_a - center_b).x (center_a - center_b).x + (center_a - center_b).y (center_a - center_b).y; float distance_c = (left_up - right_down).x (left_up - right_down).x + (left_up - right_down).y (left_up - right_down).y; float inter_l = det_a.x - det_a.w / 2 > det_b.x - det_b.w / 2 ? det_a.x - det_a.w / 2 : det_b.x - det_b.w / 2; float inter_t = det_a.y - det_a.h / 2 > det_b.y - det_b.h / 2 ? det_a.y - det_a.h / 2 : det_b.y - det_b.h / 2; float inter_r = det_a.x + det_a.w / 2 < det_b.x + det_b.w / 2 ? det_a.x + det_a.w / 2 : det_b.x + det_b.w / 2; float inter_b = det_a.y + det_a.h / 2 < det_b.y + det_b.h / 2 ? det_a.y + det_a.h / 2 : det_b.y + det_b.h / 2; if (inter_b < inter_t || inter_r < inter_l) return 0; float inter_area = (inter_b - inter_t) (inter_r - inter_l); float union_area = det_a.w det_a.h + det_b.w * det_b.h - inter_area; if (union_area == 0) return 0; else return inter_area / union_area - distance_d / distance_c; } `

LukeAI commented 3 years ago

ok thanks for reply! I just added the include line in my local header but I thought it might be worth adding upstream too as I guess everybody will face this?