Open interstellar-space opened 5 months ago
solved?
@xjock no... Here is my code, but it cannot draw any lane lines
#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudawarping.hpp>
#include <opencv2/core.hpp>
#include <opencv2/cudaarithm.hpp>
#include <algorithm>
#include <numeric>
struct Detection
{
float background;
float foreground;
float start_y;
float start_x;
float theta;
float length;
float lane_x_coordinates[72];
};
// utilities ----------------------------------------------------------------------------------------------------------
// class to log errors, warnings, and other information during the build and inference phases
class Logger : public nvinfer1::ILogger
{
public:
void log(Severity severity, const char *msg) noexcept override
{
// remove this 'if' if you need more logged info
if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR))
{
std::cout << msg << "\n";
}
}
} gLogger;
// destroy TensorRT objects if something goes wrong
struct TRTDestroy
{
template <class T>
void operator()(T *obj) const
{
if (obj)
{
obj->destroy();
}
}
};
template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;
// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims &dims)
{
size_t size = 1;
for (size_t i = 0; i < dims.nbDims; ++i)
{
size *= dims.d[i];
}
return size;
}
// preprocessing stage ------------------------------------------------------------------------------------------------
void preprocessImage(cv::cuda::GpuMat &gpu_frame, float *gpu_input, const nvinfer1::Dims &dims)
{
auto input_width = dims.d[2];
auto input_height = dims.d[1];
auto channels = dims.d[0];
auto input_size = cv::Size(input_width, input_height);
// resize
cv::cuda::GpuMat resized;
cv::cuda::resize(gpu_frame, resized, input_size, 0, 0, cv::INTER_NEAREST);
// normalize
cv::cuda::GpuMat flt_image;
resized.convertTo(flt_image, CV_32FC3, 1.f / 255.f);
cv::cuda::subtract(flt_image, cv::Scalar(0.485f, 0.456f, 0.406f), flt_image, cv::noArray(), -1);
cv::cuda::divide(flt_image, cv::Scalar(0.229f, 0.224f, 0.225f), flt_image, 1, -1);
// to tensor
std::vector<cv::cuda::GpuMat> chw;
for (size_t i = 0; i < channels; ++i)
{
chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
}
cv::cuda::split(flt_image, chw);
}
std::vector<std::vector<float>> softmax(const std::vector<std::vector<float>> &x)
{
std::vector<std::vector<float>> y(x.size(), std::vector<float>(x[0].size()));
// Assume that the softmax is performed along the last axis (columns).
for (size_t i = 0; i < x.size(); ++i)
{
float maxVal = *std::max_element(x[i].begin(), x[i].end());
std::vector<float> expVec(x[i].size());
float sum = 0.0f;
for (size_t j = 0; j < x[i].size(); ++j)
{
expVec[j] = exp(x[i][j] - maxVal);
sum += expVec[j];
}
for (size_t j = 0; j < y[i].size(); ++j)
{
y[i][j] = expVec[j] / sum;
}
}
return y;
}
bool Lane_IOU(const std::vector<float> &parent_box, const std::vector<float> &compared_box, float threshold)
{
int n_offsets = 72;
int n_strips = n_offsets - 1;
int start_a = static_cast<int>(parent_box[2] * n_strips + 0.5);
int start_b = static_cast<int>(compared_box[2] * n_strips + 0.5);
int start = std::max(start_a, start_b);
int end_a = start_a + static_cast<int>(parent_box[4] - 1 + 0.5 - ((parent_box[4] - 1) < 0));
int end_b = start_b + static_cast<int>(compared_box[4] - 1 + 0.5 - ((compared_box[4] - 1) < 0));
int end = std::min({end_a, end_b, 71});
if ((end - start) < 0)
{
return false;
}
float dist = 0.0f;
for (int i = 5 + start; i <= 5 + end; ++i)
{
if (parent_box[i] < compared_box[i])
{
dist += compared_box[i] - parent_box[i];
}
else
{
dist += parent_box[i] - compared_box[i];
}
}
return dist < (threshold * (end - start + 1));
}
std::vector<int> Lane_nms(const std::vector<std::vector<float>> &proposals, const std::vector<float> &scores, float overlap = 50, int top_k = 4)
{
std::vector<int> keep_index;
std::vector<size_t> indices(scores.size());
std::iota(indices.begin(), indices.end(), 0);
// Sort indices based on corresponding scores in descending order
std::sort(indices.begin(), indices.end(), [&scores](int a, int b)
{ return scores[a] > scores[b]; });
std::vector<int> r_filters(scores.size(), 0);
for (size_t i = 0; i < indices.size(); ++i)
{
size_t index = indices[i];
if (r_filters[index] == 1) // Ensure we check r_filters for the right index
{
continue;
}
keep_index.push_back(index);
if (static_cast<int>(keep_index.size()) > top_k) // We cast size to int to compare with top_k
{
break;
}
if (i == indices.size() - 1) // If it's the last index, break out of the loop
{
break;
}
// Iterate over the rest of the proposals from this point on
for (size_t j = i + 1; j < indices.size(); ++j)
{
size_t sub_index = indices[j];
if (!r_filters[sub_index]) // Check if not already filtered
{
if (Lane_IOU(proposals[index], proposals[sub_index], overlap))
{
r_filters[sub_index] = 1;
}
}
}
}
// Resize to remove any excess elements in case fewer than top_k were kept
keep_index.resize(std::min(top_k, static_cast<int>(keep_index.size())));
return keep_index;
}
std::vector<std::vector<cv::Point2f>> predictions_to_pred(const std::vector<std::vector<float>> &predictions,
const std::vector<float> &prior_ys,
int n_strips, int ori_img_w, int ori_img_h, int img_w, int img_h, int cut_height)
{
std::vector<std::vector<cv::Point2f>> lanes;
for (const auto &lane : predictions)
{
std::vector<float> lane_xs(lane.begin() + 6, lane.end()); // normalized value
int start = std::min(std::max(0, static_cast<int>(round(lane[2] * n_strips))), n_strips);
int length = static_cast<int>(round(lane[5]));
int end = start + length - 1;
end = std::min(end, static_cast<int>(prior_ys.size()) - 1);
// Extend prediction until x is outside the image
std::vector<bool> mask(start, false);
for (int i = start - 1; i >= 0; --i)
{
if (lane_xs[i] < 0.0f || lane_xs[i] > 1.0f)
{
mask[i] = true;
}
else if (i < start - 1 && mask[i + 1])
{
mask[i] = true;
}
}
std::fill(lane_xs.begin() + end + 1, lane_xs.end(), -2.0f);
for (int i = 0; i < start; ++i)
{
if (mask[i])
{
lane_xs[i] = -2.0f;
}
}
std::vector<float> lane_ys;
for (size_t i = 0; i < lane_xs.size(); ++i)
{
if (lane_xs[i] >= 0)
{
lane_ys.push_back(prior_ys[i]);
}
}
lane_xs.erase(std::remove_if(lane_xs.begin(), lane_xs.end(),
[](float x)
{ return x < 0; }),
lane_xs.end());
if (lane_xs.size() <= 1)
{
continue;
}
std::reverse(lane_xs.begin(), lane_xs.end());
std::reverse(lane_ys.begin(), lane_ys.end());
auto scale_x = static_cast<float>(ori_img_w) / img_w;
auto scale_y = static_cast<float>(ori_img_h) / (img_h - cut_height);
for (size_t i = 0; i < lane_xs.size(); ++i)
{
lane_xs[i] = lane_xs[i] * scale_x;
lane_ys[i] = (lane_ys[i] * (img_h - cut_height) + cut_height) * scale_y;
}
std::vector<cv::Point2f> points;
for (size_t i = 0; i < lane_xs.size(); ++i)
{
points.emplace_back(lane_xs[i], lane_ys[i]);
}
std::cout << "lane_xs: ";
for (const auto &x : lane_xs)
{
std::cout << x << " ";
}
std::cout << "\nlane_ys: ";
for (const auto &y : lane_ys)
{
std::cout << y << " ";
}
std::cout << "\n";
lanes.push_back(points);
}
std::cout << "lanes: " << lanes.size() << "\n";
return lanes;
}
cv::Mat imshow_lanes(cv::Mat &img, const std::vector<std::vector<cv::Point2f>> &lanes, int width = 4)
{
std::vector<std::vector<cv::Point>> lanes_xys;
for (const auto &lane : lanes)
{
std::vector<cv::Point> xys;
for (const auto &point : lane)
{
if (point.x <= 0.0f || point.y <= 0.0f)
{
continue;
}
int x = static_cast<int>(point.x);
int y = static_cast<int>(point.y);
xys.emplace_back(x, y);
}
if (!xys.empty())
{
lanes_xys.push_back(xys);
}
}
std::cout << "lanes_xys: " << lanes_xys.size() << "\n";
std::sort(lanes_xys.begin(), lanes_xys.end(),
[](const std::vector<cv::Point> &a, const std::vector<cv::Point> &b)
{ return a[0].x < b[0].x; });
std::vector<cv::Scalar> COLORS = {cv::Scalar(255, 0, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 255)};
for (size_t idx = 0; idx < lanes_xys.size(); ++idx)
{
const auto &xys = lanes_xys[idx];
for (size_t i = 1; i < xys.size(); ++i)
{
cv::line(img, xys[i - 1], xys[i], COLORS[idx % COLORS.size()], width);
}
}
cv::imwrite("test.jpg", img);
// cv::imshow("Lanes", img);
// cv::waitKey(0);
return img;
}
// post-processing stage ----------------------------------------------------------------------------------------------
void postprocessResults(cv::Mat &frame, float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
{
float conf_threshold = 0.4f;
// copy results from GPU to CPU
std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
cudaMemcpy(cpu_output.data(), gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < cpu_output.size(); ++i)
{
std::cout << cpu_output[i] << " ";
if (!((i + 1) % 78))
{
std::cout << "\n\n";
}
}
std::cout << "------------------------\n";
std::vector<std::vector<float>> detections(cpu_output.size() / 78, std::vector<float>(2));
for (size_t i = 0; i < cpu_output.size() / 78; ++i)
{
detections[i][0] = cpu_output[i * 78];
detections[i][1] = cpu_output[(i + 1) * 78];
}
const auto xyscores = softmax(detections);
for (const auto &score : xyscores)
{
for (const auto &val : score)
{
std::cout << val << " ";
}
std::cout << "\n";
}
std::cout << "------------------------\n";
std::vector<float> scores;
std::vector<std::vector<float>> predictions(cpu_output.size() / 78, std::vector<float>(78));
for (int i = 0; i < xyscores.size(); ++i)
{
scores.emplace_back(xyscores[i][1]);
if (xyscores[i][1] >= conf_threshold)
{
std::copy(cpu_output.begin() + i * 78, cpu_output.begin() + (i + 1) * 78, predictions[i].begin());
}
}
std::cout << "predictions: " << predictions.size() << "\n";
int n_offsets = 72;
int n_strips = n_offsets - 1;
int img_w = 800;
int img_h = 320;
int ori_img_w = 1280;
int ori_img_h = 720;
int cut_height = 160;
std::vector<std::vector<float>> nms_predictions;
for (auto &prediction : predictions)
{
prediction[5] = std::round(prediction[5] * n_strips);
std::vector<float> predict(sizeof(Detection) - sizeof(float));
for (size_t j = 0; j < 4; ++j)
{
predict[j] = prediction[j];
}
for (size_t j = 5; j < prediction.size(); ++j)
{
predict[j - 1] = prediction[j];
}
nms_predictions.emplace_back(predict);
}
std::cout << "nms_predictions: " << nms_predictions.size() << "\n";
for (auto &nms_prediction : nms_predictions)
{
nms_prediction[4] *= n_strips;
for (size_t i = 5; i < nms_prediction.size(); ++i)
{
nms_prediction[i] *= ori_img_w - 1;
}
}
auto keep = Lane_nms(nms_predictions, scores, 50, 5);
std::cout << "keep: " << keep.size() << "\n";
std::vector<float> prior_ys(n_offsets);
for (int i = 0; i < n_offsets; ++i)
{
prior_ys[i] = 1.0f - static_cast<float>(i) / n_strips;
}
auto lanes = predictions_to_pred(predictions, prior_ys, n_strips, ori_img_w, ori_img_h, img_w, img_h, cut_height);
imshow_lanes(frame, lanes);
}
// initialize TensorRT engine and parse ONNX model --------------------------------------------------------------------
void parseOnnxModel(const std::string &model_path, TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
TRTUniquePtr<nvinfer1::IBuilder> builder{nvinfer1::createInferBuilder(gLogger)};
TRTUniquePtr<nvinfer1::INetworkDefinition> network{builder->createNetworkV2(1)};
TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
// parse ONNX
if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
{
std::cerr << "ERROR: could not parse the model.\n";
return;
}
// allow TensorRT to use up to 1GB of GPU memory for tactic selection.
config->setMaxWorkspaceSize(1ULL << 30);
// use FP16 mode if possible
if (builder->platformHasFastFp16())
{
std::cout << "fp16\n";
config->setFlag(nvinfer1::BuilderFlag::kFP16);
}
// we have only one image in batch
builder->setMaxBatchSize(1);
// generate TensorRT engine optimized for the target platform
engine.reset(builder->buildEngineWithConfig(*network, *config));
context.reset(engine->createExecutionContext());
}
// initialize TensorRT engine from serialized model --------------------------------------------------------------------
void loadTrtEngine(const std::string &engine_path, TRTUniquePtr<nvinfer1::IRuntime> &runtime,
TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
std::ifstream engine_file(engine_path, std::ios::binary);
if (!engine_file)
{
std::cerr << "ERROR: could not open the engine file.\n";
return;
}
// 计算文件大小
engine_file.seekg(0, engine_file.end);
size_t file_size = engine_file.tellg();
engine_file.seekg(0, engine_file.beg);
// 加载文件内容到内存中
std::vector<char> trt_model_stream(file_size);
engine_file.read(trt_model_stream.data(), file_size);
engine_file.close();
// 创建runtime
runtime.reset(nvinfer1::createInferRuntime(gLogger));
// 反序列化计划文件并创建引擎
engine.reset(runtime->deserializeCudaEngine(trt_model_stream.data(), file_size, nullptr));
// 创建上下文
context.reset(engine->createExecutionContext());
}
// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
if (argc < 3)
{
std::cerr << "usage: " << argv[0] << " model.onnx image.jpg\n";
return -1;
}
std::string model_path(argv[1]);
std::string image_path(argv[2]);
int batch_size = 1;
// initialize TensorRT engine and parse ONNX model
TRTUniquePtr<nvinfer1::IRuntime> runtime{nullptr};
TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
// parseOnnxModel(model_path, engine, context);
loadTrtEngine(model_path, runtime, engine, context);
// get sizes of input and output and allocate memory required for input data and for output data
std::vector<nvinfer1::Dims> input_dims; // we expect only one input
std::vector<nvinfer1::Dims> output_dims; // and one output
std::vector<void *> buffers(engine->getNbBindings()); // buffers for input and output data
for (size_t i = 0; i < engine->getNbBindings(); ++i)
{
auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size * sizeof(float);
cudaMalloc(&buffers[i], binding_size);
if (engine->bindingIsInput(i))
{
input_dims.emplace_back(engine->getBindingDimensions(i));
}
else
{
output_dims.emplace_back(engine->getBindingDimensions(i));
}
}
if (input_dims.empty() || output_dims.empty())
{
std::cerr << "Expect at least one input and one output for network\n";
return -1;
}
// read input image
cv::Mat frame = cv::imread(image_path);
if (frame.empty())
{
std::cerr << "Input image " << image_path << " load failed\n";
return -1;
}
cv::cuda::GpuMat gpu_frame;
// upload image to GPU
gpu_frame.upload(frame);
// preprocess input data
preprocessImage(gpu_frame, (float *)buffers[0], input_dims[0]);
// inference
context->enqueue(batch_size, buffers.data(), 0, nullptr);
// postprocess results
postprocessResults(frame, (float *)buffers[1], output_dims[0], batch_size);
for (void *buf : buffers)
{
cudaFree(buf);
}
return 0;
}
I wrote some C++ code, but the inference results are different from Python. Can you help me take a look?