cyrusbehr / tensorrt-cpp-api

TensorRT C++ API Tutorial
MIT License
577 stars 72 forks source link

How to build for older version of Tensorrt on ARM device AGX Xavier #17

Closed BADAL244 closed 1 year ago

BADAL244 commented 1 year ago

ccache: not found -- Found OpenCV: /home/badal/Documents/opencv (found suitable version "4.6.0", minimum required is "4.6.0") -- Configuring done (0.7s) -- Generating done (0.2s) -- Build files have been written to: /home/badal/tensorrt-cpp-api/build badal@badal-desktop:~/tensorrt-cpp-api/build$ make -j8 [ 25%] Building CXX object CMakeFiles/tensorrt_cpp_api.dir/src/engine.cpp.o /home/badal/tensorrt-cpp-api/src/engine.cpp: In member function ‘bool Engine::loadNetwork()’: /home/badal/tensorrt-cpp-api/src/engine.cpp:250:32: error: ‘class nvinfer1::ICudaEngine’ has no member named ‘getNbIOTensors’ 250 | m_buffers.resize(m_engine->getNbIOTensors()); | ^~~~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:258:35: error: ‘class nvinfer1::ICudaEngine’ has no member named ‘getNbIOTensors’ 258 | for (int i = 0; i < m_engine->getNbIOTensors(); ++i) { | ^~~~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:259:43: error: ‘class nvinfer1::ICudaEngine’ has no member named ‘getIOTensorName’ 259 | const auto tensorName = m_engine->getIOTensorName(i); | ^~~~~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:261:43: error: ‘class nvinfer1::ICudaEngine’ has no member named ‘getTensorIOMode’ 261 | const auto tensorType = m_engine->getTensorIOMode(tensorName); | ^~~~~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:262:44: error: ‘class nvinfer1::ICudaEngine’ has no member named ‘getTensorShape’ 262 | const auto tensorShape = m_engine->getTensorShape(tensorName); | ^~~~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:263:27: error: ‘TensorIOMode’ has not been declared 263 | if (tensorType == TensorIOMode::kINPUT) { | ^~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:270:34: error: ‘TensorIOMode’ has not been declared 270 | } else if (tensorType == TensorIOMode::kOUTPUT) { | ^~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp: In member function ‘bool Engine::runInference(const std::vector<std::vector >&, std::vector<std::vector<std::vector > >&)’: /home/badal/tensorrt-cpp-api/src/engine.cpp:352:20: error: ‘class nvinfer1::IExecutionContext’ has no member named ‘setInputShape’; did you mean ‘setInputShapeBinding’? 352 | m_context->setInputShape(m_IOTensorNames[i].c_str(), inputDims); // Define the batch size | ^~~~~ | setInputShapeBinding /home/badal/tensorrt-cpp-api/src/engine.cpp:374:34: error: ‘class nvinfer1::IExecutionContext’ has no member named ‘setTensorAddress’ 374 | bool status = m_context->setTensorAddress(m_IOTensorNames[i].c_str(), m_buffers[i]); | ^~~~ /home/badal/tensorrt-cpp-api/src/engine.cpp:381:30: error: ‘class nvinfer1::IExecutionContext’ has no member named ‘enqueueV3’; did you mean ‘enqueueV2’? 381 | bool status = m_context->enqueueV3(inferenceCudaStream); | ^~~~~ | enqueueV2 make[2]: [CMakeFiles/tensorrt_cpp_api.dir/build.make:76: CMakeFiles/tensorrt_cpp_api.dir/src/engine.cpp.o] Error 1 make[1]: [CMakeFiles/Makefile2:85: CMakeFiles/tensorrt_cpp_api.dir/all] Error 2 make: *** [Makefile:91: all] Error 2

ii graphsurgeon-tf 8.4.1-1+cuda11.4 arm64 GraphSurgeon for TensorRT package ii libnvinfer-bin 8.4.1-1+cuda11.4 arm64 TensorRT binaries ii libnvinfer-dev 8.4.1-1+cuda11.4 arm64 TensorRT development libraries and headers ii libnvinfer-plugin-dev 8.4.1-1+cuda11.4 arm64 TensorRT plugin libraries ii libnvinfer-plugin8 8.4.1-1+cuda11.4 arm64 TensorRT plugin libraries ii libnvinfer-samples 8.4.1-1+cuda11.4 all TensorRT samples ii libnvinfer8 8.4.1-1+cuda11.4 arm64 TensorRT runtime libraries ii libnvonnxparsers-dev 8.4.1-1+cuda11.4 arm64 TensorRT ONNX libraries ii libnvonnxparsers8 8.4.1-1+cuda11.4 arm64 TensorRT ONNX libraries ii libnvparsers-dev 8.4.1-1+cuda11.4 arm64 TensorRT parsers libraries ii libnvparsers8 8.4.1-1+cuda11.4 arm64 TensorRT parsers libraries ii nvidia-tensorrt 5.0.2-b231 arm64 NVIDIA TensorRT Meta Package ii nvidia-tensorrt-dev 5.0.2-b231 arm64 NVIDIA TensorRT dev Meta Package ii python3-libnvinfer 8.4.1-1+cuda11.4 arm64 Python 3 bindings for TensorRT ii python3-libnvinfer-dev 8.4.1-1+cuda11.4 arm64 Python 3 development package for TensorRT ii tensorrt 8.4.1.5-1+cuda11.4 arm64 Meta package for TensorRT ii uff-converter-tf 8.4.1-1+cuda11.4 arm64 UFF converter for TensorRT package

cyrusbehr commented 1 year ago

You can go through git history for this project and checkout commit before I upgraded to latest TensorRT api.

cyrusbehr commented 1 year ago

Hi @BADAL244 I upgraded to TensorRT API 8.6 in V3.0, so please try release 2.2 and see if that works for you:

image

BADAL244 commented 1 year ago

Hey Thanks for your Updata, I am currently working on semantic segmentation conversion to tensorrt , but I am facing a problem in doing this . Can you please go through my code snippet and help me to figure out where i am doing wrong .

include

include

include

include

include <opencv2/opencv.hpp>

using namespace nvinfer1;

class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* msg) noexcept override { // Customize the logging behavior based on severity levels if (severity == Severity::kERROR) std::cerr << "Error: " << msg << std::endl; else if (severity == Severity::kWARNING) std::cout << "Warning: " << msg << std::endl; else std::cout << msg << std::endl; } } gLogger;

int main() { // Initialize TensorRT components // Load the TensorRT engine from the .plan file (previously optimized) const std::string engineFilePath = "/home/badal/tensorrt-cpp-api/build/cityscapes_fan_large_hybrid_224.engine.NVIDIAGeForceRTX4060LaptopGPU.fp16.1.1"; std::ifstream engineFile(engineFilePath, std::ios::binary); engineFile.seekg(0, std::ios::end); const size_t engineSize = engineFile.tellg(); engineFile.seekg(0, std::ios::beg); std::vector engineData(engineSize); engineFile.read(engineData.data(), engineSize); engineFile.close();

IRuntime* runtime = createInferRuntime(gLogger);
ICudaEngine* engine = runtime->deserializeCudaEngine(engineData.data(), engineSize);

// Prepare input and output buffers
void* buffers[2];  // Assuming 1 input and 1 output
const int inputIndex = engine->getBindingIndex("input");
const int outputIndex = engine->getBindingIndex("output");
const int inputChannels = 3;  // RGB image
const int inputHeight = 224;  // Height of your input images
const int inputWidth = 224;   // Width of your input images
const int inputDataTypeSize = sizeof(float);  // Size of the data type (float in this case)

const int inputSize = inputChannels * inputHeight * inputWidth * inputDataTypeSize;
const int numClasses = 1;   // Number of segmentation classes
const int outputHeight = 224; // Height of your output mask
const int outputWidth = 224;  // Width of your output mask
const int outputDataTypeSize = sizeof(float);  // Size of the data type (int in this case)

const int outputSize = numClasses * outputHeight * outputWidth * outputDataTypeSize;
float* outputData = new float[outputSize / sizeof(float)]; // Allocate memory on the host for the output data

cudaMalloc(&buffers[inputIndex], inputSize);
cudaMalloc(&buffers[outputIndex], outputSize);

// Load and preprocess input images using OpenCV
cv::Mat inputImage = cv::imread("bielefeld_000000_001011_leftImg8bit.png");
// Preprocess inputImage if needed

// Create a CUDA stream for inference
cudaStream_t stream;
cudaStreamCreate(&stream);

// Perform inference
cudaMemcpyAsync(outputData, buffers[outputIndex], outputSize, cudaMemcpyDeviceToHost, stream);

IExecutionContext* context = engine->createExecutionContext();
cudaMemcpyAsync(buffers[inputIndex], inputImage.data, inputSize, cudaMemcpyHostToDevice, stream);
context->enqueue(1, buffers, stream, nullptr);
cudaMemcpyAsync(outputData, buffers[outputIndex], outputSize, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);

// cv::Mat segmentationMapMat(outputHeight, outputWidth, CV_32SC1, outputData);  // Assuming 32-bit signed integer data type
// cv::Mat coloredMap;
// cv::applyColorMap(segmentationMapMat, coloredMap, cv::COLORMAP_JET);
cv::Mat segmentationMapMat(outputHeight, outputWidth, CV_32SC1, outputData); // Assuming 32-bit signed integer data type

// Collect unique pixel values

// Define the class colors as cv::Scalar values (BGR format)
std::vector<cv::Scalar> classColors = {
    cv::Scalar(0, 0, 0),        // Background (Black)
    cv::Scalar(128, 0, 0),      // Class 1 (Dark Blue)
    cv::Scalar(0, 128, 0),      // Class 2 (Dark Green)
    cv::Scalar(128, 128, 0),    // Class 3 (Dark Cyan)
    cv::Scalar(0, 0, 128),      // Class 4 (Dark Red)
    cv::Scalar(128, 0, 128),    // Class 5 (Dark Magenta)
    cv::Scalar(0, 128, 128),    // Class 6 (Dark Yellow)
    cv::Scalar(128, 128, 128),  // Class 7 (Gray)
    cv::Scalar(64, 0, 0),       // Class 8 (Brown)
    cv::Scalar(192, 0, 0),      // Class 9 (Light Blue)
    cv::Scalar(0, 64, 0),       // Class 10 (Light Green)
    cv::Scalar(192, 64, 0),     // Class 11 (Light Cyan)
    cv::Scalar(0, 0, 64),       // Class 12 (Light Red)
    cv::Scalar(192, 0, 64),     // Class 13 (Light Magenta)
    cv::Scalar(0, 64, 64),      // Class 14 (Light Yellow)
    cv::Scalar(192, 192, 192),  // Class 15 (Light Gray)
    cv::Scalar(96, 0, 0),       // Class 16 (Dark Brown)
    cv::Scalar(32, 0, 0),       // Class 17 (Darker Brown)
    cv::Scalar(255, 255, 255),  // Class 18 (White)
    // Class 19 and more...
};

std::vector<cv::Vec3b> classColorsVec3b;
for (const auto& scalarColor : classColors) {
    cv::Vec3b vec3bColor(scalarColor[0], scalarColor[1], scalarColor[2]);
    classColorsVec3b.push_back(vec3bColor);
}

// Convert class indices to a grayscale image
cv::Mat grayscaleMap;
segmentationMapMat.convertTo(grayscaleMap, CV_8U);

std::set<uchar> uniquePixelValues;
for (int y = 0; y < outputHeight; ++y) {
    for (int x = 0; x < outputWidth; ++x) {
        uchar pixelValue = grayscaleMap.at<uchar>(y, x);
        uniquePixelValues.insert(pixelValue);
    }
}

// Print the unique pixel values
std::cout << "Unique Pixel Values in Grayscale Map:" << std::endl;
for (const uchar value : uniquePixelValues) {
    std::cout << static_cast<int>(value) << std::endl;
}

// Apply colormap for visualization using the custom class colors
cv::Mat coloredMap(outputHeight, outputWidth, CV_8UC3);

for (int y = 0; y < outputHeight; ++y) {
    for (int x = 0; x < outputWidth; ++x) {
        int classIdx = static_cast<int>(grayscaleMap.at<uchar>(y, x));
        if (classIdx >= 0 && static_cast<size_t>(classIdx) < classColorsVec3b.size()) {
            coloredMap.at<cv::Vec3b>(y, x) = classColorsVec3b[classIdx];
        }
    }
}

cv::Mat resizedColoredMap;
cv::resize(coloredMap, resizedColoredMap, cv::Size(1024, 1024));

// Display the visualization using OpenCV
cv::imshow("Segmentation Map", resizedColoredMap);
cv::waitKey(0); 

cv::Mat grayscaleImage(outputHeight, outputWidth, CV_8UC3);
cv::applyColorMap(grayscaleMap, grayscaleImage, cv::COLORMAP_JET);

// Resize the grayscaleImage to 1024x1024
cv::Mat resizedGrayscaleImage;
cv::resize(grayscaleImage, resizedGrayscaleImage, cv::Size(1024, 1024));

// Display the resized grayscale visualization using OpenCV
cv::imshow("Segmentation Map (Grayscale)", resizedGrayscaleImage);
cv::waitKey(0);

// Clean up
context->destroy();
engine->destroy();
runtime->destroy();
cudaFree(buffers[inputIndex]);
cudaFree(buffers[outputIndex]);
cudaStreamDestroy(stream);
delete[] outputData;

return 0;

} please see , I will pass more info in this first see , what are the possible wrong in this .

cyrusbehr commented 1 year ago

Hey I'm sorry I don't have the time to look over your code. Please try to adapt my implementation to work for you.