NVIDIA® TensorRT™ is an SDK for high-performance deep learning inference on NVIDIA GPUs. This repository contains the open source components of TensorRT.
I want to use the dla in the device , I use the sample code sample_googlenet and change the some code.
I got the error use the TensorRT OSS cross-compiled in the x86-64 ,but I can use the same code to run the model in the dla device compiled in aarch64.
error cross-compiled in x86
nvidia@nvidia:~/temp/out$ ./sample_googlenet
&&&& RUNNING TensorRT.sample_googlenet # ./sample_googlenet
[06/29/2021-21:07:43] [I] Building and running a GPU inference engine for GoogleNet
[06/29/2021-21:07:46] [E] [TRT] Parameter check failed at: ../builder/BuilderConfig.cpp::setDeviceType::178, condition: int(deviceType) >= 0 && int(deviceType) < EnumMax<DeviceType>()
[06/29/2021-21:07:46] [I] [TRT]
[06/29/2021-21:07:46] [I] [TRT] --------------- Layers running on DLA:
[06/29/2021-21:07:46] [I] [TRT]
[06/29/2021-21:07:46] [I] [TRT] --------------- Layers running on GPU:
[06/29/2021-21:07:46] [I] [TRT] conv1 + relu1, norm1, pool1, conv2 + relu2, norm2, pool2, conv3 + relu3, conv4 + relu4, conv5 + relu5, pool5, fc6 + relu6, fc7 + relu7, fc8, prob,
[06/29/2021-21:07:50] [I] [TRT] Some tactics do not have sufficient workspace memory to run. Increasing workspace size may increase performance, please check verbose output.
[06/29/2021-21:12:04] [W] [TRT] No implementation obeys reformatting-free rules, at least 2 reformatting nodes are needed, now picking the fastest path instead.
[06/29/2021-21:12:04] [I] [TRT] Detected 1 inputs and 1 output network tensors.
[06/29/2021-21:12:07] [I] Ran ./sample_googlenet with:
[06/29/2021-21:12:07] [I] Input(s): data
[06/29/2021-21:12:07] [I] Output(s): prob
&&&& PASSED TensorRT.sample_googlenet # ./sample_googlenet
right compiled in aarch64 device.
nvidia@nvidia:~/workspace/bin$ ./sample_googlenet
&&&& RUNNING TensorRT.sample_googlenet # ./sample_googlenet
[06/29/2021-21:12:38] [I] Building and running a GPU inference engine for GoogleNet
[06/29/2021-21:12:40] [W] [TRT] Default DLA is enabled but layer prob is not supported on DLA, falling back to GPU.
[06/29/2021-21:12:42] [I] [TRT]
[06/29/2021-21:12:42] [I] [TRT] --------------- Layers running on DLA:
[06/29/2021-21:12:42] [I] [TRT] {conv1,relu1,norm1,pool1,conv2,relu2,norm2,pool2,conv3,relu3,conv4,relu4,conv5,relu5,pool5,fc6,relu6,fc7,relu7,fc8},
[06/29/2021-21:12:42] [I] [TRT] --------------- Layers running on GPU:
[06/29/2021-21:12:42] [I] [TRT] prob,
[06/29/2021-21:12:52] [W] [TRT] No implementation obeys reformatting-free rules, at least 1 reformatting nodes are needed, now picking the fastest path instead.
[06/29/2021-21:12:52] [I] [TRT] Detected 1 inputs and 1 output network tensors.
[06/29/2021-21:12:55] [I] Ran ./sample_googlenet with:
[06/29/2021-21:12:55] [I] Input(s): data
[06/29/2021-21:12:55] [I] Output(s): prob
&&&& PASSED TensorRT.sample_googlenet # ./sample_googlenet
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//!
//! sampleGoogleNet.cpp
//! This file contains the implementation of the GoogleNet sample. It creates the network using
//! the GoogleNet caffe model.
//! It can be run with the following command line:
//! Command: ./sample_googlenet [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
//!
//!
//! \brief The SampleGoogleNet class implements the GoogleNet sample
//!
//! \details It creates the network using a caffe model
//!
class SampleGoogleNet
{
template
using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
//!
//! \brief Builds the network engine
//!
bool build();
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
bool infer();
//!
//! \brief Used to clean up any state created in the sample class
//!
bool teardown();
samplesCommon::CaffeSampleParams mParams;
private:
//!
//! \brief Parses a Caffe model for GoogleNet and creates a TensorRT network
//!
void constructNetwork(
SampleUniquePtr& parser, SampleUniquePtr& network);
std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
};
//!
//! \brief Creates the network, configures the builder and creates the network engine
//!
//! \details This function creates the GoogleNet network by parsing the caffe model and builds
//! the engine that will be used to run GoogleNet (mEngine)
//!
//! \return Returns true if the engine was created successfully and false otherwise
//!
bool SampleGoogleNet::build()
{
auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetwork());
if (!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config)
{
return false;
}
auto parser = SampleUniquePtr<nvcaffeparser1::ICaffeParser>(nvcaffeparser1::createCaffeParser());
if (!parser)
{
return false;
}
constructNetwork(parser, network);
builder->setMaxBatchSize(mParams.batchSize);
config->setMaxWorkspaceSize(16_MiB);
// samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
samplesCommon::enableDLA(builder.get(), config.get(), 1);
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
if (!mEngine)
return false;
return true;
}
//!
//! \brief Uses a caffe parser to create the googlenet Network and marks the
//! output layers
//!
//! \param network Pointer to the network that will be populated with the googlenet network
//!
//! \param builder Pointer to the engine builder
//!
void SampleGoogleNet::constructNetwork(
SampleUniquePtr& parser, SampleUniquePtr& network)
{
const nvcaffeparser1::IBlobNameToTensor blobNameToTensor = parser->parse(
mParams.prototxtFileName.c_str(), mParams.weightsFileName.c_str(), network, nvinfer1::DataType::kFLOAT);
for (auto& s : mParams.outputTensorNames)
{
network->markOutput(*blobNameToTensor->find(s.c_str()));
}
}
//!
//! \brief Runs the TensorRT inference engine for this sample
//!
//! \details This function is the main execution function of the sample. It allocates the buffer,
//! sets inputs and executes the engine.
//!
bool SampleGoogleNet::infer()
{
// Create RAII buffer manager object
samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
return false;
}
// Fetch host buffers and set host input buffers to all zeros
for (auto& input : mParams.inputTensorNames)
{
const auto bufferSize = buffers.size(input);
if (bufferSize == samplesCommon::BufferManager::kINVALID_SIZE_VALUE)
{
sample::gLogError << "input tensor missing: " << input << "\n";
return EXIT_FAILURE;
}
memset(buffers.getHostBuffer(input), 0, bufferSize);
}
// Memcpy from host input buffers to device input buffers
buffers.copyInputToDevice();
bool status = context->execute(mParams.batchSize, buffers.getDeviceBindings().data());
if (!status)
{
return false;
}
// Memcpy from device output buffers to host output buffers
buffers.copyOutputToHost();
return true;
}
//!
//! \brief Used to clean up any state created in the sample class
//!
bool SampleGoogleNet::teardown()
{
//! Clean up the libprotobuf files as the parsing is complete
//! \note It is not safe to use any other part of the protocol buffers library after
//! ShutdownProtobufLibrary() has been called.
nvcaffeparser1::shutdownProtobufLibrary();
return true;
}
//!
//! \brief Initializes members of the params struct using the command line args
//!
samplesCommon::CaffeSampleParams initializeSampleParams(const samplesCommon::Args& args)
{
samplesCommon::CaffeSampleParams params;
if (args.dataDirs.empty())
{
params.dataDirs.push_back("/home/nvidia/workspace/model");
params.dataDirs.push_back("data/googlenet/");
params.dataDirs.push_back("data/samples/googlenet/");
}
else
{
params.dataDirs = args.dataDirs;
}
}
//!
//! \brief Prints the help information for running this sample
//!
void printHelpInfo()
{
std::cout
<< "Usage: ./sample_googlenet [-h or --help] [-d or --datadir=] [--useDLACore=]\n";
std::cout << "--help Display help information\n";
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"data/samples/googlenet/ and data/googlenet/"
<< std::endl;
std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
"where n is the number of DLA engines on the platform."
<< std::endl;
}
Description
I want to use the dla in the device , I use the sample code sample_googlenet and change the some code.
I got the error use the TensorRT OSS cross-compiled in the x86-64 ,but I can use the same code to run the model in the dla device compiled in aarch64.
error cross-compiled in x86
Environment
TensorRT Version: 7.1.3 NVIDIA GPU:
NVIDIA Driver Version: CUDA Version: 10.2
CUDNN Version: Operating System: ubuntu 18.04
jeston jepack 4.5
xavier xavier nx
Relevant Files
code
//! //! sampleGoogleNet.cpp //! This file contains the implementation of the GoogleNet sample. It creates the network using //! the GoogleNet caffe model. //! It can be run with the following command line: //! Command: ./sample_googlenet [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir] //!
include "argsParser.h"
include "buffers.h"
include "common.h"
include "logger.h"
include "NvCaffeParser.h"
include "NvInfer.h"
include
include
include
include
include
const std::string gSampleName = "TensorRT.sample_googlenet";
//! //! \brief The SampleGoogleNet class implements the GoogleNet sample //! //! \details It creates the network using a caffe model //! class SampleGoogleNet { template
using SampleUniquePtr = std::unique_ptr<T, samplesCommon::InferDeleter>;
public: SampleGoogleNet(const samplesCommon::CaffeSampleParams& params) : mParams(params) { }
private: //! //! \brief Parses a Caffe model for GoogleNet and creates a TensorRT network //! void constructNetwork( SampleUniquePtr& parser, SampleUniquePtr& network);
};
//! //! \brief Creates the network, configures the builder and creates the network engine //! //! \details This function creates the GoogleNet network by parsing the caffe model and builds //! the engine that will be used to run GoogleNet (mEngine) //! //! \return Returns true if the engine was created successfully and false otherwise //! bool SampleGoogleNet::build() { auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder)
{
return false;
}
}
//! //! \brief Uses a caffe parser to create the googlenet Network and marks the //! output layers //! //! \param network Pointer to the network that will be populated with the googlenet network //! //! \param builder Pointer to the engine builder //! void SampleGoogleNet::constructNetwork( SampleUniquePtr& parser, SampleUniquePtr& network)
{
const nvcaffeparser1::IBlobNameToTensor blobNameToTensor = parser->parse(
mParams.prototxtFileName.c_str(), mParams.weightsFileName.c_str(), network, nvinfer1::DataType::kFLOAT);
}
//! //! \brief Runs the TensorRT inference engine for this sample //! //! \details This function is the main execution function of the sample. It allocates the buffer, //! sets inputs and executes the engine. //! bool SampleGoogleNet::infer() { // Create RAII buffer manager object samplesCommon::BufferManager buffers(mEngine, mParams.batchSize);
}
//! //! \brief Used to clean up any state created in the sample class //! bool SampleGoogleNet::teardown() { //! Clean up the libprotobuf files as the parsing is complete //! \note It is not safe to use any other part of the protocol buffers library after //! ShutdownProtobufLibrary() has been called. nvcaffeparser1::shutdownProtobufLibrary(); return true; }
//! //! \brief Initializes members of the params struct using the command line args //! samplesCommon::CaffeSampleParams initializeSampleParams(const samplesCommon::Args& args) { samplesCommon::CaffeSampleParams params; if (args.dataDirs.empty()) { params.dataDirs.push_back("/home/nvidia/workspace/model"); params.dataDirs.push_back("data/googlenet/"); params.dataDirs.push_back("data/samples/googlenet/"); } else { params.dataDirs = args.dataDirs; }
} //! //! \brief Prints the help information for running this sample //! void printHelpInfo() { std::cout << "Usage: ./sample_googlenet [-h or --help] [-d or --datadir=] [--useDLACore=]\n";
std::cout << "--help Display help information\n";
std::cout << "--datadir Specify path to a data directory, overriding the default. This option can be used "
"multiple times to add multiple directories. If no data directories are given, the default is to use "
"data/samples/googlenet/ and data/googlenet/"
<< std::endl;
std::cout << "--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
"where n is the number of DLA engines on the platform."
<< std::endl;
}
int main(int argc, char** argv) { samplesCommon::Args args; bool argsOK = samplesCommon::parseArgs(args, argc, argv); if (!argsOK) { sample::gLogError << "Invalid arguments" << std::endl; printHelpInfo(); return EXIT_FAILURE; }
}
./sample_googlenet