onnx / onnx-tensorrt

ONNX-TensorRT: TensorRT backend for ONNX
Apache License 2.0
2.95k stars 545 forks source link

onnx to tensorRT 7.1: network has dynamic or shape inputs, but no optimization profile has been defined. with multi inputs and outputs #524

Closed vechee closed 3 years ago

vechee commented 4 years ago

I have convert tensorflow model from pb to onnx. when doing onnx to tensorRT. It throws: network has dynamic or shape inputs, but no optimization profile has been defined.

Then i change to _sample_dynamicreshape.cpp for help.

i don't know how to handle multi inputs and outputs.

for example: my network needs to input two images, named img_one, img_two and the output node is: out_h, out_g

I don't know how to do?

any reference or suggestions?

I am new to tensorRT.

vechee commented 4 years ago

for example, i rewrite the code as following. It doesnt work. And still get the error network has dynamic or shape inputs, but no optimization profile has been defined. So how can I fix this?

` void mynet::build() { auto builder = makeUnique(nvinfer1::createInferBuilder(gLogger.getTRTLogger()));

    // This function will also set mPredictionInputDims and mPredictionOutputDims,
    // so it needs to be called before building the preprocessor.
    buildPredictionEngine(builder);
    buildPreprocessorEngine(builder);
}

void mynet::prepare()
{
    mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext());
    mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext());
    // Since input dimensions are not known ahead of time, we only allocate the output buffer and preprocessor output
    // buffer.
    mPredictionInput_img.resize(mPredictionInputDims_img);
    mPredictionInput_roi.resize(mPredictionInputDims_roi);
    mOutput_hg.hostBuffer.resize(mPredictionOutputDims_hg);
    mOutput_hg.deviceBuffer.resize(mPredictionOutputDims_hg);
    mOutput_bl.hostBuffer.resize(mPredictionOutputDims_bl);
    mOutput_bl.deviceBuffer.resize(mPredictionOutputDims_bl);
}

void mynet::buildPreprocessorEngine(const hgnetUniquePtr<nvinfer1::IBuilder>& builder)
{
    // Create the preprocessor engine using a network that supports full dimensions (createNetworkV2).
    auto preprocessorNetwork = makeUnique( builder->createNetworkV2(1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));

    auto input_img = preprocessorNetwork->addInput("img_in", nvinfer1::DataType::kFLOAT, Dims4{ 1, 3, -1, -1 });
    auto resizeLayer1 = preprocessorNetwork->addResize(*input_img);
    resizeLayer1->setOutputDimensions(mPredictionInputDims_img);
    preprocessorNetwork->markOutput(*resizeLayer1->getOutput(0));

    auto input_roi = preprocessorNetwork->addInput("roi_in", nvinfer1::DataType::kFLOAT, Dims4{ 1, 3, -1, -1 });
    auto resizeLayer2 = preprocessorNetwork->addResize(*input_roi);
    resizeLayer2->setOutputDimensions(mPredictionInputDims_roi);
    preprocessorNetwork->markOutput(*resizeLayer2->getOutput(0));

    // Finally, configure and build the preprocessor engine.
    auto preprocessorConfig = makeUnique(builder->createBuilderConfig());

    // Create an optimization profile so that we can specify a range of input dimensions.
    auto profile1 = builder->createOptimizationProfile();

    profile1->setDimensions(input_img->getName(), OptProfileSelector::kMIN, Dims4{ 1, 3, 1, 1 });
    profile1->setDimensions(input_img->getName(), OptProfileSelector::kOPT, Dims4{ 1, 3, 224, 224 });
    profile1->setDimensions(input_img->getName(), OptProfileSelector::kMAX, Dims4{ 1, 3, 448, 448 });
    preprocessorConfig->addOptimizationProfile(profile1);
    auto profile2 = builder->createOptimizationProfile();
    profile2->setDimensions(input_roi->getName(), OptProfileSelector::kMIN, Dims4{ 1, 3, 1, 1 });
    profile2->setDimensions(input_roi->getName(), OptProfileSelector::kOPT, Dims4{ 1, 3, 224, 224});
    profile2->setDimensions(input_roi->getName(), OptProfileSelector::kMAX, Dims4{ 1, 3, 448, 448});
    preprocessorConfig->addOptimizationProfile(profile2);

    mPreprocessorEngine = makeUnique(builder->buildEngineWithConfig(*preprocessorNetwork, *preprocessorConfig));
}

void mynet::buildPredictionEngine(const hgnetUniquePtr<nvinfer1::IBuilder>& builder)
{
    // Create a network using the parser.
    const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = makeUnique(builder->createNetworkV2(explicitBatch));
    auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());
    bool parsingSuccess = parser->parseFromFile( locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast<int>(gLogger.getReportableSeverity()));
    if (!parsingSuccess)
    {
        throw std::runtime_error{ "Failed to parse model" };
    }
    /*
    // Attach a softmax layer to the end of the network.
    auto softmax = network->addSoftMax(*network->getOutput(0));
    // Set softmax axis to 1 since network output has shape [1, 10] in full dims mode
    softmax->setAxes(1 << 1);
    network->unmarkOutput(*network->getOutput(0));
    network->markOutput(*softmax->getOutput(0));
    */
    // Get information about the inputs/outputs directly from the model.
    mPredictionInputDims_img = network->getInput(0)->getDimensions();
    mPredictionInputDims_roi = network->getInput(1)->getDimensions();
    mPredictionOutputDims_hg = network->getOutput(0)->getDimensions();
    mPredictionOutputDims_bl = network->getOutput(1)->getDimensions();

    // Create a builder config
    auto config = makeUnique(builder->createBuilderConfig());
    config->setMaxWorkspaceSize(16_MiB);
    if (mParams.fp16)
    {
        config->setFlag(BuilderFlag::kFP16);
    }
    if (mParams.int8)
    {
        config->setFlag(BuilderFlag::kINT8);
        hgnetCommon::setAllTensorScales(network.get(), 127.0f, 127.0f);
    }
    // Build the prediciton engine.
    mPredictionEngine = makeUnique(builder->buildEngineWithConfig(*network, *config));
}

bool mynet::infer(cv::Mat img_one, cv::Mat img_two)
{

    processInput(img_one, img_two);

    mInput_img.deviceBuffer.resize(Dims{ 1, 3, 224, 224 });
    CHECK(cudaMemcpy(mInput_img.deviceBuffer.data(), mInput_img.hostBuffer.data(), mInput_img.hostBuffer.nbBytes(), cudaMemcpyHostToDevice));
    mInput_roi.deviceBuffer.resize(Dims{ 1, 3, 224, 224});
    CHECK(cudaMemcpy(mInput_roi.deviceBuffer.data(), mInput_roi.hostBuffer.data(), mInput_roi.hostBuffer.nbBytes(), cudaMemcpyHostToDevice));

    // Set the input size for the preprocessor
    mPreprocessorContext->setBindingDimensions(0, Dims{ 1, 3, 224, 224});
    mPreprocessorContext->setBindingDimensions(1, Dims{ 1, 3, 224, 224});
    // We can only run inference once all dynamic input shapes have been specified.
    if (!mPreprocessorContext->allInputDimensionsSpecified())
    {
        return false;
    }

    // Run the preprocessor to resize the input to the correct shape
    std::vector<void*> preprocessorBindings_img = { mInput_img.deviceBuffer.data(), mPredictionInput_img.data()};
    // For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
    bool status = mPreprocessorContext->executeV2(preprocessorBindings_img.data());
    if (!status)
    {
        return false;
    }

    std::vector<void*> preprocessorBindings_roi = { mInput_roi.deviceBuffer.data(), mPredictionInput_roi.data() };
    // For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
    bool status = mPreprocessorContext->executeV2(preprocessorBindings_roi.data());
    if (!status)
    {
        return false;
    }

    // Next, run the model to generate a prediction.
    std::vector<void*> predicitonBindings_hg = { mPredictionInput_img.data(), mPredictionInput_roi.data(), 
                                              mOutput_hg.deviceBuffer.data(), mOutput_bl.deviceBuffer.data() };
    status = mPredictionContext->executeV2(predicitonBindings_hg.data());
    if (!status)
    {
        return false;
    }

    // Copy the outputs back to the host and verify the output.
    CHECK(cudaMemcpy(mOutput_hg.hostBuffer.data(), mOutput_hg.deviceBuffer.data(), mOutput_hg.deviceBuffer.nbBytes(),
        cudaMemcpyDeviceToHost));
    CHECK(cudaMemcpy(mOutput_bl.hostBuffer.data(), mOutput_bl.deviceBuffer.data(), mOutput_bl.deviceBuffer.nbBytes(),
        cudaMemcpyDeviceToHost));

    return true;
}

bool mynet::processInput(cv::Mat img_one, cv::Mat ing_two)
{
    // Normalize and copy to the host buffer.
    mInput_img.hostBuffer.resize(Dims{ 1, 3, 224, 224});
    float* hostInputBuffer_img = static_cast<float*>(mInput_img.hostBuffer.data());
    mInput_roi.hostBuffer.resize(Dims{ 1, 3, 224, 224});
    float* hostInputBuffer_mask = static_cast<float*>(mInput_roi.hostBuffer.data());

   /*do something*/
    return true;
}

`

bitwangdan commented 3 years ago

@vechee hi, have you solved the problem?

kevinch-nv commented 3 years ago

It looks like you are creating two Engines - one for resizing the image and the other for running inference on the resized data.

Which engine is throwing the error? It looks like you are creating the profiles correctly for the preprocessing engine - does the prediction engine also have dynamic inputs?

kevinch-nv commented 3 years ago

Closing due to no activity in this thread, if you are still having troubles feel free to open a new issue with a repro on the latest version of TRT.