Closed ylmzkaan closed 3 years ago
maybe outputIndex2 is 1 not 2. you need check it.
I have an assert that checks that.
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
I am trying to add a second head to yolov5s and I get cuda failure: 1 error inside context.enqueue() function. Here is my steps to add the second output to network and maybe someone can tell me what is wrong with it.
The new head takes the output of bottleneck_csp9 as input, applies couple of convolutional layers and finally spits out a fully connected layer with 7 neurons. Inside createEngine_s() function, I created this new head and added the head's 7-neuron output to the outputs of the network as (det3 is the 7-neuron fully connected layer):
det3->getOutput(0)->setName(WHOLESCENE_OUTPUT_BLOB_NAME); network->markOutput(*det3->getOutput(0));
static float wholescene[BATCH_SIZE * OUTPUT_SIZE_WHOLESCENE];
where OUTPUT_SIZE_WHOLESCENE is 7.void* buffers[3];
.const int outputIndex2 = engine->getBindingIndex(WHOLESCENE_OUTPUT_BLOB_NAME);
.CHECK(cudaMalloc(&buffers[outputIndex2], BATCH_SIZE * OUTPUT_SIZE_WHOLESCENE * sizeof(float)));
Then I start the inference with this edited doInference() function:
include
include
include "cuda_runtime_api.h"
include "logging.h"
include "common.hpp"
define USE_FP16 // comment out this if want to use FP32
define DEVICE 0 // GPU id
define NMS_THRESH 0.65
define CONF_THRESH 0.4
define BATCH_SIZE 1
define N_CLASS_WHOLESCENE 7
define NET s // s m l x
define NETSTRUCT(str) createEngine_##str
define CREATENET(net) NETSTRUCT(net)
define STR1(x) #x
define STR2(x) STR1(x)
// stuff we know about the network and the input/output blobs static const int INPUT_H = Yolo::INPUT_H; static const int INPUT_W = Yolo::INPUT_W; static const int CLASS_NUM = Yolo::CLASS_NUM; static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1 static const int OUTPUT_SIZE_WHOLESCENE = N_CLASS_WHOLESCENE + 1; const char INPUT_BLOB_NAME = "data"; const char OUTPUT_BLOB_NAME = "prob"; const char WHOLESCENE_OUTPUT_BLOB_NAME = "wholescene_output"; static Logger gLogger;
// Creat the engine using only the API and not any parser. ICudaEngine createEngine_s(unsigned int maxBatchSize, IBuilder builder, IBuilderConfig config, DataType dt) { INetworkDefinition network = builder->createNetworkV2(0U);
ifdef USE_FP16
endif
}
void APIToModel(unsigned int maxBatchSize, IHostMemory* modelStream) { // Create builder IBuilder builder = createInferBuilder(gLogger); IBuilderConfig* config = builder->createBuilderConfig();
}
void doInference(IExecutionContext& context, cudaStream_t& stream, void *buffers, float input, float output, float wholescene, int batchSize) { // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host CHECK(cudaMemcpyAsync(buffers[0], input, batchSize 3 INPUT_H INPUT_W sizeof(float), cudaMemcpyHostToDevice, stream)); context.enqueue(batchSize, buffers, stream, nullptr); CHECK(cudaMemcpyAsync(output, buffers[1], batchSize OUTPUT_SIZE sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK(cudaMemcpyAsync(wholescene, buffers[2], batchSize OUTPUT_SIZE_WHOLESCENE sizeof(float), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream); }
int main(int argc, char* argv) { cudaSetDevice(DEVICE); // create a model using the API directly and serialize it to a stream char trtModelStream{ nullptr }; size_t size{ 0 }; std::string engine_name = STR2(NET); engine_name = "yolov5" + engine_name + ".engine"; if (argc == 2 && std::string(argv[1]) == "-s") { IHostMemory modelStream{ nullptr }; APIToModel(BATCH_SIZE, &modelStream); assert(modelStream != nullptr); std::ofstream p(engine_name, std::ios::binary); if (!p) { std::cerr << "could not open plan output file" << std::endl; return -1; } p.write(reinterpret_cast<const char>(modelStream->data()), modelStream->size()); modelStream->destroy(); return 0; } else if (argc == 3 && std::string(argv[1]) == "-d") { std::ifstream file(engine_name, std::ios::binary); if (file.good()) { file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); } } else { std::cerr << "arguments not right!" << std::endl; std::cerr << "./yolov5 -s // serialize model to plan file" << std::endl; std::cerr << "./yolov5 -d ../samples // deserialize plan file and run inference" << std::endl; return -1; }
}