[Bug] mmseg batch inference by C++ SDK

Checklist

[X] I have searched related issues but cannot get the expected help.
[X] 2. I have read the FAQ documentation but cannot get the expected help.
[X] 3. The bug has not been fixed in the latest version.

Describe the bug

i want to use C++ SDK for mmseg model batch inferince, but i cant find any demo code about batch segmentaion. i try to use this code for batch inference, but only get one output.

// Copyright (c) OpenMMLab. All rights reserved.

#include <fstream>
#include <numeric>
#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <random>
#include <string>
#include <vector>
#include <chrono>
#include <iostream>

#include "mmdeploy/segmentor.h"

using namespace std;

vector<cv::Vec3b> gen_palette(int num_classes) {
    std::mt19937 gen;
    std::uniform_int_distribution<ushort> uniform_dist(0, 255);

    vector<cv::Vec3b> palette;
    palette.reserve(num_classes);
    for (auto i = 0; i < num_classes; ++i) {
        palette.emplace_back(uniform_dist(gen), uniform_dist(gen),
                             uniform_dist(gen));
    }
    return palette;
}

void batch_infer(mmdeploy_segmentor_t& segmentor,
           std::vector<cv::Mat> images,
           std::ofstream& ofs) {
    int status{};
    auto start = std::chrono::steady_clock::now();
    std::vector<mmdeploy_mat_t> mats;
    for (auto it = images.begin(); it != images.end(); it++) {
        cv::Mat img;
        (*it).copyTo(img);
        mmdeploy_mat_t mat{img.data,
                           img.rows,
                           img.cols,
                           3,
                           MMDEPLOY_PIXEL_FORMAT_BGR,
                           MMDEPLOY_DATA_TYPE_UINT8};
        mats.push_back(mat);
    }

    // 推理
    auto predictStart = std::chrono::steady_clock::now();
    mmdeploy_segmentation_t* results[6] = {};
    //std::vector<mmdeploy_segmentation_t> results;
    status = mmdeploy_segmentor_apply(segmentor, mats.data(), mats.size(), results);
    if (status != MMDEPLOY_SUCCESS) {
        fprintf(stderr, "failed to apply segmentor, code: %d\n", (int)status);
        return;
    }
    auto postprocessStart = std::chrono::steady_clock::now();

    // 结果后处理
    auto palette = gen_palette(results[0]->classes + 1);

    mmdeploy_segmentation_t* res = results[0];
    for (int i = 0; i < mats.size(); i++) {
        res = results[i];
        cv::Mat color_mask = cv::Mat::zeros(res->height, res->width, CV_8UC3);
        int pos = 0;
        int total = color_mask.rows * color_mask.cols;
        std::vector<int> idxs(res->classes);
        for (auto iter = color_mask.begin<cv::Vec3b>();
             iter != color_mask.end<cv::Vec3b>(); ++iter) {
            // output mask
            if (res->mask) {
                *iter = palette[res->mask[pos++]];
            }
            // output score
            if (res->score) {
                std::iota(idxs.begin(), idxs.end(), 0);
                auto k =
                    std::max_element(idxs.begin(), idxs.end(),
                                     [&](int i, int j) {
                                         return res->score[i * total + pos] <
                                                res->score[j * total + pos];
                                     }) -
                    idxs.begin();
                *iter = palette[k];
                pos += 1;
            }
        }
        cv::Mat img = images[i];
        img = img * 0.5 + color_mask * 0.5;
        cv::imwrite("output_segmentation_" + std::to_string(i) + ".png", img);
    }

    auto end = std::chrono::steady_clock::now();

    double inputTime =
        std::chrono::duration<double, std::milli>(predictStart - start).count();
    double predictTime = std::chrono::duration<double, std::milli>(
                             postprocessStart - predictStart)
                             .count();
    double postprocessTime =
        std::chrono::duration<double, std::milli>(end - postprocessStart)
            .count();
    double allTime =
        std::chrono::duration<double, std::milli>(end - start).count();
    ofs << "all_time: " << allTime << " input_time: " << inputTime
        << " infer_time: " << predictTime
        << " postprocess_time: " << postprocessTime << std::endl;

    mmdeploy_segmentor_release_result(*results, mats.size());
}

int main(int argc, char* argv[]) {
    /*if (argc != 4) {
      fprintf(stderr,
              "usage:\n  image_segmentation device_name model_path
    image_path\n"); return 1;
    }
    auto device_name = argv[1];
    auto model_path = argv[2];
    auto image_path = argv[3];*/
    auto device_name = "cuda";
    /*auto model_path =
        "E:\\openmmlab\\mmdeploy_models\\mmseg\\trt\\stdc2_in1k-pre_4xb12-80k_"
        "medicine_board_with_ok-dynamic-512x1024-2048x2048";
    auto image_path = "E:\\openmmlab\\mmsegmentation\\demo\\0172_leftImg8bit.png";*/
    auto model_path =
        "E:\\openmmlab\\mmdeploy_models\\mmseg\\trt\\stdc2_in1k-pre_4xb12-800e_"
        "cityscapes_pills-128x128_5_not_keep_ratio";
    auto image_path_1 = "images\\0001_leftImg8bit.png";
    auto image_path_2 = "images\\0002_leftImg8bit.png";
    auto image_path_3 = "images\\0003_leftImg8bit.png";
    auto image_path_4 = "images\\0004_leftImg8bit.png";
    auto image_path_5 = "images\\0005_leftImg8bit.png";
    auto image_path_6 = "images\\0006_leftImg8bit.png";

    cv::Mat img_1 = cv::imread(image_path_1);
    cv::Mat img_2 = cv::imread(image_path_2);
    cv::Mat img_3 = cv::imread(image_path_3);
    cv::Mat img_4 = cv::imread(image_path_4);
    cv::Mat img_5 = cv::imread(image_path_5);
    cv::Mat img_6 = cv::imread(image_path_6);
    if (!img_1.data || 
        !img_2.data || 
        !img_3.data || 
        !img_4.data ||
        !img_5.data ||
        !img_6.data) {
        fprintf(stderr, "failed to load image: %s\n", image_path_1);
        return 1;
    }

    cv::resize(img_1, img_1, cv::Size(128, 128));
    cv::resize(img_2, img_2, cv::Size(128, 128));
    cv::resize(img_3, img_3, cv::Size(128, 128));
    cv::resize(img_4, img_4, cv::Size(128, 128));
    cv::resize(img_5, img_5, cv::Size(128, 128));
    cv::resize(img_6, img_6, cv::Size(128, 128));

    std::vector<cv::Mat> images;
    images.push_back(img_1);
    images.push_back(img_2);
    images.push_back(img_3);
    images.push_back(img_4);
    images.push_back(img_5);
    images.push_back(img_6);

    // 创建推理器
    mmdeploy_segmentor_t segmentor{};
    int status{};
    status = mmdeploy_segmentor_create_by_path(model_path, device_name, 0,
                                               &segmentor);
    if (status != MMDEPLOY_SUCCESS) {
        fprintf(stderr, "failed to create segmentor, code: %d\n", (int)status);
        return 1;
    }

    std::ofstream ofs;
    ofs.open("time.txt", std::ios::out);
    if (!ofs.is_open()) {
        std::cout << "Open file failed. \"time.txt\"" << std::endl;
    }
    int i = 1;
    while (i < 10000) {
        ofs << "[" << i << "/10000]: ";
        batch_infer(segmentor, images, ofs);

        //std::cout << "infer time: " << predictTime << std::endl;
        if (i % 100 == 0) {
            std::cout << "[" << i << "/10000]"
                      << std::endl;
        }
        i++;
    }

    ofs.close();
    cv::destroyAllWindows();
    mmdeploy_segmentor_destroy(segmentor);

    return 0;
}

AE06D231-5C28-4b1c-A34F-6A819F752908

Reproduction

Environment

10/19 15:33:37 - mmengine - INFO - **********Environmental information**********
10/19 15:33:51 - mmengine - INFO - sys.platform: win32
10/19 15:33:51 - mmengine - INFO - Python: 3.8.17 (default, Jul  5 2023, 20:44:21) [MSC v.1916 64 bit (AMD64)]
10/19 15:33:51 - mmengine - INFO - CUDA available: True
10/19 15:33:51 - mmengine - INFO - numpy_random_seed: 2147483648
10/19 15:33:51 - mmengine - INFO - GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU
10/19 15:33:51 - mmengine - INFO - CUDA_HOME: D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3
10/19 15:33:51 - mmengine - INFO - NVCC: Cuda compilation tools, release 11.3, V11.3.109
10/19 15:33:51 - mmengine - INFO - MSVC: Microsoft (R) C/C++ Optimizing Compiler Version 19.29.30151 for x64
10/19 15:33:51 - mmengine - INFO - GCC: n/a
10/19 15:33:51 - mmengine - INFO - PyTorch: 1.12.0
10/19 15:33:51 - mmengine - INFO - PyTorch compiling details: PyTorch built with:
  - C++ Version: 199711
  - MSVC 192829337
  - Intel(R) Math Kernel Library Version 2020.0.2 Product Build 20200624 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
  - OpenMP 2019
  - LAPACK is enabled (usually provided by MKL)
  - CPU capability usage: AVX2
  - CUDA Runtime 11.3
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
  - CuDNN 8.3.2  (built against CUDA 11.5)
  - Magma 2.5.4
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=C:/cb/pytorch_1000000000000/work/tmp_bin/sccache-cl.exe, CXX_FLAGS=/DWIN32 /D_WINDOWS /GR /EHsc /w /bigobj -DUSE_PTHREADPOOL -openmp:experimental -IC:/cb/pytorch_1000000000000/work/mkl/include -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=OFF, USE_OPENMP=ON, USE_ROCM=OFF,

10/19 15:33:51 - mmengine - INFO - TorchVision: 0.13.0
10/19 15:33:51 - mmengine - INFO - OpenCV: 4.8.0
10/19 15:33:51 - mmengine - INFO - MMEngine: 0.8.4
10/19 15:33:51 - mmengine - INFO - MMCV: 2.0.1
10/19 15:33:51 - mmengine - INFO - MMCV Compiler: MSVC 192930148
10/19 15:33:51 - mmengine - INFO - MMCV CUDA Compiler: 11.3
10/19 15:33:51 - mmengine - INFO - MMDeploy: 1.2.0+d7d0ca7
10/19 15:33:51 - mmengine - INFO -

10/19 15:33:51 - mmengine - INFO - **********Backend information**********
10/19 15:33:52 - mmengine - INFO - tensorrt:    8.4.0.6
10/19 15:33:52 - mmengine - INFO - tensorrt custom ops: Available
10/19 15:33:52 - mmengine - INFO - ONNXRuntime: 1.15.1
10/19 15:33:52 - mmengine - INFO - ONNXRuntime-gpu:     1.15.1
10/19 15:33:52 - mmengine - INFO - ONNXRuntime custom ops:      Available
10/19 15:33:52 - mmengine - INFO - pplnn:       None
10/19 15:33:52 - mmengine - INFO - ncnn:        None
10/19 15:33:53 - mmengine - INFO - snpe:        None
10/19 15:33:53 - mmengine - INFO - openvino:    None
10/19 15:33:53 - mmengine - INFO - torchscript: 1.12.0
10/19 15:33:53 - mmengine - INFO - torchscript custom ops:      NotAvailable
10/19 15:33:53 - mmengine - INFO - rknn-toolkit:        None
10/19 15:33:53 - mmengine - INFO - rknn-toolkit2:       None
10/19 15:33:53 - mmengine - INFO - ascend:      None
10/19 15:33:53 - mmengine - INFO - coreml:      None
10/19 15:33:53 - mmengine - INFO - tvm: None
10/19 15:33:53 - mmengine - INFO - vacc:        None
10/19 15:33:53 - mmengine - INFO -

10/19 15:33:53 - mmengine - INFO - **********Codebase information**********
10/19 15:33:53 - mmengine - INFO - mmdet:       3.1.0
10/19 15:33:53 - mmengine - INFO - mmseg:       1.1.1
10/19 15:33:53 - mmengine - INFO - mmpretrain:  1.0.2
10/19 15:33:53 - mmengine - INFO - mmocr:       None
10/19 15:33:53 - mmengine - INFO - mmagic:      None
10/19 15:33:53 - mmengine - INFO - mmdet3d:     None
10/19 15:33:53 - mmengine - INFO - mmpose:      None
10/19 15:33:53 - mmengine - INFO - mmrotate:    1.0.0rc1
10/19 15:33:53 - mmengine - INFO - mmaction:    None
10/19 15:33:53 - mmengine - INFO - mmrazor:     None
10/19 15:33:53 - mmengine - INFO - mmyolo:      0.6.0

Error traceback

No response

open-mmlab / mmdeploy

[Bug] mmseg batch inference by C++ SDK #2505

Checklist

Describe the bug

Reproduction

Environment

Error traceback