open-mmlab / mmrazor

OpenMMLab Model Compression Toolbox and Benchmark.
https://mmrazor.readthedocs.io/en/latest/
Apache License 2.0
1.45k stars 228 forks source link

Quantizing rtmdet to int8 fails #612

Open AbhiM98 opened 9 months ago

AbhiM98 commented 9 months ago

Describe the bug

I am trying to use mmrazor to quantize the model, but it gives me the following error and fails -

Traceback (most recent call last): File "tools/ptq.py", line 74, in main() File "tools/ptq.py", line 67, in main runner = Runner.from_cfg(cfg) File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/runner/runner.py", line 445, in from_cfg runner = cls( File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/runner/runner.py", line 412, in init self.model = self.build_model(model) File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/runner/runner.py", line 819, in build_model model = MODELS.build(model) File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/registry/registry.py", line 570, in build return self.build_func(cfg, args, kwargs, registry=self) File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 232, in build_model_from_cfg return build_from_cfg(cfg, registry, default_args) File "/home/ubuntu/.pyenv/versions/mmdeploy/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg obj = obj_cls(args) # type: ignore File "/home/ubuntu/mmrazor/mmrazor/models/algorithms/quantization/mm_architecture.py", line 90, in init self.qmodels = self._build_qmodels(self.architecture) File "/home/ubuntu/mmrazor/mmrazor/models/algorithms/quantization/mm_architecture.py", line 297, in _build_qmodels observed_module = self.quantizer.prepare( File "/home/ubuntu/mmrazor/mmrazor/models/quantizers/native_quantizer.py", line 231, in prepare traced_graph = self.tracer.trace(model, concrete_args=concrete_args) File "/home/ubuntu/mmrazor/mmrazor/models/task_modules/tracer/fx/custom_tracer.py", line 421, in trace 'output', (self.create_arg(fn(args)), ), {}, File "/home/ubuntu/mmdetection/mmdet/models/detectors/base.py", line 100, in forward return self.predict(inputs, data_samples) File "/home/ubuntu/mmdetection/mmdet/models/detectors/single_stage.py", line 110, in predict results_list = self.bbox_head.predict( File "/home/ubuntu/mmdetection/mmdet/models/dense_heads/base_dense_head.py", line 207, in predict predictions = self.predict_by_feat( File "/home/ubuntu/mmdeploy/mmdeploy/codebase/mmdet/models/dense_heads/rtmdet_ins_head.py", line 60, in rtmdet_ins_headpredict_by_feat mlvl_priors = self.prior_generator.grid_priors( File "/home/ubuntu/mmdetection/mmdet/models/task_modules/prior_generators/point_generator.py", line 164, in grid_priors priors = self.single_level_grid_priors( File "/home/ubuntu/mmdeploy/mmdeploy/codebase/mmdet/models/task_modules/prior_generators/point_generator.py", line 48, in mlvl_point_generator__single_level_grid_priorstensorrt shift_x = (torch.arange(0, feat_w, device=device) + self.offset) * stride_w TypeError: arange() received an invalid combination of arguments - got (int, Proxy, device=Attribute), but expected one of:

To Reproduce

The command you executed. To reproduce run - python tools/ptq.py configs/quantization/ptq/base/ptq_tensorrt_rtmdet_s_8xb8-300e_coco_calib32xb32.py ptq_tensorrt_rtmdet_s_8xb8-300e_coco_calib32xb32.py is as follows -

_base_ = [
    '../../../../rtm-det-config/config.py',
    '../../deploy_cfgs/mmdet/detection_tensorrt-int8-explicit_dynamic-rtmdet.py'  # noqa: E501
]
_base_.val_dataloader.batch_size = 32
test_cfg = dict(
    type='mmrazor.PTQLoop',
    calibrate_dataloader=_base_.val_dataloader,
    calibrate_steps=32,
)
float_checkpoint = '/home/ubuntu/mmrazor/epoch_300.pth'#'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet-ins_s_8xb32-300e_coco/rtmdet-ins_s_8xb32-300e_coco_20221121_212604-fdc5d7ec.pth'  # noqa: E501
global_qconfig = dict(
    w_observer=dict(type='mmrazor.PerChannelMinMaxObserver'),
    a_observer=dict(type='mmrazor.MovingAverageMinMaxObserver'),
    w_fake_quant=dict(type='mmrazor.FakeQuantize'),
    a_fake_quant=dict(type='mmrazor.FakeQuantize'),
    w_qscheme=dict(
        qdtype='qint8', bit=8, is_symmetry=True, is_symmetric_range=True),
    a_qscheme=dict(
        qdtype='qint8', bit=8, is_symmetry=True, averaging_constant=0.1),
)
model = dict(
    _delete_=True,
    type='mmrazor.MMArchitectureQuant',
    data_preprocessor=dict(
        type='mmdet.DetDataPreprocessor',
        pad_size_divisor=32,
        batch_augments=[
            dict(
                type='mmdet.BatchSyncRandomResize',
                random_size_range=(640, 640),
                size_divisor=32,
                interval=10)
        ]),
    architecture=_base_.model,
    deploy_cfg=_base_.deploy_cfg, 
    float_checkpoint=float_checkpoint,
    quantizer=dict(
        type='mmrazor.TensorRTQuantizer',
        global_qconfig=global_qconfig,
        tracer=dict(
            type='mmrazor.CustomTracer',
            skipped_methods=[
                'mmdet.models.dense_heads.rtmdet_ins_head.RTMDetHead.predict_by_feat',  # noqa: E501
                'mmdet.models.dense_heads.rtmdet_ins_head.RTMDetHead.loss_by_feat',
            ]))
        ,
        # test_cfg=dict(
        # # type='mmdet.test_cfg',
        # mask_thr_binary=0.5,
        # max_per_img=100,
        # min_bbox_size=16,
        # nms=dict(iou_threshold=0.65, type='mmdeploy.nms'),
        # nms_pre=900,
        # score_thr=0.2),)
)

model_wrapper_cfg = dict(
    type='mmrazor.MMArchitectureQuantDDP',
    broadcast_buffers=False,
    find_unused_parameters=True)

custom_hooks = []

detection_tensorrt-int8-explicit_dynamic-rtmdet.py is as follows

deploy_cfg = dict(
    onnx_config=dict(
        type='onnx',
        export_params=True,
        keep_initializers_as_inputs=False,
        opset_version=11,
        save_file='end2end.onnx',
        input_names=['input'],
        output_names=['dets', 'labels','masks'],
        input_shape=None,
        optimize=True,
        # dynamic_axes=dict(
        #     input=dict({
        #         0: 'batch',
        #         2: 'height',
        #         3: 'width'
        #     }),
        #     dets=dict({
        #         0: 'batch',
        #         1: 'num_dets'
        #     }),
        #     labels=dict({
        #         0: 'batch',
        #         1: 'num_dets'
        #     }))
            ),
        # calib_config = dict(create_calib=True, calib_file='calib_data.h5'),

    codebase_config=dict(
        type='mmdet',
        task='InstanceSegmentation',
        model_type='end2end',
        post_processing=dict(
            score_threshold=0.05,
            confidence_threshold=0.005,
            iou_threshold=0.5,
            max_output_boxes_per_class=200,
            pre_top_k=1000,
            keep_top_k=100,
            background_label_id=-1),
            export_postprocess_mask=True),
    backend_config=dict(
        type='tensorrt',
        common_config=dict(
            fp16_mode=True,
            max_workspace_size=1<<30,
            int8_mode=True,
            explicit_quant_mode=False),
        model_inputs=[
            dict(
                input_shapes=dict(
                    input=dict(
                        min_shape=[1, 3, 640, 640],
                        opt_shape=[1, 3, 640, 640],
                        max_shape=[1, 3, 640, 640])))
        ]),
    function_record_to_pop=[
        'mmdet.models.detectors.single_stage.SingleStageDetector.forward',
        'mmdet.models.detectors.two_stage.TwoStageDetector.forward',
        'mmdet.models.detectors.single_stage_instance_seg.SingleStageInstanceSegmentor.forward',  # noqa: E501
        'torch.cat'
    ]
)

Additional context

Also is there a proper instructions for the same which i can refer to and for qat as well? [here]

jslok commented 7 months ago

Getting the same error. Were you able to solve it?

shwu-nyunai commented 3 months ago

+1