NVIDIA / TensorRT

NVIDIA® TensorRT™ is an SDK for high-performance deep learning inference on NVIDIA GPUs. This repository contains the open source components of TensorRT.
https://developer.nvidia.com/tensorrt
Apache License 2.0
10.74k stars 2.13k forks source link

INT8 Quantization of a custom model failed #4037

Open faizan1234567 opened 3 months ago

faizan1234567 commented 3 months ago

Description

Environment

TensorRT Version: 8.5

NVIDIA GPU: Jetson Orin Nano developer kit 8gb

NVIDIA Driver Version:

CUDA Version:11.4

CUDNN Version:8.6

Operating System:

Python Version (if applicable): 3.8.10

Tensorflow Version (if applicable):

PyTorch Version (if applicable): 2.1.0a0+41361538.nv23.06

my implementation ` import warnings warnings.filterwarnings("ignore") import torch import torch.nn as nn import torch.optim as optim import torch.utils.data as data import torchvision.transforms as transforms from torchvision import models, datasets

import pytorch_quantization from pytorch_quantization import nn as quant_nn from pytorch_quantization import quant_modules from pytorch_quantization import calib from tqdm import tqdm

print(pytorch_quantization.version) import tensorrt as trt import numpy as np import tarfile import shutil import logging import yaml

from TarDAL.config import ConfigDict, from_dict from pathlib import Path import os from datetime import datetime import onnx

from pipeline.fuse import Fuse

from TarDAL.module.fuse.generator import Generator

from run_trt_inference import cDataset from torch.utils.data import Dataset, DataLoader

configure logger

logger = logging.getLogger(name) logger.setLevel(logging.INFO)

stream_handler = logging.StreamHandler() formatter = logging.Formatter(fmt = "%(asctime)s: %(message)s", datefmt= '%Y-%m-%d %H:%M%S') stream_handler.setFormatter(formatter) logger.addHandler(stream_handler)

def load_tardal_weights(model, ckpt): """ load PyTorch trained weights into the model in the inference mode. """ if 'use_eval' in ckpt: ckpt.pop('use_eval') model.load_state_dict(ckpt) return model

def load_tardal(weights, cfg): """load TarDAL for ONNX conversion"""

# Init config 
logger.info("Initializing Configuration settings! \n")
if isinstance(cfg, str) or isinstance(cfg, Path):
    config = yaml.safe_load(Path(cfg).open('r'))
    config = from_dict(config)  # convert dict to object
else:
    config = cfg

# Init model
logger.info("Initializing model \n")
f_dim, f_depth = config.fuse.dim, config.fuse.depth
model = Generator(dim=f_dim, depth=f_depth)

# Load weights 
logger.info("Loading model weights to the model \n")
map_location = lambda storage, loc: storage
if torch.cuda.is_available():
    map_location = None
ckpt = torch.load(weights, map_location= map_location)
model = load_tardal_weights(model, ckpt)
model.eval()
return model, ckpt

def compute_amax(model, **kwargs):

Load calib result

for name, module in model.named_modules():
    if isinstance(module, quant_nn.TensorQuantizer):
        if module._calibrator is not None:
            if isinstance(module._calibrator, calib.MaxCalibrator):
                module.load_calib_amax()
            else:
                module.load_calib_amax(**kwargs)
model.cuda()

def collect_stats(model, data_loader, num_batches): """Feed data to the network and collect statistics"""

Enable calibrators

for name, module in model.named_modules():
    if isinstance(module, quant_nn.TensorQuantizer):
        if module._calibrator is not None:
            module.disable_quant()
            module.enable_calib()
        else:
            module.disable()

# Feed data to the network for collecting stats
for i, (ir, vi, image_name) in tqdm(enumerate(data_loader), total=num_batches):
    image = torch.cat((ir, vi), dim=1)
    model(image.cuda())
    if i >= num_batches:
        break

# Disable calibrators
for name, module in model.named_modules():
    if isinstance(module, quant_nn.TensorQuantizer):
        if module._calibrator is not None:
            module.enable_quant()
            module.disable_calib()
        else:
            module.enable()

if name == "main":

quant_modules.initialize()
# data loaders
 # Load the dataset.
weights = "TarDAL/weights/v1/tardal-dt.pth"
cfg = "TarDAL/config/default.yaml"
data_path = "images"
homography_path = "camera_data/homography.npz"
image_shape = (640, 640, 1)
transformation =  transforms.Compose([
                    transforms.Grayscale(num_output_channels = 1),
                    transforms.Resize(size = image_shape[:2]),
                    transforms.ToTensor()])

dataset = cDataset(data_path, transforms= transformation,  homography_mat=homography_path)
data_loader = DataLoader(dataset, batch_size=1, shuffle=False)

q_model, ckpt = load_tardal(weights=weights, cfg=cfg)
q_model = q_model.cuda()

modified_state_dict={}
for key, val in ckpt.items():
    # Remove 'module.' from the key names
    if key.startswith('module'):
        modified_state_dict[key[7:]] = val
    else:
        modified_state_dict[key] = val

# Load the pre-trained checkpoint
q_model.load_state_dict(modified_state_dict)

# Calibrate the model using max calibration technique.
with torch.no_grad():
    collect_stats(q_model, data_loader, num_batches=16)
    compute_amax(q_model, method="max")

# Save the PTQ model
torch.save(q_model.state_dict(), "TarDAL/weights/tardal-dt-ptq.pth")

# convert to onnx
# Set static member of TensorQuantizer to use Pytorch’s own fake quantization functions
quant_nn.TensorQuantizer.use_fb_fake_quant = True

# Exporting to ONNX
dummy_input = torch.randn(1, 2, 640, 640, device='cuda')
input_names = [ "actual_inputs" ]
output_names = [ "fused_output" ]
torch.onnx.export(
    q_model,
    dummy_input,
    "onnx_files/tardal-dt-ptq.onnx",
    verbose=False,
    opset_version=13,
do_constant_folding = False)
# now build the file using int8

`

i am then using following command to build the engine.

trtexec --onnx=onnx_files/tardal-dt-ptq.onnx --int8 --saveEngine=tensorrt_files/tardal-dt-ptq.trt

but gives the following error:

[E] Error[2]: [optimizer.cpp::filterQDQFormats::4421] Error Code 2: Internal Error (Assertion !n->candidateRequirements.empty() failed. All of the candidates were removed, which points to the node being incorrectly marked as an int8 node.) [07/30/2024-17:21:39] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) [07/30/2024-17:21:39] [E] Engine could not be created from network [07/30/2024-17:21:39] [E] Building engine failed [07/30/2024-17:21:39] [E] Failed to create engine from model or file. [07/30/2024-17:21:39] [E] Engine set up failed

lix19937 commented 3 months ago

Try to use trt 9.0.

faizan1234567 commented 3 months ago

@lix19937 will try, and I will updated you.