Dear author, thanks for the amazing work. I adapted the code in quantize folder to quantize my custom model (BEVFormer encoder). I still use Resnet and FPN as backbone and neck. However, when I run, the weights are still in fp32. Can you give me some suggestion. Here are the source code:
Driver code
from ptq_bev import *
import torch
import time
from mmdet3d.models import build_model
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint
from mmdet3d.datasets import build_dataloader, build_dataset
from mmcv.cnn import get_model_complexity_info
import torch.quantization
import sys
import copy
from mmdet.apis import set_random_seed
sys.path.insert(0,'workspace/source/Mapless')
def main():
quantize.initialize()
parser = argparse.ArgumentParser()
parser.add_argument("--calibrate_batch", type=int, default=200, help="calibrate batch")
parser.add_argument("--deterministic", type=bool, default=True, help="deterministic")
parser.add_argument('--test_int8', type=bool, default=False, help='test int8 or not')
parser.add_argument('--test_fp32', type=bool, default=False, help='test fp32 or not')
parser.add_argument("--seed", type=int, default=0, help="seed")
parser.add_argument('--show', action='store_true', help='show results')
parser.add_argument('--show-dir', default='work_dirs/lanesegnet_quantize/show' , help='directory where results will be saved')
parser.add_argument('--out', help='output result file in pickle format')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument(
'--eval',
type=str,
default='bbox',
help='evaluation metrics, which depends on the dataset, e.g., "mAP",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
parser.add_argument(
'--eval-options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function')
parser.add_argument(
'--format-only',
action='store_true',
help='Format the output results without perform evaluation. It is'
'useful when you want to format the result to a specific format and '
'submit it to the test server')
parser.add_argument('--samples', default=2000, help='samples to benchmark')
parser.add_argument(
'--out-dir', default='work_dirs/lanesegnet_quantize', help='directory where results will be saved')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
args = parser.parse_args()
# Load the configuration
save_path = 'work_dirs/lanesegnet_quantize/quantized_backbone.pth'
os.makedirs(os.path.dirname(save_path), exist_ok=True)
config_path = 'plugin/LaneSegNet/configs/lanesegnet_r18_1x1_1e_olv2_subset_A.py'
checkpoint_path = 'work_dirs/lanesegnet_debug/epoch_24.pth'
cfg = Config.fromfile(config_path)
cuda_device = torch.device("cuda:0")
cpu_device = torch.device("cpu:0")
if args.seed is not None:
set_random_seed(args.seed, deterministic=args.deterministic)
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# Create the model
# Load checkpoint
dataset = build_dataset(cfg.data.test)
distributed = False
samples_per_gpu = 1 # enough
data_loader = build_dataloader(
dataset,
samples_per_gpu=samples_per_gpu,
workers_per_gpu = cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False,
)
model_fp32 = build_model(cfg.model)
checkpoint = load_checkpoint(model_fp32, checkpoint_path, map_location='cpu')
# print(model_fp32.bev_constructor)
# print_nn_layers_type(model_fp32)
model_int8 = copy.deepcopy(model_fp32)
# model.train()
# fused_model.train()
if args.test_fp32:
print("######## fp32 #########")
model_fp32.to(cuda_device)
model_fp32 = fuse_conv_bn(model_fp32)
model_fp32 = MMDataParallel(model_fp32, device_ids=[0])
model_fp32.eval()
# test_model(cfg, args, model_fp32,checkpoint, data_loader, dataset)
model_int8.to(cpu_device)
model_int8 = quantize_net(model_int8) # how to make it work
model_int8 = fuse_conv_bn(model_int8)
# print(model_int8.bev_constructor)
model_int8 = MMDataParallel(model_int8,device_ids=[0])
# print_nn_layers_type(model_int8)
model_int8.eval()
# calibrate
print("start calibrate")
quantize.set_quantizer_fast(model_int8) # Ins
quantize.calibrate_model(model_int8, data_loader, 0, None, args.calibrate_batch)
model_int8 = torch.quantization.convert(model_int8, inplace=True)
torch.save(model_int8, save_path)
if args.test_int8:
print("######## int8 #########")
model_int8.to(cuda_device)
test_model(cfg, args, model_int8,checkpoint, data_loader, dataset)
if __name__ == '__main__':
main()
After quantizing, I printed out part of the config:
I think I got something wrong because this is fake tensor_quantizer for quantization aware training. That is why when i save the quantized checkpoint of mine, weights are still in fp32.
Dear author, thanks for the amazing work. I adapted the code in quantize folder to quantize my custom model (BEVFormer encoder). I still use Resnet and FPN as backbone and neck. However, when I run, the weights are still in fp32. Can you give me some suggestion. Here are the source code: Driver code
After quantizing, I printed out part of the config:
I think I got something wrong because this is fake tensor_quantizer for quantization aware training. That is why when i save the quantized checkpoint of mine, weights are still in fp32.