baaivision / EVA

EVA Series: Visual Representation Fantasies from BAAI
MIT License
2.3k stars 167 forks source link

use torch.jit.trace export pytorch 2 torchscript fail. #131

Closed xinlin-xiao closed 11 months ago

xinlin-xiao commented 11 months ago

I rewrite EVA/EVA-master-project/EVA-02/det/tools/deploy/export_model.py to use LazyConfig to read EVA/EVA-master-project/EVA-02/det/projects/ViTDet/configs/eva2_o365_to_coco/eva2_o365_to_coco_cascade_mask_rcnn_vitdet_l_8attn_1536_lrd0p8.py

The export_model.py after change:

# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import os
from typing import Dict, List, Tuple
import torch
from torch import Tensor, nn

import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, detection_utils
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import (
    STABLE_ONNX_OPSET_VERSION,
    TracingAdapter,
    dump_torchscript_IR,
    scripting_with_instances,
)
from detectron2.modeling import GeneralizedRCNN, RetinaNet, build_model
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.projects.point_rend import add_pointrend_config
from detectron2.structures import Boxes
from detectron2.utils.env import TORCH_VERSION
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import setup_logger

from detectron2.config import LazyConfig, instantiate
from detectron2.engine import (
    AMPTrainer,
    SimpleTrainer,
    default_argument_parser,
    default_setup,
    default_writers,
    hooks,#导入hooks模块
    launch,
)

def setup_cfg(args):
    # cfg = get_cfg()
    # # cuda context is initialized before creating dataloader, so we don't fork anymore
    # cfg.DATALOADER.NUM_WORKERS = 0
    # add_pointrend_config(cfg)
    # cfg.merge_from_file(args.config_file)
    # cfg.merge_from_list(args.opts)
    # cfg.freeze()
    cfg = LazyConfig.load(args.config_file)#从配置文件加载训练配置,args.config_file 包含了训练的配置信息。
    #cfg = LazyConfig.apply_overrides(cfg, args.opts):根据命令行参数中的覆盖选项,修改配置文件中的配置项。
    # 这可以用于在命令行中修改配置,例如更改学习率、批大小等。
    cfg = LazyConfig.apply_overrides(cfg, args.opts)
    #增加start=========
    cfg.DATASETS.TRAIN=instantiate(cfg.DATASETS.TRAIN)
    #print("cfg.DATASETS.TRAIN:",cfg.DATASETS.TRAIN)
    cfg.DATASETS.TEST=instantiate(cfg.DATASETS.TEST)
    #print("cfg.DATASETS.TEST:",cfg.DATASETS.TEST)
    #assert cfg.DATASETS.TRAIN == "mydata_train"
    #assert cfg.DATASETS.TEST == "mydata_val"
    #cfg.DATASETS.TRAIN = ("mydata_train")
    #cfg.DATASETS.TEST = ("mydata_val",)  # 没有不用填
    #cfg.DATALOADER.NUM_WORKERS = 2
    # 预训练模型文件,可自行提前下载
    cfg.MODEL.WEIGHTS = instantiate(cfg.MODEL.WEIGHTS)
    cfg.MODEL.META_ARCHITECTURE=instantiate(cfg.MODEL.META_ARCHITECTURE)
    cfg.MODEL.DEVICE=instantiate(cfg.MODEL.DEVICE)
    cfg.MODEL.PIXEL_MEAN=instantiate(cfg.MODEL.PIXEL_MEAN)
    # 或者使用自己的预训练模型
    # cfg.MODEL.WEIGHTS = "../tools/output/model_00999.pth"
    #cfg.SOLVER.IMS_PER_BATCH = 2
    #========end
    #default_setup(cfg, args):初始化 Detectron2 库,包括 GPU 设置、分布式训练等。
    default_setup(cfg, args)
    return cfg

def export_caffe2_tracing(cfg, torch_model, inputs):
    from detectron2.export import Caffe2Tracer

    tracer = Caffe2Tracer(cfg, torch_model, inputs)
    if args.format == "caffe2":
        caffe2_model = tracer.export_caffe2()
        caffe2_model.save_protobuf(args.output)
        # draw the caffe2 graph
        caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=inputs)
        return caffe2_model
    elif args.format == "onnx":
        import onnx

        onnx_model = tracer.export_onnx()
        onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
    elif args.format == "torchscript":
        ts_model = tracer.export_torchscript()
        with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
            torch.jit.save(ts_model, f)
        dump_torchscript_IR(ts_model, args.output)

# experimental. API not yet final
def export_scripting(torch_model):
    assert TORCH_VERSION >= (1, 8)
    fields = {
        "proposal_boxes": Boxes,
        "objectness_logits": Tensor,
        "pred_boxes": Boxes,
        "scores": Tensor,
        "pred_classes": Tensor,
        "pred_masks": Tensor,
        "pred_keypoints": torch.Tensor,
        "pred_keypoint_heatmaps": torch.Tensor,
    }
    assert args.format == "torchscript", "Scripting only supports torchscript format."

    class ScriptableAdapterBase(nn.Module):
        # Use this adapter to workaround https://github.com/pytorch/pytorch/issues/46944
        # by not retuning instances but dicts. Otherwise the exported model is not deployable
        def __init__(self):
            super().__init__()
            self.model = torch_model
            self.eval()

    if isinstance(torch_model, GeneralizedRCNN):

        class ScriptableAdapter(ScriptableAdapterBase):
            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
                instances = self.model.inference(inputs, do_postprocess=False)
                return [i.get_fields() for i in instances]

    else:

        class ScriptableAdapter(ScriptableAdapterBase):
            def forward(self, inputs: Tuple[Dict[str, torch.Tensor]]) -> List[Dict[str, Tensor]]:
                instances = self.model(inputs)
                return [i.get_fields() for i in instances]

    ts_model = scripting_with_instances(ScriptableAdapter(), fields)
    with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
        torch.jit.save(ts_model, f)
    dump_torchscript_IR(ts_model, args.output)
    # TODO inference in Python now missing postprocessing glue code
    return None

# experimental. API not yet final
def export_tracing(torch_model, inputs):
    assert TORCH_VERSION >= (1, 8)
    image = inputs[0]["image"]
    inputs = [{"image": image}]  # remove other unused keys

    if isinstance(torch_model, GeneralizedRCNN):

        def inference(model, inputs):
            # use do_postprocess=False so it returns ROI mask
            inst = model.inference(inputs, do_postprocess=False)[0]
            return [{"instances": inst}]

    else:
        inference = None  # assume that we just call the model directly

    traceable_model = TracingAdapter(torch_model, inputs, inference)

    if args.format == "torchscript":
        ts_model = torch.jit.trace(traceable_model, (image,))
        with PathManager.open(os.path.join(args.output, "model.ts"), "wb") as f:
            torch.jit.save(ts_model, f)
        dump_torchscript_IR(ts_model, args.output)
    elif args.format == "onnx":
        with PathManager.open(os.path.join(args.output, "model.onnx"), "wb") as f:
            torch.onnx.export(traceable_model, (image,), f, opset_version=STABLE_ONNX_OPSET_VERSION)
    logger.info("Inputs schema: " + str(traceable_model.inputs_schema))
    logger.info("Outputs schema: " + str(traceable_model.outputs_schema))

    if args.format != "torchscript":
        return None
    if not isinstance(torch_model, (GeneralizedRCNN, RetinaNet)):
        return None

    def eval_wrapper(inputs):
        """
        The exported model does not contain the final resize step, which is typically
        unused in deployment but needed for evaluation. We add it manually here.
        """
        input = inputs[0]
        instances = traceable_model.outputs_schema(ts_model(input["image"]))[0]["instances"]
        postprocessed = detector_postprocess(instances, input["height"], input["width"])
        return [{"instances": postprocessed}]

    return eval_wrapper

def get_sample_inputs(args):

    if args.sample_image is None:
        # get a first batch from dataset
        #data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
        data_loader = instantiate(cfg.dataloader.test)
        first_batch = next(iter(data_loader))
        return first_batch
    else:
        # get a sample data
        original_image = detection_utils.read_image(args.sample_image, format=cfg.INPUT.FORMAT)
        # Do same preprocessing as DefaultPredictor
        aug = T.ResizeShortestEdge(
            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
        )
        height, width = original_image.shape[:2]
        image = aug.get_transform(original_image).apply_image(original_image)
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

        inputs = {"image": image, "height": height, "width": width}

        # Sample ready
        sample_inputs = [inputs]
        return sample_inputs

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Export a model for deployment.")
    parser.add_argument(
        "--format",
        choices=["caffe2", "onnx", "torchscript"],
        help="output format",
        default="torchscript",
    )
    parser.add_argument(
        "--export-method",
        choices=["caffe2_tracing", "tracing", "scripting"],
        help="Method to export models",
        default="tracing",
    )
    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
    parser.add_argument("--sample-image", default=None, type=str, help="sample image for input")
    parser.add_argument("--run-eval", action="store_true")
    parser.add_argument("--output", help="output directory for the converted model")
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    logger = setup_logger()
    logger.info("Command line arguments: " + str(args))
    PathManager.mkdirs(args.output)
    # Disable re-specialization on new shapes. Otherwise --run-eval will be slow
    torch._C._jit_set_bailout_depth(1)

    cfg = setup_cfg(args)

    # create a torch model
    #torch_model = build_model(cfg)
    torch_model = instantiate(cfg.model)  # 创建了目标检测模型,cfg.model 包含了用于构建模型的配置信息。
    torch_model.to(cfg.train.device)  # 模型移动到指定的计算设备,通常是 GPU
    DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
    torch_model.eval()

    # get sample data
    sample_inputs = get_sample_inputs(args)

    # convert and save model
    if args.export_method == "caffe2_tracing":
        exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs)
    elif args.export_method == "scripting":
        exported_model = export_scripting(torch_model)
    elif args.export_method == "tracing":
        exported_model = export_tracing(torch_model, sample_inputs)

    # run evaluation with the converted model
    if args.run_eval:
        assert exported_model is not None, (
            "Python inference is not yet implemented for "
            f"export_method={args.export_method}, format={args.format}."
        )
        logger.info("Running evaluation ... this takes a long time if you export to CPU.")
        dataset = cfg.DATASETS.TEST[0]
        #data_loader = build_detection_test_loader(cfg, dataset)
        data_loader = instantiate(cfg.dataloader.test)
        # NOTE: hard-coded evaluator. change to the evaluator for your dataset
        evaluator = COCOEvaluator(dataset, output_dir=args.output)
        metrics = inference_on_dataset(exported_model, data_loader, evaluator)
        print_csv_format(metrics)
    logger.info("Success.")

The eva2_o365_to_coco/eva2_o365_to_coco_cascade_mask_rcnn_vitdet_l_8attn_1536_lrd0p8.py is:


from ..common.coco_loader_lsj_1536 import dataloader
from .cascade_mask_rcnn_vitdet_b_100ep import (
    lr_multiplier,
    model,
    train,
    optimizer,
    get_vit_lr_decay_rate,
)

from detectron2.config import LazyCall as L
from fvcore.common.param_scheduler import *
from detectron2.solver import WarmupParamScheduler
#DATASETS = {
  #"TRAIN": ("mydata_train"),
  #"TEST": ("mydata_val",)
#}
DATASETS=dict(TRAIN="mydata_train",TEST="mydata_val")
MODEL=dict(WEIGHTS="/mnt/data1/download_new/EVA/models-EVA-02/model-select/eva02_L_coco_det_sys_o365.pth",
           META_ARCHITECTURE="GeneralizedRCNN",DEVICE="cuda",PIXEL_MEAN=[123.675, 116.28, 103.53],)
#DATALOADER = {
#  "NUM_WORKERS": 2

#}
#MODEL = {
#  "WEIGHTS": r"/mnt/data1/download_new/EVA/models-EVA-02/model-select/eva02_L_coco_det_sys_o365.pth"
#}
#SOLVER = {
#  "IMS_PER_BATCH": 2,
#  "REFERENCE_WORLD_SIZE": 4
#}
#meta_arch 用于构建模型的元架构的名称(
#MODEL=dict(META_ARCHITECTURE="GeneralizedRCNN")
#gpu
#MODEL=dict(DEVICE="cuda")

model.backbone.net.img_size = 1536
model.backbone.square_pad = 1536
model.backbone.net.patch_size = 16
model.backbone.net.window_size = 16
model.backbone.net.embed_dim = 1024
model.backbone.net.depth = 24
model.backbone.net.num_heads = 16
model.backbone.net.mlp_ratio = 4*2/3
model.backbone.net.use_act_checkpoint = True
model.backbone.net.drop_path_rate = 0.3
#设置num_classes
model.roi_heads.num_classes=107
#model.roi_heads.box_predictors=107
#model.roi_heads.mask_head=107

# 2, 5, 8, 11, 14, 17, 20, 23 for global attention
model.backbone.net.window_block_indexes = (
    list(range(0, 2)) + list(range(3, 5)) + list(range(6, 8)) + list(range(9, 11)) + list(range(12, 14)) + list(range(15, 17)) + list(range(18, 20)) + list(range(21, 23))
)

optimizer.lr=4e-5
optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.8, num_layers=24)
optimizer.params.overrides = {}
optimizer.params.weight_decay_norm = None
#训练最大次数
train.max_iter = 30000
#设置迭代到指定次数,进行一次评估 根据训练数据总数目(4000)以及batch_size(1或者4?多gpu),计算出每个epoch需要的迭代次数
#train.eval_period=1000
train.model_ema.enabled=True
train.model_ema.device="cuda"
train.model_ema.decay=0.9999

lr_multiplier = L(WarmupParamScheduler)(
    scheduler=L(CosineParamScheduler)(
        start_value=1,
        end_value=1,
    ),
    warmup_length=0.01,
    warmup_factor=0.001,
)

dataloader.test.num_workers=0
dataloader.train.total_batch_size=4
#更改dataloader下test的dataset的names为注册的数据集
dataloader.test.dataset.names="mydata_val"
dataloader.train.dataset.names="mydata_train"
dataloader.evaluator.dataset_name="mydata_val"

And,I use the :

./export_model.py--config-file /mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/projects/ViTDet/configs/eva2_o365_to_coco/eva2_o365_to_coco_cascade_mask_rcnn_vitdet_l_8attn_1536_lrd0p8.py --output ./output --export-method tracing --format torchscript

to export pytorch 2 torchscript.

It return a erros:

  File "./export_model.py", line 264, in <module>
    exported_model = export_tracing(torch_model, sample_inputs)
  File "./export_model.py", line 161, in export_tracing
    ts_model = torch.jit.trace(traceable_model, (image,))
  File "/usr/local/lib/python3.8/dist-packages/torch/jit/_trace.py", line 794, in trace
    return trace_module(
  File "/usr/local/lib/python3.8/dist-packages/torch/jit/_trace.py", line 1056, in trace_module
    module._c._create_method_from_trace(
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1488, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/export/flatten.py", line 294, in forward
    outputs = self.inference_func(self.model, *inputs_orig_format)
  File "./export_model.py", line 152, in inference
    inst = model.inference(inputs, do_postprocess=False)[0]
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/modeling/meta_arch/rcnn.py", line 228, in inference
    features = self.backbone(images.tensor)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1488, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/modeling/backbone/vit.py", line 564, in forward
    bottom_up_features = self.net(x)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1488, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/modeling/backbone/vit.py", line 432, in forward
    x = blk(x)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1488, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/fairscale/nn/checkpoint/checkpoint_activations.py", line 171, in _checkpointed_forward
    return original_forward(module, *args, **kwargs)
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/modeling/backbone/vit.py", line 276, in forward
    x = self.attn(x)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1488, in _slow_forward
    result = self.forward(*input, **kwargs)
  File "/mnt/data1/download_new/EVA/EVA-master-project/EVA-02/det/detectron2/modeling/backbone/vit.py", line 126, in forward
    x = xops.memory_efficient_attention(q, k, v)
  File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/__init__.py", line 192, in memory_efficient_attention
    return _memory_efficient_attention(
  File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/__init__.py", line 290, in _memory_efficient_attention
    return _memory_efficient_attention_forward(
  File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/__init__.py", line 310, in _memory_efficient_attention_forward
    out, *_ = op.apply(inp, needs_gradient=False)
  File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/cutlass.py", line 175, in apply
    out, lse, rng_seed, rng_offset = cls.OPERATOR(
  File "/usr/local/lib/python3.8/dist-packages/torch/_ops.py", line 502, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: unsupported output type: int, from operator: xformers::efficient_attention_forward_cutlass

I would be very grateful if you could give me some suggestion to fix it !

deyiluobo commented 11 months ago

hi@xinlin-xiao I have encountered a similar problem recently, but I think it is an error in the xformer module. You can try setting the xatten used in backbone vit to False.

xinlin-xiao commented 11 months ago

@deyiluobo hi,thanks,i use https://facebookresearch.github.io/xformers/components/ops.html Equivalent pytorch code instead x = xops.memory_efficient_attention(q, k, v).it can solve the problem.

woshidengweimo commented 11 months ago

tOoZzLOznk Have you met this import error?

xinlin-xiao commented 11 months ago

奥兹洛兹克 你遇到过这个导入错误吗?

maybe you can not use "Caffe2Tracer",you should not import it

woshidengweimo commented 11 months ago

转模型时好像需要导入呀

发自我的iPhone

------------------ Original ------------------ From: xinlin-xiao @.> Date: Wed,Dec 20,2023 8:21 PM To: baaivision/EVA @.> Cc: woshidengweimo @.>, Comment @.> Subject: Re: [baaivision/EVA] use torch.jit.trace export pytorch 2 torchscript fail. (Issue #131)

xinlin-xiao commented 11 months ago

你看看转onnx和torchscript不需要你那个包 ,转其它的应该啥环境你没装,搜一下装一下环境就可以

---Original--- From: @.> Date: Wed, Dec 20, 2023 20:24 PM To: @.>; Cc: @.>;"State @.>; Subject: Re: [baaivision/EVA] use torch.jit.trace export pytorch 2 torchscript fail. (Issue #131)

转模型时好像需要导入呀

发自我的iPhone

------------------ Original ------------------ From: xinlin-xiao @.&gt; Date: Wed,Dec 20,2023 8:21 PM To: baaivision/EVA @.&gt; Cc: woshidengweimo @.&gt;, Comment @.&gt; Subject: Re: [baaivision/EVA] use torch.jit.trace export pytorch 2 torchscript fail. (Issue #131) — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you modified the open/close state.Message ID: @.***>

woshidengweimo commented 11 months ago

我用“tracing”,而不是“caffe2_tracing”。转成功了onnx。后面你有做onnx到trt吗?

xinlin-xiao commented 11 months ago

你是得看用的哪个模型,onnx转trt可能有些算子trt不支持

---Original--- From: @.> Date: Thu, Dec 21, 2023 11:58 AM To: @.>; Cc: @.>;"State @.>; Subject: Re: [baaivision/EVA] use torch.jit.trace export pytorch 2 torchscript fail. (Issue #131)

我用“tracing”,而不是“caffe2_tracing”。转成功了onnx。后面你有做onnx到trt吗?

— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you modified the open/close state.Message ID: @.***>