apple / coremltools

Core ML tools contain supporting tools for Core ML model conversion, editing, and validation.
https://coremltools.readme.io
BSD 3-Clause "New" or "Revised" License
4.42k stars 640 forks source link

RuntimeError: PyTorch convert function for op 'torchvision::roi_align' not implemented. #1793

Open ivyas21 opened 1 year ago

ivyas21 commented 1 year ago

When converting a traced torchvision model, RuntimeError: PyTorch convert function for op 'torchvision::roi_align' not implemented.

Stack Trace

---------------------------------------------------------------------------
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_998/3386583322.py in <module>
      5     traced_model = torch.jit.trace(model_to_trace, example_image_pt).eval()
      6 
----> 7 detector_mlmodel = ct.convert(traced_model, inputs=[ct.ImageType(shape=(1, 3, 224, 224))])
      8 detector_mlmodel.save("segmenter.mlmodel")

/opt/conda/lib/python3.7/site-packages/coremltools/converters/_converters_entry.py in convert(model, source, inputs, outputs, classifier_config, minimum_deployment_target, convert_to, compute_precision, skip_model_load, compute_units, package_dir, debug)
    454         package_dir=package_dir,
    455         debug=debug,
--> 456         specification_version=specification_version,
    457     )
    458 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in mil_convert(model, convert_from, convert_to, compute_units, **kwargs)
    185         See `coremltools.converters.convert`
    186     """
--> 187     return _mil_convert(model, convert_from, convert_to, ConverterRegistry, MLModel, compute_units, **kwargs)
    188 
    189 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in _mil_convert(model, convert_from, convert_to, registry, modelClass, compute_units, **kwargs)
    214                             convert_to,
    215                             registry,
--> 216                             **kwargs
    217                          )
    218 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in mil_convert_to_proto(model, convert_from, convert_to, converter_registry, **kwargs)
    279     frontend_converter = frontend_converter_type()
    280 
--> 281     prog = frontend_converter(model, **kwargs)
    282 
    283     if convert_to.lower() != "neuralnetwork":

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in __call__(self, *args, **kwargs)
    107         from .frontend.torch import load
    108 
--> 109         return load(*args, **kwargs)
    110 
    111 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/load.py in load(model_spec, inputs, specification_version, debug, outputs, cut_at_symbols, **kwargs)
     55     inputs = _convert_to_torch_inputtype(inputs)
     56     converter = TorchConverter(torchscript, inputs, outputs, cut_at_symbols, specification_version)
---> 57     return _perform_torch_convert(converter, debug)
     58 
     59 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/load.py in _perform_torch_convert(converter, debug)
    102             print("the following model ops are MISSING:")
    103             print("\n".join(["  " + str(x) for x in sorted(missing)]))
--> 104         raise e
    105 
    106     return prog

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/load.py in _perform_torch_convert(converter, debug)
     94 def _perform_torch_convert(converter, debug):
     95     try:
---> 96         prog = converter.convert()
     97     except RuntimeError as e:
     98         if debug and "convert function" in str(e):

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/converter.py in convert(self)
    279 
    280             # Add the rest of the operations
--> 281             convert_nodes(self.context, self.graph)
    282 
    283             graph_outputs = [self.context[name] for name in self.graph.outputs]

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/ops.py in convert_nodes(context, graph)
     83         if add_op is None:
     84             raise RuntimeError(
---> 85                 "PyTorch convert function for op '{}' not implemented.".format(node.kind)
     86             )
     87 

RuntimeError: PyTorch convert function for op 'torchvision::roi_align' not implemented.

Steps To Reproduce

import coremltools as ct
import torch, torchvision
from torchvision.transforms import functional as F, InterpolationMode, transforms as T
import requests
from PIL import Image
import numpy as np
from typing import Dict, Tuple, Optional

# Image conversion tools:
class PILToTensor(torch.nn.Module):
    def forward(
        self, image: torch.Tensor, target: Optional[Dict[str, torch.Tensor]] = None
    ) -> Tuple[torch.Tensor, Optional[Dict[str, torch.Tensor]]]:
        image = F.pil_to_tensor(image)
        return image, target

class ConvertImageDtype(torch.nn.Module):
    def __init__(self, dtype: torch.dtype) -> None:
        super().__init__()
        self.dtype = dtype

    def forward(
        self, image: torch.Tensor, target: Optional[Dict[str, torch.Tensor]] = None
    ) -> Tuple[torch.Tensor, Optional[Dict[str, torch.Tensor]]]:
        image = F.convert_image_dtype(image, self.dtype)
        return image, target

# Load the torchvision model
detector_model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
detector_model = detector_model.eval()

# Get a sample image
toTensor = T.PILToTensor()
toFloatTensor = T.ConvertImageDtype(torch.float)
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

example_image_np = np.array(example_image)
example_image_pt = toFloatTensor(toTensor(example_image))
example_image_pt = example_image_pt.unsqueeze(0)

# Run the sample through the model to demonstrate the model works
y = detector_model(example_image_pt)

# Make an adaptor to convert the model outputs to a tuple
class FasterRCNN_MobileNetV3_AdapterModel(torch.nn.Module):
    """This adapter is only here to unbox the first output."""
    def __init__(self, model, w=2):
        super().__init__()
        self.model = model

    def forward(self, x):
        result = self.model(x)
        return result[0]['boxes'], result[0]['labels'], result[0]['scores']

adapted_detector_model = FasterRCNN_MobileNetV3_AdapterModel(detector_model)

# Trace and convert the model using coremltools
model_to_trace = adapted_detector_model
with torch.inference_mode():
    out = model_to_trace(example_image_pt)
    traced_model = torch.jit.trace(model_to_trace, example_image_pt).eval()

detector_mlmodel = ct.convert(traced_model, inputs=[ct.ImageType(shape=example_image_pt.shape)])
detector_mlmodel.save("segmenter.mlmodel")

System environment:

TobyRoseman commented 1 year ago

We have an unmerged pull request that adds support for roi_align (not torchvision::roi_align): #1509.

ivyas21 commented 1 year ago

@TobyRoseman ,

I tried the roi_align code from PR #1509. I'm getting further error. thats why I started as an issue. What is best way to purse this issue? Thank you!

ivyas21 commented 1 year ago

This is what I got an error if I added roi_align code from PR #1509


/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  for i in range(dim)
/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  for g in grid_sizes
/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  for g in grid_sizes
/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/rpn.py:73: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  A = Ax4 // 4
/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/rpn.py:74: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  C = AxC // A
/opt/conda/lib/python3.7/site-packages/torchvision/ops/boxes.py:156: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
/opt/conda/lib/python3.7/site-packages/torchvision/ops/boxes.py:158: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
/opt/conda/lib/python3.7/site-packages/torchvision/models/detection/transform.py:293: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  for s, s_orig in zip(new_size, original_size)
Tuple detected at graph output. This will be flattened in the converted model.
Converting PyTorch Frontend ==> MIL Ops:  66%|██████▌   | 1371/2092 [00:01<00:00, 824.08 ops/s]Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Converting PyTorch Frontend ==> MIL Ops:  70%|██████▉   | 1455/2092 [00:02<00:00, 734.20 ops/s]Saving value type of int64 into a builtin type of int32, might lose precision!
Converting PyTorch Frontend ==> MIL Ops:  80%|████████  | 1675/2092 [00:02<00:00, 916.54 ops/s]Saving value type of int64 into a builtin type of int32, might lose precision!

Converting PyTorch Frontend ==> MIL Ops:  67%|██████▋   | 2/3 [00:00<00:00, 1842.03 ops/s]

Converting PyTorch Frontend ==> MIL Ops:  90%|█████████ | 9/10 [00:00<00:00, 964.90 ops/s]
Saving value type of float64 into a builtin type of fp32, might lose precision!
Saving value type of float64 into a builtin type of fp32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Saving value type of int64 into a builtin type of int32, might lose precision!
Converting PyTorch Frontend ==> MIL Ops:  89%|████████▊ | 1852/2092 [00:02<00:00, 1152.80 ops/s]Saving value type of int64 into a builtin type of int32, might lose precision!
Converting PyTorch Frontend ==> MIL Ops:  89%|████████▉ | 1870/2092 [00:02<00:00, 796.46 ops/s] 
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/tmp/ipykernel_31355/3386583322.py in <module>
      5     traced_model = torch.jit.trace(model_to_trace, example_image_pt).eval()
      6 
----> 7 detector_mlmodel = ct.convert(traced_model, inputs=[ct.ImageType(shape=(1, 3, 224, 224))])
      8 detector_mlmodel.save("segmenter.mlmodel")

/opt/conda/lib/python3.7/site-packages/coremltools/converters/_converters_entry.py in convert(model, source, inputs, outputs, classifier_config, minimum_deployment_target, convert_to, compute_precision, skip_model_load, compute_units, package_dir, debug)
    454         package_dir=package_dir,
    455         debug=debug,
--> 456         specification_version=specification_version,
    457     )
    458 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in mil_convert(model, convert_from, convert_to, compute_units, **kwargs)
    185         See `coremltools.converters.convert`
    186     """
--> 187     return _mil_convert(model, convert_from, convert_to, ConverterRegistry, MLModel, compute_units, **kwargs)
    188 
    189 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in _mil_convert(model, convert_from, convert_to, registry, modelClass, compute_units, **kwargs)
    214                             convert_to,
    215                             registry,
--> 216                             **kwargs
    217                          )
    218 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in mil_convert_to_proto(model, convert_from, convert_to, converter_registry, **kwargs)
    279     frontend_converter = frontend_converter_type()
    280 
--> 281     prog = frontend_converter(model, **kwargs)
    282 
    283     if convert_to.lower() != "neuralnetwork":

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in __call__(self, *args, **kwargs)
    107         from .frontend.torch import load
    108 
--> 109         return load(*args, **kwargs)
    110 
    111 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/load.py in load(model_spec, inputs, specification_version, debug, outputs, cut_at_symbols, **kwargs)
     55     inputs = _convert_to_torch_inputtype(inputs)
     56     converter = TorchConverter(torchscript, inputs, outputs, cut_at_symbols, specification_version)
---> 57     return _perform_torch_convert(converter, debug)
     58 
     59 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/load.py in _perform_torch_convert(converter, debug)
     94 def _perform_torch_convert(converter, debug):
     95     try:
---> 96         prog = converter.convert()
     97     except RuntimeError as e:
     98         if debug and "convert function" in str(e):

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/converter.py in convert(self)
    279 
    280             # Add the rest of the operations
--> 281             convert_nodes(self.context, self.graph)
    282 
    283             graph_outputs = [self.context[name] for name in self.graph.outputs]

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/ops.py in convert_nodes(context, graph)
     87 
     88         context.prepare_for_conversion(node)
---> 89         add_op(context, node)
     90 
     91         # We've generated all the outputs the graph needs, terminate conversion.

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/ops.py in scatter(context, node)
   5228         mode = 'update'
   5229 
-> 5230     _scatter(context, inputs, mode, node.name)
   5231 
   5232 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/frontend/torch/ops.py in _scatter(context, inputs, mode, name)
   5209     if types.is_scalar(updates.sym_type):
   5210         updates = mb.fill(shape=indices.shape, value=updates.val, name=name)
-> 5211     result = mb.scatter_along_axis(data=data, indices=indices, updates=updates,axis=axis, mode=mode, name=name)
   5212     context.add(result)
   5213 

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/mil/ops/registry.py in add_op(cls, **kwargs)
    174                     op_cls_to_add = op_reg[op_type]
    175 
--> 176                 return cls._add_op(op_cls_to_add, **kwargs)
    177 
    178             setattr(Builder, op_type, add_op)

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/mil/builder.py in _add_op(cls, op_cls, **kwargs)
    180         curr_block()._insert_op_before(new_op, before_op=before_op)
    181         new_op.build_nested_blocks()
--> 182         new_op.type_value_inference()
    183         if len(new_op.outputs) == 1:
    184             return new_op.outputs[0]

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/mil/operation.py in type_value_inference(self, overwrite_output)
    251         existing _output_vars
    252         """
--> 253         output_types = self.type_inference()
    254         if not isinstance(output_types, tuple):
    255             output_types = (output_types,)

/opt/conda/lib/python3.7/site-packages/coremltools/converters/mil/mil/ops/defs/iOS15/scatter_gather.py in type_inference(self)
    431         for i in range(self.data.rank):
    432             if i != axis:
--> 433                 assert self.data.shape[i] == self.indices.shape[i], f'type_inference: axis={axis}, i={i}: {self.data.shape[i]} != {self.indices.shape[i]}'
    434 
    435         return self.data.sym_type

AssertionError: type_inference: axis=0, i=1: 256 != is452
TobyRoseman commented 1 year ago

This error looks unrelated to your previous error. I think it's an entirely separate issue.