berkeley-hipie / HIPIE

[NeurIPS2023] Code release for "Hierarchical Open-vocabulary Universal Image Segmentation"
https://people.eecs.berkeley.edu/~xdwang/projects/HIPIE/
MIT License
260 stars 19 forks source link

An error occurred while running the demo #6

Open confusedgreenhand opened 1 year ago

confusedgreenhand commented 1 year ago

Hi, thanks for your great work! I would like to ask if you encountered such an error when running the demo(Demo-Main.ipynb):

hipie_error

RuntimeError Traceback (most recent call last) Cell In[13], line 1 ----> 1 from projects.HIPIE.demo_lib.part_segm_demo import PartSegmDemo 2 from detectron2.data.detection_utils import read_image,convert_PIL_to_numpy 3 from fairscale.nn.checkpoint import checkpoint_wrapper

File ~/PycharmProjects/HIPIE-main/projects/HIPIE/demo_lib/part_segm_demo.py:25 22 from detectron2.data.detection_utils import read_image 23 from detectron2.utils.logger import setup_logger ---> 25 from detectron2.projects.hipie import add_hipie_config 26 from projects.HIPIE.predictor import VisualizationDemo 27 from argparse import Namespace

File ~/PycharmProjects/HIPIE-main/projects/HIPIE/hipie/init.py:2 1 from .config import add_hipie_config ----> 2 from .hipie_img import HIPIE_IMG 3 from .data import build_detection_train_loader, build_detection_test_loader 4 from .data.objects365 import categories

File ~/PycharmProjects/HIPIE-main/projects/HIPIE/hipie/hipie_img.py:21 19 from .models.deformable_detr.deformable_transformer_dino import DeformableTransformerVLDINO 20 from .models.ddetrs import DDETRSegmUni, segmentation_postprocess ---> 21 from .models.ddetrs_dn import DDETRSegmUniDN 22 from .util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh 23 from detectron2.utils.memory import retry_if_cuda_oom

File ~/PycharmProjects/HIPIE-main/projects/HIPIE/hipie/models/ddetrs_dn.py:22 19 from PIL import Image 21 from .maskdino.build import build_maskdino ---> 22 from .maskdino.criterion import SetCriterion as MaskDINOCriterion 23 from ..util.misc import NestedTensor, interpolate, nested_tensor_from_tensor_list, inverse_sigmoid 24 # from .dcn.deform_conv import DeformConv

File ~/PycharmProjects/HIPIE-main/projects/HIPIE/hipie/models/maskdino/criterion.py:108 101 return loss.mean(1).sum() / num_masks 104 sigmoid_ce_loss_jit = torch.jit.script( 105 sigmoid_ce_loss 106 ) # type: torch.jit.ScriptModule --> 108 token_sigmoid_binary_focal_loss_jit = torch.jit.script( 109 token_sigmoid_binary_focal_loss 110 ) 112 def calculate_uncertainty(logits): 113 """ 114 We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the 115 foreground class in classes. (...) 122 the most uncertain locations having the highest uncertainty score. 123 """

File ~/anaconda3/envs/hipie/lib/python3.9/site-packages/torch/jit/_script.py:1343, in script(obj, optimize, _frames_up, _rcb, example_inputs) 1341 if _rcb is None: 1342 _rcb = _jit_internal.createResolutionCallbackFromClosure(obj) -> 1343 fn = torch._C._jit_script_compile( 1344 qualified_name, ast, _rcb, get_default_args(obj) 1345 ) 1346 # Forward docstrings 1347 fn.doc = obj.doc

RuntimeError: Expected a default value of type Tensor (inferred) on parameter "alpha".Because "alpha" was not annotated with an explicit type it is assumed to be type 'Tensor'.: File "/home/hazelwang/PycharmProjects/HIPIE-main/projects/HIPIE/hipie/models/deformable_detr/segmentation.py", line 120 def token_sigmoid_binary_focal_loss(pred_logits, targets, alpha=0.25, gamma=2.0, text_mask=None, reduction=True):

    # binary version of focal loss
# copied from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/focal_loss.py
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
~~~
Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Args:
~~~~~
    inputs: A float tensor of arbitrary shape.
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            The predictions for each example.
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    targets: A float tensor with the same shape as inputs. Stores the binary
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
             classification label for each element in inputs
             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            (0 for the negative class and 1 for the positive class).
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    alpha: (optional) Weighting factor in range (0,1) to balance
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            positive vs negative examples. Default = -1 (no weighting).
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    gamma: Exponent of the modulating factor (1 - p_t) to
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           balance easy vs hard examples.
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Returns:
~~~~~~~~
    Loss tensor with the reduction option applied.
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
~~~
# pred_logits: (bs, n_anchors, max_seq_len)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# targets: (bs, n_anchors, max_seq_len)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# text_mask: (bs, max_seq_len)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert (targets.dim() == 3)
~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert (pred_logits.dim() == 3)  # batch x from x to
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

bs, n, _ = pred_logits.shape
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if text_mask is not None:
~~~~~~~~~~~~~~~~~~~~~~~~~
    assert (text_mask.dim() == 2)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    text_mask = (text_mask > 0).unsqueeze(1) # (bs, 1, max_seq_len)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    text_mask = text_mask.repeat(1, pred_logits.size(1), 1)  # copy along the image channel dimension. (bs, n_anchors, max_seq_len)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    pred_logits = torch.masked_select(pred_logits, text_mask)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    targets = torch.masked_select(targets, text_mask)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # print(pred_logits.shape)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~
    # print(targets.shape)
    ~~~~~~~~~~~~~~~~~~~~~~

p = torch.sigmoid(pred_logits)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ce_loss = F.binary_cross_entropy_with_logits(pred_logits, targets, reduction="none")
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
p_t = p * targets + (1 - p) * (1 - targets)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
loss = ce_loss * ((1 - p_t) ** gamma)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if alpha >= 0:
~~~~~~~~~~~~~~
    alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    loss = alpha_t * loss
    ~~~~~~~~~~~~~~~~~~~~~
if reduction:
~~~~~~~~~~~~~
    return loss.sum()
    ~~~~~~~~~~~~~~~~~
else:
~~~~~
    return loss
    ~~~~~~~~~~~ <--- HERE
KKallidromitis commented 1 year ago

Hi, I am uncertain what exactly the error could be but it is likely it is an issue with the jit.script of PyTorch. Please make sure you are using the recommended torch version 1.12.

Yonglild commented 1 year ago

Hi,i also encounter the same problem. try this def token_sigmoid_binary_focal_loss(pred_logits, targets, alpha:float= 0.25, gamma:float =2.0, text_mask=None, reduction:bool=True):