VainF / Torch-Pruning

[CVPR 2023] DepGraph: Towards Any Structural Pruning
https://arxiv.org/abs/2301.12900
MIT License
2.75k stars 337 forks source link

Linear layer hasn't been added to graph after calling build_dependency #188

Open chamecall opened 1 year ago

chamecall commented 1 year ago

Code snippet:


import torch_pruning as tp
import einops
from torch import nn
import torch

class RoiAlignDecoderLayer(nn.Module):
    def __init__(self, n_channels, n_locs, n_heads, drop_rate: float = 0.0):
        super().__init__()

        self.cross_attn = RoiAlignedAttention(n_channels, n_locs, n_heads, drop_rate=drop_rate)

        self.self_attn_norm = nn.LayerNorm(n_channels)
        self.self_attn = SelfAttention(n_channels, n_heads, attn_drop=drop_rate, proj_drop=drop_rate)

        self.ffn_norm = nn.LayerNorm(n_channels)
        self.ffn = FFN(n_channels, n_channels, 4 * n_channels, drop_rate=drop_rate)

        self.init_weights()

    def init_weights(self):
        nn.init.constant_(self.ffn.out_layer.weight, 0.0)
        nn.init.constant_(self.ffn.out_layer.bias, 0.0)

    def forward(self, queries, aligned_features):
        queries = queries + self.cross_attn(queries, aligned_features)
        queries = queries + self.self_attn(self.self_attn_norm(queries))
        queries = queries + self.ffn(self.ffn_norm(queries))
        return queries

class RoiAlignedAttention(nn.Module):
    def __init__(self, n_channels, n_locs, n_heads=8, drop_rate: float = 0.0):
        super().__init__()
        self.n_locs = n_locs
        self.n_heads = n_heads

        self.attn_weights = nn.Linear(n_channels, n_heads * n_locs)
        self.attn_dropout = nn.Dropout(drop_rate)
        self.projection = nn.Linear(n_channels, n_channels)
        self.proj_dropout = nn.Dropout(drop_rate)

    def forward(self, queries: torch.Tensor, aligned_features: torch.Tensor):
        aligned_features = einops.rearrange(aligned_features, "B N (nh Ch) n_locs -> B N nh Ch n_locs", nh=self.n_heads)
        attn_weights = self.attn_weights(queries)
        attn_weights = einops.rearrange(attn_weights, "B N (nh n_locs) -> B N nh (n_locs)", nh=self.n_heads).softmax(
            dim=-1
        )
        attn_weights = self.attn_dropout(attn_weights)

        outputs = einops.einsum(
            aligned_features,
            attn_weights,
            "B N nh Ch n_locs, B N nh n_locs -> B N nh Ch",
        )
        outputs = self.projection(outputs.flatten(-2))
        outputs = self.proj_dropout(outputs)
        return outputs

class SelfAttention(nn.Module):
    def __init__(
        self,
        dim,
        num_heads=8,
        qkv_bias=False,
        qk_norm=False,
        attn_drop=0.0,
        proj_drop=0.0,
        norm_layer=nn.LayerNorm,
    ):
        super().__init__()
        assert dim % num_heads == 0, "dim should be divisible by num_heads"
        self.num_heads = num_heads
        self.head_dim = dim // num_heads
        self.scale = self.head_dim**-0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.q_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
        self.k_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
        q, k, v = qkv.unbind(0)
        q, k = self.q_norm(q), self.k_norm(k)

        # x = F.scaled_dot_product_attention(
        #    q, k, v,
        #    dropout_p=self.attn_drop.p,
        # )
        q = q * self.scale
        attn = q @ k.transpose(-2, -1)
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)
        x = attn @ v

        x = x.transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

class FFN(nn.Module):
    def __init__(self, in_c: int, out_c: int, mid_c: int, drop_rate: float = 0.0):
        super().__init__()
        self.in_layer = nn.Linear(in_c, mid_c)
        self.act = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(drop_rate, inplace=True)
        self.out_layer = nn.Linear(mid_c, out_c)

    def forward(self, x):
        x = self.in_layer(x)
        x = self.dropout(x)
        x = self.act(x)
        x = self.out_layer(x)
        return x

class SelfAttentionPruner(tp.function.BasePruningFunc):
    TARGET_MODULES = SelfAttention

    def check(self, layer, idxs, to_output):
        super().check(layer, idxs, to_output)
        assert (
            layer.dim - len(idxs)
        ) % layer.num_heads == 0, "dim (%d) of MultiheadAttention after pruning must divide evenly by `num_heads` (%d)" % (
            layer.embed_dim,
            layer.num_heads,
        )

    def prune_out_channels(self, layer, idxs: list) -> nn.Module:
        dim = self.get_out_channels(layer)

        keep_idxs = list(set(range(dim)) - set(idxs))
        keep_idxs.sort()

        pruning_idxs_repeated = idxs + [i + dim for i in idxs] + [i + 2 * dim for i in idxs]

        keep_idxs_3x_repeated = list(set(range(3 * dim)) - set(pruning_idxs_repeated))

        layer.qkv.weight = self._prune_parameter_and_grad(layer.qkv.weight, keep_idxs, 1)
        layer.qkv.weight = self._prune_parameter_and_grad(layer.qkv.weight, keep_idxs_3x_repeated, 0)
        layer.qkv.in_features = len(keep_idxs)
        layer.qkv.out_features = len(keep_idxs_3x_repeated)

        layer.proj.weight = self._prune_parameter_and_grad(layer.proj.weight, keep_idxs, 1)
        layer.proj.weight = self._prune_parameter_and_grad(layer.proj.weight, keep_idxs, 0)
        layer.proj.bias = self._prune_parameter_and_grad(layer.proj.bias, keep_idxs, 0)

        layer.proj.in_features = layer.proj.out_features = len(keep_idxs)
        assert (layer.head_dim - len(idxs)) % layer.num_heads == 0
        layer.head_dim = len(keep_idxs) // layer.num_heads
        print("PRUNING SelfAttention...")
        return layer

    prune_in_channels = prune_out_channels

    def get_out_channels(self, layer):
        return layer.head_dim * layer.num_heads

    def get_in_channels(self, layer):
        return self.get_out_channels(layer)

class RoiAlignedAttentionPruner(tp.function.BasePruningFunc):
    TARGET_MODULES = RoiAlignedAttention

    def check(self, layer, idxs, to_output):
        super().check(layer, idxs, to_output)

    def prune_out_channels(self, layer, idxs: list) -> nn.Module:
        dim = self.get_out_channels(layer)

        keep_idxs = list(set(range(dim)) - set(idxs))
        keep_idxs.sort()
        layer.attn_weights.weight = self._prune_parameter_and_grad(layer.attn_weights.weight, keep_idxs, 1)
        layer.attn_weights.in_features = len(keep_idxs)

        layer.projection.weight = self._prune_parameter_and_grad(layer.projection.weight, keep_idxs, 1)
        layer.projection.weight = self._prune_parameter_and_grad(layer.projection.weight, keep_idxs, 0)
        layer.projection.bias = self._prune_parameter_and_grad(layer.projection.bias, keep_idxs, 0)
        layer.projection.in_features = layer.projection.out_features = len(keep_idxs)

        print("PRUNING RoiAlignedAttention...")
        return layer

    prune_in_channels = prune_out_channels

    def get_out_channels(self, layer):
        return layer.attn_weights.in_features

    def get_in_channels(self, layer):
        return self.get_out_channels(layer)

radl = RoiAlignDecoderLayer(64, 75, 8, 0.1)
queries = torch.randn(*[1, 64, 64])
aligned_features = torch.randn(*[1, 64, 64, 75])
example_inputs = [queries, aligned_features]
print(radl)

customized_pruners = {SelfAttention: SelfAttentionPruner(), RoiAlignedAttention: RoiAlignedAttentionPruner()}
DG = tp.DependencyGraph().build_dependency(radl, example_inputs=example_inputs, customized_pruners=customized_pruners)

for g in list(DG.get_all_groups()):
    print(g)

pruning_idxs = pruning_idxs = [2, 6, 9, 10, 12, 14, 17, 20]
pruning_group = DG.get_pruning_group(
    radl.ffn.out_layer,
    tp.prune_linear_in_channels,
    idxs=pruning_idxs,
)

if DG.check_pruning_group(pruning_group):
    pruning_group.prune()

print(radl)

I have 3 stacked cross-attention, self-attention and ffn layers. For the first two I defined custom pruners (without them the issue is the same). After building a graph we can see that ffn.in_layer in not present in the result group. As a result after pruning some of the layers (prune_in_channels in ffn.out_layer) we can see that ffn.in_layer weight shape hasn't changed and we'll get error if we feed some data in the model cause ffn.in_layer.out_features and ffn.out_layer.in_features are not consistent.

chamecall commented 1 year ago

output snippet:

RoiAlignDecoderLayer(
  (cross_attn): RoiAlignedAttention(
    (attn_weights): Linear(in_features=64, out_features=600, bias=True)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (projection): Linear(in_features=64, out_features=64, bias=True)
    (proj_dropout): Dropout(p=0.1, inplace=False)
  )
  (self_attn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (self_attn): SelfAttention(
    (qkv): Linear(in_features=64, out_features=192, bias=False)
    (q_norm): Identity()
    (k_norm): Identity()
    (attn_drop): Dropout(p=0.1, inplace=False)
    (proj): Linear(in_features=64, out_features=64, bias=True)
    (proj_drop): Dropout(p=0.1, inplace=False)
  )
  (ffn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (ffn): FFN(
    (in_layer): Linear(in_features=64, out_features=256, bias=True)
    (act): ReLU(inplace=True)
    (dropout): Dropout(p=0.1, inplace=True)
    (out_layer): Linear(in_features=256, out_features=64, bias=True)
  )
)

--------------------------------
          Pruning Group
--------------------------------
[0] prune_out_channels on ffn.out_layer (Linear(in_features=256, out_features=64, bias=True)) => prune_out_channels on ffn.out_layer (Linear(in_features=256, out_features=64, bias=True)), #idxs=64
[1] prune_out_channels on ffn.out_layer (Linear(in_features=256, out_features=64, bias=True)) => prune_out_channels on _ElementWiseOp_0(AddBackward0), #idxs=64
[2] prune_out_channels on _ElementWiseOp_0(AddBackward0) => prune_out_channels on _ElementWiseOp_1(AddBackward0), #idxs=64
[3] prune_out_channels on _ElementWiseOp_1(AddBackward0) => prune_out_channels on _ElementWiseOp_10(AddBackward0), #idxs=64
[4] prune_out_channels on _ElementWiseOp_1(AddBackward0) => prune_out_channels on self_attn (SelfAttention(
  (qkv): Linear(in_features=64, out_features=192, bias=False)
  (q_norm): Identity()
  (k_norm): Identity()
  (attn_drop): Dropout(p=0.1, inplace=False)
  (proj): Linear(in_features=64, out_features=64, bias=True)
  (proj_drop): Dropout(p=0.1, inplace=False)
)), #idxs=64
[5] prune_out_channels on _ElementWiseOp_1(AddBackward0) => prune_out_channels on ffn_norm (LayerNorm((64,), eps=1e-05, elementwise_affine=True)), #idxs=64
[6] prune_out_channels on ffn_norm (LayerNorm((64,), eps=1e-05, elementwise_affine=True)) => prune_out_channels on _Reshape_8(), #idxs=64
[7] prune_out_channels on _Reshape_8() => prune_out_channels on _ElementWiseOp_7(AddmmBackward0), #idxs=64
[8] prune_out_channels on _ElementWiseOp_7(AddmmBackward0) => prune_out_channels on _ElementWiseOp_9(TBackward0), #idxs=64
[9] prune_out_channels on _ElementWiseOp_7(AddmmBackward0) => prune_out_channels on _ElementWiseOp_6(torch::autograd::CopySlices), #idxs=64
[10] prune_out_channels on _ElementWiseOp_6(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_5(AsStridedBackward0), #idxs=64
[11] prune_out_channels on _ElementWiseOp_5(AsStridedBackward0) => prune_out_channels on _Reshape_3(), #idxs=64
[12] prune_out_channels on _Reshape_3() => prune_out_channels on _ElementWiseOp_2(AddmmBackward0), #idxs=64
[13] prune_out_channels on _ElementWiseOp_2(AddmmBackward0) => prune_out_channels on _ElementWiseOp_4(TBackward0), #idxs=64
[14] prune_out_channels on _ElementWiseOp_2(AddmmBackward0) => prune_in_channels on ffn.out_layer (Linear(in_features=256, out_features=64, bias=True)), #idxs=64
[15] prune_out_channels on self_attn (SelfAttention(
  (qkv): Linear(in_features=64, out_features=192, bias=False)
  (q_norm): Identity()
  (k_norm): Identity()
  (attn_drop): Dropout(p=0.1, inplace=False)
  (proj): Linear(in_features=64, out_features=64, bias=True)
  (proj_drop): Dropout(p=0.1, inplace=False)
)) => prune_out_channels on _ElementWiseOp_11(AddmmBackward0), #idxs=64
[16] prune_out_channels on _ElementWiseOp_11(AddmmBackward0) => prune_out_channels on _Reshape_12(), #idxs=64
[17] prune_out_channels on _ElementWiseOp_11(AddmmBackward0) => prune_out_channels on _ElementWiseOp_13(TBackward0), #idxs=64
[18] prune_out_channels on _Reshape_12() => prune_out_channels on _Reshape_14(), #idxs=64
[19] prune_out_channels on _Reshape_14() => prune_out_channels on _ElementWiseOp_15(CloneBackward0), #idxs=64
[20] prune_out_channels on _ElementWiseOp_15(CloneBackward0) => prune_out_channels on _ElementWiseOp_16(TransposeBackward0), #idxs=64
[21] prune_out_channels on _ElementWiseOp_16(TransposeBackward0) => prune_out_channels on _Reshape_17(), #idxs=64
[22] prune_out_channels on _Reshape_17() => prune_out_channels on _ElementWiseOp_18(BmmBackward0), #idxs=64
[23] prune_out_channels on _ElementWiseOp_18(BmmBackward0) => prune_out_channels on _Reshape_19(), #idxs=64
[24] prune_out_channels on _ElementWiseOp_18(BmmBackward0) => prune_out_channels on _Reshape_20(), #idxs=64
[25] prune_out_channels on _Reshape_20() => prune_out_channels on _ElementWiseOp_21(ExpandBackward0), #idxs=64
[26] prune_out_channels on _ElementWiseOp_21(ExpandBackward0) => prune_out_channels on _ElementWiseOp_22(UnbindBackward0), #idxs=64
[27] prune_out_channels on _ElementWiseOp_22(UnbindBackward0) => prune_out_channels on _ElementWiseOp_23(PermuteBackward0), #idxs=64
[28] prune_out_channels on _ElementWiseOp_22(UnbindBackward0) => prune_out_channels on _ElementWiseOp_55(TransposeBackward0), #idxs=64
[29] prune_out_channels on _ElementWiseOp_22(UnbindBackward0) => prune_out_channels on _ElementWiseOp_57(MulBackward0), #idxs=64
[30] prune_out_channels on _ElementWiseOp_57(MulBackward0) => prune_out_channels on _ElementWiseOp_56(ExpandBackward0), #idxs=64
[31] prune_out_channels on _ElementWiseOp_56(ExpandBackward0) => prune_out_channels on _Reshape_52(), #idxs=64
[32] prune_out_channels on _Reshape_52() => prune_out_channels on _ElementWiseOp_51(BmmBackward0), #idxs=64
[33] prune_out_channels on _ElementWiseOp_51(BmmBackward0) => prune_out_channels on _Reshape_53(), #idxs=64
[34] prune_out_channels on _ElementWiseOp_51(BmmBackward0) => prune_out_channels on _Reshape_50(), #idxs=64
[35] prune_out_channels on _Reshape_50() => prune_out_channels on _ElementWiseOp_49(SoftmaxBackward0), #idxs=64
[36] prune_out_channels on _ElementWiseOp_49(SoftmaxBackward0) => prune_out_channels on _ElementWiseOp_48(ExpandBackward0), #idxs=64
[37] prune_out_channels on _Reshape_53() => prune_out_channels on _ElementWiseOp_54(ExpandBackward0), #idxs=64
[38] prune_out_channels on _ElementWiseOp_23(PermuteBackward0) => prune_out_channels on _Reshape_24(), #idxs=64
[39] prune_out_channels on _Reshape_24() => prune_out_channels on _Reshape_25(), #idxs=64
[40] prune_out_channels on _Reshape_25() => prune_out_channels on _ElementWiseOp_26(MmBackward0), #idxs=64
[41] prune_out_channels on _ElementWiseOp_26(MmBackward0) => prune_out_channels on _Reshape_27(), #idxs=64
[42] prune_out_channels on _ElementWiseOp_26(MmBackward0) => prune_out_channels on _ElementWiseOp_28(TBackward0), #idxs=64
[43] prune_out_channels on _Reshape_27() => prune_out_channels on self_attn_norm (LayerNorm((64,), eps=1e-05, elementwise_affine=True)), #idxs=64
[44] prune_out_channels on _ElementWiseOp_10(AddBackward0) => prune_out_channels on cross_attn (RoiAlignedAttention(
  (attn_weights): Linear(in_features=64, out_features=600, bias=True)
  (attn_dropout): Dropout(p=0.1, inplace=False)
  (projection): Linear(in_features=64, out_features=64, bias=True)
  (proj_dropout): Dropout(p=0.1, inplace=False)
)), #idxs=64
[45] prune_out_channels on cross_attn (RoiAlignedAttention(
  (attn_weights): Linear(in_features=64, out_features=600, bias=True)
  (attn_dropout): Dropout(p=0.1, inplace=False)
  (projection): Linear(in_features=64, out_features=64, bias=True)
  (proj_dropout): Dropout(p=0.1, inplace=False)
)) => prune_out_channels on _ElementWiseOp_29(AddmmBackward0), #idxs=64
[46] prune_out_channels on _ElementWiseOp_29(AddmmBackward0) => prune_out_channels on _Reshape_30(), #idxs=64
[47] prune_out_channels on _ElementWiseOp_29(AddmmBackward0) => prune_out_channels on _ElementWiseOp_31(TBackward0), #idxs=64
[48] prune_out_channels on _Reshape_30() => prune_out_channels on _Reshape_32(), #idxs=64
[49] prune_out_channels on _Reshape_32() => prune_out_channels on _Reshape_33(), #idxs=64
[50] prune_out_channels on _Reshape_33() => prune_out_channels on _ElementWiseOp_34(PermuteBackward0), #idxs=64
[51] prune_out_channels on _ElementWiseOp_34(PermuteBackward0) => prune_out_channels on _Reshape_35(), #idxs=64
[52] prune_out_channels on _Reshape_35() => prune_out_channels on _ElementWiseOp_36(BmmBackward0), #idxs=64
[53] prune_out_channels on _ElementWiseOp_36(BmmBackward0) => prune_out_channels on _Reshape_37(), #idxs=64
[54] prune_out_channels on _Reshape_37() => prune_out_channels on _ElementWiseOp_38(PermuteBackward0), #idxs=64
[55] prune_out_channels on _ElementWiseOp_38(PermuteBackward0) => prune_out_channels on _ElementWiseOp_39(PermuteBackward0), #idxs=64
[56] prune_out_channels on _ElementWiseOp_39(PermuteBackward0) => prune_out_channels on _ElementWiseOp_40(UnsqueezeBackward0), #idxs=64
[57] prune_out_channels on _ElementWiseOp_40(UnsqueezeBackward0) => prune_out_channels on _ElementWiseOp_41(SoftmaxBackward0), #idxs=64
[58] prune_out_channels on _ElementWiseOp_41(SoftmaxBackward0) => prune_out_channels on _Reshape_42(), #idxs=64
[59] prune_out_channels on _Reshape_42() => prune_out_channels on _ElementWiseOp_43(PermuteBackward0), #idxs=64
[60] prune_out_channels on _ElementWiseOp_43(PermuteBackward0) => prune_out_channels on _Reshape_44(), #idxs=64
[61] prune_out_channels on _Reshape_44() => prune_out_channels on _Reshape_45(), #idxs=64
[62] prune_out_channels on _Reshape_45() => prune_out_channels on _ElementWiseOp_46(AddmmBackward0), #idxs=64
[63] prune_out_channels on _ElementWiseOp_46(AddmmBackward0) => prune_out_channels on _ElementWiseOp_47(TBackward0), #idxs=64
--------------------------------

PRUNING SelfAttention...
PRUNING RoiAlignedAttention...
RoiAlignDecoderLayer(
  (cross_attn): RoiAlignedAttention(
    (attn_weights): Linear(in_features=56, out_features=600, bias=True)
    (attn_dropout): Dropout(p=0.1, inplace=False)
    (projection): Linear(in_features=56, out_features=56, bias=True)
    (proj_dropout): Dropout(p=0.1, inplace=False)
  )
  (self_attn_norm): LayerNorm((56,), eps=1e-05, elementwise_affine=True)
  (self_attn): SelfAttention(
    (qkv): Linear(in_features=56, out_features=168, bias=False)
    (q_norm): Identity()
    (k_norm): Identity()
    (attn_drop): Dropout(p=0.1, inplace=False)
    (proj): Linear(in_features=56, out_features=56, bias=True)
    (proj_drop): Dropout(p=0.1, inplace=False)
  )
  (ffn_norm): LayerNorm((56,), eps=1e-05, elementwise_affine=True)
  (ffn): FFN(
    (in_layer): Linear(in_features=64, out_features=256, bias=True)
    (act): ReLU(inplace=True)
    (dropout): Dropout(p=0.1, inplace=True)
    (out_layer): Linear(in_features=248, out_features=56, bias=True)
  )
)
chamecall commented 1 year ago

I tried to trace a node for the layer and it's appeared that during handling preprocessing_stack in _trace_computational_graph method grad_fn for the layer (ffn.in_layer) hasn't even been placed there

chamecall commented 1 year ago

any ideas? is it bug in the lib?

chamecall commented 1 year ago

@VainF I tried to wrap all my potentially problematic modules into customized wrappers and now I constantly face the error about pruning concat, maybe you've already faced the issue?

Traceback (most recent call last): File "/home/algernone/git-reps/detection-utils/train2-detr-pruning.py", line 750, in <module> pl_model = PLModel(cfg) File "/home/algernone/git-reps/detection-utils/train2-detr-pruning.py", line 615, in __init__ self.detector.prune() File "/home/algernone/git-reps/structured-pruning/src/structured_pruning/pruner.py", line 267, in prune pruning_group.prune() File "/home/algernone/.pyenv/versions/python3.9/lib/python3.9/site-packages/torch_pruning/dependency.py", line 180, in prune dep(idxs) File "/home/algernone/.pyenv/versions/python3.9/lib/python3.9/site-packages/torch_pruning/dependency.py", line 109, in __call__ result = self.handler( File "/home/algernone/.pyenv/versions/python3.9/lib/python3.9/site-packages/torch_pruning/ops.py", line 85, in prune_out_channels offsets.append(offsets[i] + concat_sizes[i]) TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

it happens here:

class ConcatPruner(DummyPruner):
    def prune_out_channels(self, layer, idxs):
        if layer.concat_sizes is None:
            return
        new_concat_sizes = layer.concat_sizes.copy()
        concat_sizes = layer.concat_sizes
        offsets = [0]
        for i in range(len(concat_sizes)):
            offsets.append(offsets[i] + concat_sizes[i]) # line 85
        for idx in idxs:  # find the ID of the concat
            for i in range(len(offsets) - 1):
                if idx >= offsets[i] and idx < offsets[i + 1]:
                    concat_sizes[i] -= 1
                    break
            new_concat_sizes[i] -= 1
        layer.concat_sizes = new_concat_sizes
        offsets = [0]
        for i in range(len(new_concat_sizes)):
            offsets.append(offsets[i] + new_concat_sizes[i])
        self.offsets = offsets

    prune_in_channels = prune_out_channels

my pruning group that caused the error looks like this:

--------------------------------
          Pruning Group
--------------------------------
[0] prune_in_channels on rpn (RPN(
  (object_logits_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=1, bias=True)
    )
  )
  (box_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=4, bias=True)
    )
  )
)) => prune_in_channels on rpn (RPN(
  (object_logits_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=1, bias=True)
    )
  )
  (box_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=4, bias=True)
    )
  )
)), #idxs=1
[1] prune_in_channels on rpn (RPN(
  (object_logits_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=1, bias=True)
    )
  )
  (box_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=4, bias=True)
    )
  )
)) => prune_out_channels on _Reshape_306(), #idxs=1
[2] prune_in_channels on rpn (RPN(
  (object_logits_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=1, bias=True)
    )
  )
  (box_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=4, bias=True)
    )
  )
)) => prune_out_channels on _Reshape_307(), #idxs=1
[3] prune_in_channels on rpn (RPN(
  (object_logits_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=1, bias=True)
    )
  )
  (box_heads): ModuleList(
    (0-2): 3 x FFN(
      (in_layer): Linear(in_features=64, out_features=63, bias=True)
      (act): ReLU(inplace=True)
      (dropout): Dropout(p=0.0, inplace=True)
      (out_layer): Linear(in_features=63, out_features=4, bias=True)
    )
  )
)) => prune_out_channels on _Reshape_308(), #idxs=1
[4] prune_out_channels on _Reshape_308() => prune_out_channels on _ElementWiseOp_309(PermuteBackward0), #idxs=1
[5] prune_out_channels on _ElementWiseOp_309(PermuteBackward0) => prune_out_channels on _Reshape_310(), #idxs=1
[6] prune_out_channels on _Reshape_310() => prune_out_channels on _Reshape_311(), #idxs=1
[7] prune_out_channels on _Reshape_311() => prune_out_channels on _ElementWiseOp_312(AddmmBackward0), #idxs=1
[8] prune_out_channels on _ElementWiseOp_312(AddmmBackward0) => prune_out_channels on _Reshape_313(), #idxs=1
[9] prune_out_channels on _ElementWiseOp_312(AddmmBackward0) => prune_out_channels on _ElementWiseOp_314(TBackward0), #idxs=1
[10] prune_out_channels on _Reshape_313() => prune_out_channels on _ElementWiseOp_315(ReluBackward0), #idxs=1
[11] prune_out_channels on _ElementWiseOp_315(ReluBackward0) => prune_out_channels on _ElementWiseOp_316(AddBackward0), #idxs=1
[12] prune_out_channels on _ElementWiseOp_316(AddBackward0) => prune_out_channels on _Reshape_317(), #idxs=1
[13] prune_out_channels on _Reshape_317() => prune_out_channels on _ElementWiseOp_318(MmBackward0), #idxs=1
[14] prune_out_channels on _ElementWiseOp_318(MmBackward0) => prune_out_channels on _Reshape_319(), #idxs=1
[15] prune_out_channels on _ElementWiseOp_318(MmBackward0) => prune_out_channels on _ElementWiseOp_320(TBackward0), #idxs=1
[16] prune_out_channels on _Reshape_319() => prune_out_channels on _Reshape_321(), #idxs=1
[17] prune_out_channels on _Reshape_321() => prune_out_channels on _ElementWiseOp_322(PermuteBackward0), #idxs=1
[18] prune_out_channels on _Reshape_321() => prune_out_channels on _Reshape_373(), #idxs=1
[19] prune_out_channels on _Reshape_373() => prune_out_channels on _ElementWiseOp_372(MmBackward0), #idxs=1
[20] prune_out_channels on _ElementWiseOp_372(MmBackward0) => prune_out_channels on _ElementWiseOp_374(TBackward0), #idxs=1
[21] prune_out_channels on _ElementWiseOp_372(MmBackward0) => prune_out_channels on _Reshape_371(), #idxs=1
[22] prune_out_channels on _Reshape_371() => prune_out_channels on _ElementWiseOp_370(AddBackward0), #idxs=1
[23] prune_out_channels on _ElementWiseOp_370(AddBackward0) => prune_out_channels on _ElementWiseOp_369(ReluBackward0), #idxs=1
[24] prune_out_channels on _ElementWiseOp_369(ReluBackward0) => prune_out_channels on _Reshape_367(), #idxs=1
[25] prune_out_channels on _Reshape_367() => prune_out_channels on _ElementWiseOp_366(AddmmBackward0), #idxs=1
[26] prune_out_channels on _ElementWiseOp_366(AddmmBackward0) => prune_out_channels on _ElementWiseOp_368(TBackward0), #idxs=1
[27] prune_out_channels on _ElementWiseOp_366(AddmmBackward0) => prune_out_channels on _Reshape_365(), #idxs=1
[28] prune_out_channels on _Reshape_365() => prune_out_channels on _Reshape_364(), #idxs=1
[29] prune_out_channels on _Reshape_364() => prune_out_channels on _ElementWiseOp_363(PermuteBackward0), #idxs=1
[30] prune_out_channels on _ElementWiseOp_363(PermuteBackward0) => prune_out_channels on _Reshape_362(), #idxs=1
[31] prune_out_channels on _Reshape_362() => prune_out_channels on _ConcatOp_359(None), #idxs=1
[32] prune_out_channels on _ConcatOp_359(None) => prune_out_channels on _Reshape_360(), #idxs=1
[33] prune_out_channels on _ConcatOp_359(None) => prune_out_channels on _Reshape_361(), #idxs=1
[34] prune_out_channels on _ConcatOp_359(None) => prune_out_channels on _ElementWiseOp_358(SliceBackward0), #idxs=1
[35] prune_out_channels on _ConcatOp_359(None) => prune_out_channels on _ElementWiseOp_400(SliceBackward0), #idxs=1
[36] prune_out_channels on _ElementWiseOp_400(SliceBackward0) => prune_out_channels on _ElementWiseOp_399(MulBackward0), #idxs=1
[37] prune_out_channels on _ElementWiseOp_399(MulBackward0) => prune_out_channels on _ElementWiseOp_355(AddBackward0), #idxs=1
[38] prune_out_channels on _ElementWiseOp_355(AddBackward0) => prune_out_channels on _ConcatOp_354(None), #idxs=1
[39] prune_out_channels on _ConcatOp_354(None) => prune_out_channels on _ElementWiseOp_356(MulBackward0), #idxs=1
[40] prune_out_channels on _ElementWiseOp_356(MulBackward0) => prune_out_channels on _ElementWiseOp_357(ExpBackward0), #idxs=1
[41] prune_out_channels on _Reshape_361() => prune_out_channels on _ElementWiseOp_375(PermuteBackward0), #idxs=1
[42] prune_out_channels on _ElementWiseOp_375(PermuteBackward0) => prune_out_channels on _Reshape_376(), #idxs=1
[43] prune_out_channels on _Reshape_376() => prune_out_channels on _Reshape_377(), #idxs=1
[44] prune_out_channels on _Reshape_377() => prune_out_channels on _ElementWiseOp_378(AddmmBackward0), #idxs=1
[45] prune_out_channels on _ElementWiseOp_378(AddmmBackward0) => prune_out_channels on _Reshape_379(), #idxs=1
[46] prune_out_channels on _ElementWiseOp_378(AddmmBackward0) => prune_out_channels on _ElementWiseOp_380(TBackward0), #idxs=1
[47] prune_out_channels on _Reshape_379() => prune_out_channels on _ElementWiseOp_381(ReluBackward0), #idxs=1
[48] prune_out_channels on _ElementWiseOp_381(ReluBackward0) => prune_out_channels on _ElementWiseOp_382(AddBackward0), #idxs=1
[49] prune_out_channels on _ElementWiseOp_382(AddBackward0) => prune_out_channels on _Reshape_383(), #idxs=1
[50] prune_out_channels on _Reshape_383() => prune_out_channels on _ElementWiseOp_384(MmBackward0), #idxs=1
[51] prune_out_channels on _ElementWiseOp_384(MmBackward0) => prune_out_channels on _Reshape_385(), #idxs=1
[52] prune_out_channels on _ElementWiseOp_384(MmBackward0) => prune_out_channels on _ElementWiseOp_386(TBackward0), #idxs=1
[53] prune_out_channels on _Reshape_385() => prune_out_channels on _Reshape_336(), #idxs=1
[54] prune_out_channels on _Reshape_336() => prune_out_channels on _ElementWiseOp_337(PermuteBackward0), #idxs=1
[55] prune_out_channels on _Reshape_336() => prune_out_channels on _Reshape_334(), #idxs=1
[56] prune_out_channels on _Reshape_334() => prune_out_channels on _ElementWiseOp_333(MmBackward0), #idxs=1
[57] prune_out_channels on _ElementWiseOp_333(MmBackward0) => prune_out_channels on _ElementWiseOp_335(TBackward0), #idxs=1
[58] prune_out_channels on _ElementWiseOp_333(MmBackward0) => prune_out_channels on _Reshape_332(), #idxs=1
[59] prune_out_channels on _Reshape_332() => prune_out_channels on _ElementWiseOp_331(AddBackward0), #idxs=1
[60] prune_out_channels on _ElementWiseOp_331(AddBackward0) => prune_out_channels on _ElementWiseOp_330(ReluBackward0), #idxs=1
[61] prune_out_channels on _ElementWiseOp_330(ReluBackward0) => prune_out_channels on _Reshape_328(), #idxs=1
[62] prune_out_channels on _Reshape_328() => prune_out_channels on _ElementWiseOp_327(AddmmBackward0), #idxs=1
[63] prune_out_channels on _ElementWiseOp_327(AddmmBackward0) => prune_out_channels on _ElementWiseOp_329(TBackward0), #idxs=1
[64] prune_out_channels on _ElementWiseOp_327(AddmmBackward0) => prune_out_channels on _Reshape_326(), #idxs=1
[65] prune_out_channels on _Reshape_326() => prune_out_channels on _Reshape_325(), #idxs=1
[66] prune_out_channels on _Reshape_325() => prune_out_channels on _ElementWiseOp_324(PermuteBackward0), #idxs=1
[67] prune_out_channels on _ElementWiseOp_337(PermuteBackward0) => prune_out_channels on _Reshape_338(), #idxs=1
[68] prune_out_channels on _Reshape_338() => prune_out_channels on encoder.neck.layer_blocks.1.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), #idxs=1
[69] prune_out_channels on encoder.neck.layer_blocks.1.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) => prune_out_channels on _ElementWiseOp_234(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>), #idxs=1
[70] prune_out_channels on _ElementWiseOp_234(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>) => prune_out_channels on _Reshape_233(), #idxs=1
[71] prune_out_channels on _Reshape_233() => prune_out_channels on _ElementWiseOp_232(PermuteBackward0), #idxs=1
[72] prune_out_channels on _ElementWiseOp_232(PermuteBackward0) => prune_out_channels on _Reshape_183(), #idxs=1
[73] prune_out_channels on _Reshape_183() => prune_out_channels on _ConcatOp_181([0, 64, 128, 192]), #idxs=3
[74] prune_out_channels on _ConcatOp_181([0, 64, 128, 192]) => prune_out_channels on _Reshape_180(), #idxs=3
[75] prune_out_channels on _ConcatOp_181([0, 64, 128, 192]) => prune_out_channels on _Reshape_256(), #idxs=3
[76] prune_out_channels on _ConcatOp_181([0, 64, 128, 192]) => prune_out_channels on _Reshape_271(), #idxs=3
[77] prune_out_channels on _Reshape_271() => prune_out_channels on _ElementWiseOp_270(PermuteBackward0), #idxs=3
[78] prune_out_channels on _ElementWiseOp_270(PermuteBackward0) => prune_out_channels on _Reshape_269(), #idxs=3
[79] prune_out_channels on _Reshape_269() => prune_out_channels on _ElementWiseOp_268(PermuteBackward0), #idxs=3
[80] prune_out_channels on _ElementWiseOp_268(PermuteBackward0) => prune_out_channels on _ElementWiseOp_267(PermuteBackward0), #idxs=3
[81] prune_out_channels on _ElementWiseOp_267(PermuteBackward0) => prune_out_channels on _Reshape_51(), #idxs=3
[82] prune_out_channels on _Reshape_51() => prune_out_channels on _ElementWiseOp_50(BmmBackward0), #idxs=3
[83] prune_out_channels on _ElementWiseOp_50(BmmBackward0) => prune_out_channels on _Reshape_52(), #idxs=3
[84] prune_out_channels on _ElementWiseOp_50(BmmBackward0) => prune_out_channels on _Reshape_49(), #idxs=3
[85] prune_out_channels on _Reshape_49() => prune_out_channels on _ElementWiseOp_48(PermuteBackward0), #idxs=3
[86] prune_out_channels on _ElementWiseOp_48(PermuteBackward0) => prune_out_channels on _Reshape_47(), #idxs=3
[87] prune_out_channels on _Reshape_47() => prune_out_channels on _Reshape_46(), #idxs=3
[88] prune_out_channels on _Reshape_46() => prune_out_channels on _Reshape_44(), #idxs=3
[89] prune_out_channels on _Reshape_44() => prune_out_channels on _ElementWiseOp_43(AddmmBackward0), #idxs=3
[90] prune_out_channels on _ElementWiseOp_43(AddmmBackward0) => prune_out_channels on _ElementWiseOp_45(TBackward0), #idxs=3
[91] prune_out_channels on _ElementWiseOp_43(AddmmBackward0) => prune_out_channels on _Reshape_42(), #idxs=3
[92] prune_out_channels on _Reshape_42() => prune_out_channels on _ElementWiseOp_20(AddBackward0), #idxs=3
[93] prune_out_channels on _ElementWiseOp_20(AddBackward0) => prune_out_channels on _ElementWiseOp_41(AddBackward0), #idxs=3
[94] prune_out_channels on _ElementWiseOp_20(AddBackward0) => prune_out_channels on _ElementWiseOp_9(AddBackward0), #idxs=3
[95] prune_out_channels on _ElementWiseOp_20(AddBackward0) => prune_out_channels on _ElementWiseOp_40(NativeLayerNormBackward0), #idxs=3
[96] prune_out_channels on _ElementWiseOp_40(NativeLayerNormBackward0) => prune_out_channels on _Reshape_38(), #idxs=3
[97] prune_out_channels on _Reshape_38() => prune_out_channels on _ElementWiseOp_37(MmBackward0), #idxs=3
[98] prune_out_channels on _ElementWiseOp_37(MmBackward0) => prune_out_channels on _ElementWiseOp_39(TBackward0), #idxs=3
[99] prune_out_channels on _ElementWiseOp_37(MmBackward0) => prune_out_channels on _Reshape_36(), #idxs=3
[100] prune_out_channels on _Reshape_36() => prune_out_channels on _Reshape_35(), #idxs=3
[101] prune_out_channels on _Reshape_35() => prune_out_channels on _ElementWiseOp_34(PermuteBackward0), #idxs=3
[102] prune_out_channels on _ElementWiseOp_34(PermuteBackward0) => prune_out_channels on _ElementWiseOp_33(UnbindBackward0), #idxs=3
[103] prune_out_channels on _ElementWiseOp_33(UnbindBackward0) => prune_out_channels on _ElementWiseOp_32(ExpandBackward0), #idxs=3
[104] prune_out_channels on _ElementWiseOp_33(UnbindBackward0) => prune_out_channels on _ElementWiseOp_279(TransposeBackward0), #idxs=3
[105] prune_out_channels on _ElementWiseOp_33(UnbindBackward0) => prune_out_channels on _ElementWiseOp_281(MulBackward0), #idxs=3
[106] prune_out_channels on _ElementWiseOp_281(MulBackward0) => prune_out_channels on _ElementWiseOp_280(ExpandBackward0), #idxs=3
[107] prune_out_channels on _ElementWiseOp_280(ExpandBackward0) => prune_out_channels on _Reshape_276(), #idxs=3
[108] prune_out_channels on _Reshape_276() => prune_out_channels on _ElementWiseOp_275(BmmBackward0), #idxs=3
[109] prune_out_channels on _ElementWiseOp_275(BmmBackward0) => prune_out_channels on _Reshape_277(), #idxs=3
[110] prune_out_channels on _ElementWiseOp_275(BmmBackward0) => prune_out_channels on _Reshape_274(), #idxs=3
[111] prune_out_channels on _Reshape_274() => prune_out_channels on _ElementWiseOp_273(SoftmaxBackward0), #idxs=3
[112] prune_out_channels on _ElementWiseOp_273(SoftmaxBackward0) => prune_out_channels on _ElementWiseOp_272(ExpandBackward0), #idxs=3
[113] prune_out_channels on _ElementWiseOp_272(ExpandBackward0) => prune_out_channels on _Reshape_30(), #idxs=3
[114] prune_out_channels on _Reshape_30() => prune_out_channels on _ElementWiseOp_29(BmmBackward0), #idxs=3
[115] prune_out_channels on _ElementWiseOp_29(BmmBackward0) => prune_out_channels on _Reshape_31(), #idxs=3
[116] prune_out_channels on _ElementWiseOp_29(BmmBackward0) => prune_out_channels on _Reshape_28(), #idxs=3
[117] prune_out_channels on _Reshape_28() => prune_out_channels on _ElementWiseOp_27(TransposeBackward0), #idxs=3
[118] prune_out_channels on _ElementWiseOp_27(TransposeBackward0) => prune_out_channels on _ElementWiseOp_26(CloneBackward0), #idxs=3
[119] prune_out_channels on _ElementWiseOp_26(CloneBackward0) => prune_out_channels on _Reshape_25(), #idxs=3
[120] prune_out_channels on _Reshape_25() => prune_out_channels on _Reshape_23(), #idxs=3
[121] prune_out_channels on _Reshape_23() => prune_out_channels on _ElementWiseOp_22(AddmmBackward0), #idxs=3
[122] prune_out_channels on _ElementWiseOp_22(AddmmBackward0) => prune_out_channels on _ElementWiseOp_24(TBackward0), #idxs=3
[123] prune_out_channels on _ElementWiseOp_22(AddmmBackward0) => prune_out_channels on _Reshape_21(), #idxs=3
[124] prune_out_channels on _ElementWiseOp_9(AddBackward0) => prune_out_channels on decoder (RoiAlignDecoder(
  (box_pos_enc): BoxPositionalEncoder(
    (projection): Linear(in_features=512, out_features=64, bias=True)
  )
  (align): RoiAlign()
  (layers): ModuleList(
    (0-2): 3 x RoiAlignDecoderLayer(
      (cross_attn): RoiAlignedAttention(
        (attn_weights): Linear(in_features=64, out_features=600, bias=True)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (projection): Linear(in_features=64, out_features=64, bias=True)
        (proj_dropout): Dropout(p=0.1, inplace=False)
      )
      (self_attn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (self_attn): SelfAttention(
        (qkv): Linear(in_features=64, out_features=192, bias=False)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.1, inplace=False)
        (proj): Linear(in_features=64, out_features=64, bias=True)
        (proj_drop): Dropout(p=0.1, inplace=False)
      )
      (ffn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ffn): FFN(
        (in_layer): Linear(in_features=64, out_features=250, bias=True)
        (act): ReLU(inplace=True)
        (dropout): Dropout(p=0.1, inplace=True)
        (out_layer): Linear(in_features=250, out_features=64, bias=True)
      )
    )
  )
)), #idxs=3
[125] prune_out_channels on _ElementWiseOp_9(AddBackward0) => prune_out_channels on _ElementWiseOp_19(NativeLayerNormBackward0), #idxs=3
[126] prune_out_channels on _ElementWiseOp_19(NativeLayerNormBackward0) => prune_out_channels on _Reshape_17(), #idxs=3
[127] prune_out_channels on _Reshape_17() => prune_out_channels on _ElementWiseOp_16(AddmmBackward0), #idxs=3
[128] prune_out_channels on _ElementWiseOp_16(AddmmBackward0) => prune_out_channels on _ElementWiseOp_18(TBackward0), #idxs=3
[129] prune_out_channels on _ElementWiseOp_16(AddmmBackward0) => prune_out_channels on _ElementWiseOp_15(torch::autograd::CopySlices), #idxs=3
[130] prune_out_channels on _ElementWiseOp_15(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_14(AsStridedBackward0), #idxs=3
[131] prune_out_channels on _ElementWiseOp_14(AsStridedBackward0) => prune_out_channels on _Reshape_12(), #idxs=3
[132] prune_out_channels on _Reshape_12() => prune_out_channels on _ElementWiseOp_11(AddmmBackward0), #idxs=3
[133] prune_out_channels on _ElementWiseOp_11(AddmmBackward0) => prune_out_channels on _ElementWiseOp_13(TBackward0), #idxs=3
[134] prune_out_channels on _ElementWiseOp_11(AddmmBackward0) => prune_out_channels on _Reshape_10(), #idxs=3
[135] prune_out_channels on decoder (RoiAlignDecoder(
  (box_pos_enc): BoxPositionalEncoder(
    (projection): Linear(in_features=512, out_features=64, bias=True)
  )
  (align): RoiAlign()
  (layers): ModuleList(
    (0-2): 3 x RoiAlignDecoderLayer(
      (cross_attn): RoiAlignedAttention(
        (attn_weights): Linear(in_features=64, out_features=600, bias=True)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (projection): Linear(in_features=64, out_features=64, bias=True)
        (proj_dropout): Dropout(p=0.1, inplace=False)
      )
      (self_attn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (self_attn): SelfAttention(
        (qkv): Linear(in_features=64, out_features=192, bias=False)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.1, inplace=False)
        (proj): Linear(in_features=64, out_features=64, bias=True)
        (proj_drop): Dropout(p=0.1, inplace=False)
      )
      (ffn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ffn): FFN(
        (in_layer): Linear(in_features=64, out_features=250, bias=True)
        (act): ReLU(inplace=True)
        (dropout): Dropout(p=0.1, inplace=True)
        (out_layer): Linear(in_features=250, out_features=64, bias=True)
      )
    )
  )
)) => prune_out_channels on _Reshape_7(), #idxs=3
[136] prune_out_channels on decoder (RoiAlignDecoder(
  (box_pos_enc): BoxPositionalEncoder(
    (projection): Linear(in_features=512, out_features=64, bias=True)
  )
  (align): RoiAlign()
  (layers): ModuleList(
    (0-2): 3 x RoiAlignDecoderLayer(
      (cross_attn): RoiAlignedAttention(
        (attn_weights): Linear(in_features=64, out_features=600, bias=True)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (projection): Linear(in_features=64, out_features=64, bias=True)
        (proj_dropout): Dropout(p=0.1, inplace=False)
      )
      (self_attn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (self_attn): SelfAttention(
        (qkv): Linear(in_features=64, out_features=192, bias=False)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.1, inplace=False)
        (proj): Linear(in_features=64, out_features=64, bias=True)
        (proj_drop): Dropout(p=0.1, inplace=False)
      )
      (ffn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ffn): FFN(
        (in_layer): Linear(in_features=64, out_features=250, bias=True)
        (act): ReLU(inplace=True)
        (dropout): Dropout(p=0.1, inplace=True)
        (out_layer): Linear(in_features=250, out_features=64, bias=True)
      )
    )
  )
)) => prune_out_channels on _Reshape_289(), #idxs=3
[137] prune_out_channels on decoder (RoiAlignDecoder(
  (box_pos_enc): BoxPositionalEncoder(
    (projection): Linear(in_features=512, out_features=64, bias=True)
  )
  (align): RoiAlign()
  (layers): ModuleList(
    (0-2): 3 x RoiAlignDecoderLayer(
      (cross_attn): RoiAlignedAttention(
        (attn_weights): Linear(in_features=64, out_features=600, bias=True)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (projection): Linear(in_features=64, out_features=64, bias=True)
        (proj_dropout): Dropout(p=0.1, inplace=False)
      )
      (self_attn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (self_attn): SelfAttention(
        (qkv): Linear(in_features=64, out_features=192, bias=False)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.1, inplace=False)
        (proj): Linear(in_features=64, out_features=64, bias=True)
        (proj_drop): Dropout(p=0.1, inplace=False)
      )
      (ffn_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ffn): FFN(
        (in_layer): Linear(in_features=64, out_features=250, bias=True)
        (act): ReLU(inplace=True)
        (dropout): Dropout(p=0.1, inplace=True)
        (out_layer): Linear(in_features=250, out_features=64, bias=True)
      )
    )
  )
)) => prune_out_channels on _Reshape_302(), #idxs=3
[138] prune_out_channels on _Reshape_302() => prune_out_channels on _ElementWiseOp_301(AddmmBackward0), #idxs=3
[139] prune_out_channels on _ElementWiseOp_301(AddmmBackward0) => prune_out_channels on _ElementWiseOp_303(TBackward0), #idxs=3
[140] prune_out_channels on _ElementWiseOp_301(AddmmBackward0) => prune_out_channels on _ElementWiseOp_300(torch::autograd::CopySlices), #idxs=3
[141] prune_out_channels on _ElementWiseOp_300(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_299(AsStridedBackward0), #idxs=3
[142] prune_out_channels on _ElementWiseOp_299(AsStridedBackward0) => prune_out_channels on _Reshape_297(), #idxs=3
[143] prune_out_channels on _Reshape_297() => prune_out_channels on _ElementWiseOp_296(AddmmBackward0), #idxs=3
[144] prune_out_channels on _ElementWiseOp_296(AddmmBackward0) => prune_out_channels on _ElementWiseOp_298(TBackward0), #idxs=3
[145] prune_out_channels on _ElementWiseOp_296(AddmmBackward0) => prune_in_channels on heads.boxes_xywh.ffn.out_layer (Linear(in_features=64, out_features=4, bias=True)), #idxs=3
[146] prune_out_channels on _Reshape_289() => prune_out_channels on _ElementWiseOp_288(AddmmBackward0), #idxs=3
[147] prune_out_channels on _ElementWiseOp_288(AddmmBackward0) => prune_out_channels on _ElementWiseOp_290(TBackward0), #idxs=3
[148] prune_out_channels on _ElementWiseOp_288(AddmmBackward0) => prune_out_channels on _ElementWiseOp_287(torch::autograd::CopySlices), #idxs=3
[149] prune_out_channels on _ElementWiseOp_287(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_286(AsStridedBackward0), #idxs=3
[150] prune_out_channels on _ElementWiseOp_286(AsStridedBackward0) => prune_out_channels on _Reshape_284(), #idxs=3
[151] prune_out_channels on _Reshape_284() => prune_out_channels on _ElementWiseOp_283(AddmmBackward0), #idxs=3
[152] prune_out_channels on _ElementWiseOp_283(AddmmBackward0) => prune_out_channels on _ElementWiseOp_285(TBackward0), #idxs=3
[153] prune_out_channels on _ElementWiseOp_283(AddmmBackward0) => prune_in_channels on heads.cls_logits.ffn.out_layer (Linear(in_features=64, out_features=10, bias=True)), #idxs=3
[154] prune_out_channels on _Reshape_7() => prune_out_channels on _ElementWiseOp_6(AddmmBackward0), #idxs=3
[155] prune_out_channels on _ElementWiseOp_6(AddmmBackward0) => prune_out_channels on _ElementWiseOp_8(TBackward0), #idxs=3
[156] prune_out_channels on _ElementWiseOp_6(AddmmBackward0) => prune_out_channels on _ElementWiseOp_5(torch::autograd::CopySlices), #idxs=3
[157] prune_out_channels on _ElementWiseOp_5(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_4(AsStridedBackward0), #idxs=3
[158] prune_out_channels on _ElementWiseOp_4(AsStridedBackward0) => prune_out_channels on _Reshape_2(), #idxs=3
[159] prune_out_channels on _Reshape_2() => prune_out_channels on _ElementWiseOp_1(AddmmBackward0), #idxs=3
[160] prune_out_channels on _ElementWiseOp_1(AddmmBackward0) => prune_out_channels on _ElementWiseOp_3(TBackward0), #idxs=3
[161] prune_out_channels on _ElementWiseOp_1(AddmmBackward0) => prune_in_channels on object_head.out_layer (Linear(in_features=64, out_features=1, bias=True)), #idxs=3
[162] prune_out_channels on _Reshape_277() => prune_out_channels on _ElementWiseOp_278(ExpandBackward0), #idxs=3
[163] prune_out_channels on _ElementWiseOp_41(AddBackward0) => prune_out_channels on _ElementWiseOp_64(AddBackward0), #idxs=3
[164] prune_out_channels on _ElementWiseOp_41(AddBackward0) => prune_out_channels on _Reshape_65(), #idxs=3
[165] prune_out_channels on _ElementWiseOp_41(AddBackward0) => prune_out_channels on _Reshape_62(), #idxs=3
[166] prune_out_channels on _Reshape_62() => prune_out_channels on _ElementWiseOp_61(AddmmBackward0), #idxs=3
[167] prune_out_channels on _ElementWiseOp_61(AddmmBackward0) => prune_out_channels on _ElementWiseOp_63(TBackward0), #idxs=3
[168] prune_out_channels on _ElementWiseOp_61(AddmmBackward0) => prune_out_channels on _Reshape_60(), #idxs=3
[169] prune_out_channels on _Reshape_60() => prune_out_channels on _Reshape_59(), #idxs=3
[170] prune_out_channels on _Reshape_59() => prune_out_channels on _ElementWiseOp_58(PermuteBackward0), #idxs=3
[171] prune_out_channels on _ElementWiseOp_58(PermuteBackward0) => prune_out_channels on _Reshape_57(), #idxs=3
[172] prune_out_channels on _Reshape_57() => prune_out_channels on _ElementWiseOp_56(SoftmaxBackward0), #idxs=3
[173] prune_out_channels on _ElementWiseOp_56(SoftmaxBackward0) => prune_out_channels on _ElementWiseOp_55(UnsqueezeBackward0), #idxs=3
[174] prune_out_channels on _ElementWiseOp_55(UnsqueezeBackward0) => prune_out_channels on _ElementWiseOp_54(PermuteBackward0), #idxs=3
[175] prune_out_channels on _ElementWiseOp_54(PermuteBackward0) => prune_out_channels on _ElementWiseOp_53(PermuteBackward0), #idxs=3
[176] prune_out_channels on _Reshape_65() => prune_out_channels on _ElementWiseOp_66(AddmmBackward0), #idxs=3
[177] prune_out_channels on _ElementWiseOp_66(AddmmBackward0) => prune_out_channels on _Reshape_67(), #idxs=3
[178] prune_out_channels on _ElementWiseOp_66(AddmmBackward0) => prune_out_channels on _ElementWiseOp_68(TBackward0), #idxs=3
[179] prune_out_channels on _Reshape_67() => prune_out_channels on _ElementWiseOp_69(AsStridedBackward0), #idxs=3
[180] prune_out_channels on _ElementWiseOp_69(AsStridedBackward0) => prune_out_channels on _ElementWiseOp_70(torch::autograd::CopySlices), #idxs=3
[181] prune_out_channels on _ElementWiseOp_70(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_71(AddmmBackward0), #idxs=3
[182] prune_out_channels on _ElementWiseOp_71(AddmmBackward0) => prune_out_channels on _Reshape_72(), #idxs=3
[183] prune_out_channels on _ElementWiseOp_71(AddmmBackward0) => prune_out_channels on _ElementWiseOp_73(TBackward0), #idxs=3
[184] prune_out_channels on _Reshape_72() => prune_out_channels on _ElementWiseOp_74(NativeLayerNormBackward0), #idxs=3
[185] prune_out_channels on _ElementWiseOp_64(AddBackward0) => prune_out_channels on _ElementWiseOp_75(AddBackward0), #idxs=3
[186] prune_out_channels on _ElementWiseOp_64(AddBackward0) => prune_out_channels on _Reshape_76(), #idxs=3
[187] prune_out_channels on _Reshape_76() => prune_out_channels on _ElementWiseOp_77(AddmmBackward0), #idxs=3
[188] prune_out_channels on _ElementWiseOp_77(AddmmBackward0) => prune_out_channels on _Reshape_78(), #idxs=3
[189] prune_out_channels on _ElementWiseOp_77(AddmmBackward0) => prune_out_channels on _ElementWiseOp_79(TBackward0), #idxs=3
[190] prune_out_channels on _Reshape_78() => prune_out_channels on _Reshape_80(), #idxs=3
[191] prune_out_channels on _Reshape_80() => prune_out_channels on _ElementWiseOp_81(CloneBackward0), #idxs=3
[192] prune_out_channels on _ElementWiseOp_81(CloneBackward0) => prune_out_channels on _ElementWiseOp_82(TransposeBackward0), #idxs=3
[193] prune_out_channels on _ElementWiseOp_82(TransposeBackward0) => prune_out_channels on _Reshape_83(), #idxs=3
[194] prune_out_channels on _Reshape_83() => prune_out_channels on _ElementWiseOp_84(BmmBackward0), #idxs=3
[195] prune_out_channels on _ElementWiseOp_84(BmmBackward0) => prune_out_channels on _Reshape_85(), #idxs=3
[196] prune_out_channels on _ElementWiseOp_84(BmmBackward0) => prune_out_channels on _Reshape_86(), #idxs=3
[197] prune_out_channels on _Reshape_86() => prune_out_channels on _ElementWiseOp_87(ExpandBackward0), #idxs=3
[198] prune_out_channels on _ElementWiseOp_87(ExpandBackward0) => prune_out_channels on _ElementWiseOp_88(UnbindBackward0), #idxs=3
[199] prune_out_channels on _ElementWiseOp_88(UnbindBackward0) => prune_out_channels on _ElementWiseOp_89(PermuteBackward0), #idxs=3
[200] prune_out_channels on _ElementWiseOp_88(UnbindBackward0) => prune_out_channels on _ElementWiseOp_264(TransposeBackward0), #idxs=3
[201] prune_out_channels on _ElementWiseOp_88(UnbindBackward0) => prune_out_channels on _ElementWiseOp_266(MulBackward0), #idxs=3
[202] prune_out_channels on _ElementWiseOp_266(MulBackward0) => prune_out_channels on _ElementWiseOp_265(ExpandBackward0), #idxs=3
[203] prune_out_channels on _ElementWiseOp_265(ExpandBackward0) => prune_out_channels on _Reshape_261(), #idxs=3
[204] prune_out_channels on _Reshape_261() => prune_out_channels on _ElementWiseOp_260(BmmBackward0), #idxs=3
[205] prune_out_channels on _ElementWiseOp_260(BmmBackward0) => prune_out_channels on _Reshape_262(), #idxs=3
[206] prune_out_channels on _ElementWiseOp_260(BmmBackward0) => prune_out_channels on _Reshape_259(), #idxs=3
[207] prune_out_channels on _Reshape_259() => prune_out_channels on _ElementWiseOp_258(SoftmaxBackward0), #idxs=3
[208] prune_out_channels on _ElementWiseOp_258(SoftmaxBackward0) => prune_out_channels on _ElementWiseOp_257(ExpandBackward0), #idxs=3
[209] prune_out_channels on _Reshape_262() => prune_out_channels on _ElementWiseOp_263(ExpandBackward0), #idxs=3
[210] prune_out_channels on _ElementWiseOp_89(PermuteBackward0) => prune_out_channels on _Reshape_90(), #idxs=3
[211] prune_out_channels on _Reshape_90() => prune_out_channels on _Reshape_91(), #idxs=3
[212] prune_out_channels on _Reshape_91() => prune_out_channels on _ElementWiseOp_92(MmBackward0), #idxs=3
[213] prune_out_channels on _ElementWiseOp_92(MmBackward0) => prune_out_channels on _Reshape_93(), #idxs=3
[214] prune_out_channels on _ElementWiseOp_92(MmBackward0) => prune_out_channels on _ElementWiseOp_94(TBackward0), #idxs=3
[215] prune_out_channels on _Reshape_93() => prune_out_channels on _ElementWiseOp_95(NativeLayerNormBackward0), #idxs=3
[216] prune_out_channels on _ElementWiseOp_75(AddBackward0) => prune_out_channels on _ElementWiseOp_96(AddBackward0), #idxs=3
[217] prune_out_channels on _ElementWiseOp_75(AddBackward0) => prune_out_channels on _Reshape_97(), #idxs=3
[218] prune_out_channels on _Reshape_97() => prune_out_channels on _ElementWiseOp_98(AddmmBackward0), #idxs=3
[219] prune_out_channels on _ElementWiseOp_98(AddmmBackward0) => prune_out_channels on _Reshape_99(), #idxs=3
[220] prune_out_channels on _ElementWiseOp_98(AddmmBackward0) => prune_out_channels on _ElementWiseOp_100(TBackward0), #idxs=3
[221] prune_out_channels on _Reshape_99() => prune_out_channels on _Reshape_101(), #idxs=3
[222] prune_out_channels on _Reshape_101() => prune_out_channels on _Reshape_102(), #idxs=3
[223] prune_out_channels on _Reshape_102() => prune_out_channels on _ElementWiseOp_103(PermuteBackward0), #idxs=3
[224] prune_out_channels on _ElementWiseOp_103(PermuteBackward0) => prune_out_channels on _Reshape_104(), #idxs=3
[225] prune_out_channels on _Reshape_104() => prune_out_channels on _ElementWiseOp_105(BmmBackward0), #idxs=3
[226] prune_out_channels on _ElementWiseOp_105(BmmBackward0) => prune_out_channels on _Reshape_106(), #idxs=3
[227] prune_out_channels on _ElementWiseOp_105(BmmBackward0) => prune_out_channels on _Reshape_107(), #idxs=3
[228] prune_out_channels on _Reshape_107() => prune_out_channels on _ElementWiseOp_108(PermuteBackward0), #idxs=3
[229] prune_out_channels on _ElementWiseOp_108(PermuteBackward0) => prune_out_channels on _ElementWiseOp_109(PermuteBackward0), #idxs=3
[230] prune_out_channels on _ElementWiseOp_109(PermuteBackward0) => prune_out_channels on _ElementWiseOp_110(UnsqueezeBackward0), #idxs=3
[231] prune_out_channels on _ElementWiseOp_110(UnsqueezeBackward0) => prune_out_channels on _ElementWiseOp_111(SoftmaxBackward0), #idxs=3
[232] prune_out_channels on _ElementWiseOp_111(SoftmaxBackward0) => prune_out_channels on _Reshape_112(), #idxs=3
[233] prune_out_channels on _Reshape_112() => prune_out_channels on _ElementWiseOp_113(PermuteBackward0), #idxs=3
[234] prune_out_channels on _ElementWiseOp_113(PermuteBackward0) => prune_out_channels on _Reshape_114(), #idxs=3
[235] prune_out_channels on _Reshape_114() => prune_out_channels on _Reshape_115(), #idxs=3
[236] prune_out_channels on _Reshape_115() => prune_out_channels on _ElementWiseOp_116(AddmmBackward0), #idxs=3
[237] prune_out_channels on _ElementWiseOp_116(AddmmBackward0) => prune_out_channels on _Reshape_117(), #idxs=3
[238] prune_out_channels on _ElementWiseOp_116(AddmmBackward0) => prune_out_channels on _ElementWiseOp_118(TBackward0), #idxs=3
[239] prune_out_channels on _ElementWiseOp_96(AddBackward0) => prune_out_channels on _ElementWiseOp_119(AddBackward0), #idxs=3
[240] prune_out_channels on _ElementWiseOp_96(AddBackward0) => prune_out_channels on _Reshape_120(), #idxs=3
[241] prune_out_channels on _Reshape_120() => prune_out_channels on _ElementWiseOp_121(AddmmBackward0), #idxs=3
[242] prune_out_channels on _ElementWiseOp_121(AddmmBackward0) => prune_out_channels on _Reshape_122(), #idxs=3
[243] prune_out_channels on _ElementWiseOp_121(AddmmBackward0) => prune_out_channels on _ElementWiseOp_123(TBackward0), #idxs=3
[244] prune_out_channels on _Reshape_122() => prune_out_channels on _ElementWiseOp_124(AsStridedBackward0), #idxs=3
[245] prune_out_channels on _ElementWiseOp_124(AsStridedBackward0) => prune_out_channels on _ElementWiseOp_125(torch::autograd::CopySlices), #idxs=3
[246] prune_out_channels on _ElementWiseOp_125(torch::autograd::CopySlices) => prune_out_channels on _ElementWiseOp_126(AddmmBackward0), #idxs=3
[247] prune_out_channels on _ElementWiseOp_126(AddmmBackward0) => prune_out_channels on _Reshape_127(), #idxs=3
[248] prune_out_channels on _ElementWiseOp_126(AddmmBackward0) => prune_out_channels on _ElementWiseOp_128(TBackward0), #idxs=3
[249] prune_out_channels on _Reshape_127() => prune_out_channels on _ElementWiseOp_129(NativeLayerNormBackward0), #idxs=3
[250] prune_out_channels on _ElementWiseOp_119(AddBackward0) => prune_out_channels on _ElementWiseOp_130(AddBackward0), #idxs=3
[251] prune_out_channels on _ElementWiseOp_119(AddBackward0) => prune_out_channels on _Reshape_131(), #idxs=3
[252] prune_out_channels on _Reshape_131() => prune_out_channels on _ElementWiseOp_132(AddmmBackward0), #idxs=3
[253] prune_out_channels on _ElementWiseOp_132(AddmmBackward0) => prune_out_channels on _Reshape_133(), #idxs=3
[254] prune_out_channels on _ElementWiseOp_132(AddmmBackward0) => prune_out_channels on _ElementWiseOp_134(TBackward0), #idxs=3
[255] prune_out_channels on _Reshape_133() => prune_out_channels on _Reshape_135(), #idxs=3
[256] prune_out_channels on _Reshape_135() => prune_out_channels on _ElementWiseOp_136(CloneBackward0), #idxs=3
[257] prune_out_channels on _ElementWiseOp_136(CloneBackward0) => prune_out_channels on _ElementWiseOp_137(TransposeBackward0), #idxs=3
[258] prune_out_channels on _ElementWiseOp_137(TransposeBackward0) => prune_out_channels on _Reshape_138(), #idxs=3
[259] prune_out_channels on _Reshape_138() => prune_out_channels on _ElementWiseOp_139(BmmBackward0), #idxs=3
[260] prune_out_channels on _ElementWiseOp_139(BmmBackward0) => prune_out_channels on _Reshape_140(), #idxs=3
[261] prune_out_channels on _ElementWiseOp_139(BmmBackward0) => prune_out_channels on _Reshape_141(), #idxs=3
[262] prune_out_channels on _Reshape_141() => prune_out_channels on _ElementWiseOp_142(ExpandBackward0), #idxs=3
[263] prune_out_channels on _ElementWiseOp_142(ExpandBackward0) => prune_out_channels on _ElementWiseOp_143(UnbindBackward0), #idxs=3
[264] prune_out_channels on _ElementWiseOp_143(UnbindBackward0) => prune_out_channels on _ElementWiseOp_144(PermuteBackward0), #idxs=3
[265] prune_out_channels on _ElementWiseOp_143(UnbindBackward0) => prune_out_channels on _ElementWiseOp_249(TransposeBackward0), #idxs=3
[266] prune_out_channels on _ElementWiseOp_143(UnbindBackward0) => prune_out_channels on _ElementWiseOp_251(MulBackward0), #idxs=3
[267] prune_out_channels on _ElementWiseOp_251(MulBackward0) => prune_out_channels on _ElementWiseOp_250(ExpandBackward0), #idxs=3
[268] prune_out_channels on _ElementWiseOp_250(ExpandBackward0) => prune_out_channels on _Reshape_246(), #idxs=3
[269] prune_out_channels on _Reshape_246() => prune_out_channels on _ElementWiseOp_245(BmmBackward0), #idxs=3
[270] prune_out_channels on _ElementWiseOp_245(BmmBackward0) => prune_out_channels on _Reshape_247(), #idxs=3
[271] prune_out_channels on _ElementWiseOp_245(BmmBackward0) => prune_out_channels on _Reshape_244(), #idxs=3
[272] prune_out_channels on _Reshape_244() => prune_out_channels on _ElementWiseOp_243(SoftmaxBackward0), #idxs=3
[273] prune_out_channels on _ElementWiseOp_243(SoftmaxBackward0) => prune_out_channels on _ElementWiseOp_242(ExpandBackward0), #idxs=3
[274] prune_out_channels on _Reshape_247() => prune_out_channels on _ElementWiseOp_248(ExpandBackward0), #idxs=3
[275] prune_out_channels on _ElementWiseOp_144(PermuteBackward0) => prune_out_channels on _Reshape_145(), #idxs=3
[276] prune_out_channels on _Reshape_145() => prune_out_channels on _Reshape_146(), #idxs=3
[277] prune_out_channels on _Reshape_146() => prune_out_channels on _ElementWiseOp_147(MmBackward0), #idxs=3
[278] prune_out_channels on _ElementWiseOp_147(MmBackward0) => prune_out_channels on _Reshape_148(), #idxs=3
[279] prune_out_channels on _ElementWiseOp_147(MmBackward0) => prune_out_channels on _ElementWiseOp_149(TBackward0), #idxs=3
[280] prune_out_channels on _Reshape_148() => prune_out_channels on _ElementWiseOp_150(NativeLayerNormBackward0), #idxs=3
[281] prune_out_channels on _ElementWiseOp_130(AddBackward0) => prune_out_channels on _Reshape_151(), #idxs=3
[282] prune_out_channels on _ElementWiseOp_130(AddBackward0) => prune_out_channels on _Reshape_152(), #idxs=3
[283] prune_out_channels on _Reshape_152() => prune_out_channels on _ElementWiseOp_153(AddmmBackward0), #idxs=3
[284] prune_out_channels on _ElementWiseOp_153(AddmmBackward0) => prune_out_channels on _Reshape_154(), #idxs=3
[285] prune_out_channels on _ElementWiseOp_153(AddmmBackward0) => prune_out_channels on _ElementWiseOp_155(TBackward0), #idxs=3
[286] prune_out_channels on _Reshape_154() => prune_out_channels on _Reshape_156(), #idxs=3
[287] prune_out_channels on _Reshape_156() => prune_out_channels on _Reshape_157(), #idxs=3
[288] prune_out_channels on _Reshape_157() => prune_out_channels on _ElementWiseOp_158(PermuteBackward0), #idxs=3
[289] prune_out_channels on _ElementWiseOp_158(PermuteBackward0) => prune_out_channels on _Reshape_159(), #idxs=3
[290] prune_out_channels on _Reshape_159() => prune_out_channels on _ElementWiseOp_160(BmmBackward0), #idxs=3
[291] prune_out_channels on _ElementWiseOp_160(BmmBackward0) => prune_out_channels on _Reshape_161(), #idxs=3
[292] prune_out_channels on _ElementWiseOp_160(BmmBackward0) => prune_out_channels on _Reshape_162(), #idxs=3
[293] prune_out_channels on _Reshape_162() => prune_out_channels on _ElementWiseOp_163(PermuteBackward0), #idxs=3
[294] prune_out_channels on _ElementWiseOp_163(PermuteBackward0) => prune_out_channels on _ElementWiseOp_164(PermuteBackward0), #idxs=3
[295] prune_out_channels on _ElementWiseOp_164(PermuteBackward0) => prune_out_channels on _ElementWiseOp_165(UnsqueezeBackward0), #idxs=3
[296] prune_out_channels on _ElementWiseOp_165(UnsqueezeBackward0) => prune_out_channels on _ElementWiseOp_166(SoftmaxBackward0), #idxs=3
[297] prune_out_channels on _ElementWiseOp_166(SoftmaxBackward0) => prune_out_channels on _Reshape_167(), #idxs=3
[298] prune_out_channels on _Reshape_167() => prune_out_channels on _ElementWiseOp_168(PermuteBackward0), #idxs=3
[299] prune_out_channels on _ElementWiseOp_168(PermuteBackward0) => prune_out_channels on _Reshape_169(), #idxs=3
[300] prune_out_channels on _Reshape_169() => prune_out_channels on _Reshape_170(), #idxs=3
[301] prune_out_channels on _Reshape_170() => prune_out_channels on _ElementWiseOp_171(AddmmBackward0), #idxs=3
[302] prune_out_channels on _ElementWiseOp_171(AddmmBackward0) => prune_out_channels on _Reshape_172(), #idxs=3
[303] prune_out_channels on _ElementWiseOp_171(AddmmBackward0) => prune_out_channels on _ElementWiseOp_173(TBackward0), #idxs=3
[304] prune_out_channels on _Reshape_151() => prune_out_channels on _ElementWiseOp_174(AddmmBackward0), #idxs=3
[305] prune_out_channels on _ElementWiseOp_174(AddmmBackward0) => prune_out_channels on _ElementWiseOp_175(TBackward0), #idxs=3
[306] prune_out_channels on _Reshape_161() => prune_out_channels on _ElementWiseOp_176(PermuteBackward0), #idxs=3
[307] prune_out_channels on _ElementWiseOp_176(PermuteBackward0) => prune_out_channels on _ElementWiseOp_177(PermuteBackward0), #idxs=3
[308] prune_out_channels on _ElementWiseOp_177(PermuteBackward0) => prune_out_channels on _Reshape_178(), #idxs=3
[309] prune_out_channels on _Reshape_178() => prune_out_channels on _ElementWiseOp_179(PermuteBackward0), #idxs=3
[310] prune_out_channels on _Reshape_106() => prune_out_channels on _ElementWiseOp_252(PermuteBackward0), #idxs=3
[311] prune_out_channels on _ElementWiseOp_252(PermuteBackward0) => prune_out_channels on _ElementWiseOp_253(PermuteBackward0), #idxs=3
[312] prune_out_channels on _ElementWiseOp_253(PermuteBackward0) => prune_out_channels on _Reshape_254(), #idxs=3
[313] prune_out_channels on _Reshape_254() => prune_out_channels on _ElementWiseOp_255(PermuteBackward0), #idxs=3
[314] prune_out_channels on _Reshape_360() => prune_out_channels on _ElementWiseOp_387(PermuteBackward0), #idxs=1
[315] prune_out_channels on _ElementWiseOp_387(PermuteBackward0) => prune_out_channels on _Reshape_388(), #idxs=1
[316] prune_out_channels on _Reshape_388() => prune_out_channels on _Reshape_389(), #idxs=1
[317] prune_out_channels on _Reshape_389() => prune_out_channels on _ElementWiseOp_390(AddmmBackward0), #idxs=1
[318] prune_out_channels on _ElementWiseOp_390(AddmmBackward0) => prune_out_channels on _Reshape_391(), #idxs=1
[319] prune_out_channels on _ElementWiseOp_390(AddmmBackward0) => prune_out_channels on _ElementWiseOp_392(TBackward0), #idxs=1
[320] prune_out_channels on _Reshape_391() => prune_out_channels on _ElementWiseOp_393(ReluBackward0), #idxs=1
[321] prune_out_channels on _ElementWiseOp_393(ReluBackward0) => prune_out_channels on _ElementWiseOp_394(AddBackward0), #idxs=1
[322] prune_out_channels on _ElementWiseOp_394(AddBackward0) => prune_out_channels on _Reshape_395(), #idxs=1
[323] prune_out_channels on _Reshape_395() => prune_out_channels on _ElementWiseOp_396(MmBackward0), #idxs=1
[324] prune_out_channels on _ElementWiseOp_396(MmBackward0) => prune_out_channels on _Reshape_397(), #idxs=1
[325] prune_out_channels on _ElementWiseOp_396(MmBackward0) => prune_out_channels on _ElementWiseOp_398(TBackward0), #idxs=1
[326] prune_out_channels on _Reshape_397() => prune_out_channels on _Reshape_351(), #idxs=1
[327] prune_out_channels on _Reshape_351() => prune_out_channels on _ElementWiseOp_352(PermuteBackward0), #idxs=1
[328] prune_out_channels on _Reshape_351() => prune_out_channels on _Reshape_349(), #idxs=1
[329] prune_out_channels on _Reshape_349() => prune_out_channels on _ElementWiseOp_348(MmBackward0), #idxs=1
[330] prune_out_channels on _ElementWiseOp_348(MmBackward0) => prune_out_channels on _ElementWiseOp_350(TBackward0), #idxs=1
[331] prune_out_channels on _ElementWiseOp_348(MmBackward0) => prune_out_channels on _Reshape_347(), #idxs=1
[332] prune_out_channels on _Reshape_347() => prune_out_channels on _ElementWiseOp_346(AddBackward0), #idxs=1
[333] prune_out_channels on _ElementWiseOp_346(AddBackward0) => prune_out_channels on _ElementWiseOp_345(ReluBackward0), #idxs=1
[334] prune_out_channels on _ElementWiseOp_345(ReluBackward0) => prune_out_channels on _Reshape_343(), #idxs=1
[335] prune_out_channels on _Reshape_343() => prune_out_channels on _ElementWiseOp_342(AddmmBackward0), #idxs=1
[336] prune_out_channels on _ElementWiseOp_342(AddmmBackward0) => prune_out_channels on _ElementWiseOp_344(TBackward0), #idxs=1
[337] prune_out_channels on _ElementWiseOp_342(AddmmBackward0) => prune_out_channels on _Reshape_341(), #idxs=1
[338] prune_out_channels on _Reshape_341() => prune_out_channels on _Reshape_340(), #idxs=1
[339] prune_out_channels on _Reshape_340() => prune_out_channels on _ElementWiseOp_339(PermuteBackward0), #idxs=1
[340] prune_out_channels on _ElementWiseOp_352(PermuteBackward0) => prune_out_channels on _Reshape_353(), #idxs=1
[341] prune_out_channels on _Reshape_353() => prune_out_channels on encoder.neck.layer_blocks.0.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), #idxs=1
[342] prune_out_channels on encoder.neck.layer_blocks.0.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) => prune_out_channels on _ElementWiseOp_239(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>), #idxs=1
[343] prune_out_channels on _ElementWiseOp_239(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>) => prune_out_channels on _Reshape_238(), #idxs=1
[344] prune_out_channels on _Reshape_238() => prune_out_channels on _ElementWiseOp_237(PermuteBackward0), #idxs=1
[345] prune_out_channels on _ElementWiseOp_237(PermuteBackward0) => prune_out_channels on _Reshape_182(), #idxs=1
[346] prune_out_channels on _ElementWiseOp_322(PermuteBackward0) => prune_out_channels on _Reshape_323(), #idxs=1
[347] prune_out_channels on _Reshape_323() => prune_out_channels on encoder.neck.layer_blocks.2.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), #idxs=1
[348] prune_out_channels on encoder.neck.layer_blocks.2.0 (Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) => prune_out_channels on _ElementWiseOp_187(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>), #idxs=1
[349] prune_out_channels on _ElementWiseOp_187(torch::autograd::CppNode<vision::ops::(anonymous namespace)::ROIAlignFunction>) => prune_out_channels on _Reshape_186(), #idxs=1
[350] prune_out_channels on _Reshape_186() => prune_out_channels on _ElementWiseOp_185(PermuteBackward0), #idxs=1
[351] prune_out_channels on _ElementWiseOp_185(PermuteBackward0) => prune_out_channels on _Reshape_184(), #idxs=1
--------------------------------