codedddddifficult commented 1 year ago

Thank you very much for your paper work. However, I have a problem when the code reproduces deiqt.py. How to solve this error that keeps appearing in the process of network forward propagation?
x = self.bunch_decoder(output_embedding, x) source error code RuntimeError: shape '[-1, 36, 64]' is invalid for input of size 75264

narthchin commented 1 year ago

Many thanks for your time reproducing our work!

This matter might be due to the shape of input tensor. And that would be great if you could provide us with the size of input image and shape B, L, D in deiqt.py:

    # Start from Line. 149
    def forward(self, x, ref):
        B, L, D = x.shape

codedddddifficult commented 1 year ago

B is1，L is 196 ，D is 384 the input is random generated by torch.randn(1, 3, 224, 224). i have tried torch.randn(16, 3, 224, 224) ,it also does not work. Other settings are consistent with your paper.
I don't think the code will work properly run when juery_nums is set to 6 , this will lead to shape mismatch.

narthchin commented 1 year ago

Many thanks for the provided information!

I will be looking into it tomorrow. For some reasons, this might be concerned with strange behaviors during shape altering.

Would you mind posting the 'deiqt.py' you currently using there?

codedddddifficult commented 1 year ago

this is my currently 'deiqt.py ,many thanks for your reply!

from functools import partial import torch import torch.nn as nn import torch.nn.functional as F from timm.models.layers import DropPath, truncnormal from timm.models.vision_transformer import Mlp

from torchinfo import summary

from torchvision.transforms.functional import resize

class PatchEmbed(nn.Module): """2D Image to Patch Embedding"""

def __init__(
    self,
    patch_size=16,
    in_chans=3,
    embed_dim=768,
    norm_layer=None,
    flatten=True,
):
    super().__init__()
    self.flatten = flatten

    self.proj = nn.Conv2d(
        in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
    )

    self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

def forward(self, x):
    x = self.proj(x)
    if self.flatten:
        x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
    x = self.norm(x)
    return x

class Attention(nn.Module):

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

def __init__(
    self,
    dim,
    num_heads=8,
    qkv_bias=False,
    qk_scale=None,
    attn_drop=0.0,
    proj_drop=0.0,
):
    super().__init__()
    self.num_heads = num_heads
    head_dim = dim // num_heads
    self.scale = qk_scale or head_dim**-0.5

    self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
    self.attn_drop = nn.Dropout(attn_drop)
    self.proj = nn.Linear(dim, dim)
    self.proj_drop = nn.Dropout(proj_drop)

def forward(self, x):
    B, N, C = x.shape
    qkv = (
        self.qkv(x)
        .reshape(B, N, 3, self.num_heads, C // self.num_heads)
        .permute(2, 0, 3, 1, 4)
    )
    q, k, v = qkv[0], qkv[1], qkv[2]

    q = q * self.scale

    attn = q @ k.transpose(-2, -1)
    attn = attn.softmax(dim=-1)
    attn = self.attn_drop(attn)

    x = (attn @ v).transpose(1, 2).reshape(B, N, C)
    x = self.proj(x)
    x = self.proj_drop(x)
    return x

class Block(nn.Module):

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

def __init__(
    self,
    dim,
    num_heads,
    mlp_ratio=4.0,
    qkv_bias=False,
    qk_scale=None,
    drop=0.0,
    attn_drop=0.0,
    drop_path=0.0,
    act_layer=nn.GELU,
    norm_layer=nn.LayerNorm,
    Attention_block=Attention,
    Mlp_block=Mlp,
    init_values=1e-4,
):
    super().__init__()
    self.norm1 = norm_layer(dim)
    self.attn = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
    self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
    self.norm2 = norm_layer(dim)
    mlp_hidden_dim = int(dim * mlp_ratio)
    self.mlp = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )

def forward(self, x):
    x = x + self.drop_path(self.attn(self.norm1(x)))
    x = x + self.drop_path(self.mlp(self.norm2(x)))
    return x

class Experts_MOS(nn.Module): def init( self, embed_dim=768, juery_nums=6, ): super().init() self.juery = juery_nums bunch_layer = nn.TransformerDecoderLayer( d_model=embed_dim, dropout=0.0, nhead=6,

activation=F.gelu,

        activation='gelu',
        # batch_first=True,
        dim_feedforward=(embed_dim * 4),
        # norm_first=True,
    )
    self.bunch_decoder = nn.TransformerDecoder(bunch_layer, num_layers=1)
    self.bunch_embedding = nn.Parameter(torch.randn(1, self.juery, embed_dim))
    self.heads = nn.Linear(embed_dim, 1, bias=False)
    trunc_normal_(self.bunch_embedding, std=0.02)

def forward(self, x, ref):
    B, L, D = x.shape
    bunch_embedding = self.bunch_embedding.expand(B, -1, -1)
    ref = ref.view(B, 1, -1)
    ref = ref.expand(B, self.juery, -1)
    output_embedding = bunch_embedding + ref
    x = self.bunch_decoder(output_embedding, x)
    # x = self.bunch_decoder(x)
    x = self.heads(x)
    x = x.view(B, -1).mean(dim=1)
    return x.view(B, 1)

class Layer_scale_init_Block(nn.Module):

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

# with slight modifications
def __init__(
    self,
    dim,
    num_heads,
    mlp_ratio=4.0,
    qkv_bias=False,
    qk_scale=None,
    drop=0.0,
    attn_drop=0.0,
    drop_path=0.0,
    act_layer=nn.GELU,
    norm_layer=nn.LayerNorm,
    Attention_block=Attention,
    Mlp_block=Mlp,
    init_values=1e-4,
):
    super().__init__()
    self.norm1 = norm_layer(dim)
    self.attn = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
    self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
    self.norm2 = norm_layer(dim)
    mlp_hidden_dim = int(dim * mlp_ratio)
    self.mlp = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )
    self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
    self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)

def forward(self, x):
    x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
    x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
    return x

class Layer_scale_init_Block_paralx2(nn.Module):

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

# with slight modifications
def __init__(
    self,
    dim,
    num_heads,
    mlp_ratio=4.0,
    qkv_bias=False,
    qk_scale=None,
    drop=0.0,
    attn_drop=0.0,
    drop_path=0.0,
    act_layer=nn.GELU,
    norm_layer=nn.LayerNorm,
    Attention_block=Attention,
    Mlp_block=Mlp,
    init_values=1e-4,
):
    super().__init__()
    self.norm1 = norm_layer(dim)
    self.norm11 = norm_layer(dim)
    self.attn = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
    self.attn1 = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
    self.norm2 = norm_layer(dim)
    self.norm21 = norm_layer(dim)
    mlp_hidden_dim = int(dim * mlp_ratio)
    self.mlp = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )
    self.mlp1 = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )
    self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
    self.gamma_1_1 = nn.Parameter(
        init_values * torch.ones((dim)), requires_grad=True
    )
    self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
    self.gamma_2_1 = nn.Parameter(
        init_values * torch.ones((dim)), requires_grad=True
    )

def forward(self, x):
    x = (
        x
        + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
        + self.drop_path(self.gamma_1_1 * self.attn1(self.norm11(x)))
    )
    x = (
        x
        + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
        + self.drop_path(self.gamma_2_1 * self.mlp1(self.norm21(x)))
    )
    return x

class Block_paralx2(nn.Module):

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

# with slight modifications
def __init__(
    self,
    dim,
    num_heads,
    mlp_ratio=4.0,
    qkv_bias=False,
    qk_scale=None,
    drop=0.0,
    attn_drop=0.0,
    drop_path=0.0,
    act_layer=nn.GELU,
    norm_layer=nn.LayerNorm,
    Attention_block=Attention,
    Mlp_block=Mlp,
    init_values=1e-4,
):
    super().__init__()
    self.norm1 = norm_layer(dim)
    self.norm11 = norm_layer(dim)
    self.attn = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
    self.attn1 = Attention_block(
        dim,
        num_heads=num_heads,
        qkv_bias=qkv_bias,
        qk_scale=qk_scale,
        attn_drop=attn_drop,
        proj_drop=drop,
    )
    self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
    self.norm2 = norm_layer(dim)
    self.norm21 = norm_layer(dim)
    mlp_hidden_dim = int(dim * mlp_ratio)
    self.mlp = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )
    self.mlp1 = Mlp_block(
        in_features=dim,
        hidden_features=mlp_hidden_dim,
        act_layer=act_layer,
        drop=drop,
    )

def forward(self, x):
    x = (
        x
        + self.drop_path(self.attn(self.norm1(x)))
        + self.drop_path(self.attn1(self.norm11(x)))
    )
    x = (
        x
        + self.drop_path(self.mlp(self.norm2(x)))
        + self.drop_path(self.mlp1(self.norm21(x)))
    )
    return x

class deiqt_models(nn.Module): """Vision Transformer with LayerScale (https://arxiv.org/abs/2103.17239) support"""

def __init__(
    self,
    patch_size=16,
    in_chans=3,
    num_classes=1,
    embed_dim=768,
    depth=12,
    num_heads=12,
    mlp_ratio=4.0,
    qkv_bias=False,
    qk_scale=None,
    attn_drop_rate=0.0,
    drop_path_rate=0.0,
    norm_layer=nn.LayerNorm,
    global_pool=None,
    block_layers=Block,
    Patch_layer=PatchEmbed,
    act_layer=nn.GELU,
    Attention_block=Attention,
    Mlp_block=Mlp,
    init_scale=1e-4,
):
    super().__init__()

    self.num_classes = num_classes
    self.num_features = self.embed_dim = embed_dim

    self.patch_embed = Patch_layer(
        patch_size=patch_size,
        in_chans=in_chans,
        embed_dim=embed_dim,
    )
    num_patches = 196
    self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
    self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))

    dpr = [drop_path_rate for i in range(depth)]
    self.blocks = nn.ModuleList(
        [
            block_layers(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=0.0,
                attn_drop=attn_drop_rate,
                drop_path=dpr[i],
                norm_layer=norm_layer,
                act_layer=act_layer,
                Attention_block=Attention_block,
                Mlp_block=Mlp_block,
                init_values=init_scale,
            )
            for i in range(depth)
        ]
    )

    self.norm = norm_layer(embed_dim)

    self.feature_info = [dict(num_chs=embed_dim, reduction=0, module="head")]
    # self.head = (
    #     nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
    # )
    self.head = Experts_MOS(embed_dim=384, juery_nums=6)

    trunc_normal_(self.pos_embed, std=0.02)
    trunc_normal_(self.cls_token, std=0.02)
    self.apply(self._init_weights)

def _init_weights(self, m):
    if isinstance(m, nn.Linear):
        trunc_normal_(m.weight, std=0.02)
        if isinstance(m, nn.Linear) and m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.LayerNorm):
        nn.init.constant_(m.bias, 0)
        nn.init.constant_(m.weight, 1.0)

@torch.jit.ignore
def no_weight_decay(self):
    return {"pos_embed", "cls_token"}

def get_classifier(self):
    return self.head

def get_num_layers(self):
    return len(self.blocks)

def reset_classifier(self, num_classes, global_pool=""):
    self.num_classes = num_classes
    self.head = (
        nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
    )

def forward_features(self, x):
    B = x.shape[0]
    x = self.patch_embed(x)

    cls_tokens = self.cls_token.expand(B, -1, -1)

    x = x + self.pos_embed

    x = torch.cat((cls_tokens, x), dim=1)

    for i, blk in enumerate(self.blocks):
        x = blk(x)

    x = self.norm(x)
    return x[:, 0], x[:, 1:, :]

def forward(self, x):
    ref, x = self.forward_features(x)
    x = self.head(x, ref)
    return x

def build_deiqt( patch_size=16, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), block_layers=Layer_scale_init_Block, pretrained=False, pretrained_model_path="", infer=False, infer_model_path="", ): model = deiqt_models( patch_size=patch_size, embed_dim=embed_dim, depth=depth, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, norm_layer=norm_layer, block_layers=block_layers, ) if pretrained: assert pretrained_model_path != "" checkpoint = torch.load(pretrained_model_path, map_location="cpu") state_dict = checkpoint["model"] del state_dict["head.weight"] del state_dict["head.bias"] model.load_state_dict(state_dict, strict=False) del checkpoint torch.cuda.empty_cache() elif infer: assert infer_model_path != "" checkpoint = torch.load(infer_model_path, map_location="cpu") state_dict = checkpoint["model"] model.load_state_dict(state_dict, strict=True) del checkpoint torch.cuda.empty_cache() return model

if name == "main": model = build_deiqt( pretrained=True, pretrained_model_path="DEIQT-main/deit_3_small_224_1k.pth", )

input1 = torch.randn(1, 3, 224, 224)
output = model(input1)
print(output)

this is my currently 'deiqt.py ,many thanks for your reply!

codedddddifficult commented 1 year ago

sorry，there is something wrong with the format of the code just pasted. You can check out this code ,many thanks for your reply!

from functools import partial

import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import DropPath, trunc_normal_
from timm.models.vision_transformer import Mlp
# from torchinfo import summary
from torchvision.transforms.functional import resize

class PatchEmbed(nn.Module):
    """2D Image to Patch Embedding"""

    def __init__(
        self,
        patch_size=16,
        in_chans=3,
        embed_dim=768,
        norm_layer=None,
        flatten=True,
    ):
        super().__init__()
        self.flatten = flatten

        self.proj = nn.Conv2d(
            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
        )

        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        x = self.proj(x)
        if self.flatten:
            x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
        x = self.norm(x)
        return x

class Attention(nn.Module):
    # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    def __init__(
        self,
        dim,
        num_heads=8,
        qkv_bias=False,
        qk_scale=None,
        attn_drop=0.0,
        proj_drop=0.0,
    ):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim**-0.5

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x):
        B, N, C = x.shape
        qkv = (
            self.qkv(x)
            .reshape(B, N, 3, self.num_heads, C // self.num_heads)
            .permute(2, 0, 3, 1, 4)
        )
        q, k, v = qkv[0], qkv[1], qkv[2]

        q = q * self.scale

        attn = q @ k.transpose(-2, -1)
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

class Block(nn.Module):
    # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    def __init__(
        self,
        dim,
        num_heads,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        Attention_block=Attention,
        Mlp_block=Mlp,
        init_values=1e-4,
    ):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )

    def forward(self, x):
        x = x + self.drop_path(self.attn(self.norm1(x)))
        x = x + self.drop_path(self.mlp(self.norm2(x)))
        return x

class Experts_MOS(nn.Module):
    def __init__(
        self,
        embed_dim=768,
        juery_nums=6,
    ):
        super().__init__()
        self.juery = juery_nums
        bunch_layer = nn.TransformerDecoderLayer(
            d_model=embed_dim,
            dropout=0.0,
            nhead=6,
            # activation=F.gelu,
            activation='gelu',
            # batch_first=True,
            dim_feedforward=(embed_dim * 4),
            # norm_first=True,
        )
        self.bunch_decoder = nn.TransformerDecoder(bunch_layer, num_layers=1)
        self.bunch_embedding = nn.Parameter(torch.randn(1, self.juery, embed_dim))
        self.heads = nn.Linear(embed_dim, 1, bias=False)
        trunc_normal_(self.bunch_embedding, std=0.02)

    def forward(self, x, ref):
        B, L, D = x.shape
        bunch_embedding = self.bunch_embedding.expand(B, -1, -1)
        ref = ref.view(B, 1, -1)
        ref = ref.expand(B, self.juery, -1)
        output_embedding = bunch_embedding + ref
        x = self.bunch_decoder(output_embedding, x)
        # x = self.bunch_decoder(x)
        x = self.heads(x)
        x = x.view(B, -1).mean(dim=1)
        return x.view(B, 1)

class Layer_scale_init_Block(nn.Module):
    # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    # with slight modifications
    def __init__(
        self,
        dim,
        num_heads,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        Attention_block=Attention,
        Mlp_block=Mlp,
        init_values=1e-4,
    ):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )
        self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
        self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)

    def forward(self, x):
        x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
        x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
        return x

class Layer_scale_init_Block_paralx2(nn.Module):
    # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    # with slight modifications
    def __init__(
        self,
        dim,
        num_heads,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        Attention_block=Attention,
        Mlp_block=Mlp,
        init_values=1e-4,
    ):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.norm11 = norm_layer(dim)
        self.attn = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.attn1 = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
        self.norm2 = norm_layer(dim)
        self.norm21 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )
        self.mlp1 = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )
        self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
        self.gamma_1_1 = nn.Parameter(
            init_values * torch.ones((dim)), requires_grad=True
        )
        self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
        self.gamma_2_1 = nn.Parameter(
            init_values * torch.ones((dim)), requires_grad=True
        )

    def forward(self, x):
        x = (
            x
            + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
            + self.drop_path(self.gamma_1_1 * self.attn1(self.norm11(x)))
        )
        x = (
            x
            + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
            + self.drop_path(self.gamma_2_1 * self.mlp1(self.norm21(x)))
        )
        return x

class Block_paralx2(nn.Module):
    # taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    # with slight modifications
    def __init__(
        self,
        dim,
        num_heads,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        Attention_block=Attention,
        Mlp_block=Mlp,
        init_values=1e-4,
    ):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.norm11 = norm_layer(dim)
        self.attn = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.attn1 = Attention_block(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
        self.norm2 = norm_layer(dim)
        self.norm21 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )
        self.mlp1 = Mlp_block(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )

    def forward(self, x):
        x = (
            x
            + self.drop_path(self.attn(self.norm1(x)))
            + self.drop_path(self.attn1(self.norm11(x)))
        )
        x = (
            x
            + self.drop_path(self.mlp(self.norm2(x)))
            + self.drop_path(self.mlp1(self.norm21(x)))
        )
        return x

class deiqt_models(nn.Module):
    """Vision Transformer with LayerScale (https://arxiv.org/abs/2103.17239) support"""

    def __init__(
        self,
        patch_size=16,
        in_chans=3,
        num_classes=1,
        embed_dim=768,
        depth=12,
        num_heads=12,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        attn_drop_rate=0.0,
        drop_path_rate=0.0,
        norm_layer=nn.LayerNorm,
        global_pool=None,
        block_layers=Block,
        Patch_layer=PatchEmbed,
        act_layer=nn.GELU,
        Attention_block=Attention,
        Mlp_block=Mlp,
        init_scale=1e-4,
    ):
        super().__init__()

        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim

        self.patch_embed = Patch_layer(
            patch_size=patch_size,
            in_chans=in_chans,
            embed_dim=embed_dim,
        )
        num_patches = 196
        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))

        dpr = [drop_path_rate for i in range(depth)]
        self.blocks = nn.ModuleList(
            [
                block_layers(
                    dim=embed_dim,
                    num_heads=num_heads,
                    mlp_ratio=mlp_ratio,
                    qkv_bias=qkv_bias,
                    qk_scale=qk_scale,
                    drop=0.0,
                    attn_drop=attn_drop_rate,
                    drop_path=dpr[i],
                    norm_layer=norm_layer,
                    act_layer=act_layer,
                    Attention_block=Attention_block,
                    Mlp_block=Mlp_block,
                    init_values=init_scale,
                )
                for i in range(depth)
            ]
        )

        self.norm = norm_layer(embed_dim)

        self.feature_info = [dict(num_chs=embed_dim, reduction=0, module="head")]
        # self.head = (
        #     nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
        # )
        self.head = Experts_MOS(embed_dim=384, juery_nums=6)

        trunc_normal_(self.pos_embed, std=0.02)
        trunc_normal_(self.cls_token, std=0.02)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=0.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {"pos_embed", "cls_token"}

    def get_classifier(self):
        return self.head

    def get_num_layers(self):
        return len(self.blocks)

    def reset_classifier(self, num_classes, global_pool=""):
        self.num_classes = num_classes
        self.head = (
            nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
        )

    def forward_features(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_tokens = self.cls_token.expand(B, -1, -1)

        x = x + self.pos_embed

        x = torch.cat((cls_tokens, x), dim=1)

        for i, blk in enumerate(self.blocks):
            x = blk(x)

        x = self.norm(x)
        return x[:, 0], x[:, 1:, :]

    def forward(self, x):
        ref, x = self.forward_features(x)
        x = self.head(x, ref)
        return x

def build_deiqt(
    patch_size=16,
    embed_dim=384,
    depth=12,
    num_heads=6,
    mlp_ratio=4,
    qkv_bias=True,
    norm_layer=partial(nn.LayerNorm, eps=1e-6),
    block_layers=Layer_scale_init_Block,
    pretrained=False,
    pretrained_model_path="",
    infer=False,
    infer_model_path="",
):
    model = deiqt_models(
        patch_size=patch_size,
        embed_dim=embed_dim,
        depth=depth,
        num_heads=num_heads,
        mlp_ratio=mlp_ratio,
        qkv_bias=qkv_bias,
        norm_layer=norm_layer,
        block_layers=block_layers,
    )
    if pretrained:
        assert pretrained_model_path != ""
        checkpoint = torch.load(pretrained_model_path, map_location="cpu")
        state_dict = checkpoint["model"]
        del state_dict["head.weight"]
        del state_dict["head.bias"]
        model.load_state_dict(state_dict, strict=False)
        del checkpoint
        torch.cuda.empty_cache()
    elif infer:
        assert infer_model_path != ""
        checkpoint = torch.load(infer_model_path, map_location="cpu")
        state_dict = checkpoint["model"]
        model.load_state_dict(state_dict, strict=True)
        del checkpoint
        torch.cuda.empty_cache()
    return model

if __name__ == "__main__":
    model = build_deiqt(
        pretrained=True,
        pretrained_model_path="/opt/data/private/python_project_3090TI/DEIQT-main/deit_3_small_224_1k.pth",
    )

    input1 = torch.randn(1, 3, 224, 224)
    output = model(input1)
    print(output)
    # summary(model, input_data=[input1], device=torch.device("cpu"))

narthchin commented 1 year ago

Hi, sorry for the late reply

Look like the mismatch is caused by the commenting of these lines:

# batch_first=True,
dim_feedforward=(embed_dim * 4),
# norm_first=True,

Currently, our tensors follow the pattern (batch, seq, feature), commenting batch_first=True may cause the problem.

codedddddifficult commented 1 year ago

Thank you for your reply, my torch version can't set this parameter, but I solved the bug by changing the shape of the input, but I still encountered some problems in the code running phase, the code you posted is the code for distributed training, I don't know much about this training method, so I deleted the code about distributed training, and put the network modeling code you posted in the network modeling framework I built. training framework, the model training is very poor, I can't check the exact problem.I was wondering if you have the code for single GPU training, if you do please post it to the project, I am very interested in your work so I would love to reproduce the results of the paper, thanks for your reply！


        output_embedding = bunch_embedding + ref
        output_embedding = output_embedding.view(6,B,-1)
```  This is how I solved the previous error, if anyone has the same problem you can use this code to solve it

narthchin commented 1 year ago

Hi,

You can use conda to create a python environment and install newer version of pytorch in order to reproduce this project smoothly.

And, the DDP method is suitable for single-gpu training. Alter the parameters of the training bash script like:

CUDA_VISIBLE_DEVICES=[target_gpuid] OMP_NUM_THREADS=1 torchrun --nnodes 1 --nproc_per_node 1 --master_port 26500  main.py \
--cfg [CONFIG_PATH] \
--data-path [YOUR_DATA_PATH] \
--output [LOG_PATH] \
--tag [REMARK_TAG] \
--repeat \
--rnum [TARGET_REPEAT_NUM]

There, use CUDA_VISIBLE_DEVICES to select the gpu, and if only one gpu in the server, just set CUDA_VISIBLE_DEVICES=0. And--nproc_per_node 1 means using one gpu.

codedddddifficult commented 1 year ago

Thanks for your reply, I get this error when running with command line parameters.

RuntimeError: Found dtype Float but expected Half It seems that the lossdtype error is caused by the codecriterion = torch.nn.SmoothL1Loss(), if I use the code criterion = torch.nn.L1Loss() the code works fine, but the runtime prompts for nan or ``inf, and the srcc and plcc are0```. Since it's run on the terminal using the command line to run, can not debug, do you know how to modify this error?

narthchin commented 1 year ago

Hi,

May I ask which gpu and which platform (Linux distros or Windows) you are using? This may be caused by the FP16.

codedddddifficult commented 1 year ago

Linux（ ubuntu18.04） adn NVIDIA GeForce RTX3090 GPU.

narthchin commented 1 year ago

You may change this in the config.py file, starting from line. 138, and to see if it's working:

# Enable Pytorch automatic mixed precision (amp).
_C.AMP_ENABLE = False

codedddddifficult commented 1 year ago

Thank you very much,it works, but the result looks wrong, why is it so?

[2023-09-15 03:59:47 production](main.py 405): INFO Train: [8/9][10/145]        eta 0:00:22 lr 0.000020  wd 0.0500      time 0.1241 (0.1698)    loss 13.9743 (15.3007)  grad_norm 11.7249 (9.0613)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:48 production](main.py 405): INFO Train: [8/9][20/145]        eta 0:00:18 lr 0.000020  wd 0.0500      time 0.1336 (0.1490)    loss 14.7644 (15.6272)  grad_norm 15.8456 (10.5005)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:50 production](main.py 405): INFO Train: [8/9][30/145]        eta 0:00:17 lr 0.000020  wd 0.0500      time 0.1457 (0.1554)    loss 12.4224 (15.2793)  grad_norm 2.5085 (12.6704)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:51 production](main.py 405): INFO Train: [8/9][40/145]        eta 0:00:16 lr 0.000020  wd 0.0500      time 0.1340 (0.1543)    loss 16.7534 (15.2032)  grad_norm 118.8225 (15.4571)    loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:53 production](main.py 405): INFO Train: [8/9][50/145]        eta 0:00:14 lr 0.000020  wd 0.0500      time 0.1230 (0.1486)    loss 14.9455 (15.2179)  grad_norm 4.3450 (14.8898)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:54 production](main.py 405): INFO Train: [8/9][60/145]        eta 0:00:12 lr 0.000020  wd 0.0500      time 0.1248 (0.1464)    loss 13.4326 (15.3112)  grad_norm 3.5536 (25.9888)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:55 production](main.py 405): INFO Train: [8/9][70/145]        eta 0:00:10 lr 0.000020  wd 0.0500      time 0.1276 (0.1436)    loss 14.5125 (15.3156)  grad_norm 6.1413 (25.5014)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:56 production](main.py 405): INFO Train: [8/9][80/145]        eta 0:00:09 lr 0.000020  wd 0.0500      time 0.1313 (0.1419)    loss 12.2901 (15.3235)  grad_norm 10.6931 (23.4222)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:58 production](main.py 405): INFO Train: [8/9][90/145]        eta 0:00:07 lr 0.000020  wd 0.0500      time 0.1294 (0.1412)    loss 14.9130 (15.3589)  grad_norm 6.2556 (21.6458)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 03:59:59 production](main.py 405): INFO Train: [8/9][100/145]       eta 0:00:06 lr 0.000020  wd 0.0500      time 0.1326 (0.1404)    loss 15.9189 (15.3879)  grad_norm 5.4959 (20.1029)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 04:00:00 production](main.py 405): INFO Train: [8/9][110/145]       eta 0:00:04 lr 0.000020  wd 0.0500      time 0.1301 (0.1393)    loss 16.8583 (15.4616)  grad_norm 5.1588 (19.0928)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 04:00:02 production](main.py 405): INFO Train: [8/9][120/145]       eta 0:00:03 lr 0.000020  wd 0.0500      time 0.1325 (0.1388)    loss 13.2129 (15.4145)  grad_norm 11.1597 (18.6324)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 04:00:03 production](main.py 405): INFO Train: [8/9][130/145]       eta 0:00:02 lr 0.000020  wd 0.0500      time 0.1392 (0.1385)    loss 20.8696 (15.4988)  grad_norm 10.4850 (18.2855)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 04:00:05 production](main.py 405): INFO Train: [8/9][140/145]       eta 0:00:00 lr 0.000020  wd 0.0500      time 0.1271 (0.1393)    loss 15.7048 (15.4331)  grad_norm 5.4928 (17.9841)      loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-15 04:00:05 production](main.py 430): INFO EPOCH 8 training takes 0:00:20
[2023-09-15 04:00:05 production](main.py 432): INFO EPOCH 8 training SRCC: 0.2147427648305893
[2023-09-15 04:00:06 production](main.py 474): INFO Test: [0/37]        Time 0.472 (0.472)      Loss 18.0407 (18.0407)  Mem 6032MB
[2023-09-15 04:00:06 production](main.py 474): INFO Test: [10/37]       Time 0.050 (0.086)      Loss 17.9505 (15.1827)  Mem 6032MB
[2023-09-15 04:00:07 production](main.py 474): INFO Test: [20/37]       Time 0.047 (0.068)      Loss 14.6563 (14.6139)  Mem 6032MB
[2023-09-15 04:00:07 production](main.py 474): INFO Test: [30/37]       Time 0.044 (0.061)      Loss 11.5299 (14.8856)  Mem 6032MB
[2023-09-15 04:00:08 production](main.py 515): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-15 04:00:08 production](main.py 521): INFO  * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-15 04:00:08 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-15 04:00:08 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-15 04:00:08 production](main.py 313): INFO Training time 0:03:05

Also I have another question, can I run this file without using the command line passing parameters in the terminal, can I add a few parameters CUDA_VISIBLE_DEVICES=0 OMP_NUM_THREADS=1 torchrun --nnodes 1 --nproc_per_node 1 --master_port 26500to the config.py file and run it directly?

narthchin commented 1 year ago

Hi,

You may refer to this issue https://github.com/narthchin/DEIQT/issues/2#issuecomment-1585726378. This may be caused by deprecated version of TorchMetrics.

codedddddifficult commented 12 months ago

Thank you for your answer. I replaced the code train_srcc = torchmetrics.functional.spearman_corrcoef( pred_scores, gt_scores ).item() with thistrain_srcc = stats.spearmanr(pred_scores, gt_scores)[0]. andtest_srcc = torchmetrics.functional.spearman_corrcoef( final_preds, final_grotruth ).item() test_plcc = torchmetrics.functional.pearson_corrcoef( final_preds, final_grotruth ).item() test_klcc = torchmetrics.functional.kendall_rank_corrcoef( final_preds, final_grotruth ).item() meanse = torchmetrics.functional.mean_squared_error( final_grotruth, final_preds ).item()

    ``` test_plcc = stats.pearsonr(final_preds, final_grotruth)[0]
    test_srcc = stats.spearmanr(final_preds, final_grotruth)[0]
    test_klcc = stats.stats.kendalltau(final_preds, final_grotruth)[0]
    meanse = np.sqrt(((final_grotruth - final_preds) ** 2).mean())```

     It still gives abnormal results, it seems that the problem is not caused by the TorchMetrics version.

codedddddifficult commented 12 months ago

livec
local rank 0 / global rank 0 successfully build train dataset
local rank 0 / global rank 0 successfully build val dataset
All checkpoints founded in output/production/default: []
[2023-09-25 13:58:34 production](main.py 215): INFO no checkpoint found in output/production/default, ignoring auto resume
[2023-09-25 13:58:34 production](main.py 241): INFO Start training
[2023-09-25 13:58:35 production](main.py 405): INFO Train: [0/9][0/145] eta 0:02:31 lr 0.000000  wd 0.0500      time 1.0420 (1.0420)    loss 51.5245 (51.5245)  grad_norm 18.3894 (18.3894)     loss_scale 65536.0000 (65536.0000)      mem 5746MB
[2023-09-25 13:58:36 production](main.py 405): INFO Train: [0/9][10/145]        eta 0:00:30 lr 0.000005  wd 0.0500      time 0.1234 (0.2238)    loss 54.2166 (54.3862)  grad_norm 18.0205 (18.2184)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:38 production](main.py 405): INFO Train: [0/9][20/145]        eta 0:00:21 lr 0.000009  wd 0.0500      time 0.1198 (0.1759)    loss 54.4660 (54.5129)  grad_norm 19.0406 (18.5653)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:39 production](main.py 405): INFO Train: [0/9][30/145]        eta 0:00:18 lr 0.000014  wd 0.0500      time 0.1204 (0.1583)    loss 50.3433 (53.5920)  grad_norm 20.6562 (19.2838)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:40 production](main.py 405): INFO Train: [0/9][40/145]        eta 0:00:15 lr 0.000019  wd 0.0500      time 0.1171 (0.1498)    loss 50.4588 (52.8594)  grad_norm 20.7780 (19.8638)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:41 production](main.py 405): INFO Train: [0/9][50/145]        eta 0:00:13 lr 0.000023  wd 0.0500      time 0.1196 (0.1443)    loss 48.3381 (52.0807)  grad_norm 19.5036 (20.0408)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:43 production](main.py 405): INFO Train: [0/9][60/145]        eta 0:00:11 lr 0.000028  wd 0.0500      time 0.1218 (0.1406)    loss 51.5196 (51.4429)  grad_norm 19.7387 (19.9043)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:44 production](main.py 405): INFO Train: [0/9][70/145]        eta 0:00:10 lr 0.000032  wd 0.0500      time 0.1225 (0.1381)    loss 48.8798 (50.9413)  grad_norm 19.3804 (19.7687)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:45 production](main.py 405): INFO Train: [0/9][80/145]        eta 0:00:08 lr 0.000037  wd 0.0500      time 0.1199 (0.1360)    loss 45.2658 (50.4357)  grad_norm 19.3119 (19.6613)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:46 production](main.py 405): INFO Train: [0/9][90/145]        eta 0:00:07 lr 0.000042  wd 0.0500      time 0.1192 (0.1343)    loss 48.2108 (49.9828)  grad_norm 19.0532 (19.5866)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:48 production](main.py 405): INFO Train: [0/9][100/145]       eta 0:00:06 lr 0.000046  wd 0.0500      time 0.1212 (0.1349)    loss 49.2067 (49.6316)  grad_norm 17.8389 (19.5048)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:49 production](main.py 405): INFO Train: [0/9][110/145]       eta 0:00:04 lr 0.000051  wd 0.0500      time 0.1211 (0.1337)    loss 49.2202 (49.4307)  grad_norm 18.1328 (19.4249)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:50 production](main.py 405): INFO Train: [0/9][120/145]       eta 0:00:03 lr 0.000055  wd 0.0500      time 0.1221 (0.1327)    loss 47.8627 (49.1650)  grad_norm 17.8832 (19.3906)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:51 production](main.py 405): INFO Train: [0/9][130/145]       eta 0:00:01 lr 0.000060  wd 0.0500      time 0.1261 (0.1320)    loss 43.5014 (48.9742)  grad_norm 18.9191 (19.3701)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:52 production](main.py 405): INFO Train: [0/9][140/145]       eta 0:00:00 lr 0.000065  wd 0.0500      time 0.1122 (0.1310)    loss 45.5670 (48.7704)  grad_norm 18.5525 (19.3308)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:53 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:58:53 production](main.py 431): INFO EPOCH 0 training takes 0:00:19
[2023-09-25 13:58:53 production](main.py 433): INFO EPOCH 0 training SRCC: 0.0
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [0/37]        Time 0.560 (0.560)      Loss 56.0027 (56.0027)  Mem 6032MB
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [10/37]       Time 0.045 (0.086)      Loss 48.4336 (44.6171)  Mem 6032MB
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [20/37]       Time 0.030 (0.060)      Loss 49.7623 (43.4446)  Mem 6032MB
[2023-09-25 13:58:55 production](main.py 475): INFO Test: [30/37]       Time 0.035 (0.051)      Loss 44.4684 (43.8490)  Mem 6032MB
[2023-09-25 13:58:55 production](main.py 520): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:58:55 production](main.py 526): INFO  * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-25 13:58:55 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-25 13:58:55 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-25 13:58:56 production](main.py 405): INFO Train: [1/9][0/145] eta 0:01:29 lr 0.000067  wd 0.0500      time 0.6181 (0.6181)    loss 41.7851 (41.7851)  grad_norm 19.1839 (19.1839)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:57 production](main.py 405): INFO Train: [1/9][10/145]        eta 0:00:22 lr 0.000071  wd 0.0500      time 0.1165 (0.1661)    loss 45.5195 (44.6139)  grad_norm 17.9857 (18.5262)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:58 production](main.py 405): INFO Train: [1/9][20/145]        eta 0:00:17 lr 0.000076  wd 0.0500      time 0.1169 (0.1427)    loss 44.6802 (45.0129)  grad_norm 17.6853 (18.6308)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:58:59 production](main.py 405): INFO Train: [1/9][30/145]        eta 0:00:15 lr 0.000081  wd 0.0500      time 0.1239 (0.1354)    loss 46.5246 (44.5022)  grad_norm 19.2854 (18.5435)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:00 production](main.py 405): INFO Train: [1/9][40/145]        eta 0:00:13 lr 0.000085  wd 0.0500      time 0.1233 (0.1320)    loss 45.5295 (44.7515)  grad_norm 18.7031 (18.6315)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:02 production](main.py 405): INFO Train: [1/9][50/145]        eta 0:00:12 lr 0.000090  wd 0.0500      time 0.1226 (0.1304)    loss 45.4512 (44.4446)  grad_norm 18.2945 (18.5039)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:03 production](main.py 405): INFO Train: [1/9][60/145]        eta 0:00:11 lr 0.000094  wd 0.0500      time 0.1187 (0.1325)    loss 42.6684 (44.3864)  grad_norm 18.2046 (18.4989)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:04 production](main.py 405): INFO Train: [1/9][70/145]        eta 0:00:09 lr 0.000099  wd 0.0500      time 0.1208 (0.1311)    loss 44.0814 (44.2235)  grad_norm 19.7774 (18.5609)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:06 production](main.py 405): INFO Train: [1/9][80/145]        eta 0:00:08 lr 0.000104  wd 0.0500      time 0.1407 (0.1300)    loss 35.6219 (44.0901)  grad_norm 17.7082 (18.5313)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:07 production](main.py 405): INFO Train: [1/9][90/145]        eta 0:00:07 lr 0.000108  wd 0.0500      time 0.1147 (0.1291)    loss 40.7175 (43.9106)  grad_norm 17.9483 (18.4705)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:08 production](main.py 405): INFO Train: [1/9][100/145]       eta 0:00:05 lr 0.000113  wd 0.0500      time 0.1173 (0.1278)    loss 42.0859 (43.7717)  grad_norm 20.2704 (18.4737)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:09 production](main.py 405): INFO Train: [1/9][110/145]       eta 0:00:04 lr 0.000117  wd 0.0500      time 0.1215 (0.1274)    loss 44.3554 (43.6578)  grad_norm 17.1600 (18.4896)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:10 production](main.py 405): INFO Train: [1/9][120/145]       eta 0:00:03 lr 0.000122  wd 0.0500      time 0.1194 (0.1271)    loss 39.6943 (43.4319)  grad_norm 17.8580 (18.4773)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:12 production](main.py 405): INFO Train: [1/9][130/145]       eta 0:00:01 lr 0.000127  wd 0.0500      time 0.1161 (0.1266)    loss 40.0683 (43.2875)  grad_norm 19.8450 (18.4690)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:13 production](main.py 405): INFO Train: [1/9][140/145]       eta 0:00:00 lr 0.000131  wd 0.0500      time 0.1154 (0.1258)    loss 39.8664 (43.1566)  grad_norm 18.7870 (18.5023)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:13 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:59:13 production](main.py 431): INFO EPOCH 1 training takes 0:00:18
[2023-09-25 13:59:13 production](main.py 433): INFO EPOCH 1 training SRCC: 0.0
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [0/37]        Time 0.453 (0.453)      Loss 49.5697 (49.5697)  Mem 6032MB
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [10/37]       Time 0.030 (0.074)      Loss 42.0031 (38.8118)  Mem 6032MB
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [20/37]       Time 0.056 (0.056)      Loss 43.3311 (37.5637)  Mem 6032MB
[2023-09-25 13:59:15 production](main.py 475): INFO Test: [30/37]       Time 0.030 (0.048)      Loss 38.0376 (37.8997)  Mem 6032MB
[2023-09-25 13:59:15 production](main.py 520): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:59:15 production](main.py 526): INFO  * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-25 13:59:15 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-25 13:59:15 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-25 13:59:16 production](main.py 405): INFO Train: [2/9][0/145] eta 0:01:14 lr 0.000133  wd 0.0500      time 0.5143 (0.5143)    loss 39.7050 (39.7050)  grad_norm 18.0310 (18.0310)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:17 production](main.py 405): INFO Train: [2/9][10/145]        eta 0:00:23 lr 0.000138  wd 0.0500      time 0.1215 (0.1739)    loss 37.5351 (38.9642)  grad_norm 18.7536 (18.3816)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:18 production](main.py 405): INFO Train: [2/9][20/145]        eta 0:00:18 lr 0.000143  wd 0.0500      time 0.1245 (0.1497)    loss 40.3877 (38.5386)  grad_norm 20.1349 (18.4151)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:19 production](main.py 405): INFO Train: [2/9][30/145]        eta 0:00:16 lr 0.000147  wd 0.0500      time 0.1256 (0.1418)    loss 40.1826 (38.4531)  grad_norm 19.1897 (18.3674)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:21 production](main.py 405): INFO Train: [2/9][40/145]        eta 0:00:14 lr 0.000152  wd 0.0500      time 0.1186 (0.1366)    loss 37.4121 (38.1109)  grad_norm 17.0464 (18.3071)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:22 production](main.py 405): INFO Train: [2/9][50/145]        eta 0:00:12 lr 0.000156  wd 0.0500      time 0.1197 (0.1341)    loss 32.4053 (37.7854)  grad_norm 15.8273 (18.2193)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:23 production](main.py 405): INFO Train: [2/9][60/145]        eta 0:00:11 lr 0.000161  wd 0.0500      time 0.1247 (0.1331)    loss 38.7977 (37.5562)  grad_norm 19.2067 (18.2199)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:25 production](main.py 405): INFO Train: [2/9][70/145]        eta 0:00:10 lr 0.000166  wd 0.0500      time 0.1348 (0.1363)    loss 33.2279 (37.4895)  grad_norm 15.7284 (18.1450)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:26 production](main.py 405): INFO Train: [2/9][80/145]        eta 0:00:08 lr 0.000170  wd 0.0500      time 0.1240 (0.1350)    loss 33.5445 (37.2308)  grad_norm 19.4514 (18.1625)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:27 production](main.py 405): INFO Train: [2/9][90/145]        eta 0:00:07 lr 0.000175  wd 0.0500      time 0.1302 (0.1341)    loss 33.1387 (36.8237)  grad_norm 16.2680 (18.0534)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:29 production](main.py 405): INFO Train: [2/9][100/145]       eta 0:00:06 lr 0.000179  wd 0.0500      time 0.1296 (0.1333)    loss 32.9727 (36.4948)  grad_norm 15.6902 (18.0132)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:30 production](main.py 405): INFO Train: [2/9][110/145]       eta 0:00:04 lr 0.000184  wd 0.0500      time 0.1262 (0.1340)    loss 33.5445 (36.0466)  grad_norm 18.3916 (17.9320)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:31 production](main.py 405): INFO Train: [2/9][120/145]       eta 0:00:03 lr 0.000189  wd 0.0500      time 0.1282 (0.1336)    loss 30.0741 (35.8019)  grad_norm 15.9190 (17.9332)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:32 production](main.py 405): INFO Train: [2/9][130/145]       eta 0:00:01 lr 0.000193  wd 0.0500      time 0.1263 (0.1330)    loss 28.0526 (35.3896)  grad_norm 17.9300 (17.8820)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:34 production](main.py 405): INFO Train: [2/9][140/145]       eta 0:00:00 lr 0.000198  wd 0.0500      time 0.1388 (0.1346)    loss 29.3030 (34.9654)  grad_norm 13.7097 (17.7245)     loss_scale 65536.0000 (65536.0000)      mem 6032MB
[2023-09-25 13:59:35 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...

DAVID-Hown commented 6 months ago

@codedddddifficult I also encountered this problem. Have you solved it？

narthchin / DEIQT

how to solve this error？ #3

from torchinfo import summary

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

activation=F.gelu,

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py

taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py