Open codedddddifficult opened 1 year ago
Many thanks for your time reproducing our work!
This matter might be due to the shape of input tensor. And that would be great if you could provide us with the size of input image and shape B
, L
, D
in deiqt.py
:
# Start from Line. 149
def forward(self, x, ref):
B, L, D = x.shape
B is1,L is 196 ,D is 384 the input is random generated by torch.randn(1, 3, 224, 224). i have tried torch.randn(16, 3, 224, 224) ,it
also does not work. Other settings are consistent with your paper.
I don't think the code will work properly run when juery_nums is set to 6 , this will lead to shape mismatch.
Many thanks for the provided information!
I will be looking into it tomorrow. For some reasons, this might be concerned with strange behaviors during shape altering.
Would you mind posting the 'deiqt.py' you currently using there?
this is my currently 'deiqt.py ,many thanks for your reply!
from functools import partial import torch import torch.nn as nn import torch.nn.functional as F from timm.models.layers import DropPath, truncnormal from timm.models.vision_transformer import Mlp
from torchvision.transforms.functional import resize
class PatchEmbed(nn.Module): """2D Image to Patch Embedding"""
def __init__(
self,
patch_size=16,
in_chans=3,
embed_dim=768,
norm_layer=None,
flatten=True,
):
super().__init__()
self.flatten = flatten
self.proj = nn.Conv2d(
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x):
x = self.proj(x)
if self.flatten:
x = x.flatten(2).transpose(1, 2) # BCHW -> BNC
x = self.norm(x)
return x
class Attention(nn.Module):
def __init__(
self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.0,
proj_drop=0.0,
):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, N, C = x.shape
qkv = (
self.qkv(x)
.reshape(B, N, 3, self.num_heads, C // self.num_heads)
.permute(2, 0, 3, 1, 4)
)
q, k, v = qkv[0], qkv[1], qkv[2]
q = q * self.scale
attn = q @ k.transpose(-2, -1)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
class Block(nn.Module):
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class Experts_MOS(nn.Module): def init( self, embed_dim=768, juery_nums=6, ): super().init() self.juery = juery_nums bunch_layer = nn.TransformerDecoderLayer( d_model=embed_dim, dropout=0.0, nhead=6,
activation='gelu',
# batch_first=True,
dim_feedforward=(embed_dim * 4),
# norm_first=True,
)
self.bunch_decoder = nn.TransformerDecoder(bunch_layer, num_layers=1)
self.bunch_embedding = nn.Parameter(torch.randn(1, self.juery, embed_dim))
self.heads = nn.Linear(embed_dim, 1, bias=False)
trunc_normal_(self.bunch_embedding, std=0.02)
def forward(self, x, ref):
B, L, D = x.shape
bunch_embedding = self.bunch_embedding.expand(B, -1, -1)
ref = ref.view(B, 1, -1)
ref = ref.expand(B, self.juery, -1)
output_embedding = bunch_embedding + ref
x = self.bunch_decoder(output_embedding, x)
# x = self.bunch_decoder(x)
x = self.heads(x)
x = x.view(B, -1).mean(dim=1)
return x.view(B, 1)
class Layer_scale_init_Block(nn.Module):
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
def forward(self, x):
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
return x
class Layer_scale_init_Block_paralx2(nn.Module):
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.norm11 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.attn1 = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
self.norm21 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.mlp1 = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_1_1 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True
)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2_1 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True
)
def forward(self, x):
x = (
x
+ self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
+ self.drop_path(self.gamma_1_1 * self.attn1(self.norm11(x)))
)
x = (
x
+ self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ self.drop_path(self.gamma_2_1 * self.mlp1(self.norm21(x)))
)
return x
class Block_paralx2(nn.Module):
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.norm11 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.attn1 = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
self.norm21 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.mlp1 = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
def forward(self, x):
x = (
x
+ self.drop_path(self.attn(self.norm1(x)))
+ self.drop_path(self.attn1(self.norm11(x)))
)
x = (
x
+ self.drop_path(self.mlp(self.norm2(x)))
+ self.drop_path(self.mlp1(self.norm21(x)))
)
return x
class deiqt_models(nn.Module): """Vision Transformer with LayerScale (https://arxiv.org/abs/2103.17239) support"""
def __init__(
self,
patch_size=16,
in_chans=3,
num_classes=1,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
attn_drop_rate=0.0,
drop_path_rate=0.0,
norm_layer=nn.LayerNorm,
global_pool=None,
block_layers=Block,
Patch_layer=PatchEmbed,
act_layer=nn.GELU,
Attention_block=Attention,
Mlp_block=Mlp,
init_scale=1e-4,
):
super().__init__()
self.num_classes = num_classes
self.num_features = self.embed_dim = embed_dim
self.patch_embed = Patch_layer(
patch_size=patch_size,
in_chans=in_chans,
embed_dim=embed_dim,
)
num_patches = 196
self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
dpr = [drop_path_rate for i in range(depth)]
self.blocks = nn.ModuleList(
[
block_layers(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=0.0,
attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
act_layer=act_layer,
Attention_block=Attention_block,
Mlp_block=Mlp_block,
init_values=init_scale,
)
for i in range(depth)
]
)
self.norm = norm_layer(embed_dim)
self.feature_info = [dict(num_chs=embed_dim, reduction=0, module="head")]
# self.head = (
# nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
# )
self.head = Experts_MOS(embed_dim=384, juery_nums=6)
trunc_normal_(self.pos_embed, std=0.02)
trunc_normal_(self.cls_token, std=0.02)
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=0.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
@torch.jit.ignore
def no_weight_decay(self):
return {"pos_embed", "cls_token"}
def get_classifier(self):
return self.head
def get_num_layers(self):
return len(self.blocks)
def reset_classifier(self, num_classes, global_pool=""):
self.num_classes = num_classes
self.head = (
nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
)
def forward_features(self, x):
B = x.shape[0]
x = self.patch_embed(x)
cls_tokens = self.cls_token.expand(B, -1, -1)
x = x + self.pos_embed
x = torch.cat((cls_tokens, x), dim=1)
for i, blk in enumerate(self.blocks):
x = blk(x)
x = self.norm(x)
return x[:, 0], x[:, 1:, :]
def forward(self, x):
ref, x = self.forward_features(x)
x = self.head(x, ref)
return x
def build_deiqt( patch_size=16, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), block_layers=Layer_scale_init_Block, pretrained=False, pretrained_model_path="", infer=False, infer_model_path="", ): model = deiqt_models( patch_size=patch_size, embed_dim=embed_dim, depth=depth, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, norm_layer=norm_layer, block_layers=block_layers, ) if pretrained: assert pretrained_model_path != "" checkpoint = torch.load(pretrained_model_path, map_location="cpu") state_dict = checkpoint["model"] del state_dict["head.weight"] del state_dict["head.bias"] model.load_state_dict(state_dict, strict=False) del checkpoint torch.cuda.empty_cache() elif infer: assert infer_model_path != "" checkpoint = torch.load(infer_model_path, map_location="cpu") state_dict = checkpoint["model"] model.load_state_dict(state_dict, strict=True) del checkpoint torch.cuda.empty_cache() return model
if name == "main": model = build_deiqt( pretrained=True, pretrained_model_path="DEIQT-main/deit_3_small_224_1k.pth", )
input1 = torch.randn(1, 3, 224, 224)
output = model(input1)
print(output)
this is my currently 'deiqt.py ,many thanks for your reply!
sorry,there is something wrong with the format of the code just pasted. You can check out this code ,many thanks for your reply!
from functools import partial
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import DropPath, trunc_normal_
from timm.models.vision_transformer import Mlp
# from torchinfo import summary
from torchvision.transforms.functional import resize
class PatchEmbed(nn.Module):
"""2D Image to Patch Embedding"""
def __init__(
self,
patch_size=16,
in_chans=3,
embed_dim=768,
norm_layer=None,
flatten=True,
):
super().__init__()
self.flatten = flatten
self.proj = nn.Conv2d(
in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x):
x = self.proj(x)
if self.flatten:
x = x.flatten(2).transpose(1, 2) # BCHW -> BNC
x = self.norm(x)
return x
class Attention(nn.Module):
# taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
def __init__(
self,
dim,
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop=0.0,
proj_drop=0.0,
):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, N, C = x.shape
qkv = (
self.qkv(x)
.reshape(B, N, 3, self.num_heads, C // self.num_heads)
.permute(2, 0, 3, 1, 4)
)
q, k, v = qkv[0], qkv[1], qkv[2]
q = q * self.scale
attn = q @ k.transpose(-2, -1)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
class Block(nn.Module):
# taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class Experts_MOS(nn.Module):
def __init__(
self,
embed_dim=768,
juery_nums=6,
):
super().__init__()
self.juery = juery_nums
bunch_layer = nn.TransformerDecoderLayer(
d_model=embed_dim,
dropout=0.0,
nhead=6,
# activation=F.gelu,
activation='gelu',
# batch_first=True,
dim_feedforward=(embed_dim * 4),
# norm_first=True,
)
self.bunch_decoder = nn.TransformerDecoder(bunch_layer, num_layers=1)
self.bunch_embedding = nn.Parameter(torch.randn(1, self.juery, embed_dim))
self.heads = nn.Linear(embed_dim, 1, bias=False)
trunc_normal_(self.bunch_embedding, std=0.02)
def forward(self, x, ref):
B, L, D = x.shape
bunch_embedding = self.bunch_embedding.expand(B, -1, -1)
ref = ref.view(B, 1, -1)
ref = ref.expand(B, self.juery, -1)
output_embedding = bunch_embedding + ref
x = self.bunch_decoder(output_embedding, x)
# x = self.bunch_decoder(x)
x = self.heads(x)
x = x.view(B, -1).mean(dim=1)
return x.view(B, 1)
class Layer_scale_init_Block(nn.Module):
# taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
def forward(self, x):
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
return x
class Layer_scale_init_Block_paralx2(nn.Module):
# taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.norm11 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.attn1 = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
self.norm21 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.mlp1 = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_1_1 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True
)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2_1 = nn.Parameter(
init_values * torch.ones((dim)), requires_grad=True
)
def forward(self, x):
x = (
x
+ self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
+ self.drop_path(self.gamma_1_1 * self.attn1(self.norm11(x)))
)
x = (
x
+ self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+ self.drop_path(self.gamma_2_1 * self.mlp1(self.norm21(x)))
)
return x
class Block_paralx2(nn.Module):
# taken from https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
# with slight modifications
def __init__(
self,
dim,
num_heads,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
drop=0.0,
attn_drop=0.0,
drop_path=0.0,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm,
Attention_block=Attention,
Mlp_block=Mlp,
init_values=1e-4,
):
super().__init__()
self.norm1 = norm_layer(dim)
self.norm11 = norm_layer(dim)
self.attn = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.attn1 = Attention_block(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
attn_drop=attn_drop,
proj_drop=drop,
)
self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
self.norm21 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
self.mlp1 = Mlp_block(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
)
def forward(self, x):
x = (
x
+ self.drop_path(self.attn(self.norm1(x)))
+ self.drop_path(self.attn1(self.norm11(x)))
)
x = (
x
+ self.drop_path(self.mlp(self.norm2(x)))
+ self.drop_path(self.mlp1(self.norm21(x)))
)
return x
class deiqt_models(nn.Module):
"""Vision Transformer with LayerScale (https://arxiv.org/abs/2103.17239) support"""
def __init__(
self,
patch_size=16,
in_chans=3,
num_classes=1,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.0,
qkv_bias=False,
qk_scale=None,
attn_drop_rate=0.0,
drop_path_rate=0.0,
norm_layer=nn.LayerNorm,
global_pool=None,
block_layers=Block,
Patch_layer=PatchEmbed,
act_layer=nn.GELU,
Attention_block=Attention,
Mlp_block=Mlp,
init_scale=1e-4,
):
super().__init__()
self.num_classes = num_classes
self.num_features = self.embed_dim = embed_dim
self.patch_embed = Patch_layer(
patch_size=patch_size,
in_chans=in_chans,
embed_dim=embed_dim,
)
num_patches = 196
self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
dpr = [drop_path_rate for i in range(depth)]
self.blocks = nn.ModuleList(
[
block_layers(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=0.0,
attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
act_layer=act_layer,
Attention_block=Attention_block,
Mlp_block=Mlp_block,
init_values=init_scale,
)
for i in range(depth)
]
)
self.norm = norm_layer(embed_dim)
self.feature_info = [dict(num_chs=embed_dim, reduction=0, module="head")]
# self.head = (
# nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
# )
self.head = Experts_MOS(embed_dim=384, juery_nums=6)
trunc_normal_(self.pos_embed, std=0.02)
trunc_normal_(self.cls_token, std=0.02)
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight, std=0.02)
if isinstance(m, nn.Linear) and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
nn.init.constant_(m.weight, 1.0)
@torch.jit.ignore
def no_weight_decay(self):
return {"pos_embed", "cls_token"}
def get_classifier(self):
return self.head
def get_num_layers(self):
return len(self.blocks)
def reset_classifier(self, num_classes, global_pool=""):
self.num_classes = num_classes
self.head = (
nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
)
def forward_features(self, x):
B = x.shape[0]
x = self.patch_embed(x)
cls_tokens = self.cls_token.expand(B, -1, -1)
x = x + self.pos_embed
x = torch.cat((cls_tokens, x), dim=1)
for i, blk in enumerate(self.blocks):
x = blk(x)
x = self.norm(x)
return x[:, 0], x[:, 1:, :]
def forward(self, x):
ref, x = self.forward_features(x)
x = self.head(x, ref)
return x
def build_deiqt(
patch_size=16,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
block_layers=Layer_scale_init_Block,
pretrained=False,
pretrained_model_path="",
infer=False,
infer_model_path="",
):
model = deiqt_models(
patch_size=patch_size,
embed_dim=embed_dim,
depth=depth,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
norm_layer=norm_layer,
block_layers=block_layers,
)
if pretrained:
assert pretrained_model_path != ""
checkpoint = torch.load(pretrained_model_path, map_location="cpu")
state_dict = checkpoint["model"]
del state_dict["head.weight"]
del state_dict["head.bias"]
model.load_state_dict(state_dict, strict=False)
del checkpoint
torch.cuda.empty_cache()
elif infer:
assert infer_model_path != ""
checkpoint = torch.load(infer_model_path, map_location="cpu")
state_dict = checkpoint["model"]
model.load_state_dict(state_dict, strict=True)
del checkpoint
torch.cuda.empty_cache()
return model
if __name__ == "__main__":
model = build_deiqt(
pretrained=True,
pretrained_model_path="/opt/data/private/python_project_3090TI/DEIQT-main/deit_3_small_224_1k.pth",
)
input1 = torch.randn(1, 3, 224, 224)
output = model(input1)
print(output)
# summary(model, input_data=[input1], device=torch.device("cpu"))
Hi, sorry for the late reply
Look like the mismatch is caused by the commenting of these lines:
# batch_first=True,
dim_feedforward=(embed_dim * 4),
# norm_first=True,
Currently, our tensors follow the pattern (batch, seq, feature)
, commenting batch_first=True
may cause the problem.
Thank you for your reply, my torch version can't set this parameter, but I solved the bug by changing the shape of the input, but I still encountered some problems in the code running phase, the code you posted is the code for distributed training, I don't know much about this training method, so I deleted the code about distributed training, and put the network modeling code you posted in the network modeling framework I built. training framework, the model training is very poor, I can't check the exact problem.I was wondering if you have the code for single GPU training, if you do please post it to the project, I am very interested in your work so I would love to reproduce the results of the paper, thanks for your reply!
output_embedding = bunch_embedding + ref
output_embedding = output_embedding.view(6,B,-1)
``` This is how I solved the previous error, if anyone has the same problem you can use this code to solve it
Hi,
You can use conda
to create a python environment and install newer version of pytorch
in order to reproduce this project smoothly.
And, the DDP method is suitable for single-gpu training. Alter the parameters of the training bash script like:
CUDA_VISIBLE_DEVICES=[target_gpuid] OMP_NUM_THREADS=1 torchrun --nnodes 1 --nproc_per_node 1 --master_port 26500 main.py \
--cfg [CONFIG_PATH] \
--data-path [YOUR_DATA_PATH] \
--output [LOG_PATH] \
--tag [REMARK_TAG] \
--repeat \
--rnum [TARGET_REPEAT_NUM]
There, use CUDA_VISIBLE_DEVICES
to select the gpu, and if only one gpu in the server, just set CUDA_VISIBLE_DEVICES=0
. And--nproc_per_node 1
means using one gpu.
Thanks for your reply, I get this error when running with command line parameters.
RuntimeError: Found dtype Float but expected Half
It seems that the lossdtype error is caused by the codecriterion = torch.nn.SmoothL1Loss()
, if I use the code criterion = torch.nn.L1Loss()
the code works fine, but the runtime prompts for nan
or ``inf, and the srcc and plcc are
0```. Since it's run on the terminal using the command line to run, can not debug, do you know how to modify this error?
Hi,
May I ask which gpu and which platform (Linux distros or Windows) you are using? This may be caused by the FP16.
Linux( ubuntu18.04) adn NVIDIA GeForce RTX3090 GPU.
You may change this in the config.py
file, starting from line. 138, and to see if it's working:
# Enable Pytorch automatic mixed precision (amp).
_C.AMP_ENABLE = False
Thank you very much,it works, but the result looks wrong, why is it so?
[2023-09-15 03:59:47 production](main.py 405): INFO Train: [8/9][10/145] eta 0:00:22 lr 0.000020 wd 0.0500 time 0.1241 (0.1698) loss 13.9743 (15.3007) grad_norm 11.7249 (9.0613) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:48 production](main.py 405): INFO Train: [8/9][20/145] eta 0:00:18 lr 0.000020 wd 0.0500 time 0.1336 (0.1490) loss 14.7644 (15.6272) grad_norm 15.8456 (10.5005) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:50 production](main.py 405): INFO Train: [8/9][30/145] eta 0:00:17 lr 0.000020 wd 0.0500 time 0.1457 (0.1554) loss 12.4224 (15.2793) grad_norm 2.5085 (12.6704) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:51 production](main.py 405): INFO Train: [8/9][40/145] eta 0:00:16 lr 0.000020 wd 0.0500 time 0.1340 (0.1543) loss 16.7534 (15.2032) grad_norm 118.8225 (15.4571) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:53 production](main.py 405): INFO Train: [8/9][50/145] eta 0:00:14 lr 0.000020 wd 0.0500 time 0.1230 (0.1486) loss 14.9455 (15.2179) grad_norm 4.3450 (14.8898) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:54 production](main.py 405): INFO Train: [8/9][60/145] eta 0:00:12 lr 0.000020 wd 0.0500 time 0.1248 (0.1464) loss 13.4326 (15.3112) grad_norm 3.5536 (25.9888) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:55 production](main.py 405): INFO Train: [8/9][70/145] eta 0:00:10 lr 0.000020 wd 0.0500 time 0.1276 (0.1436) loss 14.5125 (15.3156) grad_norm 6.1413 (25.5014) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:56 production](main.py 405): INFO Train: [8/9][80/145] eta 0:00:09 lr 0.000020 wd 0.0500 time 0.1313 (0.1419) loss 12.2901 (15.3235) grad_norm 10.6931 (23.4222) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:58 production](main.py 405): INFO Train: [8/9][90/145] eta 0:00:07 lr 0.000020 wd 0.0500 time 0.1294 (0.1412) loss 14.9130 (15.3589) grad_norm 6.2556 (21.6458) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 03:59:59 production](main.py 405): INFO Train: [8/9][100/145] eta 0:00:06 lr 0.000020 wd 0.0500 time 0.1326 (0.1404) loss 15.9189 (15.3879) grad_norm 5.4959 (20.1029) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 04:00:00 production](main.py 405): INFO Train: [8/9][110/145] eta 0:00:04 lr 0.000020 wd 0.0500 time 0.1301 (0.1393) loss 16.8583 (15.4616) grad_norm 5.1588 (19.0928) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 04:00:02 production](main.py 405): INFO Train: [8/9][120/145] eta 0:00:03 lr 0.000020 wd 0.0500 time 0.1325 (0.1388) loss 13.2129 (15.4145) grad_norm 11.1597 (18.6324) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 04:00:03 production](main.py 405): INFO Train: [8/9][130/145] eta 0:00:02 lr 0.000020 wd 0.0500 time 0.1392 (0.1385) loss 20.8696 (15.4988) grad_norm 10.4850 (18.2855) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 04:00:05 production](main.py 405): INFO Train: [8/9][140/145] eta 0:00:00 lr 0.000020 wd 0.0500 time 0.1271 (0.1393) loss 15.7048 (15.4331) grad_norm 5.4928 (17.9841) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-15 04:00:05 production](main.py 430): INFO EPOCH 8 training takes 0:00:20
[2023-09-15 04:00:05 production](main.py 432): INFO EPOCH 8 training SRCC: 0.2147427648305893
[2023-09-15 04:00:06 production](main.py 474): INFO Test: [0/37] Time 0.472 (0.472) Loss 18.0407 (18.0407) Mem 6032MB
[2023-09-15 04:00:06 production](main.py 474): INFO Test: [10/37] Time 0.050 (0.086) Loss 17.9505 (15.1827) Mem 6032MB
[2023-09-15 04:00:07 production](main.py 474): INFO Test: [20/37] Time 0.047 (0.068) Loss 14.6563 (14.6139) Mem 6032MB
[2023-09-15 04:00:07 production](main.py 474): INFO Test: [30/37] Time 0.044 (0.061) Loss 11.5299 (14.8856) Mem 6032MB
[2023-09-15 04:00:08 production](main.py 515): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-15 04:00:08 production](main.py 521): INFO * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-15 04:00:08 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-15 04:00:08 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-15 04:00:08 production](main.py 313): INFO Training time 0:03:05
Also I have another question, can I run this file without using the command line passing parameters in the terminal, can I add a few parameters CUDA_VISIBLE_DEVICES=0 OMP_NUM_THREADS=1 torchrun --nnodes 1 --nproc_per_node 1 --master_port 26500
to the config.py
file and run it directly?
Hi,
You may refer to this issue https://github.com/narthchin/DEIQT/issues/2#issuecomment-1585726378. This may be caused by deprecated version of TorchMetrics.
Thank you for your answer.
I replaced the code train_srcc = torchmetrics.functional.spearman_corrcoef( pred_scores, gt_scores ).item()
with thistrain_srcc = stats.spearmanr(pred_scores, gt_scores)[0]
.
andtest_srcc = torchmetrics.functional.spearman_corrcoef( final_preds, final_grotruth ).item() test_plcc = torchmetrics.functional.pearson_corrcoef( final_preds, final_grotruth ).item() test_klcc = torchmetrics.functional.kendall_rank_corrcoef( final_preds, final_grotruth ).item() meanse = torchmetrics.functional.mean_squared_error( final_grotruth, final_preds ).item()
``` test_plcc = stats.pearsonr(final_preds, final_grotruth)[0]
test_srcc = stats.spearmanr(final_preds, final_grotruth)[0]
test_klcc = stats.stats.kendalltau(final_preds, final_grotruth)[0]
meanse = np.sqrt(((final_grotruth - final_preds) ** 2).mean())```
It still gives abnormal results, it seems that the problem is not caused by the TorchMetrics version.
livec
local rank 0 / global rank 0 successfully build train dataset
local rank 0 / global rank 0 successfully build val dataset
All checkpoints founded in output/production/default: []
[2023-09-25 13:58:34 production](main.py 215): INFO no checkpoint found in output/production/default, ignoring auto resume
[2023-09-25 13:58:34 production](main.py 241): INFO Start training
[2023-09-25 13:58:35 production](main.py 405): INFO Train: [0/9][0/145] eta 0:02:31 lr 0.000000 wd 0.0500 time 1.0420 (1.0420) loss 51.5245 (51.5245) grad_norm 18.3894 (18.3894) loss_scale 65536.0000 (65536.0000) mem 5746MB
[2023-09-25 13:58:36 production](main.py 405): INFO Train: [0/9][10/145] eta 0:00:30 lr 0.000005 wd 0.0500 time 0.1234 (0.2238) loss 54.2166 (54.3862) grad_norm 18.0205 (18.2184) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:38 production](main.py 405): INFO Train: [0/9][20/145] eta 0:00:21 lr 0.000009 wd 0.0500 time 0.1198 (0.1759) loss 54.4660 (54.5129) grad_norm 19.0406 (18.5653) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:39 production](main.py 405): INFO Train: [0/9][30/145] eta 0:00:18 lr 0.000014 wd 0.0500 time 0.1204 (0.1583) loss 50.3433 (53.5920) grad_norm 20.6562 (19.2838) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:40 production](main.py 405): INFO Train: [0/9][40/145] eta 0:00:15 lr 0.000019 wd 0.0500 time 0.1171 (0.1498) loss 50.4588 (52.8594) grad_norm 20.7780 (19.8638) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:41 production](main.py 405): INFO Train: [0/9][50/145] eta 0:00:13 lr 0.000023 wd 0.0500 time 0.1196 (0.1443) loss 48.3381 (52.0807) grad_norm 19.5036 (20.0408) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:43 production](main.py 405): INFO Train: [0/9][60/145] eta 0:00:11 lr 0.000028 wd 0.0500 time 0.1218 (0.1406) loss 51.5196 (51.4429) grad_norm 19.7387 (19.9043) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:44 production](main.py 405): INFO Train: [0/9][70/145] eta 0:00:10 lr 0.000032 wd 0.0500 time 0.1225 (0.1381) loss 48.8798 (50.9413) grad_norm 19.3804 (19.7687) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:45 production](main.py 405): INFO Train: [0/9][80/145] eta 0:00:08 lr 0.000037 wd 0.0500 time 0.1199 (0.1360) loss 45.2658 (50.4357) grad_norm 19.3119 (19.6613) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:46 production](main.py 405): INFO Train: [0/9][90/145] eta 0:00:07 lr 0.000042 wd 0.0500 time 0.1192 (0.1343) loss 48.2108 (49.9828) grad_norm 19.0532 (19.5866) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:48 production](main.py 405): INFO Train: [0/9][100/145] eta 0:00:06 lr 0.000046 wd 0.0500 time 0.1212 (0.1349) loss 49.2067 (49.6316) grad_norm 17.8389 (19.5048) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:49 production](main.py 405): INFO Train: [0/9][110/145] eta 0:00:04 lr 0.000051 wd 0.0500 time 0.1211 (0.1337) loss 49.2202 (49.4307) grad_norm 18.1328 (19.4249) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:50 production](main.py 405): INFO Train: [0/9][120/145] eta 0:00:03 lr 0.000055 wd 0.0500 time 0.1221 (0.1327) loss 47.8627 (49.1650) grad_norm 17.8832 (19.3906) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:51 production](main.py 405): INFO Train: [0/9][130/145] eta 0:00:01 lr 0.000060 wd 0.0500 time 0.1261 (0.1320) loss 43.5014 (48.9742) grad_norm 18.9191 (19.3701) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:52 production](main.py 405): INFO Train: [0/9][140/145] eta 0:00:00 lr 0.000065 wd 0.0500 time 0.1122 (0.1310) loss 45.5670 (48.7704) grad_norm 18.5525 (19.3308) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:53 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:58:53 production](main.py 431): INFO EPOCH 0 training takes 0:00:19
[2023-09-25 13:58:53 production](main.py 433): INFO EPOCH 0 training SRCC: 0.0
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [0/37] Time 0.560 (0.560) Loss 56.0027 (56.0027) Mem 6032MB
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [10/37] Time 0.045 (0.086) Loss 48.4336 (44.6171) Mem 6032MB
[2023-09-25 13:58:54 production](main.py 475): INFO Test: [20/37] Time 0.030 (0.060) Loss 49.7623 (43.4446) Mem 6032MB
[2023-09-25 13:58:55 production](main.py 475): INFO Test: [30/37] Time 0.035 (0.051) Loss 44.4684 (43.8490) Mem 6032MB
[2023-09-25 13:58:55 production](main.py 520): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:58:55 production](main.py 526): INFO * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-25 13:58:55 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-25 13:58:55 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-25 13:58:56 production](main.py 405): INFO Train: [1/9][0/145] eta 0:01:29 lr 0.000067 wd 0.0500 time 0.6181 (0.6181) loss 41.7851 (41.7851) grad_norm 19.1839 (19.1839) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:57 production](main.py 405): INFO Train: [1/9][10/145] eta 0:00:22 lr 0.000071 wd 0.0500 time 0.1165 (0.1661) loss 45.5195 (44.6139) grad_norm 17.9857 (18.5262) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:58 production](main.py 405): INFO Train: [1/9][20/145] eta 0:00:17 lr 0.000076 wd 0.0500 time 0.1169 (0.1427) loss 44.6802 (45.0129) grad_norm 17.6853 (18.6308) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:58:59 production](main.py 405): INFO Train: [1/9][30/145] eta 0:00:15 lr 0.000081 wd 0.0500 time 0.1239 (0.1354) loss 46.5246 (44.5022) grad_norm 19.2854 (18.5435) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:00 production](main.py 405): INFO Train: [1/9][40/145] eta 0:00:13 lr 0.000085 wd 0.0500 time 0.1233 (0.1320) loss 45.5295 (44.7515) grad_norm 18.7031 (18.6315) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:02 production](main.py 405): INFO Train: [1/9][50/145] eta 0:00:12 lr 0.000090 wd 0.0500 time 0.1226 (0.1304) loss 45.4512 (44.4446) grad_norm 18.2945 (18.5039) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:03 production](main.py 405): INFO Train: [1/9][60/145] eta 0:00:11 lr 0.000094 wd 0.0500 time 0.1187 (0.1325) loss 42.6684 (44.3864) grad_norm 18.2046 (18.4989) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:04 production](main.py 405): INFO Train: [1/9][70/145] eta 0:00:09 lr 0.000099 wd 0.0500 time 0.1208 (0.1311) loss 44.0814 (44.2235) grad_norm 19.7774 (18.5609) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:06 production](main.py 405): INFO Train: [1/9][80/145] eta 0:00:08 lr 0.000104 wd 0.0500 time 0.1407 (0.1300) loss 35.6219 (44.0901) grad_norm 17.7082 (18.5313) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:07 production](main.py 405): INFO Train: [1/9][90/145] eta 0:00:07 lr 0.000108 wd 0.0500 time 0.1147 (0.1291) loss 40.7175 (43.9106) grad_norm 17.9483 (18.4705) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:08 production](main.py 405): INFO Train: [1/9][100/145] eta 0:00:05 lr 0.000113 wd 0.0500 time 0.1173 (0.1278) loss 42.0859 (43.7717) grad_norm 20.2704 (18.4737) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:09 production](main.py 405): INFO Train: [1/9][110/145] eta 0:00:04 lr 0.000117 wd 0.0500 time 0.1215 (0.1274) loss 44.3554 (43.6578) grad_norm 17.1600 (18.4896) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:10 production](main.py 405): INFO Train: [1/9][120/145] eta 0:00:03 lr 0.000122 wd 0.0500 time 0.1194 (0.1271) loss 39.6943 (43.4319) grad_norm 17.8580 (18.4773) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:12 production](main.py 405): INFO Train: [1/9][130/145] eta 0:00:01 lr 0.000127 wd 0.0500 time 0.1161 (0.1266) loss 40.0683 (43.2875) grad_norm 19.8450 (18.4690) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:13 production](main.py 405): INFO Train: [1/9][140/145] eta 0:00:00 lr 0.000131 wd 0.0500 time 0.1154 (0.1258) loss 39.8664 (43.1566) grad_norm 18.7870 (18.5023) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:13 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:59:13 production](main.py 431): INFO EPOCH 1 training takes 0:00:18
[2023-09-25 13:59:13 production](main.py 433): INFO EPOCH 1 training SRCC: 0.0
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [0/37] Time 0.453 (0.453) Loss 49.5697 (49.5697) Mem 6032MB
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [10/37] Time 0.030 (0.074) Loss 42.0031 (38.8118) Mem 6032MB
[2023-09-25 13:59:14 production](main.py 475): INFO Test: [20/37] Time 0.056 (0.056) Loss 43.3311 (37.5637) Mem 6032MB
[2023-09-25 13:59:15 production](main.py 475): INFO Test: [30/37] Time 0.030 (0.048) Loss 38.0376 (37.8997) Mem 6032MB
[2023-09-25 13:59:15 production](main.py 520): WARNING Array contains NaN or infs. Resetting cc relation to zero...
[2023-09-25 13:59:15 production](main.py 526): INFO * SRCC@ 0.000000 PLCC@ 0.000000 KLCC@ 0.000000 MSE@ 0.000000
[2023-09-25 13:59:15 production](main.py 291): INFO SRCC, PLCC, KLCC and MSE of the network on the 2320 test images: 0.000000, 0.000000, 0.000000, 0.000000
[2023-09-25 13:59:15 production](main.py 302): INFO Max PLCC: 0.000000 Max SRCC: 0.000000 Max KLCC: 0.000000 Min MSE: 0.000000
[2023-09-25 13:59:16 production](main.py 405): INFO Train: [2/9][0/145] eta 0:01:14 lr 0.000133 wd 0.0500 time 0.5143 (0.5143) loss 39.7050 (39.7050) grad_norm 18.0310 (18.0310) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:17 production](main.py 405): INFO Train: [2/9][10/145] eta 0:00:23 lr 0.000138 wd 0.0500 time 0.1215 (0.1739) loss 37.5351 (38.9642) grad_norm 18.7536 (18.3816) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:18 production](main.py 405): INFO Train: [2/9][20/145] eta 0:00:18 lr 0.000143 wd 0.0500 time 0.1245 (0.1497) loss 40.3877 (38.5386) grad_norm 20.1349 (18.4151) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:19 production](main.py 405): INFO Train: [2/9][30/145] eta 0:00:16 lr 0.000147 wd 0.0500 time 0.1256 (0.1418) loss 40.1826 (38.4531) grad_norm 19.1897 (18.3674) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:21 production](main.py 405): INFO Train: [2/9][40/145] eta 0:00:14 lr 0.000152 wd 0.0500 time 0.1186 (0.1366) loss 37.4121 (38.1109) grad_norm 17.0464 (18.3071) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:22 production](main.py 405): INFO Train: [2/9][50/145] eta 0:00:12 lr 0.000156 wd 0.0500 time 0.1197 (0.1341) loss 32.4053 (37.7854) grad_norm 15.8273 (18.2193) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:23 production](main.py 405): INFO Train: [2/9][60/145] eta 0:00:11 lr 0.000161 wd 0.0500 time 0.1247 (0.1331) loss 38.7977 (37.5562) grad_norm 19.2067 (18.2199) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:25 production](main.py 405): INFO Train: [2/9][70/145] eta 0:00:10 lr 0.000166 wd 0.0500 time 0.1348 (0.1363) loss 33.2279 (37.4895) grad_norm 15.7284 (18.1450) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:26 production](main.py 405): INFO Train: [2/9][80/145] eta 0:00:08 lr 0.000170 wd 0.0500 time 0.1240 (0.1350) loss 33.5445 (37.2308) grad_norm 19.4514 (18.1625) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:27 production](main.py 405): INFO Train: [2/9][90/145] eta 0:00:07 lr 0.000175 wd 0.0500 time 0.1302 (0.1341) loss 33.1387 (36.8237) grad_norm 16.2680 (18.0534) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:29 production](main.py 405): INFO Train: [2/9][100/145] eta 0:00:06 lr 0.000179 wd 0.0500 time 0.1296 (0.1333) loss 32.9727 (36.4948) grad_norm 15.6902 (18.0132) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:30 production](main.py 405): INFO Train: [2/9][110/145] eta 0:00:04 lr 0.000184 wd 0.0500 time 0.1262 (0.1340) loss 33.5445 (36.0466) grad_norm 18.3916 (17.9320) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:31 production](main.py 405): INFO Train: [2/9][120/145] eta 0:00:03 lr 0.000189 wd 0.0500 time 0.1282 (0.1336) loss 30.0741 (35.8019) grad_norm 15.9190 (17.9332) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:32 production](main.py 405): INFO Train: [2/9][130/145] eta 0:00:01 lr 0.000193 wd 0.0500 time 0.1263 (0.1330) loss 28.0526 (35.3896) grad_norm 17.9300 (17.8820) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:34 production](main.py 405): INFO Train: [2/9][140/145] eta 0:00:00 lr 0.000198 wd 0.0500 time 0.1388 (0.1346) loss 29.3030 (34.9654) grad_norm 13.7097 (17.7245) loss_scale 65536.0000 (65536.0000) mem 6032MB
[2023-09-25 13:59:35 production](main.py 428): WARNING Array contains NaN or infs. Resetting cc relation to zero...
@codedddddifficult I also encountered this problem. Have you solved it?
Thank you very much for your paper work. However, I have a problem when the code reproduces deiqt.py. How to solve this error that keeps appearing in the process of network forward propagation?
x = self.bunch_decoder(output_embedding, x) source error code RuntimeError: shape '[-1, 36, 64]' is invalid for input of size 75264