Open XuanjiaZ opened 1 year ago
123123
111
1 2 3 43 4
123123
123123 111 222 333
123 qwe
123 ‘’‘123123 123123 123132 ’‘’
123123 123
123123
/123 123
123 qweasd asd
123: 123123 123
123123 `# data size: [224, 224] input_size: [224, 224] transform_type: parallel num_classes: -1 dataset: millionaid #potsdam #millionaid
backbone: resnet50
base_model: moco
in_chans: 3
predictor_hidden_dim: 512
out_dim: 256
base_momentum: 0.996
final_momentum: 1
temperature: 0.2
queue_size: 65536
mask_stage: 0
out_stage: 4
kernel_size: 1
in_channels: 2048
out_stride: 32
model_warmup_epoch: -1
global_branch: True
num_matches: 20
hidden_dim: 2048
online_temp: 0.1
branch_temp: 0.07
warmup_branch_temp: 0.04
warmup_branch_temp_epochs: 50
center_momentum: 0.9
class_feat_size: 2048
use_bn: True
num_layers: 3
bottleneck_dim: 256
mask_patch_size: 32 mask_ratio: 0.4 mask_color: mean
use_fremae: True
loss_weight: 1.0 reduction: none activate: sigmoid alpha: 0.2 focal_gamma: 1.0 residual: False fft_weight: 1.0 fft_focal: True fft_unmask_weight: 0.001 fft_unmask_replace: mixed encoder_in_channels: 3 unmask_weight: 0.
optimizer: adan lr: 0.0088 #0.003125 wd: 0.02 epochs: 200 #400 max_grad_norm: null
name: cosine min_lr: 0.000001 warmup_epochs: 15 #5 #15 #10 warmup_method: linear warmup_factor: 0.01 decay_epochs: 30 decay_rate: 0.1 multisteps: [] gamma: 0.1
k: 20 T: 0.07 distance_fx: euclidean
ft_classify: False segmentation: False ignore_index: 255 `
123123、】、 ’‘’python qwe qwe ‘’‘
以下是我的Config:
Config
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook', by_epoch=False), dict(type='TensorboardLoggerHook'), dict(type='MMSegWandbHook', init_kwargs=dict(project='segmentation_potsdam', name='mmseg0.27.0')) ]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] cudnn_benchmark = True optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(type='Fp16OptimizerHook', distributed=False,) grad_clip=dict(max_norm=5.0, norm_type=2)) lr_config = dict( policy='CosineAnnealing', warmup='linear', warmup_iters=100, min_lr=0.000001, by_epoch=False) runner = dict(type='EpochBasedRunner', max_epochs=50) checkpoint_config = dict(by_epoch=True, interval=10) evaluation = dict(interval=5, metric=['mIoU', "mFscore"], pre_eval=True, by_epoch=True) norm_cfg = dict(type='BN', requires_grad=True) checkpoint_path = r"F:\pumpkinCode\mmseg\pretrain_checkpoint\CMID_200ep_BK.pth" # change this to path of cmid_pretrained_resnet50 model = dict( type='EncoderDecoder', pretrained=checkpoint_path, backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 1, 1), strides=(1, 2, 2, 2), norm_cfg=dict(type='BN', requires_grad=True), norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='UPerHead', in_channels=[256, 512, 1024, 2048], in_index=[0, 1, 2, 3], pool_scales=(1, 2, 3, 6), channels=512, ignore_index=255, dropout_ratio=0.1, num_classes=6, norm_cfg=dict(type='BN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), auxiliary_head=dict( type='FCNHead', in_channels=1024, in_index=2, channels=256, ignore_index=255, num_convs=1, concat_input=False, dropout_ratio=0.1, num_classes=6, norm_cfg=dict(type='BN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), train_cfg=dict(), test_cfg=dict(mode='whole')) # dataset_type = 'PotsdamAllDataset' dataset_type="PotsdamDataset" # data_root = '/workspace/SSL_RS/data/segmentation/potsdam' data_root = 'F:\VisionData\Potsdam\mmseg' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'), # dict(type='LoadAnnotations', reduce_zero_label=True), dict(type='LoadAnnotationsReduceIgnoreIndex', reduce_zero_label=True, ignore_index=6), dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(512, 512), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=4, workers_per_gpu=4, train=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/train', ann_dir='ann_dir/train', pipeline=train_pipeline), val=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/val', ann_dir='ann_dir/val', pipeline=test_pipeline), test=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/val', ann_dir='ann_dir/val', pipeline=test_pipeline))
123132
qweqew
qweqew
# data
# --data-path '/home/sgiit/SGIIT/zxj/datasets/Potsdam/2_Ortho_RGB_pretrain' \
size: [224, 224]
input_size: [224, 224]
transform_type: parallel
num_classes: -1
dataset: millionaid #potsdam #millionaid
# model
backbone: resnet50
base_model: moco
in_chans: 3
predictor_hidden_dim: 512
out_dim: 256
base_momentum: 0.996
final_momentum: 1
temperature: 0.2
queue_size: 65536
mask_stage: 0
out_stage: 4
kernel_size: 1
in_channels: 2048
out_stride: 32
model_warmup_epoch: -1
global_branch: True
num_matches: 20
hidden_dim: 2048
online_temp: 0.1
branch_temp: 0.07
warmup_branch_temp: 0.04
warmup_branch_temp_epochs: 50
center_momentum: 0.9
class_feat_size: 2048
use_bn: True
num_layers: 3
bottleneck_dim: 256
# Mask Parameter
mask_patch_size: 32
mask_ratio: 0.4
mask_color: mean
# freMAE
use_fremae: True
# loss Parameter
loss_weight: 1.0
reduction: none
activate: sigmoid
alpha: 0.2
focal_gamma: 1.0
residual: False
fft_weight: 1.0
fft_focal: True
fft_unmask_weight: 0.001
fft_unmask_replace: mixed
encoder_in_channels: 3
unmask_weight: 0.
# optimizer
optimizer: adan
lr: 0.0088 #0.003125
wd: 0.02
epochs: 200 #400
max_grad_norm: null
# scheduler
name: cosine
min_lr: 0.000001
warmup_epochs: 15 #5 #15 #10
warmup_method: linear
warmup_factor: 0.01
decay_epochs: 30
decay_rate: 0.1
multisteps: []
gamma: 0.1
# knn evalutaion
k: 20
T: 0.07
distance_fx: euclidean
# tine-tuning classifier
ft_classify: False
segmentation: False
ignore_index: 255
#我对论文中的Ⅳ.B 节对 Potsdam 数据集 预训练 的实验进行了复现,但是效果不是很好,以下是我的配置文件:
这是我的代码 Code
import timm
import math
import torch
self.base_tau = base_tau
self.cur_tau = base_tau
self.final_tau = final_tau
else:
assert config.backbone in timm.list_models("*"), f"Backbone {config.backbone} is not supported"
if config.use_fremae:
self.online_encoder = FreMaskResNet(config.mask_stage,
config.out_stage,
config.backbone,
if self.ft_classify:
self.classifier = nn.Linear(config.in_channels, config.num_classes)
self.cls_avg_pool = nn.AdaptiveAvgPool2d(1)
if self.multi_label:
self.cls_metrics = None
elif self.segmentation:
self.bn = nn.BatchNorm2d(config.in_channels)
self.conv_seg = nn.Conv2d(config.in_channels, config.num_classes, kernel_size=1)
self.loss_seg = nn.CrossEntropyLoss(ignore_index=config.ignore_index)
self.seg_ignore_index = config.ignore_index
self.ema = MomentumUpdater(config.base_momentum, config.final_momentum)
self.ploter = PlotTensor(apply_inv=True)
nrow = 4
img_mim = img_mim[:nrow]
img_rec = img_rec[:nrow]
img = img[:nrow]
plot_args = dict(dpi=None, apply_inv=True)
mask = mask[:4].unsqueeze(1).type_as(img_rec)
mask = F.interpolate(mask, scale_factor=img_rec.size(2) / mask.size(2), mode="nearest")
#img_rec = img_rec * mask + img * (1 - mask)
img = torch.cat((img, img_mim, img_rec), dim=0)
assert self.save_name.find(".png") != -1
self.ploter.plot(
img, nrow=nrow, title_name="MIM", save_name=self.save_name, **plot_args)
@property
def momentum_pairs(self):
return [
(self.online_encoder.model, self.branch_encoder),e.
"""
batch_size = keys.shape[0]
ptr = int(self.queue_ptr) # type: ignore
assert self.queue_size % batch_size == 0 # for simplicity
pixel_loss = self.class_loss(mask_proj_1, view_proj_1.detach())
pixel_loss += self.class_loss(mask_proj_2, view_proj_2.detach())
pixel_loss /= 2
total_loss += pixel_loss
out.update(pixel_loss=pixel_loss)ool(feature).squeeze() for feature in mask_embedding_view]
with torch.no_grad():
view_avg_embedding_org = [self.avg_pool(feature).squeeze() for feature in view_embedding_org]
view_avg_embedding_view = [self.avg_pool(featu
我对论文中的Ⅳ.B 节对 Potsdam 数据集 预训练 的实验进行了复现,但是效果不是很好,不清楚是否我的配置有误,以下是我的配置文件:
我在预训练中使用了4张显卡,因此我在其上文件中的batchsize设置为64/4=16,学习率设置为0.003125(但是在训练过程中生成的配置文件config.json中的学习率为0.003125*4=0.0125),我不是很确定在上述pt_config.yaml我的学习率以及batchsize是否这两个参数设置正确。
123123