About clip's configuration file

Thank you so much for your wonderful work! When we were training rein+clip we realized that there was no configuration file for it, so we wrote our own based on your code and other samples, but we ran into a problem: clip uses the neck module, so in the neck module do we just pass in the backbone(rein) output about the feature, and the learnable token directly into head? Here's the config file we wrote:

base = [ "../base/datasets/dg_gta_512x512.py", "../base/default_runtime.py", "../base/models/clip-L_mask2former.py", ] model = dict( backbone=dict( type="ReinsCLIPVisionTransformer", reins_config=dict( type="LoRAReins", token_length=100, embed_dims=1024, num_layers=24, patch_size=16, link_token_to_query=True, lora_dim=16, ), ), decode_head=dict( type="ReinMask2FormerHead", ), ) train_pipeline = [ dict(type="LoadImageFromFile"), dict(type="LoadAnnotations"), dict( type="RandomChoiceResize", scales=[int(512 x 0.1) for x in range(5, 21)], resize_type="ResizeShortestEdge", max_size=2048, ), dict(type="RandomCrop", crop_size={{base.crop_size}}, cat_max_ratio=0.75), dict(type="RandomFlip", prob=0.5), dict(type="PhotoMetricDistortion"), dict(type="PackSegInputs"), ] train_dataloader = dict(batch_size=4, dataset=dict(pipeline=train_pipeline))

embed_multi = dict(lr_mult=1.0, decay_mult=0.0) optim_wrapper = dict( constructor="PEFTOptimWrapperConstructor", optimizer=dict( type="AdamW", lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999) ), paramwise_cfg=dict( custom_keys={ "norm": dict(decay_mult=0.0), "query_embed": embed_multi, "level_embed": embed_multi, "learnable_tokens": embed_multi, "reins.scale": embed_multi, }, norm_decay_mult=0.0, ), ) param_scheduler = [ dict(type="PolyLR", eta_min=0, power=0.9, begin=0, end=40000, by_epoch=False) ]

train_cfg = dict(type="IterBasedTrainLoop", max_iters=40000, val_interval=10000) val_cfg = dict(type="ValLoop") test_cfg = dict(type="TestLoop") default_hooks = dict( timer=dict(type="IterTimerHook"), logger=dict(type="LoggerHook", interval=50, log_metric_by_epoch=False), param_scheduler=dict(type="ParamSchedulerHook"), checkpoint=dict( type="CheckpointHook", by_epoch=False, interval=4000, max_keep_ckpts=3 ), sampler_seed=dict(type="DistSamplerSeedHook"), visualization=dict(type="SegVisualizationHook"), ) find_unused_parameters = True auto_scale_lr = dict(enable=False, base_batch_size=4) # v2

w1oves / Rein

About clip's configuration file #47