Closed roman807 closed 1 week ago
In general, we should prioritise loading models from timm
here, since they support all the complex features extraction logic from almost all the models
Now if there is a model which is not part of timm
and we need to extract the features with a custom way, we should use the EncoderWrapper
and add the extraction code as part of the tensor_transforms
argument.
Some examples of usage of the TimmEncoder
:
# template
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: ${oc.env:TIMM_MODEL_NAME, vit_small_patch16_224}
pretrained: ${oc.env:MODEL_PRETRAINED, true}
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
checkpoint_path: &CHECKPOINT_PATH ${oc.env:CHECKPOINT_PATH, null}
model_arguments:
dynamic_img_size: true
# random
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: vit_small_patch16_224
pretrained: false
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
checkpoint_path: null
model_arguments:
dynamic_img_size: true
# vits16 ImageNet weights
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: vit_small_patch16_224
pretrained: true
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
checkpoint_path: null
model_arguments:
dynamic_img_size: true
# [with pretrain] dino Facebook official weights
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: vit_small_patch16_224_dino
pretrained: true
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
model_arguments:
dynamic_img_size: true
# [with checkpoint] dino Facebook official weights
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: vit_small_patch16_224
pretrained: true
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
checkpoint_path: https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth
model_arguments:
dynamic_img_size: true
# custom dino weights - example kaiko vits16
encoder:
class_path: eva.vision.models.networks.encoders.TimmEncoder
init_args:
model_name: ${oc.env:TIMM_MODEL_NAME, vit_small_patch16_224}
pretrained: true
out_indices: ${oc.env:TIMM_MODEL_OUT_INDICES, 1}
checkpoint_path: https://github.com/kaiko-ai/towards_large_pathology_fms/releases/download/0.0.1/vits16.pth
model_arguments:
dynamic_img_size: true
closes https://github.com/kaiko-ai/eva/issues/550