Open goldwater668 opened 2 weeks ago
hi @goldwater668 , towhee integrates image models through timm. If your model can be called through timm, use the following code:
from towhee import pipe, ops, DataCollection
p = (
pipe.input('path')
.map('path', 'img', ops.image_decode())
.map('img', 'vec', ops.image_embedding.timm(model_name='model_name', checkpoint_path="your local weights"))
.output('img', 'vec')
)
DataCollection(p('towhee.jpeg')).show()
If not, you need to implement an operator yourself, https://towhee.readthedocs.io/en/latest/operator/usage.html#custom-operators
@junjiejiangjjj
https://github.com/Westlake-AI/MogaNet
This is the warehouse of the model. I saw the following call. It should support timm. If you have time, you can help and see if it can be called directly.
from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models
from timm.data import create_dataset, create_loader, resolve_data_config, RealLabelsImagenet
from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging, set_jit_legacy
checkpoint = torch.load(checkpoint_path)
model = create_model(
"moganet_xtiny",
pretrained=False,
num_classes=2,
in_chans=3,
global_pool=None,
scriptable=False,
mem_index=[10],
K=1024)
model.load_state_dict(checkpoint["state_dict"], strict=True)
model = model.cuda()
model.eval()
transform = create_transform(input_size=(3, 224, 224),mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225),crop_pct=0.9)
for root, subdirs, files in os.walk(folder, topdown=False, followlinks=True):
rel_path = os.path.relpath(root, folder) if (root != folder) else ''
label = os.path.basename(rel_path) if True else rel_path.replace(os.path.sep, '_')
for f in files:
base, ext = os.path.splitext(f)
if ext.lower() in ['.png', '.jpg', '.jpeg']:
image_path = os.path.join(root,f)
img=Image.open(image_path)
img = img.convert("RGB")
input_image = transform(img).unsqueeze(0)
input_image = Variable(input_image).to(DEVICE)
out = model(input_image)
img_num+=1
classes = ('NG', 'OK')
_, pred = torch.max(out.data, 1)
It seems that you need to run it in the directory of this project.
git clone https://github.com/Westlake-AI/MogaNet
cd MogaNet
import models
from towhee import pipe, ops, DataCollection
p = (
pipe.input('path')
.map('path', 'img', ops.image_decode())
.map('img', 'vec', ops.image_embedding.timm(model_name='moganet_xtiny', checkpoint_path="your local weights"))
.output('img', 'vec')
)
DataCollection(p('towhee.jpeg')).show()
@junjiejiangjjj I see that your pipeline only has ConvNeXt and no ConvNeXt V2. Can ConvNeXt V2 be called directly from your place?
hi @goldwater668 , if you change the channels, you also need to change it when initializing the model.
@junjiejiangjjj Thank you for your timely reply. I have modified it as follows and the verification has passed. The dimension of extracted features here is 192 dimensions. Can this dimension be changed to 768 dimensions? How to change it? Is it determined by the training model?
@register
class ImageDecodeCV2_moganet(operator.PyOperator):
def __init__(self, mode=False):
self.mode=mode
print("model:",mode)
def __call__(self, image_path: str):
if self.mode:
bgr_cv_image = cv2.imread(image_path)
rgb_cv_image = cv2.cvtColor(bgr_cv_image, cv2.COLOR_BGR2RGB)
return Image(rgb_cv_image, 'RGB')
else:
input = read_image(image_path)
return input
def read_image(image_path):
img = cv2.imread(image_path,0)
img_h, img_w = img.shape
if img_h !=148 or img_w != 220:
img = cv2.resize(img, (220, 148))
img_h, img_w = img.shape
img1 = img[0:int(img_h / 2), 0:int(img_w / 2)]
img2 = img[0:int(img_h / 2), int(img_w / 2):img_w]
img3 = img[int(img_h / 2):img_h, 0:int(img_w / 2)]
img4 = img[int(img_h / 2):img_h, int(img_w / 2):img_w]
mean = (0., 0., 0., 0.)
img1 = np.array(img1, np.float32) / 255 - mean[0]
img2 = np.array(img2, np.float32) / 255 - mean[1]
img3 = np.array(img3, np.float32) / 255 - mean[2]
img4 = np.array(img4, np.float32) / 255 - mean[3]
input = np.array([img1, img2, img3, img4], np.float32)
input = torch.from_numpy(input).unsqueeze(0).cuda()
print("input:",input.shape)
return input
@register
class MogaNet_model(operator.NNOperator):
def __init__(self,device, num_classes,in_channels,checkpoint_path=None):
self.device = device
self.model = moganet_xtiny(num_classes=num_classes,in_channels=in_channels)
checkpoint = torch.load(checkpoint_path)
self.model.load_state_dict(checkpoint['state_dict'],strict=False)
self.model.eval()
self.model.to(device)
def convert_img(self, img: 'towhee.types.Image'):
img = PILImage.fromarray(img.astype('uint8'), 'RGB')
return img
def __call__(self, input):
if isinstance(input, torch.Tensor):
pre= self.model.forward_features(input)
else:
self.tfms = create_transform(**resolve_data_config({}, model=self.model))
img = self.convert_img(input) if isinstance(input, numpy.ndarray) else input.convert('RGB')
img = self.tfms(img)
inputs = torch.stack([img])
pre= self.model.forward_features(inputs.cuda())
pre = pre.to('cpu')
if pre.dim() == 4:
global_pool = nn.AdaptiveAvgPool2d(1)
features = global_pool(pre)
features = features.flatten(1)
# print(features.shape)
# print(features.squeeze(0).detach().numpy().shape)
embeddings = features.squeeze(0).detach().numpy()
return embeddings
class Extract_features:
def __init__(self,device, num_classes,in_channels,checkpoint_path,mode=False):
self.image_embedding_pipe = (
pipe.input('path')
.map('path', 'img', ops.ImageDecodeCV2_moganet(mode=mode))
.map('img', 'embedding', ops.MogaNet_model(device, num_classes,in_channels,checkpoint_path=checkpoint_path))
.map('embedding', 'embedding', ops.towhee.np_normalize())
)
def extract_feat(self, img_path):
feat = self.image_embedding_pipe.output('embedding')(img_path)
print("feat:",feat)
return feat`
This is determined by the model. This model has these specifications
Is there an existing issue for this?
Is your feature request related to a problem? Please describe.
If I use moganet to train a model, and then use the model for feature extraction and image retrieval, how should I apply this framework?
Describe the solution you'd like.
If I use moganet to train a model, and then use the model for feature extraction and image retrieval, how should I apply this framework?
Describe an alternate solution.
If I use moganet to train a model, and then use the model for feature extraction and image retrieval, how should I apply this framework?
Anything else? (Additional Context)
If I use moganet to train a model, and then use the model for feature extraction and image retrieval, how should I apply this framework?