Test errors on own dataset

snow-wind-001 commented 1 month ago


 import torch
import pandas as pd
import random
from pathlib import Path
from torch.utils.data import DataLoader
from typing import Union, Optional
from pprint import pprint
from oml.const import PATHS_COLUMN
from oml.datasets.base import DatasetQueryGallery
from oml.inference.flat import inference_on_dataframe, inference_on_images
from oml.models import ConcatSiamese, ViTExtractor
from oml.registry.transforms import get_transforms_for_pretrained
from oml.retrieval.postprocessors.pairwise import PairwiseImagesPostprocessor
from oml.utils.misc_torch import pairwise_dist
from oml.utils.io import download_checkpoint_one_of
from NumpyImageDataset import *
import os

dataset_root = Path("/home/snowolf/dataset/bottle_processed_images/part1/archive")

def load_images_from_folder(folder_path):
    images = []  # 创建一个列表来存储图像数据
    valid_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff')  # 支持的文件格式

    for filename in os.listdir(folder_path):
        if filename.endswith(valid_extensions):
            img_path = os.path.join(folder_path, filename)  # 获取图像的完整路径
            with Image.open(img_path) as img:
                img_array = np.array(img)  # 将图像转换为 numpy 数组
                images.append(img_array)  # 将数组添加到列表中
    return images

class CustomViTExtractor(ViTExtractor):
    def __init__(self, arch: str = "vits16", normalise_features: bool = False, use_multi_scale: bool = False, weights: Optional[Union[Path, str]] = None):
        super().__init__(weights=None, arch=arch, normalise_features=normalise_features, use_multi_scale=use_multi_scale)
        if weights is not None:
            self.load_pretrained_weights(weights)
        self.to(device)

    def load_pretrained_weights(self, weights: Union[Path, str]):
        if isinstance(weights, str) and Path(weights).exists():
            ckpt = torch.load(weights, map_location=device)
            state_dict = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
        elif weights in self.pretrained_models:
            pretrained = self.pretrained_models[weights]
            downloaded_weights = download_checkpoint_one_of(
                url_or_fid_list=pretrained["url"],
                hash_md5=pretrained["hash"],
                fname=pretrained["fname"],
            )
            ckpt = torch.load(downloaded_weights, map_location=device)
            state_dict = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
        self.model.load_state_dict(state_dict, strict=False)
        pprint('加载权重到模型!!')

    @classmethod
    def from_pretrained(cls, path: Union[Path, str], arch: str = "vits16", normalise_features: bool = False, use_multi_scale: bool = False):
        return cls(weights=path, arch=arch, normalise_features=normalise_features, use_multi_scale=use_multi_scale)

def data_bottle(dataset_root, df_name):
    df = pd.read_csv(dataset_root / df_name)
    df_train = df[df["split"] == "train"].reset_index(drop=True)
    df_val = df[df["split"] == "validation"].reset_index(drop=True)
    df_val["is_query"] = df_val["is_query"].astype(bool)
    df_val["is_gallery"] = df_val["is_gallery"].astype(bool)
    return df_train, df_val

_, df_val = data_bottle(dataset_root, 'df_test.csv')
df_val["path"] = df_val["path"].apply(lambda x: dataset_root / x)
queries = df_val[df_val["is_query"]]["path"].tolist()
galleries = df_val[df_val["is_gallery"]]["path"].tolist()

for i in enumerate(queries):
    print(i)

print('========================')

for i in enumerate(galleries):
    print(i)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = ViTExtractor.from_pretrained("vits16_dino")
# checkpoints_path = '/home/snowolf/git_code/open-metric-learning/pipelines/postprocessing/pairwise_postprocessing/logs/2024-05-13_10-58-23_feature_extractor/checkpoints/best.ckpt'
checkpoints_path = '/home/snowolf/git_code/open-metric-learning/pipelines/postprocessing/pairwise_postprocessing/checkpoints/bottle/extractor.ckpt'
model = CustomViTExtractor.from_pretrained(checkpoints_path, arch='vits16')
transform, _ = get_transforms_for_pretrained("vits16_dino")
save_features_galleries = True
args = {"num_workers": 0, "batch_size": 8}

features_queries = inference_on_images(model, paths=queries, transform=transform, **args)

if save_features_galleries == False:
    features_galleries = inference_on_images(model, paths=galleries, transform=transform, **args)
    #将features_galleries保存起来，方便后续使用
    torch.save(features_galleries, 'features_galleries.pth')
else:
    features_galleries = torch.load('features_galleries.pth')
    print('features_galleries loaded!')

features_galleries1 = torch.load('features_galleries.pth')
if torch.equal(features_galleries, features_galleries1):
    print('Save and load features_galleries is successful!')
else:
    print('Save and load features_galleries is not successful!')

dist_mat = pairwise_dist(x1=features_queries, x2=features_galleries)
ii_closest = torch.argmin(dist_mat, dim=1)
print(f"Indices of the items closest to queries: {ii_closest}")

snow-wind-001 commented 1 month ago

Why are the results different after two consecutive runs? It was right the first time. The second time I ran it by saving features_galleries, the result was ridiculously wrong. @churnikov @chang48 @dapladoc @alexmelekhin @leoromanovich

AlekseySh commented 1 month ago

@snow-wind-001 Hi! Could you please reformat the code so it's easier to read?

snow-wind-001 commented 1 month ago

I reformat the code@AlekseySh

AlekseySh commented 1 month ago

Hey, @snow-wind-001

Once again. The problem is that ii_closest has changed when you run the script 2nd time?

The script looks okay for me:

there is no randomness in transforms
model is set to eval mode inside inference_on_images

Do you have the problem when you don't cache features? It's hard to debug from my place, but you need to find the exact moment when you have differences between two runs. Are models the same? Are datasets the same and not shuffled after reloading? Are features the same?

PS. You don't need to reimplement ViTExtractor to be able to load weights from the disk. You can just provide path to your weights as weights parameter. In other words, weights may be either special name of the weights storing in cloud OR path you your weights.

snow-wind-001 commented 1 month ago

@AlekseySh I have another question. Is this code valid for is_query and is_gallery in the csv file during the training process?

snow-wind-001 commented 1 month ago

Hey, @snow-wind-001

Once again. The problem is that ii_closest has changed when you run the script 2nd time?

The script looks okay for me:

there is no randomness in transforms

model is set to eval mode inside inference_on_images

Do you have the problem when you don't cache features? It's hard to debug from my place, but you need to find the exact moment when you have differences between two runs. Are models the same? Are datasets the same and not shuffled after reloading? Are features the same?

PS. You don't need to reimplement ViTExtractor to be able to load weights from the disk. You can just provide path to your weights as weights parameter. In other words, weights may be either special name of the weights storing in cloud OR path you your weights.

import torch
import pandas as pd
import random
from pathlib import Path
from torch.utils.data import DataLoader
from typing import Union, Optional
from pprint import pprint
from oml.const import PATHS_COLUMN
from oml.datasets.base import DatasetQueryGallery
from oml.inference.flat import inference_on_dataframe, inference_on_images
from oml.inference.pairs import pairwise_inference_on_images
from oml.models import ConcatSiamese, ViTExtractor
from oml.registry.transforms import get_transforms_for_pretrained
from oml.retrieval.postprocessors.pairwise import PairwiseImagesPostprocessor
from oml.utils.misc_torch import pairwise_dist
from oml.utils.io import download_checkpoint_one_of
from NumpyImageDataset import *
from pathlib import Path
from PIL import Image  # 正确导入 Image
import os
def print_dataset_paths(dataset):
    for i in range(len(dataset)):
        data = dataset[i]
        print(f"Index {i}: Path {data['path']}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoints_path = './best.ckpt'
model = ViTExtractor(weights=checkpoints_path, arch='vits16', normalise_features =True)
transform, _ = get_transforms_for_pretrained("vits16_dino")
from oml.transforms.images.torchvision import get_normalisation_resize_torch
transform = get_normalisation_resize_torch(im_size=224, mean=[0.40590053,0.40100682,0.35496407], std=[0.20830545,0.18484329,0.19726294])
#读取不同文件夹的图片，组成query和gallery
image_is_query_path = Path('/home/snowolf/dataset/bottle_test/is_query/1')
image_is_gallery_path = Path('/home/snowolf/dataset/bottle_test/is_gallery/1')
query_path = []
gallery_path = []
for img_file in image_is_query_path.glob('*.jpg'):  # 假设图像文件是.jpg格式
    query_path.append(img_file)
for img_file in image_is_gallery_path.glob('*.jpg'):  # 假设图像文件是.jpg格式
    gallery_path.append(img_file)
print("Image paths from folder 1:")
print(query_path)
print("\nImage paths from folder 2:")
print(gallery_path)
num_workers = 4
batch_size = 16
verbose = True
output_tensor = pairwise_inference_on_images(
    model=model,
    paths1=query_path,
    paths2=gallery_path,
    transform=transform,
    num_workers=num_workers,
    batch_size=batch_size,
    verbose=verbose
)
print(output_tensor)
ii_closest = torch.argmin(output_tensor, dim=1)
print(ii_closest)

### I tried your method but reality lacks predict method. Traceback (most recent call last): File "/home/snowolf/git_code/open-metric-learning/pipelines/postprocessing/pairwise_postprocessing/test_v3.py", line 94, in output_tensor = pairwise_inference_on_images( File "/home/snowolf/git_code/open-metric-learning/oml/inference/pairs.py", line 44, in pairwise_inference_on_images output = _inference( File "/home/snowolf/miniconda3/envs/metric/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/snowolf/git_code/open-metric-learning/oml/inference/abstract.py", line 40, in _inference out = apply_model(model, batch) File "/home/snowolf/git_code/open-metric-learning/oml/inference/pairs.py", line 42, in apply return model.predict(pair1, pair2) File "/home/snowolf/miniconda3/envs/metric/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1614, in getattr raise AttributeError("'{}' object has no attribute '{}'".format( AttributeError: 'ViTExtractor' object has no attribute 'predict'

snow-wind-001 commented 1 month ago

I try to use OML to perform image recognition on objects such as bottles. I input the test image and the standard image of a known category into OML, and ultimately hope to achieve good recognition results. I hope you can give us a demo to facilitate our implementation of this application. @AlekseySh

AlekseySh commented 1 month ago

wait a second, why do you use pairwise_inference_on_images? it's a special function for Siamese like models similar mostly used for postprocessing (re-ranking). It's a wrong function, the inference_on_images was correct.

What do you mean by recognition results? What exactly do you do?

snow-wind-001 commented 1 month ago

We have encountered such a need to identify beverage bottles. Originally there were 172 types of bottles, which may be expanded to more than 600 types in the future. Currently we use vit to implement a bottle classification model. Although the effect is good, the training cost for subsequent addition of new categories is very high, so we hope to use the OML project to implement this function. My plan is to set the views of all bottles in four directions as gallery, and through training, let the network judge the distance between the input image and the gallery image to achieve classification, and in the future, a small amount of training will be required to change the gallery at the same time. Data sets to achieve algorithm scalability. I saw this example of PyTorch Metric Learning with our Pipelines just follow the standard tutorial of adding custom loss. But there is no val code for sorting the input image output by correlation with the gallery image. I don’t know if my idea is correct, and can I provide an idea for calling the OML algorithm. @AlekseySh

AlekseySh commented 1 month ago

@snow-wind-001 As far as I understood your task -- yes, it's a metric learning problem suitable for OML.

There is validation, you probably missed an example. Take a look at the examples section in readme or in the docs. (They are hidden under spoilers, so you need to click on them). You can use validation from there.

If you saw this validation but found it not convenient to analyze your outputs you can try reworked validation that will be published soon with OML 3.0. If you don' want to wait, just jump into docs branch (you can use git clone and specify the branch) and run this example: https://github.com/OML-Team/open-metric-learning/tree/docs?tab=readme-ov-file#examples. With RetrievalResults you will have more sense of the retrieval process during validation.

AlekseySh commented 4 weeks ago

hey @snow-wind-001

We've just released OML 3.0 where we made work with retrieved items more transparent and simple. Take a look at the examples: https://github.com/OML-Team/open-metric-learning?tab=readme-ov-file#examples. I hope your problem will be automatically solved.

OML-Team / open-metric-learning

Test errors on own dataset #569