samschulter / omnilabeltools

A Python toolkit for the OmniLabel benchmark providing code for evaluation and visualization
https://www.omnilabel.org
MIT License
21 stars 4 forks source link

result.json #3

Open HEasoner opened 11 months ago

HEasoner commented 11 months ago

@samschulter Hi, thanks for sharing the toolkit. If I use GLIP-L for the experiment, how do I get the result.json for evaluation?

xiaofeng94 commented 11 months ago

Hey @HEasoner, thanks for you interest!

For the evaluation, we first load the label space for each image. You may check how to write a dataloader at #1 . There are free-form text descriptions and category names in the label space. For descriptions, we query each description using GLIP to get predictions. For category names, we concatenate k categories (20 by default) as a text query to GLIP.

You may find a sample evaluation code below. Note it's incomplete and cannot run directly

    predictions = []
    for iidx, batch in enumerate(_iterator):
        images, targets, image_ids, *_ = batch

        images = images.to(device)
        text_queries = targets[0].get_field('inference_obj_descriptions')
        text_queries_ids = targets[0].get_field("inference_obj_description_ids")
        image_size = targets[0].size
        image_id = image_ids[0]

        des_id_start = 0
        while des_id_start < len(text_queries_ids):
            # sinlge descriptions each time
            if num_of_words(text_queries[des_id_start]) > 2:
                description_list = remove_full_stop([text_queries[des_id_start]])
                description_id_list = [text_queries_ids[des_id_start]]
                des_id_start += 1
            else:
                description_list = remove_full_stop(text_queries[des_id_start:des_id_start+chunk_size])
                description_id_list = text_queries_ids[des_id_start:des_id_start+chunk_size]
                des_id_start += chunk_size

            # create postive map, always use continuous labels starting from 1
            continue_labels = np.arange(0, chunk_size) + class_plus
            cur_queries, positive_map_label_to_token = create_queries_and_maps(continue_labels, description_list, tokenizer, cfg=cfg)

            with torch.no_grad():
                output = model(images, captions=[cur_queries], positive_map=positive_map_label_to_token)
                output = output[0].to(cpu_device).convert(mode="xywh")
                output = output.resize(image_size)  # to the oringinal scale

            # thresolding
            if threshold is not None:
                scores = output.get_field('scores')
                output = output[scores > threshold]
            # sorted by scores
            if topk_per_eval is not None:
                scores = output.get_field('scores')
                _, sortIndices = scores.sort(descending=True)
                output = output[sortIndices]
                # topk 
                output = output[:topk_per_eval]

            # map continuous id to description id
            cont_ids_2_descript_ids = {i:v for i, v in enumerate(description_id_list)}
            pred_boxes = output.bbox
            pred_labels = output.get_field('labels') - class_plus   # continuous ids, starting from 0
            pred_scores = output.get_field('scores')

            # convert continuous id to description id
            for box_idx, box in enumerate(pred_boxes):
                predictions.append({
                    "image_id": image_id,
                    "bbox": box.cpu().tolist(),
                    "description_ids": [cont_ids_2_descript_ids[pred_labels[box_idx].item()]],
                    "scores": [pred_scores[box_idx].item()],
                })
HEasoner commented 11 months ago

@xiaofeng94 Thank you for your answer, but I still have some doubts. Can you give me the part of the dataloader used here? I can't reproduce it with the simple one. And then what are the values of chunk_size, class_plus in this code? Maybe I need a little more information to reproduce this code.

xiaofeng94 commented 11 months ago

Hey @HEasoner, for dataloader, you may check the following snippet. chunk_size = 20 by default, class_plus is a hyperparameter for GLIP and equals to 1 by default.

def pil_loader(path, retry=5):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    ri = 0
    while ri < retry:
        try:
            with open(path, "rb") as f:
                img = Image.open(f)
                return img.convert("RGB")
        except:
            ri += 1

def load_omnilabel_json(path_json: str, path_imgs: str):
    assert isinstance(path_json, str)

    ol = olt.OmniLabel(path_json)
    dataset_dicts = []
    for img_id in ol.image_ids:
        img_sample = ol.get_image_sample(img_id)
        dataset_dicts.append({
            "image_id": img_sample["id"],
            "file_name": os.path.join(path_imgs, img_sample["file_name"]),
            "inference_obj_descriptions": [od["text"] for od in img_sample["labelspace"]],
            "inference_obj_description_ids": [od["id"] for od in img_sample["labelspace"]],
        })

    return dataset_dicts

class OmniLabelDataset(data.Dataset):
    """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.

    Args:
        img_folder (string): Root directory where images are downloaded to.
        ann_file (string): Path to json annotation file.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.ToTensor``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
    """

    def __init__(self, img_folder, ann_file, transforms=None, **kwargs):
        self.img_folder = img_folder
        self.transforms = transforms
        self.dataset_dicts = load_omnilabel_json(ann_file, img_folder)

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
        """
        data_dict = self.dataset_dicts[index]
        img_id = data_dict["image_id"]

        path = data_dict["file_name"]
        img = pil_loader(path)

        # only support test. No box here
        target = BoxList(torch.Tensor(0,4), img.size, mode="xywh").convert("xyxy")
        target.add_field("inference_obj_descriptions", data_dict["inference_obj_descriptions"])
        target.add_field("inference_obj_description_ids", data_dict["inference_obj_description_ids"])

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target, img_id

    def __len__(self):
        return len(self.dataset_dicts)

    def __repr__(self):
        fmt_str = "Dataset " + self.__class__.__name__ + "\n"
        fmt_str += "    Number of datapoints: {}\n".format(self.__len__())
        fmt_str += "    Root Location: {}\n".format(self.img_folder)
        return fmt_str