Open liuzc1999 opened 9 months ago
Our model does not have the capability for object detection, so calculating AP (Average Precision) and AR (Average Recall) can be a bit troublesome. We need to rely on an object detection model.
Taking KINS as an example, for a fair comparison, the specific steps are as follows:
- Infer with AISFormer and save its predicted visible masks.
- For each visible mask, perform amodal completion using C2F-Seg.
- Arrange the data according to the format of AISFormer, and use the code of AISFormer to calculate AP and AR.
By the way, during this process, if you use a stronger detector, you can achieve better results.
However, unfortunately, we have accidentally lost the code for this part. We will provide this section of the code and our result files as soon as possible (in 3 weeks). Additionally, we recommend to use IoUs as the evaluation metrics for our model.
Can you provide the code for evaluating metrics such as AP and AR?
I have a very basic implementation for inferring the predicted visible mask with AISFormer.
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank])
iter = 0
save_path = "/home/ubuntu/data/Aisformer_COCOA_results"
model.eval()
with torch.no_grad():
if rank==0:
test_loader = tqdm(test_loader)
for items in test_loader:
image_id = int(items["image_id"].item())
if not os.path.exists(os.path.join(save_path, "{}.pkl".format(image_id))):
print(image_id, items["vm_crop"].size(1))
if items["vm_crop"].size(1)<=16:
if items["vm_crop"].size(1)==0:
my_dict = {
"image_id": image_id,
"annotation": []
}
with open(os.path.join(save_path, "{}.pkl".format(image_id)), "wb") as tf:
pickle.dump(my_dict, tf)
continue
items = to_cuda(items, config.device)
pred_objs_FM = model.module.only_predict_maskgit(items, iter, 'test')
pred_objs_FM = pred_objs_FM.cpu().numpy()
# if items["vm_crop"].size(1)==1:
# pred_objs_FM = pred_objs_FM[np.newaxis, ...]
save_results(pred_objs_FM, image_id, os.path.join(save_path, "{}.pkl".format(image_id)))
elif items["vm_crop"].size(1)<=32:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
# if items["vm_crop"].size(1)==17:
# pred_objs_FM_b = pred_objs_FM_b[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=48:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
# if items["vm_crop"].size(1)==33:
# pred_objs_FM_c = pred_objs_FM_c[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=64:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
pred_objs_FM_d = get_results(items, 48, 64)
# if items["vm_crop"].size(1)==49:
# pred_objs_FM_d = pred_objs_FM_d[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c,pred_objs_FM_d),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=80:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
pred_objs_FM_d = get_results(items, 48, 64)
pred_objs_FM_e = get_results(items, 64, 80)
# if items["vm_crop"].size(1)==65:
# pred_objs_FM_e = pred_objs_FM_e[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c,pred_objs_FM_d,pred_objs_FM_e),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=96:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
pred_objs_FM_d = get_results(items, 48, 64)
pred_objs_FM_e = get_results(items, 64, 80)
pred_objs_FM_f = get_results(items, 80, 96)
# if items["vm_crop"].size(1)==81:
# pred_objs_FM_f = pred_objs_FM_f[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c,pred_objs_FM_d,pred_objs_FM_e,pred_objs_FM_f),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=112:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
pred_objs_FM_d = get_results(items, 48, 64)
pred_objs_FM_e = get_results(items, 64, 80)
pred_objs_FM_f = get_results(items, 80, 96)
pred_objs_FM_g = get_results(items, 96, 112)
# if items["vm_crop"].size(1)==97:
# pred_objs_FM_g = pred_objs_FM_g[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c,pred_objs_FM_d,pred_objs_FM_e,pred_objs_FM_f,pred_objs_FM_g),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
elif items["vm_crop"].size(1)<=128:
pred_objs_FM_a = get_results(items, 0, 16)
pred_objs_FM_b = get_results(items, 16, 32)
pred_objs_FM_c = get_results(items, 32, 48)
pred_objs_FM_d = get_results(items, 48, 64)
pred_objs_FM_e = get_results(items, 64, 80)
pred_objs_FM_f = get_results(items, 80, 96)
pred_objs_FM_g = get_results(items, 96, 112)
pred_objs_FM_h = get_results(items, 112, 128)
# if items["vm_crop"].size(1)==113:
# pred_objs_FM_h = pred_objs_FM_h[np.newaxis, ...]
save_results(
np.concatenate((pred_objs_FM_a, pred_objs_FM_b, pred_objs_FM_c,pred_objs_FM_d,pred_objs_FM_e,pred_objs_FM_f,pred_objs_FM_g,pred_objs_FM_h),axis=0),
image_id,
os.path.join(save_path, "{}.pkl".format(image_id))
)
else:
print(image_id)
iter += 1
print(os.path.join(save_path, "{}.pkl".format(image_id)))
torch.cuda.empty_cache()
Next, you should organize these results into the same format as AISFormer's predicted visible mask and then use AISFormer's code (https://github.com/UARK-AICV/AISFormer) to get AP and AR.
import torch
import os
import pickle
import pycocotools.mask as mask_utils
import matplotlib.pyplot as plt
import cvbase as cvb
import cv2
from tqdm import tqdm
import itertools
import json
import numpy as np
amodal_dict = torch.load("/home/ubuntu/data/Aisformer_kins_results/test_inference/instances_amodal_predictions.pth")
visible_dict = torch.load("/home/ubuntu/data/Aisformer_kins_results/test_inference/instances_visible_predictions.pth")
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, bytes):
return str(obj, encoding='utf-8')
return json.JSONEncoder.default(self, obj)
def mask_find_bboxs(mask):
retval, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
stats = stats[stats[:,4].argsort()]
return stats[:-1]
final_list = []
json_file_list = []
for i in tqdm(range(len(visible_dict))):
image_id = visible_dict[i]["image_id"]
visible_instances = visible_dict[i]["instances"]
amodal_instances = amodal_dict[i]["instances"]
pkl_file_name = os.path.join("//home/ubuntu/data/Aisformer_kins_results","{}.pkl".format(image_id))
pkl_data = pickle.load(open(pkl_file_name, "rb"))
pkl_annotation = pkl_data["annotation"]
amodal_instances_list = []
# if len(visible_instances)==0:
for j in range(len(visible_instances)):
# import pdb;pdb.set_trace()
amodal_mask = mask_utils.decode(pkl_annotation[j]["amodal"])
visible_mask = mask_utils.decode(visible_instances[j]["segmentation"])
amodal_dict_vrsp = mask_utils.decode(amodal_dict[i]["instances"][j]["segmentation"])
area = (visible_mask*amodal_mask).sum()/amodal_mask.sum()
amodal_instances_list.append(
{
"image_id": image_id,
'category_id': visible_instances[j]["category_id"],
'bbox': visible_instances[j]["bbox"],
'score': visible_instances[j]["score"],
'segmentation':pkl_annotation[j]["amodal"],
'area': area
}
)
meta = {
"image_id":image_id,
"instances": amodal_instances_list
}
final_list.append(meta)
file_path = os.path.join("/home/ubuntu/data/Aisformer_kins_results", "44k_results.pth")
torch.save(final_list, file_path, _use_new_zipfile_serialization=False)
json_amodal_results = list(itertools.chain(*[x["instances"] for x in final_list]))
file_path = os.path.join("/home/ubuntu/data/Aisformer_kins_results", "coco_instances_amodal_results.pth")
torch.save(json_amodal_results, file_path,_use_new_zipfile_serialization=False)
file_path = os.path.join("/home/ubuntu/data/Aisformer_kins_results", "coco_instances_amodal_results_.json")
with open(file_path, "w") as f: f.write(json.dumps(json_amodal_results,cls=MyEncoder, ensure_ascii=False))
Our model does not have the capability for object detection, so calculating AP (Average Precision) and AR (Average Recall) can be a bit troublesome. We need to rely on an object detection model.
Taking KINS as an example, for a fair comparison, the specific steps are as follows:
By the way, during this process, if you use a stronger detector, you can achieve better results.
However, unfortunately, we have accidentally lost the code for this part. We will provide this section of the code and our result files as soon as possible (in 3 weeks). Additionally, we recommend to use IoUs as the evaluation metrics for our model.