Open simba0626 opened 3 months ago
I wrote a function for calculating the metrics of table 8, and it looks good.
def evaluation_metrics(data_path):
base = []
with open(data_path, "r") as fp:
lines = fp.readlines()
for line in lines:
base.append(json.loads(line))
correct = 0
incorrect = 0
comp_correct = 0
comp_incorrect = 0
pre_correct = 0
pre_incorrect = 0
ru_correct = 0
ru_incorrect = 0
for answers in tqdm(base):
gt = answers["gt"].lower()
type_ = answers["type"]
answer = answers["answer"].replace(" ", "").lower().replace(".", "")
if gt == answer:
correct = correct + 1
if type_ == "comp":
comp_correct = comp_correct + 1
if type_ == "presence":
pre_correct = pre_correct + 1
if type_ == "rural_urban":
ru_correct = ru_correct + 1
else:
incorrect = incorrect + 1
if type_ == "comp":
comp_incorrect = comp_incorrect + 1
if type_ == "presence":
pre_incorrect = pre_incorrect + 1
if type_ == "rural_urban":
ru_incorrect = ru_incorrect + 1
print("presence_correct:", pre_correct)
print("presence_incorrect:", pre_incorrect)
print("presence_Total:", pre_correct + pre_incorrect)
print("presence_Acc:", (pre_correct / (pre_correct + pre_incorrect)))
print("-" * 100)
print("comparison_correct:", comp_correct)
print("comparison_incorrect:", comp_incorrect)
print("comparison_Total:", comp_correct + comp_incorrect)
print("comparison_Acc:", (comp_correct / (comp_correct + comp_incorrect)))
print("-" * 100)
if ru_correct + ru_incorrect != 0:
print("rural_urban_correct:", ru_correct)
print("rural_urban_incorrect:", ru_incorrect)
print("rural_urban_Total:", ru_correct + ru_incorrect)
print("rural_urban_Acc:", (ru_correct / (ru_correct + ru_incorrect)))
print("-" * 100)
print("total_correct:", correct)
print("total_incorrect:", incorrect)
print("total_Total:", correct + incorrect)
print("total_Acc:", correct / (correct + incorrect))
I am also waiting for the metric calculation function of Table 7 and Table 9.
I wrote a function for calculating the metrics of table 8, and it looks good.
def evaluation_metrics(data_path): base = [] with open(data_path, "r") as fp: lines = fp.readlines() for line in lines: base.append(json.loads(line)) correct = 0 incorrect = 0 comp_correct = 0 comp_incorrect = 0 pre_correct = 0 pre_incorrect = 0 ru_correct = 0 ru_incorrect = 0 for answers in tqdm(base): gt = answers["gt"].lower() type_ = answers["type"] answer = answers["answer"].replace(" ", "").lower().replace(".", "") if gt == answer: correct = correct + 1 if type_ == "comp": comp_correct = comp_correct + 1 if type_ == "presence": pre_correct = pre_correct + 1 if type_ == "rural_urban": ru_correct = ru_correct + 1 else: incorrect = incorrect + 1 if type_ == "comp": comp_incorrect = comp_incorrect + 1 if type_ == "presence": pre_incorrect = pre_incorrect + 1 if type_ == "rural_urban": ru_incorrect = ru_incorrect + 1 print("presence_correct:", pre_correct) print("presence_incorrect:", pre_incorrect) print("presence_Total:", pre_correct + pre_incorrect) print("presence_Acc:", (pre_correct / (pre_correct + pre_incorrect))) print("-" * 100) print("comparison_correct:", comp_correct) print("comparison_incorrect:", comp_incorrect) print("comparison_Total:", comp_correct + comp_incorrect) print("comparison_Acc:", (comp_correct / (comp_correct + comp_incorrect))) print("-" * 100) if ru_correct + ru_incorrect != 0: print("rural_urban_correct:", ru_correct) print("rural_urban_incorrect:", ru_incorrect) print("rural_urban_Total:", ru_correct + ru_incorrect) print("rural_urban_Acc:", (ru_correct / (ru_correct + ru_incorrect))) print("-" * 100) print("total_correct:", correct) print("total_incorrect:", incorrect) print("total_Total:", correct + incorrect) print("total_Acc:", correct / (correct + incorrect))
I am also waiting for the metric calculation function of Table 7 and Table 9.
I am currently facing this issue. Have you implemented the metric calculations in other tables?
Not yet
Hi author, It is nice work. When run the evaluation codes, I find the output is json file. My questions: How to calculate the metrics in table 7, 8, 9? Would you like to provide the code for computing the metrics?
Thank you
ywsun
python geochat/eval/batch_geochat_grounding.py \ --model-path /path/to/model \ --question-file path/to/jsonl/file \ --answer-file path/to/output/jsonl/file \ --image_folder path/to/image/folder/
python geochat/eval/batch_geochat_referring.py \ --model-path /path/to/model \ --question-file path/to/jsonl/file \ --answer-file path/to/output/jsonl/file \ --image_folder path/to/image/folder/