boostcampaitech2 / object-detection-level2-cv-03

object-detection-level2-cv-03 created by GitHub Classroom
2 stars 8 forks source link

pseudo labeling을 위한 코드입니다! #7

Open lsh3163 opened 3 years ago

lsh3163 commented 3 years ago

pseudo labeling을 위한 코드입니다! 제출 csv 파일과 train, test json으로 새로운 pseudo_train.json을 생성합니다. fiftyone으로 확인 필요합니다.

import pandas as pd
import json

test_json_path = "test.json"
train_json_path = "cv_train1.json"
submission_file_path = "submission_latest.csv"
pseudolabel_file_path = 'pseudo_train.json'

with open(test_json_path, "r") as test_json:
    test_dict = json.load(test_json)

for i in range(len(test_dict["images"])):
    test_dict["images"][i]["id"] += 4883

with open(train_json_path, "r") as train_json:
    train_dict = json.load(train_json)

pseudo_dict = {}

pseudo_dict["info"] = train_dict["info"]
pseudo_dict["licenses"] = train_dict["licenses"]
pseudo_dict["images"] = train_dict["images"] + test_dict["images"]
pseudo_dict["categories"] = train_dict["categories"]
pseudo_dict["annotations"] = train_dict["annotations"]

submission_file = pd.read_csv(submission_file_path)

cnt = 23144 # cv_train1 last box id is 23143, so next first box id is 23144
for idx, row in submission_file.iterrows():
    row_unit = row["PredictionString"].split(" ")
    for j in range(0, len(row_unit)-6, 6):
        category_id = row_unit[j]
        image_id = idx + 4883 # train last id is 4882, so next first test id is 4883
        bbox = [row_unit[j+2], row_unit[j+3],row_unit[j+4],row_unit[j+5]]
        is_crowd = 0
        q = {'image_id':image_id, "category_id":category_id, "area" : float(row_unit[j+4]) * float(row_unit[j+5]), "iscrowd": 0, "id":cnt, "bbox":bbox}
        pseudo_dict["annotations"].append(q)
        cnt += 1

with open(pseudolabel_file_path, 'w', encoding='utf-8') as f:
    json.dump(pseudo_dict, f, ensure_ascii=False, indent=4)

submission_file.head()