Closed blackjaxx closed 4 years ago
你去看看他们的标注说明,写个脚本遍历所有文件处理一下文件名就可以了。
CCPD数据集的处理可以用这个:
import os
import cv2
import json
import random
import multiprocessing
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
def process(walk):
dataset = []
for root, filename in walk:
name, ext = os.path.splitext(filename)
labels = name.split("-")
if len(labels) < 7:
continue
image = os.path.join(root, filename)
height, width = cv2.imread(image, 1).shape[:2]
points = labels[3].split("_")
points = [tuple(points[2].split("&")), tuple(points[3].split("&")), tuple(points[0].split("&")), tuple(points[1].split("&"))]
points = [float(x) / width for x, _ in points] + [float(y) / height for _, y in points]
plate = [int(tk) for tk in labels[4].split("_")]
plate = "".join([provinces[plate[0]], alphabets[plate[1]]] + [ads[tk] for tk in plate[2:]])
dataset.append({"image": image, "points": points, "plate": plate})
return dataset
if __name__ == "__main__":
walk = [(root, filename) for root, _, files in os.walk("CCPD2019") for filename in files]
walk = [walk[i:i + 10000] for i in range(0, len(walk), 10000)]
with multiprocessing.Pool(multiprocessing.cpu_count() * 2) as p:
dataset = [dc for ds in p.map(process, walk) for dc in ds]
random.shuffle(dataset)
print(json.dumps(dataset))
收到,我去试试,谢谢你!!
在dataset.py文件中
def load_dataset(root, filename="dataset.json")
这个dataset.json文件如何生成 数据集参考ccpd ccpd里面是一张张图片,它的文件名既为标签名,搞不懂如何将它转换为json文件 或者您能否提供我一份参考,谢谢