Open ross-Hr opened 2 years ago
由于网络限制,不能从coco官网抽样下载,所以 我更改了下代码,可以直接从 完整的train数据中, 根据作者提哦的instances_minitrain2017.json 将图片抽取出来
import os from pycocotools.coco import COCO import wget import concurrent.futures import argparse import pathlib from shutil import copy parser = argparse.ArgumentParser(description="Download COCO images") parser.add_argument( "--annotation", type=str, default="", help="Json file containing annotations", ) parser.add_argument( "--output_dir", type=str, default="", help="Output file to save images to" ) parser.add_argument( "--start_dir", type=str, default="", help="all train data(20G) dir" ) args = parser.parse_args() annotation = args.annotation start_dir= args.start_dir root = pathlib.Path().absolute() ann_file = root / annotation assert pathlib.Path(args.output_dir).is_dir(), "not valid dir" if not os.fsdecode(ann_file).endswith(".json"): assert "Only support COCO style JSON file" try: coco = COCO(os.fsdecode(ann_file)) img_ids = list(coco.imgs.keys()) except FileNotFoundError: raise def download_images(id): try: filename = "{0:0>12d}".format(id) filename = filename + ".jpg" source = f"{start_dir}/{filename}" # wget.download(full_url, out=args.output_dir) copy(source,args.output_dir) except Exception as e: print(f"The download exception is {e}", flush=True) with concurrent.futures.ThreadPoolExecutor() as executor: executor.map(download_images, img_ids)
由于网络限制,不能从coco官网抽样下载,所以 我更改了下代码,可以直接从 完整的train数据中, 根据作者提哦的instances_minitrain2017.json 将图片抽取出来