giddyyupp / coco-minitrain

a subset of coco dataset for faster experimentation
236 stars 34 forks source link

The download exception is Remote end closed connection without response #21

Open ross-Hr opened 2 years ago

ross-Hr commented 2 years ago

由于网络限制,不能从coco官网抽样下载,所以 我更改了下代码,可以直接从 完整的train数据中, 根据作者提哦的instances_minitrain2017.json 将图片抽取出来

import os
from pycocotools.coco import COCO
import wget
import concurrent.futures
import argparse
import pathlib
from shutil import copy

parser = argparse.ArgumentParser(description="Download COCO images")
parser.add_argument(
    "--annotation",
    type=str,
    default="",
    help="Json file containing annotations",
)
parser.add_argument(
    "--output_dir", type=str, default="", help="Output file to save images to"
)
parser.add_argument(
    "--start_dir", type=str, default="", help="all train data(20G) dir"
)
args = parser.parse_args()
annotation = args.annotation
start_dir= args.start_dir
root = pathlib.Path().absolute()
ann_file = root / annotation
assert pathlib.Path(args.output_dir).is_dir(), "not valid dir"
if not os.fsdecode(ann_file).endswith(".json"):
    assert "Only support COCO style JSON file"
try:
    coco = COCO(os.fsdecode(ann_file))
    img_ids = list(coco.imgs.keys())
except FileNotFoundError:
    raise
def download_images(id):
    try:
        filename = "{0:0>12d}".format(id)
        filename = filename + ".jpg"
        source = f"{start_dir}/{filename}"
        # wget.download(full_url, out=args.output_dir)
        copy(source,args.output_dir)
    except Exception as e:
        print(f"The download exception is {e}", flush=True)
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(download_images, img_ids)