facebookresearch / d2go

D2Go is a toolkit for efficient deep learning
Apache License 2.0
826 stars 197 forks source link

TypeError: Population must be a sequence. For dicts or sets, use sorted(d). #655

Open SpirikleOfficial opened 3 months ago

SpirikleOfficial commented 3 months ago

Instructions To Reproduce the 🐛 Bug:

  1. Full runnable code or full changes you made:

PREPARE DATASET


import os
import json
import cv2
from detectron2.structures import BoxMode
import numpy as np
from detectron2.data import MetadataCatalog, DatasetCatalog

def get_document_dicts(img_dir):
    json_file = os.path.join(img_dir, "documents_coco.json")
    with open(json_file) as f:
        coco_data = json.load(f)

    dataset_dicts = []
    for img in coco_data["images"]:
        record = {}

        filename = os.path.join(img_dir, img["file_name"])
        height = img["height"]
        width = img["width"]

        record["file_name"] = filename
        record["image_id"] = img["id"]
        record["height"] = height
        record["width"] = width

        annotations = []
        for annotation in coco_data["annotations"]:
            if annotation["image_id"] == img["id"]:
                px = annotation["keypoints"][0::3]
                py = annotation["keypoints"][1::3]
                visibility = annotation["keypoints"][2::3]

                keypoints = []
                for x, y, v in zip(px, py, visibility):
                    keypoints.append(x)
                    keypoints.append(y)
                    keypoints.append(v)

                obj = {
                    "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "keypoints": keypoints,
                    "category_id": 0,
                }
                annotations.append(obj)
        record["annotations"] = annotations
        dataset_dicts.append(record)

    return dataset_dicts

for d in ["train", "val"]:
    DatasetCatalog.register("docv3_" + d, lambda d=d: get_document_dicts("final_dataset/" + d))
    MetadataCatalog.get("docv3_" + d).set(thing_classes=["document"], evaluator_type="coco")

docv3_metadata = MetadataCatalog.get("docv3_train")

TRAIN MODEL


import os
from d2go.runner import GeneralizedRCNNRunner

def prepare_for_launch():
    runner = GeneralizedRCNNRunner()
    cfg = runner.get_default_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("keypoint_rcnn_fbnetv3a_dsmask_C4.yaml"))
    cfg.MODEL_EMA.ENABLED = False
    cfg.DATASETS.TRAIN = ("docv3_train",)
    cfg.DATASETS.TEST = ("docv3_val",)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("keypoint_rcnn_fbnetv3a_dsmask_C4.yaml")  # Let training initialize from model zoo
    cfg.MODEL.DEVICE = "cuda"
    cfg.SOLVER.IMS_PER_BATCH = 32
    cfg.SOLVER.BASE_LR = 0.16 # pick a good LR
    cfg.SOLVER.MAX_ITER =100    # 600 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []        # do not decay learning rate
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
    # NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
     # Update the number of keypoints to match your dataset
    cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 4  # Update this to match your dataset
    # Update cfg.TEST.KEYPOINT_OKS_SIGMAS
    cfg.TEST.KEYPOINT_OKS_SIGMAS = [0.01] * 4  # Set the same sigma for all keypoints
     # Add keypoint_names to the metadata
    MetadataCatalog.get("docv3_train").keypoint_names = ["tl", "bl", "br", "tr"]
    MetadataCatalog.get("docv3_train").keypoint_flip_map = [("tl","tr"), ("bl","br")]
    return cfg, runner

for d in ["train", "val"]:
    MetadataCatalog.get("docv3_" + d).set(thing_classes=["document"], evaluator_type="coco")
cfg, runner = prepare_for_launch()
model = runner.build_model(cfg)
runner.do_train(cfg, model, resume=False)
  1. What exact command you run: After running the train model code mentioned above following was the output of the error:
INFO:d2go.runner.default_runner:Initializing control pg
INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
trunk
- {'block_op': 'conv_k3', 'block_cfg': {'out_channels': 16, 'stride': 2}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 16, 'stride': 1, 'expansion': 1, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 32, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 0}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 32, 'stride': 1, 'expansion': 2, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 40, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 40, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 40, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 2}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 40, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 3}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 72, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 72, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 72, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 2}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 72, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 3}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 4}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 5}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 6}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 3, 'block_idx': 7}
WARNING:mobile_cv.arch.utils.helper:Arguments ['width_divisor', 'dw_skip_bnrelu', 'zero_last_bn_gamma'] skipped for op Conv2d
INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
rpn
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 112, 'stride': 1, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 2}
INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
bbox
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 2, 'expansion': 4}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 1, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 1, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 2}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 160, 'stride': 1, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 3}
INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
kpts
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 2, 'expansion': 4}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 1, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': 1, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 2}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 128, 'stride': -2, 'expansion': 6}, 'stage_idx': 0, 'block_idx': 3}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 64, 'stride': -2, 'expansion': 3}, 'stage_idx': 0, 'block_idx': 4}
INFO:d2go.optimizer.build:Using optimizer:
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: True
    lr: 0.16
    maximize: False
    momentum: 0.9
    nesterov: False
    param_names: ['backbone.body.trunk0.fbnetv2_0_0.conv.weight', 'backbone.body.trunk0.fbnetv2_0_0.conv.bias', 'backbone.body.trunk0.fbnetv2_0_1.dw.conv.weight', 'backbone.body.trunk0.fbnetv2_0_1.pwl.conv.weight', 'backbone.body.trunk1.fbnetv2_1_0.pw.conv.weight', 'backbone.body.trunk1.fbnetv2_1_0.dw.conv.weight', 'backbone.body.trunk1.fbnetv2_1_0.pwl.conv.weight', 'backbone.body.trunk1.fbnetv2_1_1.pw.conv.weight', 'backbone.body.trunk1.fbnetv2_1_1.dw.conv.weight', 'backbone.body.trunk1.fbnetv2_1_1.pwl.conv.weight', 'backbone.body.trunk2.fbnetv2_2_0.pw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_0.dw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_0.pwl.conv.weight', 'backbone.body.trunk2.fbnetv2_2_1.pw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_1.dw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_1.pwl.conv.weight', 'backbone.body.trunk2.fbnetv2_2_2.pw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_2.dw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_2.pwl.conv.weight', 'backbone.body.trunk2.fbnetv2_2_3.pw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_3.dw.conv.weight', 'backbone.body.trunk2.fbnetv2_2_3.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_0.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_0.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_0.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_1.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_1.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_1.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_2.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_2.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_2.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_3.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_3.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_3.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_4.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_4.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_4.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_5.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_5.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_5.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_6.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_6.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_6.pwl.conv.weight', 'backbone.body.trunk3.fbnetv2_3_7.pw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_7.dw.conv.weight', 'backbone.body.trunk3.fbnetv2_3_7.pwl.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.dw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pwl.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.dw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pwl.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.dw.conv.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pwl.conv.weight', 'proposal_generator.rpn_head.rpn_regressor.cls_logits.weight', 'proposal_generator.rpn_head.rpn_regressor.cls_logits.bias', 'proposal_generator.rpn_head.rpn_regressor.bbox_pred.weight', 'proposal_generator.rpn_head.rpn_regressor.bbox_pred.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.dw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pwl.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.dw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pwl.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.dw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pwl.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.dw.conv.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pwl.conv.weight', 'roi_heads.box_predictor.cls_score.weight', 'roi_heads.box_predictor.cls_score.bias', 'roi_heads.box_predictor.bbox_pred.weight', 'roi_heads.box_predictor.bbox_pred.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.dw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pwl.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.dw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pwl.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.dw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pwl.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.dw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pwl.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.dw.conv.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pwl.conv.weight', 'roi_heads.keypoint_head.predictor.kps_score_lowres.pw.conv.weight', 'roi_heads.keypoint_head.predictor.kps_score_lowres.dw.conv.weight', 'roi_heads.keypoint_head.predictor.kps_score_lowres.pwl.conv.weight']
    weight_decay: 0.0001

Parameter Group 1
    dampening: 0
    differentiable: False
    foreach: True
    lr: 0.16
    maximize: False
    momentum: 0.9
    nesterov: False
    param_names: ['backbone.body.trunk0.fbnetv2_0_0.bn.weight', 'backbone.body.trunk0.fbnetv2_0_0.bn.bias', 'backbone.body.trunk0.fbnetv2_0_1.pwl.bn.weight', 'backbone.body.trunk0.fbnetv2_0_1.pwl.bn.bias', 'backbone.body.trunk1.fbnetv2_1_0.pw.bn.weight', 'backbone.body.trunk1.fbnetv2_1_0.pw.bn.bias', 'backbone.body.trunk1.fbnetv2_1_0.pwl.bn.weight', 'backbone.body.trunk1.fbnetv2_1_0.pwl.bn.bias', 'backbone.body.trunk1.fbnetv2_1_1.pw.bn.weight', 'backbone.body.trunk1.fbnetv2_1_1.pw.bn.bias', 'backbone.body.trunk1.fbnetv2_1_1.pwl.bn.weight', 'backbone.body.trunk1.fbnetv2_1_1.pwl.bn.bias', 'backbone.body.trunk2.fbnetv2_2_0.pw.bn.weight', 'backbone.body.trunk2.fbnetv2_2_0.pw.bn.bias', 'backbone.body.trunk2.fbnetv2_2_0.pwl.bn.weight', 'backbone.body.trunk2.fbnetv2_2_0.pwl.bn.bias', 'backbone.body.trunk2.fbnetv2_2_1.pw.bn.weight', 'backbone.body.trunk2.fbnetv2_2_1.pw.bn.bias', 'backbone.body.trunk2.fbnetv2_2_1.pwl.bn.weight', 'backbone.body.trunk2.fbnetv2_2_1.pwl.bn.bias', 'backbone.body.trunk2.fbnetv2_2_2.pw.bn.weight', 'backbone.body.trunk2.fbnetv2_2_2.pw.bn.bias', 'backbone.body.trunk2.fbnetv2_2_2.pwl.bn.weight', 'backbone.body.trunk2.fbnetv2_2_2.pwl.bn.bias', 'backbone.body.trunk2.fbnetv2_2_3.pw.bn.weight', 'backbone.body.trunk2.fbnetv2_2_3.pw.bn.bias', 'backbone.body.trunk2.fbnetv2_2_3.pwl.bn.weight', 'backbone.body.trunk2.fbnetv2_2_3.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_0.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_0.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_0.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_0.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_1.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_1.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_1.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_1.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_2.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_2.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_2.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_2.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_3.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_3.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_3.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_3.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_4.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_4.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_4.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_4.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_5.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_5.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_5.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_5.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_6.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_6.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_6.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_6.pwl.bn.bias', 'backbone.body.trunk3.fbnetv2_3_7.pw.bn.weight', 'backbone.body.trunk3.fbnetv2_3_7.pw.bn.bias', 'backbone.body.trunk3.fbnetv2_3_7.pwl.bn.weight', 'backbone.body.trunk3.fbnetv2_3_7.pwl.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pw.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pw.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pwl.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0.pwl.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pw.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pw.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pwl.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1.pwl.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pw.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pw.bn.bias', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pwl.bn.weight', 'proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2.pwl.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pw.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pw.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pwl.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_0.pwl.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pw.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pw.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pwl.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_1.pwl.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pw.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pw.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pwl.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_2.pwl.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pw.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pw.bn.bias', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pwl.bn.weight', 'roi_heads.box_head.roi_box_conv.0.fbnetv2_0_3.pwl.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pw.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pw.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pwl.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_0.pwl.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pw.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pw.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pwl.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_1.pwl.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pw.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pw.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pwl.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_2.pwl.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pw.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pw.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pwl.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_3.pwl.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pw.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pw.bn.bias', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pwl.bn.weight', 'roi_heads.keypoint_head.feature_extractor.0.fbnetv2_0_4.pwl.bn.bias']
    weight_decay: 0.0
)
INFO:d2go.optimizer.build:optimizer parameter groups:
Param group 0: {dampening: 0, differentiable: False, foreach: True, lr: 0.16, maximize: False, momentum: 0.9, nesterov: False, params: 93, weight_decay: 0.0001}
Param group 1: {dampening: 0, differentiable: False, foreach: True, lr: 0.16, maximize: False, momentum: 0.9, nesterov: False, params: 108, weight_decay: 0.0}

WARNING:d2go.optimizer.build:param_groups.txt already exists
INFO:d2go.checkpoint.log_checkpoint:Checkpoint:1428566632 load begin 
INFO:detectron2.checkpoint.detection_checkpoint:[DetectionCheckpointer] Loading from https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250430934/model_0389999.pth ...
INFO:fvcore.common.checkpoint:[Checkpointer] Loading from C:\Users\Admin/.torch/iopath_cache\d2go/models/250430934\model_0389999.pth ...
WARNING:fvcore.common.checkpoint:Skip loading parameter 'roi_heads.keypoint_head.predictor.kps_score_lowres.pwl.conv.weight' to the model due to incompatible shapes: (17, 144, 1, 1) in the checkpoint but (4, 144, 1, 1) in the model! You might want to double check if this is expected.
WARNING:fvcore.common.checkpoint:Some model parameters or buffers are not found in the checkpoint:
roi_heads.keypoint_head.predictor.kps_score_lowres.pwl.conv.weight
WARNING:fvcore.common.checkpoint:The checkpoint state_dict contains keys that are not used by the model:
  pixel_mean
  pixel_std
INFO:d2go.checkpoint.log_checkpoint:Checkpoint:1428566632 load end 
INFO:d2go.data.build:Building D2Go's train loader ...
INFO:d2go.data.build:Using dataset mapper:
D2GoDatasetMapper:
  is_train: True
  image_loader: None
  tfm_gens: 
    - ResizeShortestEdge(short_edge_length=(224,), max_size=448, sample_style='choice')
    - RandomFlip()
INFO:detectron2.data.build:Removed 0 images with no usable annotations. 1020 images left.
INFO:detectron2.data.build:Removed 0 images with fewer than 1 keypoints.
INFO:detectron2.data.build:Using training sampler TrainingSampler
INFO:detectron2.data.common:Serializing the dataset using: <class 'mobile_cv.torch.utils_pytorch.shareables.SharedList'>
INFO:mobile_cv.torch.utils_pytorch.shareables:Serializing 1020 elements to byte tensors and concatenating them all ...
INFO:mobile_cv.torch.utils_pytorch.shareables:Serialized dataset takes 0.38 MiB
INFO:mobile_cv.torch.utils_pytorch.shareables:Moving serialized dataset to shared memory ...
INFO:mobile_cv.torch.utils_pytorch.shareables:Moving data to shared memory (SharedMemory('wnsm_981390ea', size=402592)) ...
INFO:mobile_cv.torch.utils_pytorch.shareables:Moving data to shared memory (SharedMemory('wnsm_f0ee797e', size=8160)) ...
INFO:mobile_cv.torch.utils_pytorch.shareables:Finished moving to shared memory
INFO:detectron2.data.build:Making batched data loader with batch_size=32
INFO:detectron2.engine.train_loop:Starting training from iteration 0
INFO:d2go.utils.flop_calculator:Evaluating model's number of parameters and FLOPS
INFO:d2go.utils.flop_calculator:Flops info written to ./output\flops_str_mobilecv.txt
INFO:d2go.utils.flop_calculator:Flops info written to ./output\flops_str_fvcore.txt
INFO:d2go.utils.flop_calculator:Flops table (full version) written to ./output\flops_table_fvcore.txt
INFO:d2go.utils.flop_calculator:Flops table:
| module                                                   | #parameters or shape   | #flops     |
|:---------------------------------------------------------|:-----------------------|:-----------|
| model                                                    | 1.499M                 | 14.328G    |
|  backbone.body                                           |  0.369M                |  6.241G    |
|   backbone.body.trunk0                                   |   0.912K               |   0.488G   |
|    backbone.body.trunk0.fbnetv2_0_0                      |    0.48K               |    0.253G  |
|    backbone.body.trunk0.fbnetv2_0_1                      |    0.432K              |    0.235G  |
|   backbone.body.trunk1                                   |   7.984K               |   1.558G   |
|    backbone.body.trunk1.fbnetv2_1_0                      |    4.336K              |    1.061G  |
|    backbone.body.trunk1.fbnetv2_1_1                      |    3.648K              |    0.497G  |
|   backbone.body.trunk2                                   |   29.824K              |   1.27G    |
|    backbone.body.trunk2.fbnetv2_2_0                      |    8.032K              |    0.528G  |
|    backbone.body.trunk2.fbnetv2_2_1                      |    7.264K              |    0.247G  |
|    backbone.body.trunk2.fbnetv2_2_2                      |    7.264K              |    0.247G  |
|    backbone.body.trunk2.fbnetv2_2_3                      |    7.264K              |    0.247G  |
|   backbone.body.trunk3                                   |   0.331M               |   2.925G   |
|    backbone.body.trunk3.fbnetv2_3_0                      |    14.832K             |    0.237G  |
|    backbone.body.trunk3.fbnetv2_3_1                      |    20.776K             |    0.177G  |
|    backbone.body.trunk3.fbnetv2_3_2                      |    20.776K             |    0.177G  |
|    backbone.body.trunk3.fbnetv2_3_3                      |    20.776K             |    0.177G  |
|    backbone.body.trunk3.fbnetv2_3_4                      |    38.48K              |    0.328G  |
|    backbone.body.trunk3.fbnetv2_3_5                      |    71.632K             |    0.61G   |
|    backbone.body.trunk3.fbnetv2_3_6                      |    71.632K             |    0.61G   |
|    backbone.body.trunk3.fbnetv2_3_7                      |    71.632K             |    0.61G   |
|  proposal_generator.rpn_head                             |  0.222M                |  1.885G    |
|   proposal_generator.rpn_head.rpn_feature.0              |   0.215M               |   1.829G   |
|    proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_0 |    71.632K             |    0.61G   |
|    proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_1 |    71.632K             |    0.61G   |
|    proposal_generator.rpn_head.rpn_feature.0.fbnetv2_0_2 |    71.632K             |    0.61G   |
|   proposal_generator.rpn_head.rpn_regressor              |   6.675K               |   56.179M  |
|    proposal_generator.rpn_head.rpn_regressor.cls_logits  |    1.335K              |    11.236M |
|    proposal_generator.rpn_head.rpn_regressor.bbox_pred   |    5.34K               |    44.943M |
|  roi_heads                                               |  0.909M                |  6.202G    |
|   roi_heads.box_head                                     |   0.434M               |   4.572G   |
|    roi_heads.box_head.roi_box_conv.0                     |    0.434M              |    4.571G  |
|    roi_heads.box_head.avgpool                            |                        |    1.037M  |
|   roi_heads.box_predictor                                |   0.726K               |   0.691M   |
|    roi_heads.box_predictor.cls_score                     |    0.242K              |    0.23M   |
|    roi_heads.box_predictor.bbox_pred                     |    0.484K              |    0.461M  |
|   roi_heads.keypoint_head                                |   0.474M               |   1.629G   |
|    roi_heads.keypoint_head.feature_extractor.0           |    0.465M              |    1.333G  |
|    roi_heads.keypoint_head.predictor.kps_score_lowres    |    8.784K              |    0.295G  |
INFO:detectron2.utils.events: eta: 0:15:21  iter: 19  total_loss: 15.4  loss_cls: 0.1736  loss_box_reg: 0.09685  loss_keypoint: 15.02  loss_rpn_cls: 0.09182  loss_rpn_loc: 0.01204    time: 11.4823  last_time: 11.0752  data_time: 2.6365  last_data_time: 2.2396   lr: 0.0001296  max_mem: 7637M
ERROR:detectron2.engine.train_loop:Exception during training:
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\engine\train_loop.py", line 155, in train
    self.run_step()
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\engine\train_loop.py", line 297, in run_step
    data = next(self._data_loader_iter)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\MachineLearning\DETECTRON2\d2go\d2go\utils\visualization.py", line 154, in __iter__
    for data in self.data_loader:
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\data\common.py", line 329, in __iter__
    for d in self.dataset:
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py", line 631, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py", line 1326, in _next_data
    return self._process_data(data)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py", line 1372, in _process_data
    data.reraise()
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\_utils.py", line 722, in reraise
    raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 32, in fetch
    data.append(next(self.dataset_iter))
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\data\common.py", line 296, in __iter__
    yield self.dataset[idx]
          ~~~~~~~~~~~~^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\data\common.py", line 133, in __getitem__
    cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\random.py", line 439, in sample
    raise TypeError("Population must be a sequence.  "
TypeError: Population must be a sequence.  For dicts or sets, use sorted(d).

INFO:detectron2.engine.hooks:Overall training speed: 28 iterations in 0:05:22 (11.5107 s / it)
INFO:detectron2.engine.hooks:Total training time: 0:05:22 (0:00:00 on hooks)
INFO:detectron2.utils.events: eta: 0:13:14  iter: 30  total_loss: 10.48  loss_cls: 0.1582  loss_box_reg: 0.1003  loss_keypoint: 10.13  loss_rpn_cls: 0.0758  loss_rpn_loc: 0.01128    time: 11.5107  last_time: 10.9982  data_time: 2.5092  last_data_time: 2.1668   lr: 0.0001136  max_mem: 7637M
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[10], line 31
     29 cfg, runner = prepare_for_launch()
     30 model = runner.build_model(cfg)
---> 31 runner.do_train(cfg, model, resume=False)

File D:\MachineLearning\DETECTRON2\d2go\d2go\runner\default_runner.py:685, in Detectron2GoRunner.do_train(self, cfg, model, resume)
    683 update_hooks_from_registry(trainer_hooks, cfg)
    684 trainer.register_hooks(trainer_hooks)
--> 685 trainer.train(start_iter, max_iter)
    687 if hasattr(self, "original_cfg"):
    688     table = get_cfg_diff_table(cfg, self.original_cfg)

File ~\anaconda3\Lib\site-packages\detectron2\engine\train_loop.py:155, in TrainerBase.train(self, start_iter, max_iter)
    153 for self.iter in range(start_iter, max_iter):
    154     self.before_step()
--> 155     self.run_step()
    156     self.after_step()
    157 # self.iter == max_iter can be used by `after_train` to
    158 # tell whether the training successfully finished or failed
    159 # due to exceptions.

File ~\anaconda3\Lib\site-packages\detectron2\engine\train_loop.py:297, in SimpleTrainer.run_step(self)
    293 start = time.perf_counter()
    294 """
    295 If you want to do something with the data, you can wrap the dataloader.
    296 """
--> 297 data = next(self._data_loader_iter)
    298 data_time = time.perf_counter() - start
    300 if self.zero_grad_before_forward:

File D:\MachineLearning\DETECTRON2\d2go\d2go\utils\visualization.py:154, in DataLoaderVisWrapper.__iter__(self)
    153 def __iter__(self):
--> 154     for data in self.data_loader:
    155         self._maybe_write_vis(data)
    156         yield data

File ~\anaconda3\Lib\site-packages\detectron2\data\common.py:329, in AspectRatioGroupedDataset.__iter__(self)
    328 def __iter__(self):
--> 329     for d in self.dataset:
    330         w, h = d["width"], d["height"]
    331         bucket_id = 0 if w > h else 1

File ~\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py:631, in _BaseDataLoaderIter.__next__(self)
    628 if self._sampler_iter is None:
    629     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    630     self._reset()  # type: ignore[call-arg]
--> 631 data = self._next_data()
    632 self._num_yielded += 1
    633 if self._dataset_kind == _DatasetKind.Iterable and \
    634         self._IterableDataset_len_called is not None and \
    635         self._num_yielded > self._IterableDataset_len_called:

File ~\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py:1326, in _MultiProcessingDataLoaderIter._next_data(self)
   1324 if len(self._task_info[self._rcvd_idx]) == 2:
   1325     data = self._task_info.pop(self._rcvd_idx)[1]
-> 1326     return self._process_data(data)
   1328 assert not self._shutdown and self._tasks_outstanding > 0
   1329 idx, data = self._get_data()

File ~\anaconda3\Lib\site-packages\torch\utils\data\dataloader.py:1372, in _MultiProcessingDataLoaderIter._process_data(self, data)
   1370 self._try_put_index()
   1371 if isinstance(data, ExceptionWrapper):
-> 1372     data.reraise()
   1373 return data

File ~\anaconda3\Lib\site-packages\torch\_utils.py:722, in ExceptionWrapper.reraise(self)
    718 except TypeError:
    719     # If the exception takes multiple arguments, don't try to
    720     # instantiate since we don't know how to
    721     raise RuntimeError(msg) from None
--> 722 raise exception

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 32, in fetch
    data.append(next(self.dataset_iter))
                ^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\data\common.py", line 296, in __iter__
    yield self.dataset[idx]
          ~~~~~~~~~~~~^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\detectron2\data\common.py", line 133, in __getitem__
    cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\random.py", line 439, in sample
    raise TypeError("Population must be a sequence.  "
TypeError: Population must be a sequence.  For dicts or sets, use sorted(d).

Expected behavior:

Successful training of the keypoint model.

I have shown how the dataset was prepared in the PREPARE DATASET code sippet. I have observed when the iterations are 10, the code runs successfully. However whenever I set the iterations to 100 and even 1000, I get the above error.