import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import some common libraries
import matplotlib.pyplot as plt
import numpy as np
import cv2
import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader
from detectron2.data.datasets import register_coco_instances
from centernet.config import add_centernet_config
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os
2. What exact command you run: python train.py
3. __Full logs__ or other relevant observations:
[04/15 01:13:32 d2.data.datasets.coco]: Loading /root/visdrone/annotations/instances_train.json takes 2.99 seconds.
[04/15 01:13:33 d2.data.datasets.coco]: Loaded 9344 images in COCO format from /root/visdrone/annotations/instances_train.json
WARNING [04/15 01:13:35 d2.config.compat]: Config '/root/CenterNet2/projects/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_24x_ST.yaml' has no VERSION. Assuming it to be compatible with latest v2.
Loading pretrained DLA!
[04/15 01:13:43 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[04/15 01:13:43 d2.data.build]: Using training sampler TrainingSampler
[04/15 01:13:43 d2.data.common]: Serializing 9344 elements to byte tensors and concatenating them all ...
[04/15 01:13:43 d2.data.common]: Serialized dataset takes 68.37 MiB
[04/15 01:13:43 d2.engine.train_loop]: Starting training from iteration 0
/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py:567: UserWarning: This overload of nonzero is deprecated:
nonzero()
Consider using one of the following signatures instead:
nonzero(, bool as_tuple) (Triggered internally at /opt/conda/conda-bld/pytorch_1603729096996/work/torch/csrc/utils/python_arg_parser.cpp:882.)
per_candidate_nonzeros = per_candidate_inds.nonzero() # n
ERROR [04/15 01:13:46 d2.engine.train_loop]: Exception during training:
Traceback (most recent call last):
File "/root/detectron2/detectron2/engine/train_loop.py", line 149, in train
self.run_step()
File "/root/detectron2/detectron2/engine/defaults.py", line 490, in run_step
self._trainer.run_step()
File "/root/detectron2/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(input, *kwargs)
File "/root/detectron2/detectron2/modeling/meta_arch/rcnn.py", line 157, in forward
proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(input, **kwargs)
File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 109, in forward
losses = self.losses(
File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 194, in losses
agn_pos_loss, agn_neg_loss = binary_heatmap_focal_loss_jit(
RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
nvrtc compilation failed:
define NAN __int_as_float(0x7fffffff)
define POS_INFINITY __int_as_float(0x7f800000)
define NEG_INFINITY __int_as_float(0xff800000)
templatedevice T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
templatedevice T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
4. please simplify the steps as much as possible so they do not require additional resources to
run, such as a private dataset.
## Expected behavior:
If there are no obvious error in "full logs" provided above,
please tell us the expected behavior.
## Environment:
If your issue looks like an installation issue / environment issue,
please first try to solve it yourself with the instructions in
https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
Instructions To Reproduce the 🐛 Bug:
import some common libraries
import matplotlib.pyplot as plt import numpy as np import cv2
import some common detectron2 utilities
from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader from detectron2.data.datasets import register_coco_instances from centernet.config import add_centernet_config
from detectron2.evaluation import COCOEvaluator, inference_on_dataset from detectron2.engine import DefaultTrainer from detectron2.config import get_cfg import os
register_coco_instances("dronedataset", {}, "/workspace/mdg904/dronedataset/dronedataset/train/train.json", "/workspace/mdg904/dronedataset/dronedataset/train") register_coco_instances("droneval", {}, "/workspace/mdg904/dronedataset/dronedataset/val/val.json", "/workspace/mdg904/dronedataset/dronedataset/val") person_metadata = MetadataCatalog.get("dronedataset") dataset_dicts = DatasetCatalog.get("dronedataset")
import random
for d in random.sample(dataset_dicts, 1): img = cv2.imread(d["file_name"]) visualizer = Visualizer(img[:, :, ::-1], metadata=person_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) cv2.imwrite('./image.png', vis.get_image()[:, :, ::-1])
cfg = get_cfg() add_centernet_config(cfg) cfg.merge_from_file("/workspace/mdg904/CenterNet2/projects/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_24x_ST.yaml") cfg.DATASETS.TRAIN = ("dronedataset",) cfg.DATASETS.TEST = () # no metrics implemented for this dataset cfg.DATALOADER.NUM_WORKERS = 4 cfg.INPUT.TRAIN_SIZE = 1000 cfg.INPUT.TEST_SIZE = 1000 cfg.SOLVER.IMS_PER_BATCH = 8
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()
evaluator = COCOEvaluator("droneval", cfg, False, output_dir="./output/") val_loader = build_detection_test_loader(cfg, "droneval") inference_on_dataset(trainer.model, val_loader, evaluator)
[04/15 01:13:32 d2.data.datasets.coco]: Loading /root/visdrone/annotations/instances_train.json takes 2.99 seconds. [04/15 01:13:33 d2.data.datasets.coco]: Loaded 9344 images in COCO format from /root/visdrone/annotations/instances_train.json WARNING [04/15 01:13:35 d2.config.compat]: Config '/root/CenterNet2/projects/CenterNet2/configs/CenterNet2_DLA-BiFPN-P5_640_24x_ST.yaml' has no VERSION. Assuming it to be compatible with latest v2. Loading pretrained DLA!
[04/15 01:13:43 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] [04/15 01:13:43 d2.data.build]: Using training sampler TrainingSampler [04/15 01:13:43 d2.data.common]: Serializing 9344 elements to byte tensors and concatenating them all ... [04/15 01:13:43 d2.data.common]: Serialized dataset takes 68.37 MiB [04/15 01:13:43 d2.engine.train_loop]: Starting training from iteration 0 /root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py:567: UserWarning: This overload of nonzero is deprecated: nonzero() Consider using one of the following signatures instead: nonzero(, bool as_tuple) (Triggered internally at /opt/conda/conda-bld/pytorch_1603729096996/work/torch/csrc/utils/python_arg_parser.cpp:882.) per_candidate_nonzeros = per_candidate_inds.nonzero() # n ERROR [04/15 01:13:46 d2.engine.train_loop]: Exception during training: Traceback (most recent call last): File "/root/detectron2/detectron2/engine/train_loop.py", line 149, in train self.run_step() File "/root/detectron2/detectron2/engine/defaults.py", line 490, in run_step self._trainer.run_step() File "/root/detectron2/detectron2/engine/train_loop.py", line 273, in run_step loss_dict = self.model(data) File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(input, *kwargs) File "/root/detectron2/detectron2/modeling/meta_arch/rcnn.py", line 157, in forward proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(input, **kwargs) File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 109, in forward losses = self.losses( File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 194, in losses agn_pos_loss, agn_neg_loss = binary_heatmap_focal_loss_jit( RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
nvrtc compilation failed:
define NAN __int_as_float(0x7fffffff)
define POS_INFINITY __int_as_float(0x7f800000)
define NEG_INFINITY __int_as_float(0xff800000)
template
device T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template
device T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" global void func_1(float v0, float v1, float t2, float t3, float v4, float v5, float aten_mul_flat, float aten_mul_flat_1, float aten_pow_flat, float aten_log_flat, float aten_add_flat, float aten_pow_flat_1, float aten_add_flat_1, float aten_clamp_flat) { { float v = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_1 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_2 = ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_clamp_flat[512 blockIdx.x + threadIdx.x] = v<v4 ? v4 : (v_1>v5 ? v5 : v_2); float aten_powflat = aten_pow_flat[512 blockIdx.x + threadIdx.x]; aten_powflat = powf((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))), v0); aten_pow_flat[512 blockIdx.x + threadIdx.x] = aten_powflat; float aten_addflat = aten_add_flat[512 blockIdx.x + threadIdx.x]; aten_addflat = (0.f - ((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))))) + 1.f; aten_add_flat[512 blockIdx.x + threadIdx.x] = aten_addflat; float t2_ = ldg(t2 + 512 blockIdx.x + threadIdx.x); aten_add_flat_1[512 blockIdx.x + threadIdx.x] = (0.f - t2_) + 1.f; float v_3 = __ldg(t3 + 512 * blockIdx.x + threadIdx.x); float v_4 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_5 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_log_flat[512 blockIdx.x + threadIdx.x] = logf((0.f - (v_3<v4 ? v4 : (v_4>v5 ? v5 : v_5))) + 1.f); aten_pow_flat_1[512 blockIdx.x + threadIdx.x] = powf((0.f - t2_) + 1.f, v1); float aten_mulflat = aten_mul_flat_1[512 blockIdx.x + threadIdx.x]; aten_mulflat = (logf((0.f - ((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))))) + 1.f)) (powf((ldg(t3 + 512 * blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 blockIdx.x + threadIdx.x))>v5 ? v5 : (__ldg(t3 + 512 blockIdx.x + threadIdx.x))), v0)); aten_mul_flat_1[512 blockIdx.x + threadIdx.x] = aten_mulflat; float v_6 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_7 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_8 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_9 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_10 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_11 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_mul_flat[512 blockIdx.x + threadIdx.x] = ((logf((0.f - (v_6<v4 ? v4 : (v_7>v5 ? v5 : v_8))) + 1.f)) (powf(v_9<v4 ? v4 : (v_10>v5 ? v5 : v_11), v0))) (powf((0.f - t2_) + 1.f, v1)); } }
[04/15 01:13:46 d2.engine.hooks]: Total training time: 0:00:00 (0:00:00 on hooks) [04/15 01:13:46 d2.utils.events]: iter: 1 total_loss: 8.957 loss_cls_stage0: 2.487 loss_box_reg_stage0: 0.03032 loss_cls_stage1: 2.451 loss_box_reg_stage1: 0.02082 loss_cls_stage2: 2.505 loss_box_reg_stage2: 0.01662 loss_centernet_loc: 0.88 loss_centernet_agn_pos: 0.5665 loss_centernet_agn_neg: 0.0002603 data_time: 0.1786 lr: 2e-05 max_mem: 14131M Traceback (most recent call last): File "train.py", line 51, in
trainer.train()
File "/root/detectron2/detectron2/engine/defaults.py", line 480, in train
super().train(self.start_iter, self.max_iter)
File "/root/detectron2/detectron2/engine/train_loop.py", line 149, in train
self.run_step()
File "/root/detectron2/detectron2/engine/defaults.py", line 490, in run_step
self._trainer.run_step()
File "/root/detectron2/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, *kwargs)
File "/root/detectron2/detectron2/modeling/meta_arch/rcnn.py", line 157, in forward
proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
File "/opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(input, **kwargs)
File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 109, in forward
losses = self.losses(
File "/root/CenterNet2/projects/CenterNet2/centernet/modeling/dense_heads/centernet.py", line 194, in losses
agn_pos_loss, agn_neg_loss = binary_heatmap_focal_loss_jit(
RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
nvrtc compilation failed:
define NAN __int_as_float(0x7fffffff)
define POS_INFINITY __int_as_float(0x7f800000)
define NEG_INFINITY __int_as_float(0xff800000)
template
device T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template
device T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" global void func_1(float v0, float v1, float t2, float t3, float v4, float v5, float aten_mul_flat, float aten_mul_flat_1, float aten_pow_flat, float aten_log_flat, float aten_add_flat, float aten_pow_flat_1, float aten_add_flat_1, float aten_clamp_flat) { { float v = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_1 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_2 = ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_clamp_flat[512 blockIdx.x + threadIdx.x] = v<v4 ? v4 : (v_1>v5 ? v5 : v_2); float aten_powflat = aten_pow_flat[512 blockIdx.x + threadIdx.x]; aten_powflat = powf((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))), v0); aten_pow_flat[512 blockIdx.x + threadIdx.x] = aten_powflat; float aten_addflat = aten_add_flat[512 blockIdx.x + threadIdx.x]; aten_addflat = (0.f - ((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))))) + 1.f; aten_add_flat[512 blockIdx.x + threadIdx.x] = aten_addflat; float t2_ = ldg(t2 + 512 blockIdx.x + threadIdx.x); aten_add_flat_1[512 blockIdx.x + threadIdx.x] = (0.f - t2_) + 1.f; float v_3 = __ldg(t3 + 512 * blockIdx.x + threadIdx.x); float v_4 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_5 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_log_flat[512 blockIdx.x + threadIdx.x] = logf((0.f - (v_3<v4 ? v4 : (v_4>v5 ? v5 : v_5))) + 1.f); aten_pow_flat_1[512 blockIdx.x + threadIdx.x] = powf((0.f - t2_) + 1.f, v1); float aten_mulflat = aten_mul_flat_1[512 blockIdx.x + threadIdx.x]; aten_mulflat = (logf((0.f - ((__ldg(t3 + 512 blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 * blockIdx.x + threadIdx.x))>v5 ? v5 : (ldg(t3 + 512 blockIdx.x + threadIdx.x))))) + 1.f)) (powf((ldg(t3 + 512 * blockIdx.x + threadIdx.x))<v4 ? v4 : ((ldg(t3 + 512 blockIdx.x + threadIdx.x))>v5 ? v5 : (__ldg(t3 + 512 blockIdx.x + threadIdx.x))), v0)); aten_mul_flat_1[512 blockIdx.x + threadIdx.x] = aten_mulflat; float v_6 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_7 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_8 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_9 = ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_10 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); float v_11 = __ldg(t3 + 512 blockIdx.x + threadIdx.x); aten_mul_flat[512 blockIdx.x + threadIdx.x] = ((logf((0.f - (v_6<v4 ? v4 : (v_7>v5 ? v5 : v_8))) + 1.f)) (powf(v_9<v4 ? v4 : (v_10>v5 ? v5 : v_11), v0))) (powf((0.f - t2_) + 1.f, v1)); } }
sys.platform linux Python 3.8.3 (default, May 19 2020, 18:47:26) [GCC 7.3.0] numpy 1.19.2 detectron2 0.4 @/root/detectron2/detectron2 Compiler GCC 7.5 CUDA compiler CUDA 11.0 detectron2 arch flags 8.0 DETECTRON2_ENV_MODULE
PyTorch 1.7.0 @/opt/conda/lib/python3.8/site-packages/torch
PyTorch debug build True
GPU available True
GPU 0 GeForce RTX 3090 (arch=8.6)
CUDA_HOME /usr/local/cuda
TORCH_CUDA_ARCH_LIST 8.0
Pillow 8.0.0
torchvision 0.8.0 @/opt/conda/lib/python3.8/site-packages/torchvision
torchvision arch flags 3.5, 5.0, 6.0, 7.0, 7.5, 8.0
fvcore 0.1.5.post20210413
iopath 0.1.7
cv2 4.5.1
PyTorch built with: