Open twangnh opened 4 days ago
Thank you for your question!
I conducted a quick test on a small RGB-Event real-world dataset (resolution: 260x346) using an NVIDIA 4090 GPU, and here’s the code I used:
## Basic setting, e.g., dataset, data transformation and etc.
...
## Real-world Testing
total_infer_time = 0.0 # total infer time
num_inferences = len(img_list) # number of total inference
warmup_iters = 100 # warmup iterations
print("Warming up...")
for _ in tqdm(range(warmup_iters)):
image = cv2.imread(os.path.join(image_folder, img_list[0]), cv2.IMREAD_COLOR)
# load_real_event
event = real_event[0].transpose(2, 0, 1) # (2, 260, 346)
event = event_transform(event)
h, w, _ = image.shape
size = np.array([[w, h, w, h]])
# read image
x, _, _, scale, offset = transforms(image)
x = x.unsqueeze(0).to(device)
event = event.unsqueeze(0).to(device)
if test_aug is not None:
bboxes, scores, cls_inds = test_aug(x, net)
else:
bboxes, scores, cls_inds = net(x, event)
for index in tqdm(range(len(img_list))):
image = cv2.imread(os.path.join(image_folder, img_list[index]), cv2.IMREAD_COLOR)
## load real event np
event = real_event[index].transpose(2, 0, 1) ## (2, 260, 346)
event = event_transform(event)
h, w, _ = image.shape
size = np.array([[w, h, w, h]])
# prepare
x, _, _, scale, offset = transforms(image)
x = x.unsqueeze(0).to(device)
event = event.unsqueeze(0).to(device)
start_time = time.time()
if test_aug is not None:
bboxes, scores, cls_inds = test_aug(x, net)
else:
# inference
# bboxes, scores, cls_inds = net(x)
bboxes, scores, cls_inds = net(x, event)
infer_time = time.time() - start_time
total_infer_time += infer_time
# rescale
bboxes -= offset
bboxes /= scale
bboxes *= size
# vis detection
img_processed = visualize(
img=image,
bboxes=bboxes,
scores=scores,
cls_inds=cls_inds,
vis_thresh=vis_thresh,
class_colors=class_colors,
class_names=class_names,
class_indexs=class_indexs,
dataset_name=dataset_name
)
if show:
cv2.imshow('detection', img_processed)
cv2.waitKey(0)
# save result
cv2.imwrite(os.path.join(save_path, str(index).zfill(6) +'.jpg'), img_processed)
avg_infer_time = total_infer_time / num_inferences
fps = 1.0 / avg_infer_time
print(f"Average inference time: {avg_infer_time:.4f} seconds")
print(f"FPS: {fps:.2f}")`
Output:
Warming up...
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:12<00:00, 8.06it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 223/223 [00:20<00:00, 10.76it/s]
Average inference time: 0.0277 seconds
FPS: 36.11
The final test result showed around 40 FPS. Since the code is relatively simple, there is still room for optimization, so feel free to use it as a reference.
If you have any other questions, I’m happy to discuss further!
Hi @AndyCao1125 thanks for sharing the wonderful work! could you let us know the inference time of the proposed method, e.g., in FPS or ms