jacobgil / pytorch-grad-cam

Advanced AI Explainability for computer vision. Support for CNNs, Vision Transformers, Classification, Object detection, Segmentation, Image similarity and more.
https://jacobgil.github.io/pytorch-gradcam-book
MIT License
10.06k stars 1.52k forks source link

IndexError: too many indices for tensor of dimension 3 #213

Closed luoolu closed 2 years ago

luoolu commented 2 years ago

`if name == 'main': """ python vit_gradcam.py -image-path Example usage of using cam-methods on a VIT network. """

args = get_args()
argss = get_parser().parse_args()
cfg = setup_cfg(argss)
methods = \
    {"gradcam": GradCAM,
     "scorecam": ScoreCAM,
     "gradcam++": GradCAMPlusPlus,
     "ablationcam": AblationCAM,
     "xgradcam": XGradCAM,
     "eigencam": EigenCAM,
     "eigengradcam": EigenGradCAM,
     "layercam": LayerCAM,
     "fullgrad": FullGrad}

if args.method not in list(methods.keys()):
    raise Exception(f"method should be one of {list(methods.keys())}")

# model = torch.hub.load('facebookresearch/deit:main',
#                        'deit_tiny_patch16_224', pretrained=True)
# model.eval()
model = build_model(cfg)
# print("model:\n", model)
print("model type:", type(model))

checkpointer = DetectionCheckpointer(model)
checkpointer.load(cfg.MODEL.WEIGHTS)
model.eval()

if args.use_cuda:
    model = model.cuda()

# target_layers = [model.layers[-1].blocks[-1].norm1]
target_layers = [model.backbone.layers[-1].blocks[-1].norm1]

from pytorch_grad_cam.ablation_layer import AblationLayerVit

if args.method == "ablationcam":
    cam = methods[args.method](model=model,
                               target_layers=target_layers,
                               use_cuda=args.use_cuda,
                               reshape_transform=reshape_transform,
                               ablation_layer=AblationLayerVit())
else:
    cam = methods[args.method](model=model,
                               target_layers=target_layers,
                               use_cuda=args.use_cuda,
                               reshape_transform=reshape_transform)

rgb_img = cv2.imread(args.image_path, 1)[:, :, ::-1]
rgb_img = cv2.resize(rgb_img, (224, 224))
rgb_img = np.float32(rgb_img) / 255
input_tensor = preprocess_image(rgb_img, mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5])
# input_tensor = input_tensor.squeeze()
print("shape input_tensor:", input_tensor.shape)

# If None, returns the map for the highest scoring category.
# Otherwise, targets the requested category.
targets = None

# AblationCAM and ScoreCAM have batched implementations.
# You can override the internal batch size for faster computation.
cam.batch_size = 32

grayscale_cam = cam(input_tensor=input_tensor,
                    targets=targets,
                    eigen_smooth=args.eigen_smooth,
                    aug_smooth=args.aug_smooth)

# Here grayscale_cam has only one image in the batch
grayscale_cam = grayscale_cam[0, :]

cam_image = show_cam_on_image(rgb_img, grayscale_cam)
cv2.imwrite(f'{args.method}_cam.jpg', cam_image)`
jacobgil commented 2 years ago

Does this error still happen after a git pull / re-install? Please re-open if it does.