Confusion matrix for action recognition

Hi team,
I am trying to train an action recognition model, where I log my matrices in WandB. While I can log top_k_accuracy, mean_class_accuracy, I can't log confusion_matrix. metric confusion_matrix is not supported In the documentation, it looks like the API for the confusion matrix is available. I was wondering if I missed something with my config.
    '../../_base_/models/i3d_r50.py', '../../_base_/schedules/sgd_100e.py',
    '../../_base_/default_runtime.py'
]

model = dict(
    type='Recognizer3D',
    backbone=dict(
        type='ResNet3d',
        pretrained2d=True,
        pretrained='./data/object_recognition/model',
        depth=50,
        conv1_kernel=(5, 7, 7),
        conv1_stride_t=2,
        pool1_stride_t=2,
        conv_cfg=dict(type='Conv3d'),
        norm_eval=False,
        inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
        zero_init_residual=False,
        non_local=((0, 0, 0), (0, 1, 0, 1), (0, 0, 0, 1, 1, 0), (0, 0, 0)),
            non_local_cfg=dict(
                sub_sample=True,
                use_scale=False,
                norm_cfg=dict(type='BN3d', requires_grad=True),
                mode='dot_product')
            ),
    cls_head=dict(
        type='custom_head',
        num_classes=36,
        in_channels=2048,
        spatial_type='avg',
        latent=512,
        dropout_ratio=0.5,
        init_std=0.01,
        multi_class=False),
    # model training and testing settings
    train_cfg=None,
    test_cfg=dict(average_clips='prob'))

dataset_type = 'VideoDataset'
data_root = './data/videos'
data_root_val = './data/videos'
ann_file_train = './data/class/train.txt'
ann_file_val = './data/class/val.txt'
ann_file_test = './data/class/test.txt'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
    dict(type='OpenCVInit'),
    dict(type='SampleFrames', clip_len=32, frame_interval=8, num_clips=1),
    dict(type='OpenCVDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(
        type='MultiScaleCrop',
        input_size=224,
        scales=(1, 0.8),
        random_crop=False,
        max_wh_scale_gap=0),
    dict(type='Resize', scale=(224, 224), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
    dict(type='OpenCVInit'),
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=8,
        num_clips=1,
        test_mode=True),
    dict(type='OpenCVDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='CenterCrop', crop_size=224),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
    dict(type='OpenCVInit'),
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=8,
        num_clips=30,
        test_mode=True),
    dict(type='OpenCVDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='ThreeCrop', crop_size=256),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCTHW'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
data = dict(
    videos_per_gpu=4,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=ann_file_train,
        data_prefix=data_root,
        pipeline=train_pipeline,
        multi_class=False,
        num_classes=36),
    val=dict(
        type=dataset_type,
        ann_file=ann_file_val,
        data_prefix=data_root_val,
        pipeline=val_pipeline,
        multi_class=False,
        num_classes=36),
    test=dict(
        type=dataset_type,
        ann_file=ann_file_val,
        data_prefix=data_root_val,
        pipeline=test_pipeline,
        multi_class=False,
        num_classes=36))

total_epochs = 30

optimizer = dict(
    type='SGD',
    lr=0.005,  # this lr is used for 8 gpus
    momentum=0.9,
    weight_decay=0.0001
    )

lr_config = dict(
    policy='step',
    step=[10, 20],
    warmup='linear',
    warmup_iters=100,
    warmup_ratio=0.4)

# evaluation = dict(
#     interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'])
evaluation = dict(
    interval=5, metrics=['mean_class_accuracy', 'top_k_accuracy', 'confusion_matrix'])

# wandb implementation details
project = 'test'
name = "Test_name"
entity = 'username'
notes = 'Testing for confusion matrix'
# group = 'Task3 3d conv head dspn'
group = 'test'
# yapf:disable
log_config = dict(
    interval=5,
    hooks=[
        dict(type='WandbLoggerHook', init_kwargs=dict(name=name,project=project, entity=entity,notes=notes, group=group)),
        # dict(type='TensorboardLoggerHook'),
    ])

# runtime settings
checkpoint_config = dict(interval=5)
work_dir = './data/model/'
find_unused_parameters=True
open-mmlab / mmaction2

Confusion matrix for action recognition #1222