facebookresearch / SlowFast

PySlowFast: video understanding codebase from FAIR for reproducing state-of-the-art video models.
Apache License 2.0
6.51k stars 1.2k forks source link

MaskFeat training lr and loss is zero #624

Closed wnzhyee closed 1 year ago

wnzhyee commented 1 year ago

Hi there! I just try to train maskfeat pt model with modified k400_MVITv2_S_16x4_MaskFeat_PT.yaml config, and it seems work well. But it confuse me that the 'lr' and 'top1_loss' is always be 0, is this normal?

This is the std out log, I find the lr in 'SOLVER' is set to 0.0001, but training log show it becomes to 0

[11/16 11:40:55][INFO] train_net.py:  535: Train with config:
[11/16 11:40:55][INFO] train_net.py:  536: {'AUG': {'AA_TYPE': '',
         'COLOR_JITTER': None,
         'ENABLE': True,
         'GEN_MASK_LOADER': True,
         'INTERPOLATION': 'bicubic',
         'MASK_FRAMES': False,
         'MASK_RATIO': 0.4,
         'MASK_TUBE': False,
         'MASK_WINDOW_SIZE': [8, 7, 7],
         'MAX_MASK_PATCHES_PER_BLOCK': None,
         'NUM_SAMPLE': 1,
         'RE_COUNT': 1,
         'RE_MODE': 'pixel',
         'RE_PROB': 0.0,
         'RE_SPLIT': False},
 'AVA': {'ANNOTATION_DIR': '/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/',
         'BGR': False,
         'DETECTION_SCORE_THRESH': 0.9,
         'EXCLUSION_FILE': 'ava_val_excluded_timestamps_v2.2.csv',
         'FRAME_DIR': '/mnt/fair-flash3-east/ava_trainval_frames.img/',
         'FRAME_LIST_DIR': '/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/',
         'FULL_TEST_ON_VAL': False,
         'GROUNDTRUTH_FILE': 'ava_val_v2.2.csv',
         'IMG_PROC_BACKEND': 'cv2',
         'LABEL_MAP_FILE': 'ava_action_list_v2.2_for_activitynet_2019.pbtxt',
         'TEST_FORCE_FLIP': False,
         'TEST_LISTS': ['val.csv'],
         'TEST_PREDICT_BOX_LISTS': ['ava_val_predicted_boxes.csv'],
         'TRAIN_GT_BOX_LISTS': ['ava_train_v2.2.csv'],
         'TRAIN_LISTS': ['train.csv'],
         'TRAIN_PCA_JITTER_ONLY': True,
         'TRAIN_PREDICT_BOX_LISTS': [],
         'TRAIN_USE_COLOR_AUGMENTATION': False},
 'BENCHMARK': CfgNode({'NUM_EPOCHS': 5, 'LOG_PERIOD': 100, 'SHUFFLE': True}),
 'BN': {'GLOBAL_SYNC': False,
        'NORM_TYPE': 'batchnorm',
        'NUM_BATCHES_PRECISE': 200,
        'NUM_SPLITS': 1,
        'NUM_SYNC_DEVICES': 1,
        'USE_PRECISE_STATS': False,
        'WEIGHT_DECAY': 0.0},
 'CONTRASTIVE': {'BN_MLP': False,
                 'BN_SYNC_MLP': False,
                 'DELTA_CLIPS_MAX': inf,
                 'DELTA_CLIPS_MIN': -inf,
                 'DIM': 128,
                 'INTERP_MEMORY': False,
                 'KNN_ON': True,
                 'LENGTH': 239975,
                 'LOCAL_SHUFFLE_BN': True,
                 'MEM_TYPE': '1d',
                 'MLP_DIM': 2048,
                 'MOCO_MULTI_VIEW_QUEUE': False,
                 'MOMENTUM': 0.5,
                 'MOMENTUM_ANNEALING': False,
                 'NUM_CLASSES_DOWNSTREAM': 400,
                 'NUM_MLP_LAYERS': 1,
                 'PREDICTOR_DEPTHS': [],
                 'QUEUE_LEN': 65536,
                 'SEQUENTIAL': False,
                 'SIMCLR_DIST_ON': True,
                 'SWAV_QEUE_LEN': 0,
                 'T': 0.07,
                 'TYPE': 'mem'},
 'DATA': {'COLOR_RND_GRAYSCALE': 0.0,
          'DECODING_BACKEND': 'pyav',
          'DECODING_SHORT_SIZE': 320,
          'DUMMY_LOAD': False,
          'ENSEMBLE_METHOD': 'sum',
          'IN22K_TRAINVAL': False,
          'IN22k_VAL_IN1K': '',
          'INPUT_CHANNEL_NUM': [3],
          'INV_UNIFORM_SAMPLE': False,
          'IN_VAL_CROP_RATIO': 0.875,
          'LOADER_CHUNK_OVERALL_SIZE': 0,
          'LOADER_CHUNK_SIZE': 0,
          'MEAN': [0.45, 0.45, 0.45],
          'MULTI_LABEL': False,
          'NUM_FRAMES': 16,
          'PATH_LABEL_SEPARATOR': ' ',
          'PATH_PREFIX': '',
          'PATH_TO_DATA_DIR': '/ml_workspace/yckj3844/datasets/opensource_data/human_activity',
          'PATH_TO_PRELOAD_IMDB': '',
          'RANDOM_FLIP': True,
          'REVERSE_INPUT_CHANNEL': False,
          'SAMPLING_RATE': 4,
          'SKIP_ROWS': 0,
          'SSL_BLUR_SIGMA_MAX': [0.0, 2.0],
          'SSL_BLUR_SIGMA_MIN': [0.0, 0.1],
          'SSL_COLOR_BRI_CON_SAT': [0.4, 0.4, 0.4],
          'SSL_COLOR_HUE': 0.1,
          'SSL_COLOR_JITTER': False,
          'SSL_MOCOV2_AUG': False,
          'STD': [0.225, 0.225, 0.225],
          'TARGET_FPS': 30,
          'TEST_CROP_SIZE': 224,
          'TIME_DIFF_PROB': 0.0,
          'TRAIN_CROP_NUM_SPATIAL': 1,
          'TRAIN_CROP_NUM_TEMPORAL': 1,
          'TRAIN_CROP_SIZE': 224,
          'TRAIN_JITTER_ASPECT_RELATIVE': [0.75, 1.3333],
          'TRAIN_JITTER_FPS': 0.0,
          'TRAIN_JITTER_MOTION_SHIFT': False,
          'TRAIN_JITTER_SCALES': [256, 320],
          'TRAIN_JITTER_SCALES_RELATIVE': [0.5, 1.0],
          'TRAIN_PCA_EIGVAL': [0.225, 0.224, 0.229],
          'TRAIN_PCA_EIGVEC': [[-0.5675, 0.7192, 0.4009],
                               [-0.5808, -0.0045, -0.814],
                               [-0.5836, -0.6948, 0.4203]],
          'USE_OFFSET_SAMPLING': True},
 'DATA_LOADER': {'ENABLE_MULTI_THREAD_DECODE': False,
                 'NUM_WORKERS': 8,
                 'PIN_MEMORY': True},
 'DEMO': {'BUFFER_SIZE': 0,
          'CLIP_VIS_SIZE': 10,
          'COMMON_CLASS_NAMES': ['watch (a person)',
                                 'talk to (e.g., self, a person, a group)',
                                 'listen to (a person)',
                                 'touch (an object)',
                                 'carry/hold (an object)',
                                 'walk',
                                 'sit',
                                 'lie/sleep',
                                 'bend/bow (at the waist)'],
          'COMMON_CLASS_THRES': 0.7,
          'DETECTRON2_CFG': 'COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml',
          'DETECTRON2_THRESH': 0.9,
          'DETECTRON2_WEIGHTS': 'detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl',
          'DISPLAY_HEIGHT': 0,
          'DISPLAY_WIDTH': 0,
          'ENABLE': False,
          'FPS': 30,
          'GT_BOXES': '',
          'INPUT_FORMAT': 'BGR',
          'INPUT_VIDEO': '',
          'LABEL_FILE_PATH': '',
          'NUM_CLIPS_SKIP': 0,
          'NUM_VIS_INSTANCES': 2,
          'OUTPUT_FILE': '',
          'OUTPUT_FPS': -1,
          'PREDS_BOXES': '',
          'SLOWMO': 1,
          'STARTING_SECOND': 900,
          'THREAD_ENABLE': False,
          'UNCOMMON_CLASS_THRES': 0.3,
          'VIS_MODE': 'thres',
          'WEBCAM': -1},
 'DETECTION': {'ALIGNED': True,
               'ENABLE': False,
               'ROI_XFORM_RESOLUTION': 7,
               'SPATIAL_SCALE_FACTOR': 16},
 'DIST_BACKEND': 'nccl',
 'LOG_MODEL_INFO': False,
 'LOG_PERIOD': 10,
 'MASK': {'DECODER_DEPTH': 0,
          'DECODER_EMBED_DIM': 512,
          'DECODER_SEP_POS_EMBED': False,
          'DEC_KV_KERNEL': [],
          'DEC_KV_STRIDE': [],
          'ENABLE': True,
          'HEAD_TYPE': 'separate',
          'MAE_ON': False,
          'MAE_RND_MASK': False,
          'NORM_PRED_PIXEL': True,
          'PER_FRAME_MASKING': False,
          'PRED_HOG': True,
          'PRETRAIN_DEPTH': [15],
          'SCALE_INIT_BY_DEPTH': False,
          'TIME_STRIDE_LOSS': True},
 'MIXUP': {'ALPHA': 0.8,
           'CUTMIX_ALPHA': 1.0,
           'ENABLE': False,
           'LABEL_SMOOTH_VALUE': 0.1,
           'PROB': 1.0,
           'SWITCH_PROB': 0.5},
 'MODEL': {'ACT_CHECKPOINT': False,
           'ARCH': 'maskmvit',
           'DETACH_FINAL_FC': False,
           'DROPCONNECT_RATE': 0.0,
           'DROPOUT_RATE': 0.0,
           'FC_INIT_STD': 0.01,
           'FP16_ALLREDUCE': False,
           'FROZEN_BN': False,
           'HEAD_ACT': 'softmax',
           'LOSS_FUNC': 'multi_mse',
           'MODEL_NAME': 'MaskMViT',
           'MULTI_PATHWAY_ARCH': ['slowfast'],
           'NUM_CLASSES': 400,
           'SINGLE_PATHWAY_ARCH': ['2d',
                                   'c2d',
                                   'i3d',
                                   'slow',
                                   'x3d',
                                   'mvit',
                                   'maskmvit']},
 'MULTIGRID': {'BN_BASE_SIZE': 8,
               'DEFAULT_B': 0,
               'DEFAULT_S': 0,
               'DEFAULT_T': 0,
               'EPOCH_FACTOR': 1.5,
               'EVAL_FREQ': 3,
               'LONG_CYCLE': False,
               'LONG_CYCLE_FACTORS': [(0.25, 0.7071067811865476),
                                      (0.5, 0.7071067811865476),
                                      (0.5, 1),
                                      (1, 1)],
               'LONG_CYCLE_SAMPLING_RATE': 0,
               'SHORT_CYCLE': False,
               'SHORT_CYCLE_FACTORS': [0.5, 0.7071067811865476]},
 'MVIT': {'CLS_EMBED_ON': True,
          'DEPTH': 16,
          'DIM_MUL': [[1, 2.0], [3, 2.0], [14, 2.0]],
          'DIM_MUL_IN_ATT': False,
          'DROPOUT_RATE': 0.0,
          'DROPPATH_RATE': 0.0,
          'EMBED_DIM': 96,
          'HEAD_INIT_SCALE': 1.0,
          'HEAD_MUL': [[1, 2.0], [3, 2.0], [14, 2.0]],
          'LAYER_SCALE_INIT_VALUE': 0.0,
          'MLP_RATIO': 4.0,
          'MODE': 'conv',
          'NORM': 'layernorm',
          'NORM_STEM': False,
          'NUM_HEADS': 1,
          'PATCH_2D': False,
          'PATCH_KERNEL': [3, 7, 7],
          'PATCH_PADDING': [1, 3, 3],
          'PATCH_STRIDE': [2, 4, 4],
          'POOL_FIRST': False,
          'POOL_KVQ_KERNEL': [3, 3, 3],
          'POOL_KV_STRIDE': [],
          'POOL_KV_STRIDE_ADAPTIVE': [1, 8, 8],
          'POOL_Q_STRIDE': [[0, 1, 1, 1],
                            [1, 1, 2, 2],
                            [2, 1, 1, 1],
                            [3, 1, 2, 2],
                            [4, 1, 1, 1],
                            [5, 1, 1, 1],
                            [6, 1, 1, 1],
                            [7, 1, 1, 1],
                            [8, 1, 1, 1],
                            [9, 1, 1, 1],
                            [10, 1, 1, 1],
                            [11, 1, 1, 1],
                            [12, 1, 1, 1],
                            [13, 1, 1, 1],
                            [14, 1, 1, 1],
                            [15, 1, 1, 1]],
          'QKV_BIAS': True,
          'REL_POS_SPATIAL': True,
          'REL_POS_TEMPORAL': True,
          'REL_POS_ZERO_INIT': False,
          'RESIDUAL_POOLING': True,
          'REV': {'BUFFER_LAYERS': [],
                  'ENABLE': False,
                  'PRE_Q_FUSION': 'avg',
                  'RESPATH_FUSE': 'concat',
                  'RES_PATH': 'conv'},
          'SEPARATE_QKV': False,
          'SEP_POS_EMBED': True,
          'USE_ABS_POS': False,
          'USE_FIXED_SINCOS_POS': False,
          'USE_MEAN_POOLING': False,
          'ZERO_DECAY_POS_CLS': False},
 'NONLOCAL': {'GROUP': [[1], [1], [1], [1]],
              'INSTANTIATION': 'dot_product',
              'LOCATION': [[[]], [[]], [[]], [[]]],
              'POOL': [[[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]]]},
 'NUM_GPUS': 4,
 'NUM_SHARDS': 1,
 'OUTPUT_DIR': 'workspace/ckpts/try_1',
 'RESNET': {'DEPTH': 50,
            'INPLACE_RELU': True,
            'NUM_BLOCK_TEMP_KERNEL': [[3], [4], [6], [3]],
            'NUM_GROUPS': 1,
            'SPATIAL_DILATIONS': [[1], [1], [1], [1]],
            'SPATIAL_STRIDES': [[1], [2], [2], [2]],
            'STRIDE_1X1': False,
            'TRANS_FUNC': 'bottleneck_transform',
            'WIDTH_PER_GROUP': 64,
            'ZERO_INIT_FINAL_BN': False,
            'ZERO_INIT_FINAL_CONV': False},
 'RNG_SEED': 3844,
 'SHARD_ID': 0,
 'SLOWFAST': {'ALPHA': 8,
              'BETA_INV': 8,
              'FUSION_CONV_CHANNEL_RATIO': 2,
              'FUSION_KERNEL_SZ': 5},
 'SOLVER': {'BASE_LR': 0.0001,
            'BASE_LR_SCALE_NUM_SHARDS': True,
            'BETAS': (0.9, 0.999),
            'CLIP_GRAD_L2NORM': 0.02,
            'CLIP_GRAD_VAL': None,
            'COSINE_AFTER_WARMUP': True,
            'COSINE_END_LR': 1e-06,
            'DAMPENING': 0.0,
            'GAMMA': 0.1,
            'LARS_ON': False,
            'LAYER_DECAY': 1.0,
            'LRS': [],
            'LR_POLICY': 'cosine',
            'MAX_EPOCH': 300,
            'MOMENTUM': 0.9,
            'NESTEROV': True,
            'OPTIMIZING_METHOD': 'adamw',
            'STEPS': [],
            'STEP_SIZE': 1,
            'WARMUP_EPOCHS': 10.0,
            'WARMUP_FACTOR': 0.1,
            'WARMUP_START_LR': 1e-06,
            'WEIGHT_DECAY': 0.05,
            'ZERO_WD_1D_PARAM': True},
 'TASK': 'ssl',
 'TENSORBOARD': {'CATEGORIES_PATH': '',
                 'CLASS_NAMES_PATH': '',
                 'CONFUSION_MATRIX': {'ENABLE': False,
                                      'FIGSIZE': [8, 8],
                                      'SUBSET_PATH': ''},
                 'ENABLE': False,
                 'HISTOGRAM': {'ENABLE': False,
                               'FIGSIZE': [8, 8],
                               'SUBSET_PATH': '',
                               'TOPK': 10},
                 'LOG_DIR': '',
                 'MODEL_VIS': {'ACTIVATIONS': False,
                               'COLORMAP': 'Pastel2',
                               'ENABLE': False,
                               'GRAD_CAM': {'COLORMAP': 'viridis',
                                            'ENABLE': True,
                                            'LAYER_LIST': [],
                                            'USE_TRUE_LABEL': False},
                               'INPUT_VIDEO': False,
                               'LAYER_LIST': [],
                               'MODEL_WEIGHTS': False,
                               'TOPK_PREDS': 1},
                 'PREDICTIONS_PATH': '',
                 'WRONG_PRED_VIS': {'ENABLE': False,
                                    'SUBSET_PATH': '',
                                    'TAG': 'Incorrectly classified videos.'}},
 'TEST': {'BATCH_SIZE': 16,
          'CHECKPOINT_FILE_PATH': '',
          'CHECKPOINT_TYPE': 'pytorch',
          'DATASET': 'kinetics',
          'ENABLE': False,
          'NUM_ENSEMBLE_VIEWS': 10,
          'NUM_SPATIAL_CROPS': 1,
          'NUM_TEMPORAL_CLIPS': [5, 10],
          'SAVE_RESULTS_PATH': ''},
 'TRAIN': {'AUTO_RESUME': True,
           'BATCH_SIZE': 16,
           'CHECKPOINT_CLEAR_NAME_PATTERN': (),
           'CHECKPOINT_EPOCH_RESET': False,
           'CHECKPOINT_FILE_PATH': '',
           'CHECKPOINT_INFLATE': False,
           'CHECKPOINT_IN_INIT': False,
           'CHECKPOINT_PERIOD': 10,
           'CHECKPOINT_TYPE': 'pytorch',
           'DATASET': 'kinetics',
           'ENABLE': True,
           'EVAL_PERIOD': 1000,
           'KILL_LOSS_EXPLOSION_FACTOR': 0.0,
           'MIXED_PRECISION': False},
 'VIS_MASK': CfgNode({'ENABLE': False}),
 'X3D': {'BN_LIN5': False,
         'BOTTLENECK_FACTOR': 1.0,
         'CHANNELWISE_3x3x3': True,
         'DEPTH_FACTOR': 1.0,
         'DIM_C1': 12,
         'DIM_C5': 2048,
         'SCALE_RES2': False,
         'WIDTH_FACTOR': 1.0}}
[11/16 11:41:03][INFO] kinetics.py:   93: Constructing Kinetics train...
[11/16 11:41:03][INFO] kinetics.py:  158: Constructing kinetics dataloader (size: 3840 skip_rows 0) from /ml_workspace/yckj3844/datasets/opensource_data/human_activity/train.csv 
[11/16 11:41:03][INFO] kinetics.py:   93: Constructing Kinetics val...
[11/16 11:41:03][INFO] kinetics.py:  158: Constructing kinetics dataloader (size: 15 skip_rows 0) from /ml_workspace/yckj3844/datasets/opensource_data/human_activity/val.csv 
[11/16 11:41:03][INFO] train_net.py:  630: Start epoch: 1
[11/16 11:42:10][INFO] distributed.py:  995: Reducer buckets have been rebuilt in this iteration.
[11/16 11:42:20][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.02490, "dt_data": 0.00045, "dt_net": 1.02445, "epoch": "1/300", "eta": "20:29:42", "gpu_mem": "7.15G", "grad_norm": 7.18432, "iter": "10/240", "loss": 0.32920, "loss_0": 0.28187, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:42:30][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.99200, "dt_data": 0.00054, "dt_net": 0.99146, "epoch": "1/300", "eta": "19:50:03", "gpu_mem": "7.15G", "grad_norm": 3.16029, "iter": "20/240", "loss": 0.18622, "loss_0": 0.14730, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:42:40][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.00303, "dt_data": 0.00050, "dt_net": 1.00253, "epoch": "1/300", "eta": "20:03:07", "gpu_mem": "7.15G", "grad_norm": 1.56387, "iter": "30/240", "loss": 0.11671, "loss_0": 0.09552, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:42:51][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.26227, "dt_data": 0.00092, "dt_net": 1.26134, "epoch": "1/300", "eta": "1 day, 1:13:52", "gpu_mem": "7.15G", "grad_norm": 0.78174, "iter": "40/240", "loss": 0.07802, "loss_0": 0.07321, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:43:02][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.08522, "dt_data": 0.00182, "dt_net": 1.08341, "epoch": "1/300", "eta": "21:41:21", "gpu_mem": "7.15G", "grad_norm": 0.33472, "iter": "50/240", "loss": 0.06137, "loss_0": 0.05739, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:47:00][INFO] train_net.py:  535: Train with config:
[11/16 11:47:00][INFO] train_net.py:  536: {'AUG': {'AA_TYPE': '',
         'COLOR_JITTER': None,
         'ENABLE': True,
         'GEN_MASK_LOADER': True,
         'INTERPOLATION': 'bicubic',
         'MASK_FRAMES': False,
         'MASK_RATIO': 0.4,
         'MASK_TUBE': False,
         'MASK_WINDOW_SIZE': [8, 7, 7],
         'MAX_MASK_PATCHES_PER_BLOCK': None,
         'NUM_SAMPLE': 1,
         'RE_COUNT': 1,
         'RE_MODE': 'pixel',
         'RE_PROB': 0.0,
         'RE_SPLIT': False},
 'AVA': {'ANNOTATION_DIR': '/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/',
         'BGR': False,
         'DETECTION_SCORE_THRESH': 0.9,
         'EXCLUSION_FILE': 'ava_val_excluded_timestamps_v2.2.csv',
         'FRAME_DIR': '/mnt/fair-flash3-east/ava_trainval_frames.img/',
         'FRAME_LIST_DIR': '/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/',
         'FULL_TEST_ON_VAL': False,
         'GROUNDTRUTH_FILE': 'ava_val_v2.2.csv',
         'IMG_PROC_BACKEND': 'cv2',
         'LABEL_MAP_FILE': 'ava_action_list_v2.2_for_activitynet_2019.pbtxt',
         'TEST_FORCE_FLIP': False,
         'TEST_LISTS': ['val.csv'],
         'TEST_PREDICT_BOX_LISTS': ['ava_val_predicted_boxes.csv'],
         'TRAIN_GT_BOX_LISTS': ['ava_train_v2.2.csv'],
         'TRAIN_LISTS': ['train.csv'],
         'TRAIN_PCA_JITTER_ONLY': True,
         'TRAIN_PREDICT_BOX_LISTS': [],
         'TRAIN_USE_COLOR_AUGMENTATION': False},
 'BENCHMARK': CfgNode({'NUM_EPOCHS': 5, 'LOG_PERIOD': 100, 'SHUFFLE': True}),
 'BN': {'GLOBAL_SYNC': False,
        'NORM_TYPE': 'batchnorm',
        'NUM_BATCHES_PRECISE': 200,
        'NUM_SPLITS': 1,
        'NUM_SYNC_DEVICES': 1,
        'USE_PRECISE_STATS': False,
        'WEIGHT_DECAY': 0.0},
 'CONTRASTIVE': {'BN_MLP': False,
                 'BN_SYNC_MLP': False,
                 'DELTA_CLIPS_MAX': inf,
                 'DELTA_CLIPS_MIN': -inf,
                 'DIM': 128,
                 'INTERP_MEMORY': False,
                 'KNN_ON': True,
                 'LENGTH': 239975,
                 'LOCAL_SHUFFLE_BN': True,
                 'MEM_TYPE': '1d',
                 'MLP_DIM': 2048,
                 'MOCO_MULTI_VIEW_QUEUE': False,
                 'MOMENTUM': 0.5,
                 'MOMENTUM_ANNEALING': False,
                 'NUM_CLASSES_DOWNSTREAM': 400,
                 'NUM_MLP_LAYERS': 1,
                 'PREDICTOR_DEPTHS': [],
                 'QUEUE_LEN': 65536,
                 'SEQUENTIAL': False,
                 'SIMCLR_DIST_ON': True,
                 'SWAV_QEUE_LEN': 0,
                 'T': 0.07,
                 'TYPE': 'mem'},
 'DATA': {'COLOR_RND_GRAYSCALE': 0.0,
          'DECODING_BACKEND': 'pyav',
          'DECODING_SHORT_SIZE': 320,
          'DUMMY_LOAD': False,
          'ENSEMBLE_METHOD': 'sum',
          'IN22K_TRAINVAL': False,
          'IN22k_VAL_IN1K': '',
          'INPUT_CHANNEL_NUM': [3],
          'INV_UNIFORM_SAMPLE': False,
          'IN_VAL_CROP_RATIO': 0.875,
          'LOADER_CHUNK_OVERALL_SIZE': 0,
          'LOADER_CHUNK_SIZE': 0,
          'MEAN': [0.45, 0.45, 0.45],
          'MULTI_LABEL': False,
          'NUM_FRAMES': 16,
          'PATH_LABEL_SEPARATOR': ' ',
          'PATH_PREFIX': '',
          'PATH_TO_DATA_DIR': '/ml_workspace/yckj3844/datasets/opensource_data/human_activity',
          'PATH_TO_PRELOAD_IMDB': '',
          'RANDOM_FLIP': True,
          'REVERSE_INPUT_CHANNEL': False,
          'SAMPLING_RATE': 4,
          'SKIP_ROWS': 0,
          'SSL_BLUR_SIGMA_MAX': [0.0, 2.0],
          'SSL_BLUR_SIGMA_MIN': [0.0, 0.1],
          'SSL_COLOR_BRI_CON_SAT': [0.4, 0.4, 0.4],
          'SSL_COLOR_HUE': 0.1,
          'SSL_COLOR_JITTER': False,
          'SSL_MOCOV2_AUG': False,
          'STD': [0.225, 0.225, 0.225],
          'TARGET_FPS': 30,
          'TEST_CROP_SIZE': 224,
          'TIME_DIFF_PROB': 0.0,
          'TRAIN_CROP_NUM_SPATIAL': 1,
          'TRAIN_CROP_NUM_TEMPORAL': 1,
          'TRAIN_CROP_SIZE': 224,
          'TRAIN_JITTER_ASPECT_RELATIVE': [0.75, 1.3333],
          'TRAIN_JITTER_FPS': 0.0,
          'TRAIN_JITTER_MOTION_SHIFT': False,
          'TRAIN_JITTER_SCALES': [256, 320],
          'TRAIN_JITTER_SCALES_RELATIVE': [0.5, 1.0],
          'TRAIN_PCA_EIGVAL': [0.225, 0.224, 0.229],
          'TRAIN_PCA_EIGVEC': [[-0.5675, 0.7192, 0.4009],
                               [-0.5808, -0.0045, -0.814],
                               [-0.5836, -0.6948, 0.4203]],
          'USE_OFFSET_SAMPLING': True},
 'DATA_LOADER': {'ENABLE_MULTI_THREAD_DECODE': False,
                 'NUM_WORKERS': 8,
                 'PIN_MEMORY': True},
 'DEMO': {'BUFFER_SIZE': 0,
          'CLIP_VIS_SIZE': 10,
          'COMMON_CLASS_NAMES': ['watch (a person)',
                                 'talk to (e.g., self, a person, a group)',
                                 'listen to (a person)',
                                 'touch (an object)',
                                 'carry/hold (an object)',
                                 'walk',
                                 'sit',
                                 'lie/sleep',
                                 'bend/bow (at the waist)'],
          'COMMON_CLASS_THRES': 0.7,
          'DETECTRON2_CFG': 'COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml',
          'DETECTRON2_THRESH': 0.9,
          'DETECTRON2_WEIGHTS': 'detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl',
          'DISPLAY_HEIGHT': 0,
          'DISPLAY_WIDTH': 0,
          'ENABLE': False,
          'FPS': 30,
          'GT_BOXES': '',
          'INPUT_FORMAT': 'BGR',
          'INPUT_VIDEO': '',
          'LABEL_FILE_PATH': '',
          'NUM_CLIPS_SKIP': 0,
          'NUM_VIS_INSTANCES': 2,
          'OUTPUT_FILE': '',
          'OUTPUT_FPS': -1,
          'PREDS_BOXES': '',
          'SLOWMO': 1,
          'STARTING_SECOND': 900,
          'THREAD_ENABLE': False,
          'UNCOMMON_CLASS_THRES': 0.3,
          'VIS_MODE': 'thres',
          'WEBCAM': -1},
 'DETECTION': {'ALIGNED': True,
               'ENABLE': False,
               'ROI_XFORM_RESOLUTION': 7,
               'SPATIAL_SCALE_FACTOR': 16},
 'DIST_BACKEND': 'nccl',
 'LOG_MODEL_INFO': False,
 'LOG_PERIOD': 10,
 'MASK': {'DECODER_DEPTH': 0,
          'DECODER_EMBED_DIM': 512,
          'DECODER_SEP_POS_EMBED': False,
          'DEC_KV_KERNEL': [],
          'DEC_KV_STRIDE': [],
          'ENABLE': True,
          'HEAD_TYPE': 'separate',
          'MAE_ON': False,
          'MAE_RND_MASK': False,
          'NORM_PRED_PIXEL': True,
          'PER_FRAME_MASKING': False,
          'PRED_HOG': True,
          'PRETRAIN_DEPTH': [15],
          'SCALE_INIT_BY_DEPTH': False,
          'TIME_STRIDE_LOSS': True},
 'MIXUP': {'ALPHA': 0.8,
           'CUTMIX_ALPHA': 1.0,
           'ENABLE': False,
           'LABEL_SMOOTH_VALUE': 0.1,
           'PROB': 1.0,
           'SWITCH_PROB': 0.5},
 'MODEL': {'ACT_CHECKPOINT': False,
           'ARCH': 'maskmvit',
           'DETACH_FINAL_FC': False,
           'DROPCONNECT_RATE': 0.0,
           'DROPOUT_RATE': 0.0,
           'FC_INIT_STD': 0.01,
           'FP16_ALLREDUCE': False,
           'FROZEN_BN': False,
           'HEAD_ACT': 'softmax',
           'LOSS_FUNC': 'multi_mse',
           'MODEL_NAME': 'MaskMViT',
           'MULTI_PATHWAY_ARCH': ['slowfast'],
           'NUM_CLASSES': 400,
           'SINGLE_PATHWAY_ARCH': ['2d',
                                   'c2d',
                                   'i3d',
                                   'slow',
                                   'x3d',
                                   'mvit',
                                   'maskmvit']},
 'MULTIGRID': {'BN_BASE_SIZE': 8,
               'DEFAULT_B': 0,
               'DEFAULT_S': 0,
               'DEFAULT_T': 0,
               'EPOCH_FACTOR': 1.5,
               'EVAL_FREQ': 3,
               'LONG_CYCLE': False,
               'LONG_CYCLE_FACTORS': [(0.25, 0.7071067811865476),
                                      (0.5, 0.7071067811865476),
                                      (0.5, 1),
                                      (1, 1)],
               'LONG_CYCLE_SAMPLING_RATE': 0,
               'SHORT_CYCLE': False,
               'SHORT_CYCLE_FACTORS': [0.5, 0.7071067811865476]},
 'MVIT': {'CLS_EMBED_ON': True,
          'DEPTH': 16,
          'DIM_MUL': [[1, 2.0], [3, 2.0], [14, 2.0]],
          'DIM_MUL_IN_ATT': False,
          'DROPOUT_RATE': 0.0,
          'DROPPATH_RATE': 0.0,
          'EMBED_DIM': 96,
          'HEAD_INIT_SCALE': 1.0,
          'HEAD_MUL': [[1, 2.0], [3, 2.0], [14, 2.0]],
          'LAYER_SCALE_INIT_VALUE': 0.0,
          'MLP_RATIO': 4.0,
          'MODE': 'conv',
          'NORM': 'layernorm',
          'NORM_STEM': False,
          'NUM_HEADS': 1,
          'PATCH_2D': False,
          'PATCH_KERNEL': [3, 7, 7],
          'PATCH_PADDING': [1, 3, 3],
          'PATCH_STRIDE': [2, 4, 4],
          'POOL_FIRST': False,
          'POOL_KVQ_KERNEL': [3, 3, 3],
          'POOL_KV_STRIDE': [],
          'POOL_KV_STRIDE_ADAPTIVE': [1, 8, 8],
          'POOL_Q_STRIDE': [[0, 1, 1, 1],
                            [1, 1, 2, 2],
                            [2, 1, 1, 1],
                            [3, 1, 2, 2],
                            [4, 1, 1, 1],
                            [5, 1, 1, 1],
                            [6, 1, 1, 1],
                            [7, 1, 1, 1],
                            [8, 1, 1, 1],
                            [9, 1, 1, 1],
                            [10, 1, 1, 1],
                            [11, 1, 1, 1],
                            [12, 1, 1, 1],
                            [13, 1, 1, 1],
                            [14, 1, 1, 1],
                            [15, 1, 1, 1]],
          'QKV_BIAS': True,
          'REL_POS_SPATIAL': True,
          'REL_POS_TEMPORAL': True,
          'REL_POS_ZERO_INIT': False,
          'RESIDUAL_POOLING': True,
          'REV': {'BUFFER_LAYERS': [],
                  'ENABLE': False,
                  'PRE_Q_FUSION': 'avg',
                  'RESPATH_FUSE': 'concat',
                  'RES_PATH': 'conv'},
          'SEPARATE_QKV': False,
          'SEP_POS_EMBED': True,
          'USE_ABS_POS': False,
          'USE_FIXED_SINCOS_POS': False,
          'USE_MEAN_POOLING': False,
          'ZERO_DECAY_POS_CLS': False},
 'NONLOCAL': {'GROUP': [[1], [1], [1], [1]],
              'INSTANTIATION': 'dot_product',
              'LOCATION': [[[]], [[]], [[]], [[]]],
              'POOL': [[[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]],
                       [[1, 2, 2], [1, 2, 2]]]},
 'NUM_GPUS': 4,
 'NUM_SHARDS': 1,
 'OUTPUT_DIR': 'workspace/ckpts/try_1',
 'RESNET': {'DEPTH': 50,
            'INPLACE_RELU': True,
            'NUM_BLOCK_TEMP_KERNEL': [[3], [4], [6], [3]],
            'NUM_GROUPS': 1,
            'SPATIAL_DILATIONS': [[1], [1], [1], [1]],
            'SPATIAL_STRIDES': [[1], [2], [2], [2]],
            'STRIDE_1X1': False,
            'TRANS_FUNC': 'bottleneck_transform',
            'WIDTH_PER_GROUP': 64,
            'ZERO_INIT_FINAL_BN': False,
            'ZERO_INIT_FINAL_CONV': False},
 'RNG_SEED': 3844,
 'SHARD_ID': 0,
 'SLOWFAST': {'ALPHA': 8,
              'BETA_INV': 8,
              'FUSION_CONV_CHANNEL_RATIO': 2,
              'FUSION_KERNEL_SZ': 5},
 'SOLVER': {'BASE_LR': 0.0001,
            'BASE_LR_SCALE_NUM_SHARDS': True,
            'BETAS': (0.9, 0.999),
            'CLIP_GRAD_L2NORM': 0.02,
            'CLIP_GRAD_VAL': None,
            'COSINE_AFTER_WARMUP': True,
            'COSINE_END_LR': 1e-06,
            'DAMPENING': 0.0,
            'GAMMA': 0.1,
            'LARS_ON': False,
            'LAYER_DECAY': 1.0,
            'LRS': [],
            'LR_POLICY': 'cosine',
            'MAX_EPOCH': 300,
            'MOMENTUM': 0.9,
            'NESTEROV': True,
            'OPTIMIZING_METHOD': 'adamw',
            'STEPS': [],
            'STEP_SIZE': 1,
            'WARMUP_EPOCHS': 10.0,
            'WARMUP_FACTOR': 0.1,
            'WARMUP_START_LR': 1e-06,
            'WEIGHT_DECAY': 0.05,
            'ZERO_WD_1D_PARAM': True},
 'TASK': 'ssl',
 'TENSORBOARD': {'CATEGORIES_PATH': '',
                 'CLASS_NAMES_PATH': '',
                 'CONFUSION_MATRIX': {'ENABLE': False,
                                      'FIGSIZE': [8, 8],
                                      'SUBSET_PATH': ''},
                 'ENABLE': False,
                 'HISTOGRAM': {'ENABLE': False,
                               'FIGSIZE': [8, 8],
                               'SUBSET_PATH': '',
                               'TOPK': 10},
                 'LOG_DIR': '',
                 'MODEL_VIS': {'ACTIVATIONS': False,
                               'COLORMAP': 'Pastel2',
                               'ENABLE': False,
                               'GRAD_CAM': {'COLORMAP': 'viridis',
                                            'ENABLE': True,
                                            'LAYER_LIST': [],
                                            'USE_TRUE_LABEL': False},
                               'INPUT_VIDEO': False,
                               'LAYER_LIST': [],
                               'MODEL_WEIGHTS': False,
                               'TOPK_PREDS': 1},
                 'PREDICTIONS_PATH': '',
                 'WRONG_PRED_VIS': {'ENABLE': False,
                                    'SUBSET_PATH': '',
                                    'TAG': 'Incorrectly classified videos.'}},
 'TEST': {'BATCH_SIZE': 16,
          'CHECKPOINT_FILE_PATH': '',
          'CHECKPOINT_TYPE': 'pytorch',
          'DATASET': 'kinetics',
          'ENABLE': False,
          'NUM_ENSEMBLE_VIEWS': 10,
          'NUM_SPATIAL_CROPS': 1,
          'NUM_TEMPORAL_CLIPS': [5, 10],
          'SAVE_RESULTS_PATH': ''},
 'TRAIN': {'AUTO_RESUME': True,
           'BATCH_SIZE': 16,
           'CHECKPOINT_CLEAR_NAME_PATTERN': (),
           'CHECKPOINT_EPOCH_RESET': False,
           'CHECKPOINT_FILE_PATH': '',
           'CHECKPOINT_INFLATE': False,
           'CHECKPOINT_IN_INIT': False,
           'CHECKPOINT_PERIOD': 10,
           'CHECKPOINT_TYPE': 'pytorch',
           'DATASET': 'kinetics',
           'ENABLE': True,
           'EVAL_PERIOD': 1000,
           'KILL_LOSS_EXPLOSION_FACTOR': 0.0,
           'MIXED_PRECISION': False},
 'VIS_MASK': CfgNode({'ENABLE': False}),
 'X3D': {'BN_LIN5': False,
         'BOTTLENECK_FACTOR': 1.0,
         'CHANNELWISE_3x3x3': True,
         'DEPTH_FACTOR': 1.0,
         'DIM_C1': 12,
         'DIM_C5': 2048,
         'SCALE_RES2': False,
         'WIDTH_FACTOR': 1.0}}
[11/16 11:47:06][INFO] kinetics.py:   93: Constructing Kinetics train...
[11/16 11:47:06][INFO] kinetics.py:  158: Constructing kinetics dataloader (size: 3840 skip_rows 0) from /ml_workspace/yckj3844/datasets/opensource_data/human_activity/train.csv 
[11/16 11:47:06][INFO] kinetics.py:   93: Constructing Kinetics val...
[11/16 11:47:06][INFO] kinetics.py:  158: Constructing kinetics dataloader (size: 15 skip_rows 0) from /ml_workspace/yckj3844/datasets/opensource_data/human_activity/val.csv 
[11/16 11:47:06][INFO] train_net.py:  630: Start epoch: 1
[11/16 11:48:04][INFO] distributed.py:  995: Reducer buckets have been rebuilt in this iteration.
[11/16 11:48:12][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.04166, "dt_data": 0.00077, "dt_net": 1.04089, "epoch": "1/300", "eta": "20:49:49", "gpu_mem": "7.15G", "grad_norm": 7.18431, "iter": "10/240", "loss": 0.32920, "loss_0": 0.28187, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:48:20][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.86445, "dt_data": 0.00099, "dt_net": 0.86345, "epoch": "1/300", "eta": "17:17:03", "gpu_mem": "7.15G", "grad_norm": 3.16029, "iter": "20/240", "loss": 0.18622, "loss_0": 0.14730, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:48:30][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.96583, "dt_data": 0.00049, "dt_net": 0.96534, "epoch": "1/300", "eta": "19:18:30", "gpu_mem": "7.15G", "grad_norm": 1.56387, "iter": "30/240", "loss": 0.11671, "loss_0": 0.09552, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:48:39][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.03053, "dt_data": 0.00074, "dt_net": 1.02979, "epoch": "1/300", "eta": "20:35:57", "gpu_mem": "7.15G", "grad_norm": 0.78174, "iter": "40/240", "loss": 0.07802, "loss_0": 0.07321, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:48:49][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.94134, "dt_data": 0.00050, "dt_net": 0.94085, "epoch": "1/300", "eta": "18:48:49", "gpu_mem": "7.15G", "grad_norm": 0.33472, "iter": "50/240", "loss": 0.06137, "loss_0": 0.05739, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:48:58][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.85098, "dt_data": 0.00079, "dt_net": 0.85018, "epoch": "1/300", "eta": "17:00:19", "gpu_mem": "7.15G", "grad_norm": 0.20784, "iter": "60/240", "loss": 0.05521, "loss_0": 0.05463, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:49:08][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.11827, "dt_data": 0.00062, "dt_net": 1.11764, "epoch": "1/300", "eta": "22:20:36", "gpu_mem": "7.15G", "grad_norm": 0.23468, "iter": "70/240", "loss": 0.05217, "loss_0": 0.05163, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:49:18][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.11032, "dt_data": 0.00109, "dt_net": 1.10922, "epoch": "1/300", "eta": "22:10:54", "gpu_mem": "7.15G", "grad_norm": 0.19667, "iter": "80/240", "loss": 0.05086, "loss_0": 0.04750, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:49:29][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.21129, "dt_data": 0.00050, "dt_net": 1.21078, "epoch": "1/300", "eta": "1 day, 0:11:43", "gpu_mem": "7.15G", "grad_norm": 0.21099, "iter": "90/240", "loss": 0.04977, "loss_0": 0.04584, "lr": 0.00000, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:49:41][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.20714, "dt_data": 0.00067, "dt_net": 1.20647, "epoch": "1/300", "eta": "1 day, 0:06:33", "gpu_mem": "7.15G", "grad_norm": 0.34251, "iter": "100/240", "loss": 0.04938, "loss_0": 0.05160, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:49:54][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.41768, "dt_data": 0.00056, "dt_net": 1.41711, "epoch": "1/300", "eta": "1 day, 4:18:36", "gpu_mem": "7.15G", "grad_norm": 0.11190, "iter": "110/240", "loss": 0.04962, "loss_0": 0.05032, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:50:07][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.32853, "dt_data": 0.00057, "dt_net": 1.32796, "epoch": "1/300", "eta": "1 day, 2:31:34", "gpu_mem": "7.15G", "grad_norm": 0.16964, "iter": "120/240", "loss": 0.04770, "loss_0": 0.04808, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:50:21][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.45923, "dt_data": 0.00054, "dt_net": 1.45869, "epoch": "1/300", "eta": "1 day, 5:07:54", "gpu_mem": "7.15G", "grad_norm": 0.18755, "iter": "130/240", "loss": 0.04778, "loss_0": 0.05006, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:50:34][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.30684, "dt_data": 0.00056, "dt_net": 1.30628, "epoch": "1/300", "eta": "1 day, 2:05:09", "gpu_mem": "7.15G", "grad_norm": 0.19371, "iter": "140/240", "loss": 0.04872, "loss_0": 0.05028, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:50:48][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.40904, "dt_data": 0.00117, "dt_net": 1.40786, "epoch": "1/300", "eta": "1 day, 4:07:19", "gpu_mem": "7.15G", "grad_norm": 0.28376, "iter": "150/240", "loss": 0.04744, "loss_0": 0.04346, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:51:01][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.39745, "dt_data": 0.00065, "dt_net": 1.39680, "epoch": "1/300", "eta": "1 day, 3:53:12", "gpu_mem": "7.15G", "grad_norm": 0.30947, "iter": "160/240", "loss": 0.04865, "loss_0": 0.04873, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:51:15][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.40967, "dt_data": 0.00117, "dt_net": 1.40849, "epoch": "1/300", "eta": "1 day, 4:07:36", "gpu_mem": "7.15G", "grad_norm": 0.17626, "iter": "170/240", "loss": 0.04806, "loss_0": 0.04684, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:51:27][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.19400, "dt_data": 0.00082, "dt_net": 1.19317, "epoch": "1/300", "eta": "23:49:13", "gpu_mem": "7.15G", "grad_norm": 0.16931, "iter": "180/240", "loss": 0.04860, "loss_0": 0.04862, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:51:41][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.48366, "dt_data": 0.00066, "dt_net": 1.48300, "epoch": "1/300", "eta": "1 day, 5:35:41", "gpu_mem": "7.15G", "grad_norm": 0.16820, "iter": "190/240", "loss": 0.04875, "loss_0": 0.05073, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:51:58][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 4.72729, "dt_data": 0.00055, "dt_net": 4.72673, "epoch": "1/300", "eta": "3 days, 22:16:59", "gpu_mem": "7.15G", "grad_norm": 0.21940, "iter": "200/240", "loss": 0.04806, "loss_0": 0.04882, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:07][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.22175, "dt_data": 0.00057, "dt_net": 1.22118, "epoch": "1/300", "eta": "1 day, 0:21:49", "gpu_mem": "7.15G", "grad_norm": 0.23675, "iter": "210/240", "loss": 0.04721, "loss_0": 0.04687, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:20][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 1.28536, "dt_data": 0.00102, "dt_net": 1.28433, "epoch": "1/300", "eta": "1 day, 1:37:42", "gpu_mem": "7.15G", "grad_norm": 0.18050, "iter": "220/240", "loss": 0.04755, "loss_0": 0.04661, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:31][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.68081, "dt_data": 0.00032, "dt_net": 0.68048, "epoch": "1/300", "eta": "13:34:21", "gpu_mem": "7.15G", "grad_norm": 0.22941, "iter": "230/240", "loss": 0.04743, "loss_0": 0.04871, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:38][INFO] logging.py:   99: json_stats: {"_type": "train_iter_ssl", "dt": 0.68507, "dt_data": 0.00039, "dt_net": 0.68468, "epoch": "1/300", "eta": "13:39:20", "gpu_mem": "7.15G", "grad_norm": 0.36048, "iter": "240/240", "loss": 0.04879, "loss_0": 0.04755, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:42][INFO] logging.py:   99: json_stats: {"RAM": "42.16/125.54G", "_type": "train_epoch_ssl", "dt": 4.59893, "dt_data": 4.59893, "dt_net": 0.68468, "epoch": "1/300", "eta": "3 days, 19:40:12", "gpu_mem": "7.15G", "grad_norm": 0.36048, "loss": 0.07140, "loss_0": 0.07140, "lr": 0.00001, "top1_err": 0.00000, "top5_err": 0.00000}
[11/16 11:52:42][INFO] train_net.py:  691: Epoch 0 takes 336.20s. Epochs from 0 to 0 take 336.20s in average and 336.20s in median.
[11/16 11:52:42][INFO] train_net.py:  697: For epoch 0, each iteraction takes 1.40s in average. From epoch 0 to 0, each iteraction takes 1.40s in average.

And my yaml file is like:

TASK: ssl
TRAIN:
  ENABLE: True
  DATASET: kinetics
  BATCH_SIZE: 16
  EVAL_PERIOD: 1000
  CHECKPOINT_PERIOD: 10
  AUTO_RESUME: True
  # CHECKPOINT_FILE_PATH: /ml_workspace/yckj3844/projects/SlowFast/workspace/ckpts/in1k_VIT_B_MaskFeat_PT_epoch_01600.pyth
DATA:
  USE_OFFSET_SAMPLING: True
  DECODING_BACKEND: pyav
  NUM_FRAMES: 16
  SAMPLING_RATE: 4
  TRAIN_JITTER_SCALES: [256, 320]
  TRAIN_CROP_SIZE: 224
  TEST_CROP_SIZE: 224
  INPUT_CHANNEL_NUM: [3]
  DECODING_SHORT_SIZE: 320
  TRAIN_JITTER_SCALES_RELATIVE: [0.5, 1.0]
  TRAIN_JITTER_ASPECT_RELATIVE: [0.75, 1.3333]
  PATH_TO_DATA_DIR: /ml_workspace/yckj3844/datasets/opensource_data/human_activity
MVIT:
  DEPTH: 16
  NUM_HEADS: 1
  EMBED_DIM: 96
  PATCH_KERNEL: [3, 7, 7]
  PATCH_STRIDE: [2, 4, 4]
  PATCH_PADDING: [1, 3, 3]
  ZERO_DECAY_POS_CLS: False
  QKV_BIAS: True
  DIM_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]]
  HEAD_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]]
  # Highlight: [14, 1, 1, 1] instead of [14, 1, 2, 2] for 14x14 output
  POOL_Q_STRIDE: [[0, 1, 1, 1], [1, 1, 2, 2], [2, 1, 1, 1], [3, 1, 2, 2], [4, 1, 1, 1], [5, 1, 1, 1], [6, 1, 1, 1], [7, 1, 1, 1], [8, 1, 1, 1], [9, 1, 1, 1], [10, 1, 1, 1], [11, 1, 1, 1], [12, 1, 1, 1], [13, 1, 1, 1], [14, 1, 1, 1], [15, 1, 1, 1]]
  POOL_KVQ_KERNEL: [3, 3, 3]
  POOL_KV_STRIDE_ADAPTIVE: [1, 8, 8]
  DROPPATH_RATE: 0.0
  CLS_EMBED_ON: True # default: True
  USE_ABS_POS: False # default: True
  SEP_POS_EMBED: True # default: false
  REL_POS_SPATIAL: True # default: false
  REL_POS_TEMPORAL: True # default: false
  RESIDUAL_POOLING: True
  MODE: "conv"
MASK:
  ENABLE: True
  PRETRAIN_DEPTH: [15]
  HEAD_TYPE: "separate"
  PRED_HOG: True
AUG:
  ENABLE: True
  COLOR_JITTER: None
  AA_TYPE: ""
  RE_PROB: 0.0
  GEN_MASK_LOADER: True
  MASK_RATIO: 0.4

  # Mask Cube (Default)
  MASK_TUBE: False
  MASK_FRAMES: False
  MASK_WINDOW_SIZE: [8, 7, 7]
MIXUP:
  ENABLE: False
BN:
  USE_PRECISE_STATS: False
  NUM_BATCHES_PRECISE: 200
SOLVER:
  CLIP_GRAD_L2NORM: 0.02
  BASE_LR: 0.0001
  BASE_LR_SCALE_NUM_SHARDS: True
  LR_POLICY: cosine
  COSINE_AFTER_WARMUP: True
  COSINE_END_LR: 1e-6
  MAX_EPOCH: 300
  WARMUP_EPOCHS: 10.0
  MOMENTUM: 0.9
  WEIGHT_DECAY: 0.05
  ZERO_WD_1D_PARAM: True
  WARMUP_START_LR: 1e-6
  OPTIMIZING_METHOD: adamw
MODEL:
  ARCH: maskmvit
  MODEL_NAME: MaskMViT
  LOSS_FUNC: multi_mse
  DROPOUT_RATE: 0.0
TEST:
  ENABLE: False
  DATASET: kinetics
  BATCH_SIZE: 16
  NUM_SPATIAL_CROPS: 1
  NUM_TEMPORAL_CLIPS: [5, 10]
DATA_LOADER:
  NUM_WORKERS: 8
  PIN_MEMORY: True
NUM_GPUS: 4
NUM_SHARDS: 1
RNG_SEED: 3844
OUTPUT_DIR: workspace/ckpts/try_1
LOG_MODEL_INFO: False
0829MingHu commented 11 months ago

I meet the same problem. could you tell me how you slove it?