ZzZZCHS / Chat-Scene

Code for "Chat-Scene: Bridging 3D Scene and Large Language Models with Object Identifiers" (NeurIPS 2024)
MIT License
113 stars 8 forks source link

loss nan #24

Closed KimWu1994 closed 7 months ago

KimWu1994 commented 7 months ago

Train Epoch: [0] [ 2400/102750] eta: 5:20:57 lr: 0.000012 stage2-loss: nan stage2-cosine_loss: No data stage2-l2_loss: No data stage2-obj_norm: nan stage2-scene_norm: 0.0000 stage2-target_norm: No data time: 0.1851 data: 0.0030 max mem: 28326 res mem: 29120

I followed the readme to train the second stage, and the loss became nan. how to solve this problem

ZzZZCHS commented 7 months ago

It's a little weird... I didn't meet nan loss during object alignment stage before. Can I see the full output of this run? (I need to check the config and see the loss in first several steps)

KimWu1994 commented 7 months ago

this is my config

# ========================= data ==========================
anno_root = "data/annotations"  # annotation dir
pc_encoder = "uni3d"
feat_file = f"{anno_root}/scannet_{pc_encoder}_feats.pt"
# attribute_file = f"{anno_root}/scannet_attributes.json"
# train_file_s1 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanrefer_train_stage1.json",
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/nr3d_train_stage1.json",
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scannet_train_stage1.json",
#     ],
#     # [
#     #     f"{anno_root}/objaverse_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/objaverse_attributes.json",
#     #     f"{anno_root}/objaverse_stage1.json"
#     # ]
# ]

train_file_s1=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scanrefer_train_stage1.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scannet_train_stage1.json"
  ]
]
val_file_s1=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/scannet_val_stage1.json"
  ]
]

train_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/obj_align_train.json"
  ]
]
val_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/obj_align_val.json"
  ]
]

# train_file_s2 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanrefer_train_stage2_objxx.json",
#     ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_caption_iou50.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/nr3d_train_stage2_objxx.json"
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_objxx.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scene_align_train.json",
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/obj_align_train.json",
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_multichoice0.01.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scene_dataset_train_stage2.json"
#     # ]
# ]
# val_file_s2 = [
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_val_stage2_objxx.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_val_stage2_caption_iou25.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_val_attributes.pt",
#         f"{anno_root}/stage2_val400.json"
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_val_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scene_align_val.json",
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/obj_align_val.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanqa_val_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanqa_val_stage2_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/sr3d_val_stage2_grounding_new.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_val_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_val_stage2_multichoice0.01.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scene_dataset_val_stage2.json"
#     # ],
# ]

# train_file_s3 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanqa_train_stage3.json",
#         1
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_train_conversation.json",
#     #     3
#     # ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_train_detail.json",
#     #     1
#     # ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/nr3d_train_tf.json",
#     #     1
#     # ]
# ]

# # val_file_s1 = [
# #     # [
# #     #     feat_file,
# #     #     f"{anno_root}/scannet_val_attributes.pt",
# #     #     f"{anno_root}/scanrefer_val_stage1.json",
# #     # ],
# #     [
# #         feat_file,
# #         f"{anno_root}/scannet_val_attributes.pt",
# #         f"{anno_root}/scannet_val_stage1.json",
# #     ]
# # ]

# val_file_s3 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_val_attributes.pt",
#         f"{anno_root}/scanqa_val_predobj.json"
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_val_conversation100.json"
#     # ],
# ]

train_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scanrefer_train_stage2_objxx.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/nr3d_train_stage2_objxx.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scene_align_train.json"
  ]
]

val_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/stage2_val400.json"
  ]
]

test_types = []
num_workers = 32

# ========================= input ==========================
s1_batch_size = 64
s2_batch_size = 1
s3_batch_size = 1
# max_txt_l = 32

pre_text = False

# ========================= model ==========================
model = dict(
    llama_model_path="ckpts/vicuna-7b-v0",
    input_dim=1024 if pc_encoder == "uni3d" else 512,
    attr_dim=512,
    encoder_num_layers=1,
    mlp_dropout=0.1,
    low_resource=False,
    system_path="prompts/system.txt",
    prompt_template="\n### Human: {}\n### Assistant: ",
    max_txt_len=512,
    end_sym="\n###",
    stage=1,
    add_scene_token=True,
    debug=False,
    obj_norm_scale=200,
    scene_norm_scale=50,
    grad_scale=1,
)

optimizer = dict(
    opt="adamW",
    lr=5e-3,
    opt_betas=[0.9, 0.999],  # default
    weight_decay=0.02,
    max_grad_norm=-1,  # requires a positive float, use -1 to disable
    # use a different lr for some modules, e.g., larger lr for new modules
    different_lr=dict(
        enable=True,
        module_names=["module.llama_model", "module.relation_module"],
        lr=[1e-5, 1e-5],
        wd=[0.02, 0.02]
    ),
)

scheduler = dict(sched="cosine", epochs=3, min_lr_multi=0.01, warmup_epochs=0.2)

evaluate = False
deep_fusion = False

fp16 = True
gradient_checkpointing = True

# ========================= wandb ==========================
wandb = dict(
    enable=False,
    entity="huanghaifeng",  # username or team name to store the runs, see https://docs.wandb.ai/ref/python/init
    project="Scene-LLM",
)
dist_url = "env://"
device = "cuda"

# ========================= others ==========================
output_dir = "outputs/tmp"  # output dir
resume = False  # if True, load optimizer and scheduler states as well
debug = False
log_freq = 100
# eval_freq = 500
seed = 42

save_latest = False
do_save = True
auto_resume = True
pretrained_path = ""
KimWu1994 commented 7 months ago

this is my train bash

export MASTER_PORT=6007

export OMP_NUM_THREADS=1
echo "PYTHONPATH: ${PYTHONPATH}"
which_python=$(which python)
echo "which python: ${which_python}"
export PYTHONPATH=${PYTHONPATH}:${which_python}
export PYTHONPATH=${PYTHONPATH}:.
echo "PYTHONPATH: ${PYTHONPATH}"
export CUDA_VISIBLE_DEVICES=6

NNODE=1
NUM_GPUS=1
MASTER_NODE='localhost'

# stage=1
# epoch=6
# add_scene_token=False
# evaluate=False
# pretrained_path=""

# stage=2
# epoch=3
# add_scene_token=False
# evaluate=False
# pretrained_path="outputs/2024-04-10-111416_dp_lr2e-4_sta1_ep6/ckpt_05.pth"

stage=2
epoch=3
max_txt_len=32
lr=1e-4
add_scene_token=False
evaluate=False
pretrained_path="outputs2/2024-04-10-161007_dp_lr1e-4_sta1_ep6/ckpt_05.pth"

OUTPUT_DIR=outputs/"$(date +"%Y-%m-%d-%T" | tr -d ':')"_dp"$dp"_lr"$lr"_sta"$stage"_ep"$epoch"
torchrun  --nnodes=${NNODE} --nproc_per_node=${NUM_GPUS} \
    --rdzv_endpoint=${MASTER_NODE}:${MASTER_PORT} \
    --rdzv_backend=c10d \
    tasks/train.py \
    $(dirname $0)/config.py \
    output_dir ${OUTPUT_DIR} \
    model.stage "$stage" \
    scheduler.epochs "$epoch" \
    optimizer.lr "$lr" \
    model.max_txt_len "$max_txt_len" \
    model.add_scene_token "$add_scene_token" \
    pretrained_path "$pretrained_path" \
    evaluate "$evaluate"
ZzZZCHS commented 7 months ago

Screenshot 2024-04-10 at 7 12 57 PM I think you forget to comment out these lines. The object alignment stage only use obj_align_train.json.

KimWu1994 commented 7 months ago

Sorry, I'm still confused. I followed the stage 2 training instructions in the readme and commented out the following

# train_file_s2 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanrefer_train_stage2_objxx.json",
#     ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_caption_iou50.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/nr3d_train_stage2_objxx.json"
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_objxx.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scene_align_train.json",
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/obj_align_train.json",
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_multichoice0.01.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scene_dataset_train_stage2.json"
#     # ]
# ]

I added configuration in readme as follow:

train_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/obj_align_train.json"
  ]
]

The loss is nan. I may misunderstand your readme.

KimWu1994 commented 7 months ago

Shouldn't I comment out train_file_s2 in the original config.py?

KimWu1994 commented 7 months ago

What is the correct config for stage 2? Is it like the follow:

train_file_s2 = [
    [
        feat_file,
        f"{anno_root}/scannet_train_attributes.pt",
        f"{anno_root}/scanrefer_train_stage2_objxx.json",
    ],
    # [
    #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
    #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
    #     f"{anno_root}/scanrefer_pointgroup_train_stage2_caption_iou50.json"
    # ],
    [
        feat_file,
        f"{anno_root}/scannet_train_attributes.pt",
        f"{anno_root}/nr3d_train_stage2_objxx.json"
    ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/sr3d_train_stage2_objxx.json"
    # ],
    [
        feat_file,
        f"{anno_root}/scannet_train_attributes.pt",
        f"{anno_root}/scene_align_train.json",
    ],
    [
        feat_file,
        f"{anno_root}/scannet_train_attributes.pt",
        f"{anno_root}/obj_align_train.json",
    ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/scanqa_train_stage2_objxx.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/scanqa_train_stage2_new.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/nr3d_train_stage2_grounding_new.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/scanrefer_train_stage2_grounding_new.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/sr3d_train_stage2_grounding_new.json"
    # ],
    # [
    #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
    #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
    #     f"{anno_root}/scanrefer_pointgroup_train_stage2_grounding_new.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_val_attributes.pt",
    #     f"{anno_root}/nr3d_train_stage2_multichoice0.01.json"
    # ],
    # [
    #     feat_file,
    #     f"{anno_root}/scannet_train_attributes.pt",
    #     f"{anno_root}/scene_dataset_train_stage2.json"
    # ]
]
ZzZZCHS commented 7 months ago

this is my config

# ========================= data ==========================
anno_root = "data/annotations"  # annotation dir
pc_encoder = "uni3d"
feat_file = f"{anno_root}/scannet_{pc_encoder}_feats.pt"
# attribute_file = f"{anno_root}/scannet_attributes.json"
# train_file_s1 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanrefer_train_stage1.json",
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/nr3d_train_stage1.json",
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scannet_train_stage1.json",
#     ],
#     # [
#     #     f"{anno_root}/objaverse_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/objaverse_attributes.json",
#     #     f"{anno_root}/objaverse_stage1.json"
#     # ]
# ]

train_file_s1=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scanrefer_train_stage1.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scannet_train_stage1.json"
  ]
]
val_file_s1=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/scannet_val_stage1.json"
  ]
]

train_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/obj_align_train.json"
  ]
]
val_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/obj_align_val.json"
  ]
]

# train_file_s2 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanrefer_train_stage2_objxx.json",
#     ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_caption_iou50.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/nr3d_train_stage2_objxx.json"
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_objxx.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scene_align_train.json",
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/obj_align_train.json",
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanqa_train_stage2_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/sr3d_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_train_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_train_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_train_stage2_multichoice0.01.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_train_attributes.pt",
#     #     f"{anno_root}/scene_dataset_train_stage2.json"
#     # ]
# ]
# val_file_s2 = [
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_val_stage2_objxx.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_val_stage2_caption_iou25.json"
#     # ],
#     [
#         feat_file,
#         f"{anno_root}/scannet_val_attributes.pt",
#         f"{anno_root}/stage2_val400.json"
#     ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_val_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scene_align_val.json",
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/obj_align_val.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanqa_val_stage2_objxx.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scanqa_val_stage2_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/sr3d_val_stage2_grounding_new.json"
#     # ],
#     # [
#     #     f"{anno_root}/scannet_pointgroup_{pc_encoder}_feats.pt",
#     #     f"{anno_root}/scannet_pointgroup_val_attributes.pt",
#     #     f"{anno_root}/scanrefer_pointgroup_val_stage2_grounding_new.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/nr3d_val_stage2_multichoice0.01.json"
#     # ],
#     # [
#     #     feat_file,
#     #     f"{anno_root}/scannet_val_attributes.pt",
#     #     f"{anno_root}/scene_dataset_val_stage2.json"
#     # ],
# ]

# train_file_s3 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_train_attributes.pt",
#         f"{anno_root}/scanqa_train_stage3.json",
#         1
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_train_conversation.json",
#     #     3
#     # ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_train_detail.json",
#     #     1
#     # ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/nr3d_train_tf.json",
#     #     1
#     # ]
# ]

# # val_file_s1 = [
# #     # [
# #     #     feat_file,
# #     #     f"{anno_root}/scannet_val_attributes.pt",
# #     #     f"{anno_root}/scanrefer_val_stage1.json",
# #     # ],
# #     [
# #         feat_file,
# #         f"{anno_root}/scannet_val_attributes.pt",
# #         f"{anno_root}/scannet_val_stage1.json",
# #     ]
# # ]

# val_file_s3 = [
#     [
#         feat_file,
#         f"{anno_root}/scannet_val_attributes.pt",
#         f"{anno_root}/scanqa_val_predobj.json"
#     ],
#     # [
#     #     feat_file,
#     #     attribute_file,
#     #     f"{anno_root}/scanrefer_val_conversation100.json"
#     # ],
# ]

train_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scanrefer_train_stage2_objxx.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/nr3d_train_stage2_objxx.json"
  ],
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_train_attributes.pt",
    "data/annotations/scene_align_train.json"
  ]
]

val_file_s2=[
  [
    "data/annotations/scannet_uni3d_feats.pt",
    "data/annotations/scannet_val_attributes.pt",
    "data/annotations/stage2_val400.json"
  ]
]

test_types = []
num_workers = 32

# ========================= input ==========================
s1_batch_size = 64
s2_batch_size = 1
s3_batch_size = 1
# max_txt_l = 32

pre_text = False

# ========================= model ==========================
model = dict(
    llama_model_path="ckpts/vicuna-7b-v0",
    input_dim=1024 if pc_encoder == "uni3d" else 512,
    attr_dim=512,
    encoder_num_layers=1,
    mlp_dropout=0.1,
    low_resource=False,
    system_path="prompts/system.txt",
    prompt_template="\n### Human: {}\n### Assistant: ",
    max_txt_len=512,
    end_sym="\n###",
    stage=1,
    add_scene_token=True,
    debug=False,
    obj_norm_scale=200,
    scene_norm_scale=50,
    grad_scale=1,
)

optimizer = dict(
    opt="adamW",
    lr=5e-3,
    opt_betas=[0.9, 0.999],  # default
    weight_decay=0.02,
    max_grad_norm=-1,  # requires a positive float, use -1 to disable
    # use a different lr for some modules, e.g., larger lr for new modules
    different_lr=dict(
        enable=True,
        module_names=["module.llama_model", "module.relation_module"],
        lr=[1e-5, 1e-5],
        wd=[0.02, 0.02]
    ),
)

scheduler = dict(sched="cosine", epochs=3, min_lr_multi=0.01, warmup_epochs=0.2)

evaluate = False
deep_fusion = False

fp16 = True
gradient_checkpointing = True

# ========================= wandb ==========================
wandb = dict(
    enable=False,
    entity="huanghaifeng",  # username or team name to store the runs, see https://docs.wandb.ai/ref/python/init
    project="Scene-LLM",
)
dist_url = "env://"
device = "cuda"

# ========================= others ==========================
output_dir = "outputs/tmp"  # output dir
resume = False  # if True, load optimizer and scheduler states as well
debug = False
log_freq = 100
# eval_freq = 500
seed = 42

save_latest = False
do_save = True
auto_resume = True
pretrained_path = ""

In this config, you set: Screenshot 2024-04-11 at 11 50 48 AM This is right.

But later you set train_file_s2 again here: Screenshot 2024-04-11 at 11 51 37 AM This replaces the former train_file_s2 and is wrong. So I mean you maybe forgot to delete this part.