critic_lm# training
policy_lm: /workspace/WebRL/webrl-glm-4-9b? # safetensors files of paramerters of the actor model
critic_lm: /workspace/WebRL/webrl-glm-4-9b? # safetensors files of paramerters of the critic model
critic_epochs: 1 # number of epochs for the critic each phase
actor_epochs: 1 # number of epochs for training the actor each phase
batch_size: 1 # batch size for training the actor and critic
critic_resume_path: /workspace/WebRL/webrl-glm-4-9b # .bin file of paramerters of the critic model
save_path: /workspace/WebRL/scripts/output run_name: "webrl"
critic_lm# training policy_lm: /workspace/WebRL/webrl-glm-4-9b? # safetensors files of paramerters of the actor model critic_lm: /workspace/WebRL/webrl-glm-4-9b? # safetensors files of paramerters of the critic model
critic_epochs: 1 # number of epochs for the critic each phase actor_epochs: 1 # number of epochs for training the actor each phase batch_size: 1 # batch size for training the actor and critic
critic_resume_path: /workspace/WebRL/webrl-glm-4-9b # .bin file of paramerters of the critic model
offline_data_path: /workspace/WebRL/scripts/offline_data
checkpointing_steps: 400 ~