Open zitterbewegung opened 1 year ago
import pykoi as pk
config = pk.RLHFConfig( base_model_path="meta-llama/Llama-2-7b-hf", reward_model_path="goldmermaid/rlhf_reward_model", dataset_type="huggingface", dataset_name="goldmermaid/stack_exchange_rank_10k_dataset", dataset_subset_rl="data", )
rlhf_step3_rl = pk.RLFinetuning(config) rlhf_step3_rl.train_and_save("./models/rlhf_step3_rl")
import pykoi as pk
use huggingface sft and reward model
config = pk.RLHFConfig( base_model_path="meta-llama/Llama-2-7b-hf", reward_model_path="goldmermaid/rlhf_reward_model", dataset_type="huggingface", dataset_name="goldmermaid/stack_exchange_rank_10k_dataset", dataset_subset_rl="data", )
rlhf_step3_rl = pk.RLFinetuning(config) rlhf_step3_rl.train_and_save("./models/rlhf_step3_rl")