tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).cuda()
for line in tqdm(open(file, 'r', encoding='utf-8').readlines()):
js = json.loads(line.strip())
instruction = js['instruction']
outputs = js['outputs']
rewards = []
for output in outputs:
rewards.append(test(tokenizer, model, instruction, output))
w.write(json.dumps({
'instruction':instruction,
'input':'',
'output': outputs[np.argmax(rewards)],
}, ensure_ascii=False) + '\n')
w.flush()
I try to find the one with the highest score from multiple answers, but it always has CUDA out of memory Error. is there something wrong with the code?
My code looks like this:
def test(tokenizer, model, instruction, ans):
tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModel.from_pretrained(model_path, trust_remote_code=True).cuda() for line in tqdm(open(file, 'r', encoding='utf-8').readlines()): js = json.loads(line.strip()) instruction = js['instruction'] outputs = js['outputs'] rewards = [] for output in outputs: rewards.append(test(tokenizer, model, instruction, output)) w.write(json.dumps({ 'instruction':instruction, 'input':'', 'output': outputs[np.argmax(rewards)], }, ensure_ascii=False) + '\n') w.flush()
I try to find the one with the highest score from multiple answers, but it always has CUDA out of memory Error. is there something wrong with the code?