1. 定义sft模型路径及dpo数据 dpo_
file = './data/dpo_train_data.json' tokenizer_dir = './model_save/tokenizer/' sft_from_checkpoint_file = './model_save/sft/' model_save_dir = './model_save/dpo/' max_seq_len = 320 # 2.
加载数据集 # 数据集token格式化 # DPO数据格式:[prompt模型输入,chosen正例, rejected负例] # 将dpo数据集三列数据添加上eostoken,bos可加可不加 def split_prompt_and_responses(samples: dict[str, str]) -> Dict[str, str
]: prompts, chosens, rejects = [], [], [] batch_size = len(samples['prompt']) for i in range(batch_size): # add an eos token for signal that end of sentence, using in generate. prompts.append(f"[BOS]{samples['prompt'][i]}[EOS
]") chosens.append(f"[BOS]{samples['chosen'][i]}[EOS]") rejects.append(f"[BOS]{samples['rejected'][i]}[EOS]") return {'www.laipuhuo.com prompt': prompts, 'chosen': chosens, 'rejected':rejects,} tokenizer = PreTrainedTokenizerFast.from_pretrained(tokenizer_dir) dataset = load_dataset(path='json', data_files=dpo_file, split='train', cache_dir='.cache') data
set = dataset.map(split_prompt_and_responses, batched=True,).shuffle(2333) # 4. 加载模型 # model和model_ref开始时是同一个模型,只训练model的参数,model_ref参数保存不变 model = PhiForCausalLM.from_pretrained(sft_from_checkpoint_file) model_ref = PhiForCausalLM.from_pretrained(sft_from_checkpoint_f
ile) # 5. 定义训练中的回调函数 # 清空cuda缓存,dpo www.laipuhuo.com 要加载两个模型,显存占用较大,这能有效缓解低显存机器显存缓慢增长的问题 class EmptyCudaCacheCallback(TrainerCallback): log_cnt = 0 def on_log(self, args, state, control, logs=None, **kwargs): self.log_cnt += 1 if self.log_cnt % 5 == 0: torch.cuda.empty_cache()