Loading...
Loading...
Compare original and translation side by side
undefinedundefined
**Training** (Mistral 7B):
```bash
ACCELERATE_LOG_LEVEL=info accelerate launch \
--config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py \
training_configs/mistral-7b-base-simpo.yaml
**训练**(Mistral 7B):
```bash
ACCELERATE_LOG_LEVEL=info accelerate launch \
--config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py \
training_configs/mistral-7b-base-simpo.yamlmistral-7b-base-simpo.yamlundefinedmistral-7b-base-simpo.yamlundefined
**Launch training**:
```bash
accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py training_configs/mistral-7b-base-simpo.yaml
**启动训练**:
```bash
accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py training_configs/mistral-7b-base-simpo.yamlllama3-8b-instruct-simpo.yamlmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
dataset_mixer:
argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
beta: 2.5
gamma_beta_ratio: 0.5
learning_rate: 5e-7
sft_weight: 0.1 # Add SFT loss to preserve capabilities
num_train_epochs: 1
per_device_train_batch_size: 2
gradient_accumulation_steps: 4
output_dir: ./outputs/llama3-8b-simpoaccelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py training_configs/llama3-8b-instruct-simpo.yamlllama3-8b-instruct-simpo.yamlmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
dataset_mixer:
argilla/ultrafeedback-binarized-preferences-cleaned: 1.0
beta: 2.5
gamma_beta_ratio: 0.5
learning_rate: 5e-7
sft_weight: 0.1 # 添加SFT损失以保留模型能力
num_train_epochs: 1
per_device_train_batch_size: 2
gradient_accumulation_steps: 4
output_dir: ./outputs/llama3-8b-simpoaccelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml \
scripts/run_simpo.py training_configs/llama3-8b-instruct-simpo.yamlmodel_name_or_path: deepseek-ai/deepseek-math-7b-base
dataset_mixer:
argilla/distilabel-math-preference-dpo: 1.0
beta: 5.0 # Higher for stronger signal
gamma_beta_ratio: 0.7 # Larger margin
learning_rate: 3e-7 # Lower LR for reasoning
sft_weight: 0.0
num_train_epochs: 1
per_device_train_batch_size: 1
gradient_accumulation_steps: 16model_name_or_path: deepseek-ai/deepseek-math-7b-base
dataset_mixer:
argilla/distilabel-math-preference-dpo: 1.0
beta: 5.0 # 更高值以增强信号
gamma_beta_ratio: 0.7 # 更大边际
learning_rate: 3e-7 # 推理任务使用更低学习率
sft_weight: 0.0
num_train_epochs: 1
per_device_train_batch_size: 1
gradient_accumulation_steps: 16learning_rate: 3e-7 # Reduce from 5e-7beta: 1.0 # Reduce from 2.0sft_weight: 0.1 # Add SFT loss componentbeta: 5.0 # Increase from 2.0
gamma_beta_ratio: 0.8 # Increase from 0.5per_device_train_batch_size: 1
gradient_accumulation_steps: 16 # Maintain effective batchgradient_checkpointing: truelearning_rate: 3e-7 # 从5e-7降低beta: 1.0 # 从2.0降低sft_weight: 0.1 # 添加SFT损失组件beta: 5.0 # 从2.0提高
gamma_beta_ratio: 0.8 # 从0.5提高per_device_train_batch_size: 1
gradient_accumulation_steps: 16 # 保持有效批次大小gradient_checkpointing: true