27 lines
819 B
Bash
27 lines
819 B
Bash
CUDA_VISIBLE_DEVICES=0,1
|
|
|
|
deepspeed finetune.py \
|
|
--model_name_or_path autodl-tmp/qwen-1.5b \
|
|
--train_files autodl-tmp/dataset/sft_data/BelleGroup/train_3.5M_CN.json \
|
|
--per_device_train_batch_size 16 \
|
|
--gradient_accumulation_steps 4 \
|
|
--do_train \
|
|
--output_dir autodl-tmp/output/sft \
|
|
--evaluation_strategy no \
|
|
--learning_rate 1e-4 \
|
|
--num_train_epochs 3 \
|
|
--warmup_steps 200 \
|
|
--logging_dir autodl-tmp/output/sft/logs \
|
|
--logging_strategy steps \
|
|
--logging_steps 5 \
|
|
--save_strategy steps \
|
|
--save_steps 100 \
|
|
--save_total_limit 1 \
|
|
--seed 12 \
|
|
--block_size 2048 \
|
|
--bf16 \
|
|
--gradient_checkpointing \
|
|
--deepspeed ./ds_config_zero2.json \
|
|
--report_to wandb
|
|
|
|
# --resume_from_checkpoint ${output_model}/checkpoint-20400 \ |