add ch6 code

2025-04-25 10:04:43 +08:00
parent b034735b42
commit b4327f741a
9 changed files with 1424 additions and 445 deletions
--- a/docs/chapter6/code/pretrain.sh
+++ b/docs/chapter6/code/pretrain.sh
@@ -0,0 +1,29 @@
+CUDA_VISIBLE_DEVICES=0,1
+
+deepspeed pretrain.py \
+    --config_name autodl-tmp/qwen-1.5b \
+    --tokenizer_name autodl-tmp/qwen-1.5b \
+    --train_files autodl-tmp/dataset/pretrain_data/mobvoi_seq_monkey_general_open_corpus_small.jsonl \
+    --per_device_train_batch_size 16 \
+    --gradient_accumulation_steps 4 \
+    --do_train \
+    --output_dir autodl-tmp/output/pretrain \
+    --evaluation_strategy  no \
+    --learning_rate 1e-4 \
+    --num_train_epochs 1 \
+    --warmup_steps 200 \
+    --logging_dir autodl-tmp/output/pretrain/logs \
+    --logging_strategy steps \
+    --logging_steps 5 \
+    --save_strategy steps \
+    --save_steps 100 \
+    --preprocessing_num_workers 10 \
+    --save_total_limit 1 \
+    --seed 12 \
+    --block_size 2048 \
+    --bf16 \
+    --gradient_checkpointing \
+    --deepspeed ./ds_config_zero2.json \
+    --report_to wandb
+    
+    # --resume_from_checkpoint ${output_model}/checkpoint-20400 \