From 36963ae5b0785e812ba3cb31b69580ab9b902b54 Mon Sep 17 00:00:00 2001 From: zengshu Date: Thu, 11 Sep 2025 18:54:00 +0800 Subject: [PATCH] 671b_A2 --- configs/grpo_deepseek_r1_671b_A2.yaml | 119 ++++++++++++++++++++++++++ requirements.txt | 3 +- 2 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 configs/grpo_deepseek_r1_671b_A2.yaml diff --git a/configs/grpo_deepseek_r1_671b_A2.yaml b/configs/grpo_deepseek_r1_671b_A2.yaml new file mode 100644 index 00000000..25aacc07 --- /dev/null +++ b/configs/grpo_deepseek_r1_671b_A2.yaml @@ -0,0 +1,119 @@ +defaults: + - model: + - deepseekv3_671b + +megatron_training: + model: deepseekv3_671b + use_flash_attn: true + shape_order: BNSD + use_fused_rotary_pos_emb: true + use_fused_rmsnorm: true + use_fused_swiglu: true + no_masked_softmax_fusion: true + attention_softmax_in_fp32: true + bf16: true + sequence_parallel: true + use_distributed_optimizer: true + tokenizer_type: PretrainedFromHF + tokenizer_name_or_path: ./DeepSeek-V3/ + global_batch_size: 384 + seq_length: 8196 + save_interval: 100 + train_iters: 1000 + stage: ray_grpo + attention_dropout: 0.0 + init_method_std: 0.01 + hidden_dropout: 0.0 + distributed_backend: nccl + no_shared_storage: true + variable_seq_lengths: true + dataset_additional_keys: ['labels',] + no_shuffle: false + data_path: ./deepscaler + split: 100,0,0 + swap_optimizer: true + moe_alltoall_overlap_comm: true + reset_position_ids: true + +actor_config: + model: deepseekv3_671b + micro_batch_size: 1 + moe_tp_extend_ep: true + expert_model_parallel_size: 8 + tensor_model_parallel_size: 4 + pipeline_model_parallel_size: 8 + num_layer_list: 7,7,8,8,8,8,8,7 + recompute_granularity: full + recompute_method: block + recompute_num_layers: 4 + lr: 1e-6 + lr_decay_style: constant + min_lr: 0.0 + weight_decay: 0.0 + lr_warmup_fraction: 0.0 + clip_grad: 1.0 + adam_beta1: 0.9 + adam_beta2: 0.95 + initial_loss_scale: 4096 + finetune: true + load: ./deepseekv3_base_tp4pp8ep8_no_mmsplit/ + save: ./ckpt/ + no_load_optim: true + no_load_rng: true + +rl_config: + use_integrated_worker: true + use_dynamic_bsz: true + max_packing_token_size: 10240 + blocking: true + gamma: 1.0 + lam: 0.95 + adv_estimator: group_norm + kl_penalty: kl + kl_ctrl_type: fixed + init_kl_coef: 0.001 + mini_batch_size: 384 + use_remove_padding: true + max_prompt_length: 8196 + epochs: 1 + clip_ratio: 0.2 + entropy_coeff: 0 + n_samples_per_prompt: 16 + rule_reward: true + # log_max_throughput: false + verifier_function: ["math_verify_reward"] + verifier_weight: [1.0] + actor_resource: + num_npus: 256 + +generate_config: + # tokenizer相关配置 + trust_remote_code: true + # 推理时的并行配置 + infer_tensor_parallel_size: 4 + infer_pipeline_parallel_size: 1 + infer_expert_parallel_size: 16 + # vllm 模型相关设置 + max_num_seqs: 96 + max_model_len: 10240 + max_num_batched_tokens: 1024 + dtype: "bfloat16" + gpu_memory_utilization: 0.8 + enforce_eager: false + torchair_graph: true + enable_expert_parallel: true + + offload_train_optimizer: false + offload_train_grad: true + offload_train_param: true + ascend_scheduler_config_enabled: false + + # 采样配置 + sampling_config: + logprobs: 1 + max_tokens: 2048 + top_p: 1 + top_k: -1 + min_p: 0 + temperature: 1.0 + detokenize: false \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b217de1f..304fe107 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,5 @@ mathruler==0.1.0 pylatexenc==2.10 numba==0.61.2 torchvision==0.20.1 -cloudpickle==3.1.1 \ No newline at end of file +cloudpickle==3.1.1 +math_verify \ No newline at end of file -- Gitee