From 923a0ccd87e86c8dd37ea95ab043bc7b13d20b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AC=A7=E6=A2=93=E6=B4=8B?= Date: Fri, 24 Oct 2025 11:03:53 +0800 Subject: [PATCH] =?UTF-8?q?qwen3=5F235b=E9=85=8D=E7=BD=AE=E9=A1=B9?= =?UTF-8?q?=E6=95=B4=E6=94=B9=E9=80=82=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models/configs/best_practice_configs.json | 60 +++++++++++++++++++ omni/models/configs/match_hf_configs.json | 2 +- .../configs/qwen3_235b_a3_decode_bf16.json | 23 +++++++ .../configs/qwen3_235b_a3_decode_w8a8.json | 23 +++++++ .../configs/qwen3_235b_a3_prefill_bf16.json | 17 ++++++ .../configs/qwen3_235b_a3_prefill_w8a8.json | 17 ++++++ 6 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 omni/models/configs/qwen3_235b_a3_decode_bf16.json create mode 100644 omni/models/configs/qwen3_235b_a3_decode_w8a8.json create mode 100644 omni/models/configs/qwen3_235b_a3_prefill_bf16.json create mode 100644 omni/models/configs/qwen3_235b_a3_prefill_w8a8.json diff --git a/omni/models/configs/best_practice_configs.json b/omni/models/configs/best_practice_configs.json index 2d18e783bb..f7385c0279 100644 --- a/omni/models/configs/best_practice_configs.json +++ b/omni/models/configs/best_practice_configs.json @@ -98,5 +98,65 @@ "pd_disaggregation": true, "prefill_config_file": "pangu_ultra_moe_a3_prefill_w8a8_4p1d.json", "decode_config_file": "pangu_ultra_moe_a3_decode_w8a8_4p1d.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 1, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 2, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 8, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 1, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 2, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 8, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" } ] \ No newline at end of file diff --git a/omni/models/configs/match_hf_configs.json b/omni/models/configs/match_hf_configs.json index 9455a55ac8..c50b411cf1 100644 --- a/omni/models/configs/match_hf_configs.json +++ b/omni/models/configs/match_hf_configs.json @@ -17,7 +17,7 @@ "max_position_embeddings": 262144, "vocab_size": 151936, "intermediate_size": 12288, - "n_routed_experts": 128, + "n_routed_experts": null, "n_shared_experts": null, "moe_intermediate_size": 1536 }, diff --git a/omni/models/configs/qwen3_235b_a3_decode_bf16.json b/omni/models/configs/qwen3_235b_a3_decode_bf16.json new file mode 100644 index 0000000000..90a59842db --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_decode_bf16.json @@ -0,0 +1,23 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "moe_multi_stream_tune": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "decode_moe_dispatch_combine": true, + "use_super_kernel": true, + "use_mlaprolog": false, + "control_accept_rate": -1, + "use_prefetch": true, + "expert_gate_up_prefetch": 24, + "expert_down_prefetch": 12, + "attn_prefetch": 8, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_decode_w8a8.json b/omni/models/configs/qwen3_235b_a3_decode_w8a8.json new file mode 100644 index 0000000000..e1eaa982e8 --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_decode_w8a8.json @@ -0,0 +1,23 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "moe_multi_stream_tune": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": false, + "decode_moe_dispatch_combine": true, + "use_super_kernel": true, + "use_mlaprolog": false, + "control_accept_rate": -1, + "use_prefetch": true, + "expert_gate_up_prefetch": 24, + "expert_down_prefetch": 12, + "attn_prefetch": 8, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_prefill_bf16.json b/omni/models/configs/qwen3_235b_a3_prefill_bf16.json new file mode 100644 index 0000000000..272b136a5b --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_prefill_bf16.json @@ -0,0 +1,17 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "control_accept_rate": -1, + "enable_prefill_micro_batch": true, + "experts_pruning": false, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json b/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json new file mode 100644 index 0000000000..272b136a5b --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json @@ -0,0 +1,17 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "control_accept_rate": -1, + "enable_prefill_micro_batch": true, + "experts_pruning": false, + "enable_scale_parallel": true + } +} \ No newline at end of file -- Gitee