diff --git a/omni/models/configs/best_practice_configs.json b/omni/models/configs/best_practice_configs.json index 2d18e783bbe33e6e5a135c833c0aeb822555b2e0..f7385c0279c35e0f3c057b59b4d5bf04ef577a34 100644 --- a/omni/models/configs/best_practice_configs.json +++ b/omni/models/configs/best_practice_configs.json @@ -98,5 +98,65 @@ "pd_disaggregation": true, "prefill_config_file": "pangu_ultra_moe_a3_prefill_w8a8_4p1d.json", "decode_config_file": "pangu_ultra_moe_a3_decode_w8a8_4p1d.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 1, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 2, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "w8a8", + "prefill_nodes_num": 8, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_w8a8.json", + "decode_config_file": "qwen3_235b_a3_decode_w8a8.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 1, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 2, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" + }, + { + "model": "qwen3-235B", + "hardware": "A3", + "precision": "bf16", + "prefill_nodes_num": 8, + "decode_nodes_num": 1, + "pd_disaggregation": true, + "prefill_config_file": "qwen3_235b_a3_prefill_bf16.json", + "decode_config_file": "qwen3_235b_a3_decode_bf16.json" } ] \ No newline at end of file diff --git a/omni/models/configs/match_hf_configs.json b/omni/models/configs/match_hf_configs.json index 9455a55ac8ca072f29f1a57b19a05afc4bc2fa12..c50b411cf1af1576465f202270094748d5a5be2f 100644 --- a/omni/models/configs/match_hf_configs.json +++ b/omni/models/configs/match_hf_configs.json @@ -17,7 +17,7 @@ "max_position_embeddings": 262144, "vocab_size": 151936, "intermediate_size": 12288, - "n_routed_experts": 128, + "n_routed_experts": null, "n_shared_experts": null, "moe_intermediate_size": 1536 }, diff --git a/omni/models/configs/qwen3_235b_a3_decode_bf16.json b/omni/models/configs/qwen3_235b_a3_decode_bf16.json new file mode 100644 index 0000000000000000000000000000000000000000..90a59842db2270cca6877de99a6ff6229cd67dbd --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_decode_bf16.json @@ -0,0 +1,23 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "moe_multi_stream_tune": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "decode_moe_dispatch_combine": true, + "use_super_kernel": true, + "use_mlaprolog": false, + "control_accept_rate": -1, + "use_prefetch": true, + "expert_gate_up_prefetch": 24, + "expert_down_prefetch": 12, + "attn_prefetch": 8, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_decode_w8a8.json b/omni/models/configs/qwen3_235b_a3_decode_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..e1eaa982e8329a71afa3104a568b33d0ba9d56a4 --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_decode_w8a8.json @@ -0,0 +1,23 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "moe_multi_stream_tune": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": false, + "decode_moe_dispatch_combine": true, + "use_super_kernel": true, + "use_mlaprolog": false, + "control_accept_rate": -1, + "use_prefetch": true, + "expert_gate_up_prefetch": 24, + "expert_down_prefetch": 12, + "attn_prefetch": 8, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_prefill_bf16.json b/omni/models/configs/qwen3_235b_a3_prefill_bf16.json new file mode 100644 index 0000000000000000000000000000000000000000..272b136a5b35958aaf1286736e3e21e6f4e1f8cf --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_prefill_bf16.json @@ -0,0 +1,17 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "control_accept_rate": -1, + "enable_prefill_micro_batch": true, + "experts_pruning": false, + "enable_scale_parallel": true + } +} \ No newline at end of file diff --git a/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json b/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..272b136a5b35958aaf1286736e3e21e6f4e1f8cf --- /dev/null +++ b/omni/models/configs/qwen3_235b_a3_prefill_w8a8.json @@ -0,0 +1,17 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "control_accept_rate": -1, + "enable_prefill_micro_batch": true, + "experts_pruning": false, + "enable_scale_parallel": true + } +} \ No newline at end of file