diff --git a/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json b/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9953943ef274b8bda7267a5b2e63822c3219df --- /dev/null +++ b/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json @@ -0,0 +1,26 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1, + "dp_size": 64 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "moe_multi_stream_tune": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "decode_moe_dispatch_combine": true, + "use_omni_placement": true, + "omni_placement_config_path": "../../tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_d.yaml", + "use_super_kernel": false, + "use_mlaprolog": false, + "opt_w2_scale_cast": true, + "control_accept_rate": -1, + "use_prefetch": true, + "expert_gate_up_prefetch": 50, + "expert_down_prefetch": 0, + "attn_prefetch": 96 + } +} \ No newline at end of file diff --git a/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json b/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json new file mode 100644 index 0000000000000000000000000000000000000000..d0047e5b0c18cd7061b351fba254c49c950ba31a --- /dev/null +++ b/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json @@ -0,0 +1,19 @@ +{ + "model_parallel_config": { + "dense_mlp_tp_size": 4, + "o_proj_tp_size": 1, + "dp_size": 1 + }, + "operator_optimizition_config": { + "enable_kv_rmsnorm_rope_cache": true, + "prefill_moe_all_to_all": true, + "best_ep": false, + "merge_qkv": false, + "gmm_nz": true, + "use_omni_placement": true, + "omni_placement_config_path": "../../tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.yaml", + "control_accept_rate": -1, + "enable_prefill_micro_batch": false, + "experts_pruning": false + } +} \ No newline at end of file diff --git a/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_d.yaml b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b882e6814fc7cb752d37411050d6f72fb4cc15bb --- /dev/null +++ b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_d.yaml @@ -0,0 +1,42 @@ +# Patterns: Path to the pattern file specifying expert deployment configuration. + +# The placement pattern is represented as a three-dimensional binary matrix (`expert_mapping`) indicating +# the presence (`1`) or absence (`0`) of experts (EPs) across dimensions of devices, layers, and expert IDs (`epid`). +# Specifically, the three dimensions are: + +# - deviceid: Identifier of the device. +# - layerid: Identifier of the MoE layer. +# - epid: Identifier of the expert within a layer. +# Note that experts in different layers may share the same `epid`, but they represent distinct experts. + +# Thus, `expert_mapping[deviceid][layerid][epid] = 1` indicates that the expert identified by `epid` at layer `layerid` +# is deployed on device `deviceid`. Conversely, a value of `0` indicates the absence of that expert on the specified device and layer. + +# Note: The same `epid` within the same `layerid` can have multiple entries with the value `1` across different devices. +# This means the combination `(layerid, epid)` alone does not uniquely identify a deployment; +# rather, experts may be replicated across multiple devices to enable parallelism or redundancy. + +# Defaults to None. +pattern_path: "../../omni/accelerators/placement/patterns/base_patterns/DSV3_baseline_64_devices_58_MoE_Layers.npy" + +#define max_layer_num as a constant 58 (for deepseek moe layer num 58) +max_moe_layer_num: 58 + +enable_dynamic: True +max_redundant_per_expert: 10 # 10 +max_redundant_per_rank: 1 # 1 + +enable_rank_round_robin: True + +enable_dump: False +dump_dir: "../dump_data" + +# Optimizers +Optimizers: + - expert_balance_optimizer.ExpertsBalanceOptimizer: + batch_size: 48 + top_k_count: 8 + - heat_optimizer.HEAT_ExpertsBalancer: + is_global_maximum_offset: True + - resdispatch_optimizer.ResDis_ExpertsBalancer: + is_rand_op: False \ No newline at end of file diff --git a/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.npy b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.npy new file mode 100644 index 0000000000000000000000000000000000000000..ae70833c6ef03b0cef543ed29948c8b295c4591f Binary files /dev/null and b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.npy differ diff --git a/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.yaml b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c0623247f1be5de7173a371160f00e57406c504 --- /dev/null +++ b/tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.yaml @@ -0,0 +1,43 @@ +# Patterns: Path to the pattern file specifying expert deployment configuration. + +# The placement pattern is represented as a three-dimensional binary matrix (`expert_mapping`) indicating +# the presence (`1`) or absence (`0`) of experts (EPs) across dimensions of devices, layers, and expert IDs (`epid`). +# Specifically, the three dimensions are: + +# - deviceid: Identifier of the device. +# - layerid: Identifier of the MoE layer. +# - epid: Identifier of the expert within a layer. +# Note that experts in different layers may share the same `epid`, but they represent distinct experts. + +# Thus, `expert_mapping[deviceid][layerid][epid] = 1` indicates that the expert identified by `epid` at layer `layerid` +# is deployed on device `deviceid`. Conversely, a value of `0` indicates the absence of that expert on the specified device and layer. + +# Note: The same `epid` within the same `layerid` can have multiple entries with the value `1` across different devices. +# This means the combination `(layerid, epid)` alone does not uniquely identify a deployment; +# rather, experts may be replicated across multiple devices to enable parallelism or redundancy. + +# Defaults to None. +pattern_path: "../../omni/accelerators/placement/patterns/placement_pattern_20250626_221356_58_rearrange_layers_58_layers_16_ranks_prefill_step0to100000.npy" +# pattern_path: "../../tests/test_config/ep_ds_r1_w8a8c16_a3_5p1d_p.npy" + +#define max_layer_num as a constant 58 (for deepseek moe layer num 58) +max_moe_layer_num: 58 + +enable_dynamic: False +max_redundant_per_expert: 1 # 10 +max_redundant_per_rank: 0 # 1 + +enable_rank_round_robin: False + +enable_dump: False +dump_dir: "../dump_data" + +# Optimizers +Optimizers: + - expert_balance_optimizer.ExpertsBalanceOptimizer: + batch_size: 48 + top_k_count: 8 + - heat_optimizer.HEAT_ExpertsBalancer: + is_global_maximum_offset: True + - resdispatch_optimizer.ResDis_ExpertsBalancer: + is_rand_op: False \ No newline at end of file diff --git a/tools/omni_cli/configs/template/cli_ds_r1_w8a8c16_a3_5p1d.yml b/tools/omni_cli/configs/template/cli_ds_r1_w8a8c16_a3_5p1d.yml new file mode 100644 index 0000000000000000000000000000000000000000..7f7afca05cec9942a149c571d15020136e6c12d2 --- /dev/null +++ b/tools/omni_cli/configs/template/cli_ds_r1_w8a8c16_a3_5p1d.yml @@ -0,0 +1,1155 @@ +all: + children: + C: + hosts: + c0: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.1 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + decode-lb-sdk: pd_score_balance + prefill-lb-sdk: pd_score_balance + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_c0 + env: + API_PORT: 7000 + CODE_PATH: /workspace/omniinfer + LOG_PATH: /tmp/log_path + ROLE: proxy + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + D: + hosts: + d0: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.6 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + graph_model_compile_config: + level: 1 + use_ge_graph_cached: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + max-num-seqs: 32 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 1 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_consumer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_DECODE + model-path: $MODEL_PATH + num-dp: 64 + num-servers: 16 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 1 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_d0 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + DECODE_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.6 + KV_RANK: 5 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json + MODEL_LEN_MAX_DECODE: 16384 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/global/collect_files_d/local_*merge.json) + ROLE: decode + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.6 + master_node: d0 + d1: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.7 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + graph_model_compile_config: + level: 1 + use_ge_graph_cached: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + max-num-seqs: 32 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 1 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_consumer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_DECODE + model-path: $MODEL_PATH + num-dp: 64 + num-servers: 16 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 1 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_d1 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + DECODE_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.6 + KV_RANK: 5 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json + MODEL_LEN_MAX_DECODE: 16384 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/global/collect_files_d/local_*merge.json) + ROLE: decode + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 16 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.6 + master_node: d0 + d2: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.8 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + graph_model_compile_config: + level: 1 + use_ge_graph_cached: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + max-num-seqs: 32 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 1 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_consumer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_DECODE + model-path: $MODEL_PATH + num-dp: 64 + num-servers: 16 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 1 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_d2 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + DECODE_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.6 + KV_RANK: 5 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json + MODEL_LEN_MAX_DECODE: 16384 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/global/collect_files_d/local_*merge.json) + ROLE: decode + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 32 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.6 + master_node: d0 + d3: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.9 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + graph_model_compile_config: + level: 1 + use_ge_graph_cached: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + max-num-seqs: 32 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 1 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_consumer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_DECODE + model-path: $MODEL_PATH + num-dp: 64 + num-servers: 16 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 1 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_d3 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + DECODE_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.6 + KV_RANK: 5 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_d.json + MODEL_LEN_MAX_DECODE: 16384 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/global/collect_files_d/local_*merge.json) + ROLE: decode + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 48 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.6 + master_node: d0 + P: + hosts: + p0: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.1 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + enforce-eager: '' + max-num-batched-tokens: 32000 + max-num-seqs: 16 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 0 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_producer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_PREFILL + model-path: $MODEL_PATH + num-dp: 1 + num-servers: 1 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 16 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_p0 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_RT_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.1 + KV_RANK: 0 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: '' + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json + MODEL_LEN_MAX_PREFILL: 16000 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PREFILL_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + PREFILL_TENSOR_PARALLEL_SIZE: 16 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/prefill_config/local_*0123456789101112131415.json) + ROLE: prefill + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.1 + master_node: p0 + p1: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.2 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + enforce-eager: '' + max-num-batched-tokens: 32000 + max-num-seqs: 16 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 0 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_producer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_PREFILL + model-path: $MODEL_PATH + num-dp: 1 + num-servers: 1 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 16 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_p1 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_RT_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.2 + KV_RANK: 1 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: '' + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json + MODEL_LEN_MAX_PREFILL: 16000 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PREFILL_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + PREFILL_TENSOR_PARALLEL_SIZE: 16 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/prefill_config/local_*0123456789101112131415.json) + ROLE: prefill + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.2 + master_node: p1 + p2: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.3 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + enforce-eager: '' + max-num-batched-tokens: 32000 + max-num-seqs: 16 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 0 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_producer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_PREFILL + model-path: $MODEL_PATH + num-dp: 1 + num-servers: 1 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 16 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_p2 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_RT_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.3 + KV_RANK: 2 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: '' + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json + MODEL_LEN_MAX_PREFILL: 16000 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PREFILL_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + PREFILL_TENSOR_PARALLEL_SIZE: 16 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/prefill_config/local_*0123456789101112131415.json) + ROLE: prefill + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.3 + master_node: p2 + p3: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.4 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + enforce-eager: '' + max-num-batched-tokens: 32000 + max-num-seqs: 16 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 0 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_producer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_PREFILL + model-path: $MODEL_PATH + num-dp: 1 + num-servers: 1 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 16 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_p3 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_RT_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.4 + KV_RANK: 3 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: '' + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json + MODEL_LEN_MAX_PREFILL: 16000 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PREFILL_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + PREFILL_TENSOR_PARALLEL_SIZE: 16 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/prefill_config/local_*0123456789101112131415.json) + ROLE: prefill + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.4 + master_node: p3 + p4: + DOCKER_IMAGE_ID: image-repository:tag + ansible_host: 127.0.0.5 + ansible_ssh_common_args: -o StrictHostKeyChecking=no -o IdentitiesOnly=yes + ansible_ssh_private_key_file: /path/to/key + ansible_user: root + args: + additional-config: + enable_omni_attn: true + base-api-port: $API_PORT + enable-mtp: '' + extra-args: + disable-log-requests: '' + enable-expert-parallel: '' + enforce-eager: '' + max-num-batched-tokens: 32000 + max-num-seqs: 16 + no-enable-prefix-caching: '' + gpu-util: '0.92' + kv-transfer-config: + engine_id: 0 + kv_buffer_device: npu + kv_connector: AscendHcclConnectorV1 + kv_parallel_size: $((PREFILL_POD_NUM + 1)) + kv_rank: $KV_RANK + kv_role: kv_producer + log-dir: $LOG_PATH + master-ip: $HOST_IP + master-port: $MASTER_PORT + max-model-len: $MODEL_LEN_MAX_PREFILL + model-path: $MODEL_PATH + num-dp: 1 + num-servers: 1 + served-model-name: deepseek + server-offset: $SERVER_OFFSET + tp: 16 + ascend_rt_visible_devices: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + container_name: ds5p1d_p4 + env: + API_PORT: 9000 + ASCEND_AICPU_PATH: /usr/local/Ascend/latest + ASCEND_GLOBAL_LOG_LEVEL: 3 + ASCEND_HOME_PATH: /usr/local/Ascend/latest + ASCEND_LAUNCH_BLOCKING: 0 + ASCEND_OPP_PATH: /usr/local/Ascend/latest/opp + ASCEND_RT_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/latest + ASDOPS_LOG_LEVEL: ERROR + ASDOPS_LOG_PATH: /root + ASDOPS_LOG_TO_BOOST_TYPE: atb + ASDOPS_LOG_TO_FILE: 1 + ASDOPS_LOG_TO_FILE_FLUSH: 0 + ASDOPS_LOG_TO_STDOUT: 0 + ASDSIP_HOME_PATH: /usr/local/Ascend/nnal/asdsip/latest/ + ASDSIP_LOG_LEVEL: INFO + ASDSIP_LOG_PATH: /root + ASDSIP_LOG_TO_BOOST_TYPE: asdsip + ASDSIP_LOG_TO_FILE: 0 + ASDSIP_LOG_TO_FILE_FLUSH: 0 + ASDSIP_LOG_TO_STDOUT: 0 + ATB_COMPARE_TILING_EVERY_KERNEL: 0 + ATB_DEVICE_TILING_BUFFER_BLOCK_NUM: 32 + ATB_HOME_PATH: /usr/local/Ascend/nnal/atb/latest/atb/cxx_abi_1 + ATB_HOST_TILING_BUFFER_BLOCK_NUM: 128 + ATB_MATMUL_SHUFFLE_K_ENABLE: 1 + ATB_OPSRUNNER_KERNEL_CACHE_GLOABL_COUNT: 5 + ATB_OPSRUNNER_KERNEL_CACHE_LOCAL_COUNT: 1 + ATB_OPSRUNNER_SETUP_CACHE_ENABLE: 1 + ATB_SHARE_MEMORY_NAME_SUFFIX: '' + ATB_STREAM_SYNC_EVERY_KERNEL_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_OPERATION_ENABLE: 0 + ATB_STREAM_SYNC_EVERY_RUNNER_ENABLE: 0 + ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE: 1 + ATB_WORKSPACE_MEM_ALLOC_GLOBAL: 0 + AUTO_USE_UC_MEMORY: 1 + CODE_PATH: /workspace/omniinfer + CPU_AFFINITY_CONF: 2 + DECODE_POD_NUM: 1 + GLOBAL_DECODE_SERVER_IP_LIST: ${SERVER_IP_LIST} + GLOBAL_RANK_TABLE_FILE_PATH: ${RANKTABLE_SAVE_PATH}/global/global_ranktable_merge.json + GLOO_SOCKET_IFNAME: enp23s0f3 + HCCL_BUFFSIZE: 1000 + HCCL_CONNECT_TIMEOUT: 1800 + HCCL_EXEC_TIMEOUT: 120 + HCCL_INTRA_PCIE_ENABLE: 0 + HCCL_INTRA_ROCE_ENABLE: 1 + HCCL_OP_EXPANSION_MODE: AIV + HOST_IP: 127.0.0.5 + KV_RANK: 4 + LCCL_DETERMINISTIC: 0 + LCCL_PARALLEL: 0 + LOCAL_DECODE_SERVER_IP_LIST: '' + LOG_PATH: /tmp/log_path + MASTER_PORT: 8000 + MODEL_EXTRA_CFG_PATH: ${CODE_PATH}/tests/test_config/ds_r1_w8a8c16_a3_5p1d_p.json + MODEL_LEN_MAX_PREFILL: 16000 + MODEL_PATH: /data/models/DeepSeek-R1-w8a8-fusion + OMNI_REUSE_PREFILLED_TOKENS: 1 + OMNI_SKIP_DECODE_TOKENIZE: 1 + OMNI_USE_DSV3: 1 + PREFILL_POD_NUM: 5 + PREFILL_SERVER_LIST: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + PREFILL_TENSOR_PARALLEL_SIZE: 16 + PROFILING_NAMELIST: /workspace/omniinfer/omni/tools/profiler/proc_bind/proc_marker_namelist.yml + PYTHONPATH: /usr/local/Ascend/CANN-7.7/toolkit/python/site-packages:$PYTHONPATH + PYTORCH_NPU_ALLOC_CONF: expandable_segments:True + RANKTABLE_SAVE_PATH: /tmp/ranktable_save_path + RANK_TABLE_FILE_PATH: $(ls /tmp/ranktable_save_path/prefill_config/local_*0123456789101112131415.json) + ROLE: prefill + SERVER_IP_LIST: 127.0.0.6,127.0.0.7,127.0.0.8,127.0.0.9 + SERVER_OFFSET: 0 + SHLVL: 1 + SOCKET_IFNAME: enp23s0f3 + TASK_QUEUE_ENABLE: 2 + TNG_HOST_COPY: 1 + TOKENIZER_PROC_POOL: 1 + TOOLCHAIN_HOME: /usr/local/Ascend/latest/toolkit + TP_SOCKET_IFNAME: enp23s0f3 + USING_LCCL_COM: 0 + VLLM_ENABLE_MC2: 1 + VLLM_LLMDATADIST_ZMQ_PORT: 5568 + VLLM_LOGGING_LEVEL: INFO + VLLM_USE_V1: 1 + VLLM_WORKER_MULTIPROC_METHOD: fork + host_ip: 127.0.0.5 + master_node: p4 \ No newline at end of file