From 2eb53d42da90cbecaf79f994cf7653dd0defc162 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=BE=90=E6=95=8F?= <m19825086558@163.com>
Date: Fri, 18 Jul 2025 11:13:24 +0800
Subject: [PATCH] [Test]Add ST test case for Qwen2_5_vl_72b

---
 ci/access_control_test_ms.py                  | 126 ++++++++++++++++++
 .../finetune_qwen2_5_vl_72b.json              |  20 +++
 .../finetune_qwen2_5_vl_72b/data_72b.json     |  52 ++++++++
 .../finetune_qwen2_5_vl_72b/model_72b.json    |  95 +++++++++++++
 .../shell_scripts/finetune_qwen2_5_vl_72b.sh  | 105 +++++++++++++++
 tests/mindspore/st/st_run.sh                  |  82 ++++++++++++
 tests/mindspore/st/test_tools/acquire_json.py |  76 +++++++++++
 tests/mindspore/st/test_tools/test_ci_st.py   | 104 +++++++++++++++
 8 files changed, 660 insertions(+)
 create mode 100644 ci/access_control_test_ms.py
 create mode 100644 tests/mindspore/st/baseline_results/finetune_qwen2_5_vl_72b.json
 create mode 100644 tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/data_72b.json
 create mode 100644 tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/model_72b.json
 create mode 100644 tests/mindspore/st/shell_scripts/finetune_qwen2_5_vl_72b.sh
 create mode 100644 tests/mindspore/st/st_run.sh
 create mode 100644 tests/mindspore/st/test_tools/acquire_json.py
 create mode 100644 tests/mindspore/st/test_tools/test_ci_st.py

diff --git a/ci/access_control_test_ms.py b/ci/access_control_test_ms.py
new file mode 100644
index 00000000..9cb7e036
--- /dev/null
+++ b/ci/access_control_test_ms.py
@@ -0,0 +1,126 @@
+import argparse
+import os
+from pathlib import Path
+
+
+def read_files_from_txt(txt_file):
+    with open(txt_file, "r") as f:
+        return [line.strip() for line in f.readlines()]
+
+
+def is_examples(file):
+    return file.startswith("example/")
+
+
+def is_pipecase(file):
+    return file.startswith("tests/pipeline")
+
+
+def is_markdown(file):
+    return file.endswith(".md")
+
+
+def skip_ci_file(files, skip_cond):
+    for file in files:
+        if not any(condition(file) for condition in skip_cond):
+            return False
+    return True
+
+
+def alter_skip_ci():
+    parent_dir = Path(__file__).absolute().parents[2]
+    raw_txt_file = os.path.join(parent_dir, "modify.txt")
+
+    if not os.path.exists(raw_txt_file):
+        return False
+    
+    file_list = read_files_from_txt(raw_txt_file)
+    skip_conds = [
+        is_examples,
+        is_pipecase,
+        is_markdown
+    ]
+
+    return skip_ci_file(file_list, skip_conds)
+
+
+def acquire_exitcode(command):
+    exitcode = os.system(command)
+    real_code = os.WEXITSTATUS(exitcode)
+    return real_code
+
+
+# =============================
+# UT test, run with pytest
+# =============================
+
+class UT_Test:
+
+    def __init__(self):
+
+        base_dir = Path(__file__).absolute().parent.parent
+        test_dir = os.path.join(base_dir, 'tests')
+        mstest_dir = os.path.join(test_dir, 'mindspore')
+        self.ut_file = os.path.join(mstest_dir, "ut")
+    
+    def run_ut(self):
+        command = f"pytest -x {self.ut_file}"
+        code = acquire_exitcode(command)
+        if code == 0:
+            print("UT test success")
+        else:
+            print("UT failed")
+        return code
+
+
+# ===============================================
+# ST test, run with sh.
+# ===============================================
+
+class ST_Test:
+    
+    def __init__(self):
+
+        base_dir = Path(__file__).absolute().parent.parent
+        test_dir = os.path.join(base_dir, 'tests')
+        mstest_dir = os.path.join(test_dir, 'mindspore')
+        st_dir = "st"
+        self.st_shell = os.path.join(
+            mstest_dir, st_dir, "st_run.sh"
+        )
+
+    def run_st(self):
+        command = f"bash {self.st_shell}"
+        code = acquire_exitcode(command)
+        
+        if code == 0:
+            print("ST test success")
+        else:
+            print("ST failed")
+        return code
+
+
+def run_st_tests():
+    st = ST_Test()
+    return st.run_st()
+
+
+def run_tests(options):
+    if options.type == "st":
+        return run_st_tests()
+    else:
+        raise ValueError(f"TEST CASE TYPE ERROR: no type `{options.type}`")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Control needed test cases")
+    parser.add_argument("--type", type=str, default="st", 
+                        help='Test cases type. `all`: run all test cases; `ut`: run ut case,' '`st`: run st cases;')
+    args = parser.parse_args()
+    print(f"options: {args}")
+    if alter_skip_ci():
+        print("Skipping CI")
+    else:
+        exit_code = run_tests(args)
+        if exit_code != 0:
+            exit(exit_code)
\ No newline at end of file
diff --git a/tests/mindspore/st/baseline_results/finetune_qwen2_5_vl_72b.json b/tests/mindspore/st/baseline_results/finetune_qwen2_5_vl_72b.json
new file mode 100644
index 00000000..d25ada45
--- /dev/null
+++ b/tests/mindspore/st/baseline_results/finetune_qwen2_5_vl_72b.json
@@ -0,0 +1,20 @@
+{
+    "loss": [
+        1.237051E+01,
+        1.242174E+01,
+        1.120166E+01
+    ],
+    "time": [
+        25398.7,
+        843.6,
+        1087.0
+    ],
+    "memo info": [
+        {
+            "rank": 0,
+            "allocated memory": 12307.1337890625,
+            "max allocated memory": 15934.4633203125
+        }
+    ],
+    "warm_up": 1
+}
diff --git a/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/data_72b.json b/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/data_72b.json
new file mode 100644
index 00000000..5476377c
--- /dev/null
+++ b/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/data_72b.json
@@ -0,0 +1,52 @@
+{
+    "dataset_param": {
+        "dataset_type": "huggingface",
+        "preprocess_parameters": {
+            "model_name_or_path": "/mnt/disk0/ci_resource/models/Qwen2-VL-72B-Instruct",
+            "use_fast_tokenizer": true,
+            "split_special_tokens": false,
+            "image_max_pixels": 262144,
+            "image_min_pixels": 0,
+            "video_max_pixels": 16384,
+            "video_min_pixels": 0,
+            "video_fps": 2.0,
+            "video_maxlen": 64
+        },
+        "basic_parameters": {
+            "template": "qwen2vl",
+            "dataset_dir": "./data",
+            "dataset": "/home/ci_resource/data/qwen2vl/qwen2vl_7b/mllm_format_llava_instruct_data.json",
+            "cache_dir": "./data/cache_dir",
+            "train_on_prompt": false,
+            "mask_history": false,
+            "preprocessing_batch_size": 100,
+            "preprocessing_num_workers": 1,
+            "max_samples": 100,
+            "tool_format": null
+        },
+        "attr": {
+            "system": null,
+            "images": "images",
+            "videos": null,
+            "messages": "messages",
+            "role_tag": "role",
+            "content_tag": "content",
+            "user_tag": "user",
+            "assistant_tag": "assistant",
+            "observation_tag": null,
+            "function_tag": null,
+            "system_tag": null
+        }
+    },
+    "dataloader_param": {
+        "dataloader_mode": "sampler",
+        "drop_last": true,
+        "sampler_type": "BaseRandomBatchSampler",
+        "collate_param": {
+            "model_name": "qwen2vl",
+            "ignore_pad_token_for_loss": true
+        },
+        "pin_memory": true,
+        "shuffle": true
+    }
+}
\ No newline at end of file
diff --git a/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/model_72b.json b/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/model_72b.json
new file mode 100644
index 00000000..b35b73db
--- /dev/null
+++ b/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/model_72b.json
@@ -0,0 +1,95 @@
+{
+    "model_id": "qwen2_5vl",
+    "img_context_token_id": 151655,
+    "video_token_id": 151656,
+    "vision_start_token_id": 151652,
+    "image_encoder": {
+        "vision_encoder": {
+            "model_id": "qwen2vit",
+            "num_layers": 2,
+            "pipeline_num_layers": [2, 0],
+            "hidden_size": 1280,
+            "ffn_hidden_size": 3456,
+            "intermediate_size": 3456,
+            "num_attention_heads": 16,
+            "gated_linear_unit": true,
+            "hidden_dropout": 0.0,
+            "attention_dropout": 0.0,
+            "in_channels": 3,
+            "patch_size": 14,
+            "spatial_merge_size": 2,
+            "temporal_patch_size": 2,
+            "layernorm_epsilon": 1e-06,
+            "tokens_per_second": 2,
+            "window_attn_size": 112,
+            "fullatt_block_indexes": [
+                7,
+                15,
+                23,
+                31
+            ],
+            "fp16": false,
+            "bf16": true,
+            "params_dtype": "bf16",
+            "activation_func": "silu",
+            "normalization": "RMSNorm",
+            "use_fused_rotary_pos_emb": true,
+            "post_layer_norm": false,
+            "freeze": true
+        },
+        "vision_projector": {
+            "model_id": "lnmlp",
+            "num_layers": 1,
+            "gated_linear_unit": false,
+            "bias_activation_fusion": false,
+            "add_bias_linear": true,
+            "input_size": 1280,
+            "hidden_size": 8192,
+            "ffn_hidden_size": 5120,
+            "activation_func": "gelu",
+            "bf16": true,
+            "params_dtype": "bf16",
+            "layernorm_epsilon": 1e-06,
+            "normalization": "RMSNorm",
+            "freeze": true
+        }
+    },
+    "text_decoder": {
+        "model_id": "qwen2_5_lm",
+        "num_layers": 2,
+        "pipeline_num_layers": [1,1],
+        "hidden_size": 8192,
+        "ffn_hidden_size": 29568,
+        "num_attention_heads": 64,
+        "group_query_attention": true,
+        "num_query_groups": 8,
+        "position_embedding_type": "mrope",
+        "mrope_section": [16, 24, 24],
+        "use_fused_rotary_pos_emb": true,
+        "rope_scaling": null,
+        "normalization": "RMSNorm",
+        "layernorm_epsilon": 1e-06,
+        "activation_func": "silu",
+        "gated_linear_unit": true,
+        "add_bias_linear":false,
+        "add_qkv_bias": true,
+        "disable_bias_linear": true,
+        "attention_softmax_in_fp32": true,
+        "untie_embeddings_and_output_weights": true,
+        "attention_dropout": 0.0,
+        "init_method_std": 0.01,
+        "hidden_dropout": 0.0,
+        "seq_length": 1024,
+        "max_position_embeddings": 128000,
+        "vocab_size": 152064,
+        "rope_theta": 1000000.0,
+        "params_dtype": "bf16",
+        "bf16": true,
+        "parallel_output": true,
+        "recompute_granularity": "full",
+        "recompute_method": "uniform",
+        "recompute_num_layers": 1
+    },
+    "text_encoder": null,
+    "video_encoder": null
+}
diff --git a/tests/mindspore/st/shell_scripts/finetune_qwen2_5_vl_72b.sh b/tests/mindspore/st/shell_scripts/finetune_qwen2_5_vl_72b.sh
new file mode 100644
index 00000000..7fcb408e
--- /dev/null
+++ b/tests/mindspore/st/shell_scripts/finetune_qwen2_5_vl_72b.sh
@@ -0,0 +1,105 @@
+ #!/bin/bash
+code_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")/../../../..")
+MindSpeed_Core_MS_PATH=$(dirname "$code_dir")/MindSpeed-Core-MS
+export PYTHONPATH=${code_dir}/:$PYTHONPATH
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter:${MindSpeed_Core_MS_PATH}/msadapter/msa_thirdparty:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:$PYTHONPATH
+export CUDA_DEVICE_MAX_CONNECTIONS=1
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=3
+export TASK_QUEUE_ENABLE=2
+export COMBINED_ENABLE=1
+export CPU_AFFINITY_CONF=1
+export HCCL_CONNECT_TIMEOUT=1200
+export NPU_ASD_ENABLE=0
+export ASCEND_LAUNCH_BLOCKING=0
+export ACLNN_CACHE_LIMIT=100000
+export PYTORCH_NPU_ALLOC_CONF="expandable_segments:True"
+
+NPUS_PER_NODE=8
+export MASTER_ADDR=localhost
+MASTER_PORT=6889
+NNODES=1
+NODE_RANK=0
+WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES))
+export LOCAL_WORLD_SIZE=$NPUS_PER_NODE
+
+BASEPATH=$(cd `dirname $0`; cd ../../../../; pwd)
+
+MM_DATA="$BASEPATH/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/data_72b.json"
+MM_MODEL="$BASEPATH/tests/mindspore/st/run_configs/finetune_qwen2_5_vl_72b/model_72b.json"
+MM_TOOL="$BASEPATH/mindspeed_mm/tools/tools.json"
+
+TP=2
+PP=2
+CP=1
+MBS=1
+GRAD_ACC_STEP=4
+DP=$(($WORLD_SIZE/$TP/$PP/$CP))
+GBS=$(($MBS*$GRAD_ACC_STEP*$DP))
+
+DISTRIBUTED_ARGS="
+    --local_worker_num $NPUS_PER_NODE \
+    --worker_num $WORLD_SIZE \
+    --node_rank $NODE_RANK \
+    --master_addr $MASTER_ADDR \
+    --master_port $MASTER_PORT \
+    --log_dir msrun_log \
+    --bind_core=True \
+    --join=True
+"
+
+GPT_ARGS="
+    --use-mcore-models \
+    --tensor-model-parallel-size ${TP} \
+    --pipeline-model-parallel-size ${PP} \
+    --micro-batch-size ${MBS} \
+    --global-batch-size ${GBS} \
+    --tokenizer-type NullTokenizer \
+    --vocab-size 152064 \
+    --seq-length 1024 \
+    --make-vocab-size-divisible-by 1 \
+    --normalization RMSNorm \
+    --use-fused-rmsnorm \
+    --swiglu \
+    --use-fused-swiglu \
+    --lr 1.0e-5 \
+    --lr-decay-style cosine \
+    --weight-decay 0 \
+    --train-iters 3 \
+    --lr-warmup-fraction 0.1 \
+    --clip-grad 0.0 \
+    --adam-beta1 0.9 \
+    --adam-beta2 0.999 \
+    --no-gradient-accumulation-fusion \
+    --seed 42 \
+    --bf16 \
+    --variable-seq-lengths \
+    --use-distributed-optimizer \
+    --no-load-optim \
+    --no-load-rng \
+    --no-save-optim \
+    --no-save-rng \
+    --num-workers 8 \
+    --use-flash-attn \
+"
+
+MM_ARGS="
+    --mm-data $MM_DATA \
+    --mm-model $MM_MODEL \
+    --mm-tool $MM_TOOL
+"
+
+OUTPUT_ARGS="
+    --log-interval 1 \
+    --save-interval 10000 \
+    --eval-interval 10000 \
+    --eval-iters 5000 \
+"
+
+msrun $DISTRIBUTED_ARGS \
+    $BASEPATH/pretrain_vlm.py \
+    $GPT_ARGS \
+    $MM_ARGS \
+    $OUTPUT_ARGS \
+    --distributed-backend nccl \
+    --ai-framework mindspore \
\ No newline at end of file
diff --git a/tests/mindspore/st/st_run.sh b/tests/mindspore/st/st_run.sh
new file mode 100644
index 00000000..2db37e4f
--- /dev/null
+++ b/tests/mindspore/st/st_run.sh
@@ -0,0 +1,82 @@
+# step 1: define dir
+BASE_DIR=$(dirname "$(readlink -f "$0")")
+export PYTHONPATH=$BASE_DIR:$PYTHONPATH
+echo $BASE_DIR
+SHELL_SCRIPTS_DIR="$BASE_DIR/shell_scripts"
+BASELINE_DIR="$BASE_DIR/baseline_results"
+EXEC_PY_DIR=$(dirname "$BASE_DIR")
+
+echo "SHELL_SCRIPTS_DIR: $SHELL_SCRIPTS_DIR"
+echo "BASELINE_DIR: $BASELINE_DIR"
+echo "EXEC_PY_DIR: $EXEC_PY_DIR"
+
+GENERATE_LOG_DIR="$BASE_DIR/run_logs"
+GENERATE_JSON_DIR="$BASE_DIR/run_jsons"
+
+mkdir -p $GENERATE_LOG_DIR
+mkdir -p $GENERATE_JSON_DIR
+
+rm -rf $GENERATE_LOG_DIR/*
+rm -rf $GENERATE_JSON_DIR/*
+
+
+# step 2: running scripts and execute `test_ci_st.py`
+declare -A TEST_CASE_TIMES
+for test_case in "$SHELL_SCRIPTS_DIR"/*.sh; do
+    file_name=$(basename "${test_case}")
+    echo "Running $file_name..."
+    file_name_prefix=$(basename "${file_name%.*}")
+    echo "$file_name_prefix"
+
+    START_TIME=$(date +%s)
+    # create empty json file to receive the result parsered from log
+    touch "$GENERATE_JSON_DIR/$file_name_prefix.json"
+
+    # if executing the shell has failed, then just exit, no need to compare.
+    bash $test_case | tee "$GENERATE_LOG_DIR/$file_name_prefix.log"
+    SCRIPT_EXITCODE=${PIPESTATUS[0]}
+    if [ $SCRIPT_EXITCODE -ne 0 ]; then
+        echo "Script $file_name has failed. Exit!"
+        exit 1
+    fi
+
+    END_TIME=$(date +%s)
+    ELAPSED_TIME=$((END_TIME-START_TIME))
+    MINUTES=$((ELAPSED_TIME / 60))
+    SECONDS=$((ELAPSED_TIME % 60))
+    TEST_CASE_TIMES["$file_name"]="$MINUTES m $SECONDS s"
+
+    if [ "$MINUTES" -gt 0 ]; then
+        echo "$(printf '*%.0s' {1..20}) Execution Time for $file_name: *${MINUTES}m ${SECONDS}s* $(printf '*%.0s' {1..20})"
+    else
+        echo "$(printf '*%.0s' {1..20}) Execution Time for $file_name: *${SECONDS}s* $(printf '*%.0s' {1..20})"
+    fi
+    if [[ $file_name == inference* ]]; then
+            echo "st is an inference task, skip compare result"
+        else
+            # begin to execute the logic of compare
+            pytest -x $BASE_DIR/test_tools/test_ci_st.py \
+                --baseline-json $BASELINE_DIR/$file_name_prefix.json \
+                --generate-log $GENERATE_LOG_DIR/$file_name_prefix.log \
+                --generate-json $GENERATE_JSON_DIR/$file_name_prefix.json
+    fi
+
+    PYTEST_EXITCODE=$?
+    echo $PYTEST_EXITCODE
+    if [ $PYTEST_EXITCODE -ne 0 ]; then
+        echo "$file_name_prefix compare to baseline has failed, check it!"
+        exit 1
+    else
+        echo "Pretrain $file_name_prefix execution success."
+    fi
+
+done
+
+echo "$(printf '*%.0s' {1..40})"
+echo "* Summary of Execution Times for All Test Cases *"
+echo "$(printf '*%.0s' {1..40})"
+for file_name in "${!TEST_CASE_TIMES[@]}"; do
+    echo "* Execution Time for $file_name: ${TEST_CASE_TIMES[$file_name]} *"
+done
+echo "$(printf '*%.0s' {1..40})"
+
diff --git a/tests/mindspore/st/test_tools/acquire_json.py b/tests/mindspore/st/test_tools/acquire_json.py
new file mode 100644
index 00000000..4f56c109
--- /dev/null
+++ b/tests/mindspore/st/test_tools/acquire_json.py
@@ -0,0 +1,76 @@
+import re
+import json
+import os
+
+
+def check_is_valid(actual_val, expected_val, margin=0.01, greater=True, message=None):
+    cond1 = actual_val > expected_val if greater else actual_val < expected_val
+    cond2 = abs(actual_val - expected_val) / expected_val > margin
+    if cond1 and cond2:
+        if message:
+            raise AssertionError(message)
+        else:
+            raise AssertionError
+
+
+def transfer_logs_as_json(log_file, output_json_file):
+    """
+    Read a log file from the input path, and return the
+    summary specified as input as a list
+
+    Args:
+        log_file: str, path to the dir where the logs are located.
+        output_json_file: str, path of the json file transferred from the logs.
+    
+    Returns:
+        data: json, the values parsed from the log, formatted as a json file.
+    """
+    
+    log_pattern = re.compile(
+        r"elapsed time per iteration \(ms\):\s+([0-9.]+)\s+\|.*?loss:\s+([0-9.]+E[+-][0-9]+)"
+    )
+
+    memory_pattern = re.compile(
+        r"\[Rank (\d+)\] \(after \d+ iterations\) memory \(MB\) \| allocated: ([0-9.]+) \| max allocated: ([0-9.]+)"
+    )
+
+    data = {
+        "loss": [],
+        "time": [],
+        "memo info": []
+    }
+    with open(log_file, "r") as f:
+        log_content = f.read()
+
+    log_matches = log_pattern.findall(log_content)
+    memory_matches = memory_pattern.findall(log_content)
+
+    if log_matches:
+        data["loss"] = [float(match[1]) for match in log_matches]
+        data["time"] = [float(match[0]) for match in log_matches]
+
+    if memory_matches:
+        memo_info = [
+            {
+                "rank": int(match[0]),
+                "allocated memory": float(match[1]),
+                "max allocated memory": float(match[2])
+            }
+            for match in memory_matches
+        ]
+        data["memo info"] = sorted(memo_info, key=lambda x: x["rank"])
+
+    with open(output_json_file, "w") as outfile:
+        json.dump(data, outfile, indent=4)
+    os.chmod(output_json_file, 440)
+
+
+def read_json(file):
+    """
+    Read baseline and new generate json file
+    """
+    if os.path.exists(file):
+        with open(file) as f:
+            return json.load(f)
+    else:
+        raise FileExistsError("The file does not exist !")
diff --git a/tests/mindspore/st/test_tools/test_ci_st.py b/tests/mindspore/st/test_tools/test_ci_st.py
new file mode 100644
index 00000000..070db8b3
--- /dev/null
+++ b/tests/mindspore/st/test_tools/test_ci_st.py
@@ -0,0 +1,104 @@
+import pytest
+from test_tools.acquire_json import transfer_logs_as_json, read_json, check_is_valid
+
+
+WARM_UP = 5
+
+
+class TestCIST:
+
+    margin_loss = 0.01 # loss可允许误差范围
+    margin_time_percent = 0.05 # 性能可允许波动百分比
+    margin_memory_percent = 0.1 # 内存可允许波动百分比
+
+    def _get_baseline(self, baseline_json):
+        # acquire expected results
+        self.expected = read_json(baseline_json)
+        self.warm_up = self.expected.get("warm_up", WARM_UP)
+
+    def _get_actual(self, generate_log, generate_json):
+        # acquire actual results
+        transfer_logs_as_json(generate_log, generate_json)
+        self.actual = read_json(generate_json)
+
+    def _test_helper(self, test_obj):
+        """
+        Core test function
+
+        Args:
+            test_obj: the object we want to test compare.
+            test_type: deterministic or approximate, default is None.
+
+        Here we temperally test `loss`, 'time' and `allocated memory`
+        """
+        comparison_selection = {
+            "loss": self._compare_loss,
+            "time": self._compare_time,
+            "memo info": self._compare_memory
+        }
+
+        if test_obj in comparison_selection:
+            print(f"===================== Begin comparing {test_obj} ===================")
+            expected_list = self.expected[test_obj]
+            actual_list = self.actual[test_obj]
+            print(f"The list of expected values: {expected_list}")
+            print(f"The list of actual values: {actual_list}")
+            # Check if lists exist and are non-empty
+            if not actual_list:
+                raise ValueError(f"Actual list for {test_obj} is empty or not found. Maybe program has failed! Check it.")
+
+            # Check if lists have the same length
+            if len(expected_list) != len(actual_list):
+                raise ValueError(f"Actual lengths of the lists for {test_obj} do not match. Maybe program has failed! Check it.")
+
+            compare_func = comparison_selection[test_obj]
+            compare_func(expected_list, actual_list)
+        else:
+            raise ValueError(f"Unsupported test object: {test_obj}")
+
+    def _compare_loss(self, expected_list, actual_list):
+        # Because "deterministic computation" affects the throughput, so we just test
+        # lm loss in case of approximation.
+        for step, (expected_val, actual_val) in enumerate(zip(expected_list, actual_list)):
+            print(f"Checking step {step + 1} for loss")
+            if actual_val != pytest.approx(expected=expected_val, rel=self.margin_loss):
+                raise AssertionError(f"The loss at step {step} should be approximate to {expected_val} but it is {actual_val}.")
+
+    def _compare_time(self, expected_list, actual_list):
+        # First few iterations might take a little longer. So we take the last 70 percent of the timings
+        expected_steps = len(expected_list) - self.warm_up
+        actual_steps = len(actual_list) - self.warm_up
+        if expected_steps <= 0 or actual_steps <= 0:
+            raise ValueError(f"Warm up steps must less than expected steps {len(expected_list)} or actual steps {len(actual_list)}")
+        expected_avg_time = sum(expected_list[self.warm_up:]) / expected_steps
+        actual_avg_time = sum(actual_list[self.warm_up:]) / actual_steps
+
+        check_is_valid(
+            actual_val=actual_avg_time,
+            expected_val=expected_avg_time,
+            margin=self.margin_time_percent,
+            greater=True,
+            message=f"The actual avg time {actual_avg_time} exceed expected avg time {expected_avg_time}"
+        )
+
+    def _compare_memory(self, expected_list, actual_list):
+        for expected_val, actual_val in zip(expected_list, actual_list):
+            check_is_valid(
+                actual_val=actual_val["allocated memory"],
+                expected_val=expected_val["allocated memory"],
+                margin=self.margin_memory_percent,
+                greater=True,
+                message=f'The actual memory {actual_val["allocated memory"]} seems to be abnormal compare to expected {expected_val["allocated memory"]}.'
+            )
+            check_is_valid(
+                actual_val=actual_val["max allocated memory"],
+                expected_val=expected_val["max allocated memory"],
+                margin=self.margin_memory_percent,
+                greater=True,
+                message=f'The actual max memory {actual_val["max allocated memory"]} seems to be abnormal compare to expected {expected_val["max allocated memory"]}.'
+            )
+
+    def test_loss(self, baseline_json, generate_log, generate_json):
+        self._get_baseline(baseline_json)
+        self._get_actual(generate_log, generate_json)
+        self._test_helper("loss")
-- 
Gitee