From 20d139bd26598344bfc32faa0ce20a2999ceeff7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Sun, 27 Apr 2025 02:57:49 +0000
Subject: [PATCH 01/97] =?UTF-8?q?!2651=20=E3=80=90AR20241227785719?=
 =?UTF-8?q?=E3=80=91fp8/hi8=20weight=20only=20Merge=20pull=20request=20!26?=
 =?UTF-8?q?51=20from=20=E5=BC=A0=E9=91=AB/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../hif8_fp8_weight_quantization/README_CN.md |  45 +++++
 .../requirements.txt                          |   7 +
 .../src/quantization.cfg                      |   5 +
 .../src/run_llama7b_quantization.py           | 154 ++++++++++++++++++
 .../hif8_fp8_weight_quantization/src/utils.py |  82 ++++++++++
 5 files changed, 293 insertions(+)
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/requirements.txt
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/quantization.cfg
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py

diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
new file mode 100644
index 000000000..04b9c0973
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
@@ -0,0 +1,45 @@
+# FP8/HIF8量化
+
+## 1 FP8/HIF8量化前提
+
+### 1.1 安装依赖
+
+本sample依赖包可参考[requirements.txt](requirements.txt)
+
+### 1.2 模型和数据集准备
+
+本sample以Llama2-7b模型，pileval和wikitext2数据集为示例，请用户自行下载，并适配utils.py文件中加载数据集和模型的路径。当前sample中数据集保存目录需根据实际保存目录修改。
+
+### 1.3 简易量化配置
+./src/quantization.cfg文件为用户自定义的简易量化配置，具体表示信息如下：
+
+| 字段 |类型| 说明 | 默认值 | 取值范围 |
+|:--| :-: | :-- | :-: | :-: |
+|skip_layers|str|跳过量化的层 |/|/|
+|weight_only_config.weight_compress_only|bool|是否为仅权重量化|False|True/False|
+|weight_only_config.wts_type|enum|量化后权重类型|INT8|INT8/MXFP4_E2M1/HIFLOAT8/FLOAT8_E4M3FN|
+
+## 2 FLOAT8_E4M3FN量化示例
+> 当前quantization.cfg文件中weight_only_config.wts_type设置的值为FLOAT8_E4M3FN，如果需要HIFLOAT8仅权重量化，请适配修改quantization.cfg
+
+> 如果要验证deploy模型，需要设置save_post_quant_model接口中参数mode为'deploy'，并将生成的部署模型搬到npu上进行推理
+
+### 2.1 使用接口方式调用
+
+请在当前目录执行如下命令运行示例程序，用户需根据实际情况修改示例程序中的模型和数据集路径：
+
+`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/run_llama7b_quantization.py`
+
+若出现如下信息，则说明量化成功：
+
+```none
+Test time taken:  1.0 min  38.24865388870239 s
+Score:  5.48
+```
+
+推理成功后，在当前目录会生成量化日志文件./amct_log/amct_pytorch.log和./output文件夹，该文件夹内包含以下内容：
+
+- config.json：量化配置文件，描述了如何对模型中的每一层进行量化。
+- record.txt：量化因子记录文件。
+
+> 如果outputs目录下已经存在量化配置文件或量化因子记录文件，再次运行示例程序时，如果新生成的文件与已有文件同名，则会覆盖已有的量化配置文件或量化因子记录文件。
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/requirements.txt b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/requirements.txt
new file mode 100644
index 000000000..55441d062
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/requirements.txt
@@ -0,0 +1,7 @@
+torch==2.1.0
+transformers==4.40.0
+accelerate==0.30.1
+datasets==2.19.1
+sentencepiece==0.2.0
+numpy==1.23.5
+protobuf==3.20.2
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/quantization.cfg b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/quantization.cfg
new file mode 100644
index 000000000..2d8b3dcc3
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/quantization.cfg
@@ -0,0 +1,5 @@
+skip_layers: "lm_head"
+weight_only_config: {
+    weight_compress_only: True
+    wts_type: FLOAT8_E4M3FN
+}
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
new file mode 100644
index 000000000..092238d22
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
@@ -0,0 +1,154 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+
+import os
+import copy
+import time
+import tqdm
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoConfig
+from accelerate import infer_auto_device_map, dispatch_model
+from accelerate.utils.modeling import get_balanced_memory
+
+from utils import get_loaders,  get_llama2, get_calib_dataset
+import amct_pytorch as amct
+
+
+def build_model_and_enc(model, model_path, gpu_num):
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+    if "mpt" in config.__class__.__name__.lower():
+        enc = AutoTokenizer.from_pretrained(
+            config.tokenizer_name, trust_remote_code=True
+        )
+    else:
+        enc = AutoTokenizer.from_pretrained(
+            model_path, use_fast=False, trust_remote_code=True
+        )
+
+    # Move the model to GPU (as much as possible) for LM evaluation
+    # max_memory = ['0:16GiB', '1:16GiB','2:16GiB', 'cpu:30GiB'], '0' means the first GPU that you specify.
+    # I don't recommend use 16GiB, we need to reserve some space for other tensors during calculation
+    # please see the recommand memeory allocation in the Word file
+    # Adjust the max_size accroding to the real situation
+    # a clever way:
+
+    max_memory = []
+    for i in range(gpu_num):
+        max_memory.append(f'{i}:12GiB')
+    max_memory.append('cpu:80GiB')
+    print('Max_memory allocation: \n', max_memory)
+
+    max_memory = [v.split(":") for v in (max_memory or [])]
+    max_memory = {(int(k) if k.isdigit() else k): v for k, v in max_memory}
+    kwargs = {
+        "max_memory": get_balanced_memory(
+            model, max_memory if len(max_memory) > 0 else None
+        )
+    }
+    model.tie_weights()
+    device_map = infer_auto_device_map(
+        model,
+        no_split_module_classes=[
+            "LlamaDecoderLayer",
+        ],
+        **kwargs,
+    )
+    model = dispatch_model(model, device_map=device_map, 
+        offload_dir=os.path.join(model_path, 'offload_dir'))
+
+    return model, enc
+
+if __name__ == '__main__':
+    model, model_path = get_llama2('7b')
+    model = model.eval()
+    copied_model = copy.deepcopy(model)
+    gpu_num = torch.cuda.device_count()
+    model, enc = build_model_and_enc(model, model_path, gpu_num)
+
+    proto_path = './src/quantization.cfg'
+    config_file = './output/config.json'
+    record_file = './output/record.txt'
+
+    test_start_time = time.time()
+    # Phase1: generate quant config json
+    amct.create_post_quant_config(config_file,
+                             model,
+                             config_defination=proto_path)
+    
+    # Phase2: do weights calibration and generate calibration model
+    samples = get_calib_dataset(
+        data="pileval", tokenizer=enc, n_samples=512, block_size=256
+    )
+    samples = torch.cat(samples, dim=0)[:1,:]
+    model.config.use_cache = False
+    post_quant_model = amct.create_post_quant_model(config_file,
+                                                    record_file,
+                                                    model)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    post_quant_model.config.use_cache = False
+    with torch.no_grad():
+        post_quant_model(samples.to(next(post_quant_model.parameters()).device))
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    test_end_time = time.time()
+    total_time = test_end_time - test_start_time
+    print('Calibration time taken: ', total_time // 60, 'min ', total_time%60, 's')
+    # save memory, del unuse model
+    del post_quant_model
+    
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    model, enc = build_model_and_enc(copied_model, model_path, gpu_num)
+    
+    # Phase3: save fakequant model
+    testenc = get_loaders(dataset_name='wikitext2',
+                        enc=enc,
+                        seqlen=model.seqlen)
+
+    testenc = testenc.input_ids.to(model.device)
+    fake_quant_model = amct.save_post_quant_model(record_file, model, mode='fakequant')
+    nsamples = testenc.numel() // model.seqlen
+    
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    
+    # Phase4: Test ppl result
+    nlls = []
+    test_start_time = time.time()
+    for i in tqdm.tqdm(range(nsamples), desc="evaluating..."):
+        batch = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)].to(
+            model.device
+        )
+        with torch.no_grad():
+            lm_logits = fake_quant_model(batch).logits
+        shift_logits = lm_logits[:, :-1, :].contiguous().float()
+        shift_labels = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)][:, 1:]
+        loss_fct = nn.CrossEntropyLoss()
+        loss = loss_fct(
+            shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
+        )
+        neg_log_likelihood = loss.float() * model.seqlen
+        nlls.append(neg_log_likelihood)
+    test_end_time = time.time()
+
+    ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen))
+
+    total_time = test_end_time - test_start_time
+    print('Test time taken: ', total_time // 60, 'min ', total_time%60, 's'  )
+    print('Score: ', ppl.item())
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py
new file mode 100644
index 000000000..af20318be
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py
@@ -0,0 +1,82 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+import torch
+import torch.nn as nn
+from datasets import load_dataset,load_from_disk
+
+def get_llama2(model, seqlen=2048):
+    '''If model is specified from ['7b', '13b', '70b'], then we load official pretrained model;
+       If you want to load checkpoints other than the official ones, please specifiy the model path,
+       otherwise please choose from ['7b', '13b', '70b'] for better clarity
+    '''
+
+    def skip(*args, **kwargs):
+        pass
+
+    if model in ['7b', '13b', '70b']:
+        model_path = f'/data/Models/pytorch/Llama2/Llama2_{model}_hf'
+        print(f'Getting official pretrained Llama2-{model}')
+    else:
+        model_path = model
+    torch.nn.init.kaiming_uniform_ = skip
+    torch.nn.init.uniform_ = skip
+    torch.nn.init.normal_ = skip
+    from transformers import LlamaForCausalLM
+    
+    model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, offload_folder="offload/")
+
+    model.seqlen = seqlen
+    return model, model_path
+
+
+def get_loaders(dataset_name: str, enc, seqlen):
+    if dataset_name == 'wikitext2':
+        print('Loading dataset: Wikitext2')
+        testenc = load_dataset('/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py', 'wikitext-2-raw-v1', split='test', trust_remote_code=True)
+        testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt")
+    
+    return testenc
+
+
+def get_calib_dataset(data="pileval", tokenizer=None, n_samples=512, block_size=512):
+    if data == "pileval":
+        dataset = load_from_disk('/pile_val_backup')
+    else:
+        raise NotImplementedError
+    dataset = dataset.shuffle(seed=42)
+    samples = []
+    n_run = 0
+    for data in dataset:
+        line = data["text"]
+        line = line.strip()
+        line_encoded = tokenizer.encode(line)
+        if len(line_encoded) > 512:
+            continue
+        sample = torch.tensor([line_encoded])
+        if sample.numel() == 0:
+            continue
+        samples.append(sample)
+        n_run += 1
+        if n_run == n_samples:
+            break
+    # now concatenate all samples and split according to block size
+    cat_samples = torch.cat(samples, dim=1)
+    n_split = cat_samples.shape[1] // block_size
+    print(f" * Split into {n_split} blocks")
+    return [
+        cat_samples[:, i * block_size : (i + 1) * block_size] for i in range(n_split)
+    ]
-- 
Gitee


From fbcdc1308f223968f8d6ca2269ee31a5bb8e96c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Tue, 29 Apr 2025 07:44:34 +0000
Subject: [PATCH 02/97] =?UTF-8?q?!2652=20=E3=80=90AR20241227785719?=
 =?UTF-8?q?=E3=80=91fp8/hi8=20weight=20only=20npu=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2652=20from=20=E5=BC=A0=E9=91=AB/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../hif8_fp8_weight_quantization/README_CN.md | 15 +++++--
 .../src/run_llama7b_quantization.py           | 39 ++++++++++++-------
 .../hif8_fp8_weight_quantization/src/utils.py | 31 +++++----------
 3 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
index 04b9c0973..2c5cc0108 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/README_CN.md
@@ -8,7 +8,7 @@
 
 ### 1.2 模型和数据集准备
 
-本sample以Llama2-7b模型，pileval和wikitext2数据集为示例，请用户自行下载，并适配utils.py文件中加载数据集和模型的路径。当前sample中数据集保存目录需根据实际保存目录修改。
+本sample以Llama2-7b模型，pileval和wikitext2数据集为示例，请用户自行下载。
 
 ### 1.3 简易量化配置
 ./src/quantization.cfg文件为用户自定义的简易量化配置，具体表示信息如下：
@@ -22,13 +22,20 @@
 ## 2 FLOAT8_E4M3FN量化示例
 > 当前quantization.cfg文件中weight_only_config.wts_type设置的值为FLOAT8_E4M3FN，如果需要HIFLOAT8仅权重量化，请适配修改quantization.cfg
 
-> 如果要验证deploy模型，需要设置save_post_quant_model接口中参数mode为'deploy'，并将生成的部署模型搬到npu上进行推理
 
 ### 2.1 使用接口方式调用
 
-请在当前目录执行如下命令运行示例程序，用户需根据实际情况修改示例程序中的模型和数据集路径：
+请在当前目录执行如下命令运行示例程序
 
-`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/run_llama7b_quantization.py`
+验证fakequant模型脚本：
+
+`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/run_llama7b_quantization.py --test_on_npu_flag=false --calibration_data=/pile_val_backup/ --verify_data=/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py --model=/data/Models/pytorch/Llama2/Llama2_7b_hf`
+
+验证deploy模型脚本（需要适配npu相关环境）：
+
+`python3 src/run_llama7b_quantization.py --test_on_npu_flag=true`
+
+> test_on_npu_flag参数表明是否生成部署模型在npu上推理，calibration_data参数为校准集路径，verify_data为验证集的路径，model为模型存放路径
 
 若出现如下信息，则说明量化成功：
 
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
index 092238d22..2b2f14603 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/run_llama7b_quantization.py
@@ -14,7 +14,7 @@
 # limitations under the License. 
 """
 
-
+import argparse
 import os
 import copy
 import time
@@ -74,7 +74,14 @@ def build_model_and_enc(model, model_path, gpu_num):
     return model, enc
 
 if __name__ == '__main__':
-    model, model_path = get_llama2('7b')
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--test_on_npu_flag', type=lambda x: (str(x).lower() == 'true'))
+    parser.add_argument('--calibration_data', type=str, default='/pile_val_backup')
+    parser.add_argument('--verify_data', type=str, default='/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py')
+    parser.add_argument('--model', type=str, default='/data/Models/pytorch/Llama2/Llama2_7b_hf')
+
+    args = parser.parse_args()
+    model, model_path = get_llama2(args.model)
     model = model.eval()
     copied_model = copy.deepcopy(model)
     gpu_num = torch.cuda.device_count()
@@ -92,20 +99,20 @@ if __name__ == '__main__':
     
     # Phase2: do weights calibration and generate calibration model
     samples = get_calib_dataset(
-        data="pileval", tokenizer=enc, n_samples=512, block_size=256
+        data_path=args.calibration_data, tokenizer=enc, n_samples=512, block_size=256
     )
     samples = torch.cat(samples, dim=0)[:1,:]
-    model.config.use_cache = False
+
     post_quant_model = amct.create_post_quant_model(config_file,
                                                     record_file,
                                                     model)
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-    post_quant_model.config.use_cache = False
+
     with torch.no_grad():
         post_quant_model(samples.to(next(post_quant_model.parameters()).device))
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     test_end_time = time.time()
     total_time = test_end_time - test_start_time
     print('Calibration time taken: ', total_time // 60, 'min ', total_time%60, 's')
@@ -117,12 +124,18 @@ if __name__ == '__main__':
     model, enc = build_model_and_enc(copied_model, model_path, gpu_num)
     
     # Phase3: save fakequant model
-    testenc = get_loaders(dataset_name='wikitext2',
+    testenc = get_loaders(data_path=args.verify_data,
                         enc=enc,
                         seqlen=model.seqlen)
 
     testenc = testenc.input_ids.to(model.device)
-    fake_quant_model = amct.save_post_quant_model(record_file, model, mode='fakequant')
+
+    if args.test_on_npu_flag:
+        quant_model = amct.save_post_quant_model(record_file, model, mode='deploy')
+        quant_model = quant_model.npu()
+    else:
+        quant_model = amct.save_post_quant_model(record_file, model, mode='fakequant')
+
     nsamples = testenc.numel() // model.seqlen
     
     if torch.cuda.is_available():
@@ -133,12 +146,12 @@ if __name__ == '__main__':
     test_start_time = time.time()
     for i in tqdm.tqdm(range(nsamples), desc="evaluating..."):
         batch = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)].to(
-            model.device
+            quant_model.device
         )
         with torch.no_grad():
-            lm_logits = fake_quant_model(batch).logits
-        shift_logits = lm_logits[:, :-1, :].contiguous().float()
-        shift_labels = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)][:, 1:]
+            lm_logits = quant_model(batch).logits
+        shift_logits = lm_logits[:, :-1, :].contiguous().float().cpu()
+        shift_labels = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)][:, 1:].cpu()
         loss_fct = nn.CrossEntropyLoss()
         loss = loss_fct(
             shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
diff --git a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py
index af20318be..586916fbd 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py
+++ b/python/level1_single_api/9_amct/amct_pytorch/hif8_fp8_weight_quantization/src/utils.py
@@ -18,45 +18,32 @@ import torch
 import torch.nn as nn
 from datasets import load_dataset,load_from_disk
 
-def get_llama2(model, seqlen=2048):
-    '''If model is specified from ['7b', '13b', '70b'], then we load official pretrained model;
-       If you want to load checkpoints other than the official ones, please specifiy the model path,
-       otherwise please choose from ['7b', '13b', '70b'] for better clarity
-    '''
-
+def get_llama2(model_path, seqlen=2048):
     def skip(*args, **kwargs):
         pass
 
-    if model in ['7b', '13b', '70b']:
-        model_path = f'/data/Models/pytorch/Llama2/Llama2_{model}_hf'
-        print(f'Getting official pretrained Llama2-{model}')
-    else:
-        model_path = model
     torch.nn.init.kaiming_uniform_ = skip
     torch.nn.init.uniform_ = skip
     torch.nn.init.normal_ = skip
     from transformers import LlamaForCausalLM
     
-    model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, offload_folder="offload/")
+    model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, offload_folder="offload/")
 
     model.seqlen = seqlen
     return model, model_path
 
 
-def get_loaders(dataset_name: str, enc, seqlen):
-    if dataset_name == 'wikitext2':
-        print('Loading dataset: Wikitext2')
-        testenc = load_dataset('/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py', 'wikitext-2-raw-v1', split='test', trust_remote_code=True)
-        testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt")
+def get_loaders(data_path: str, enc, seqlen):
+
+    print('Loading dataset: Wikitext2')
+    testenc = load_dataset(data_path, 'wikitext-2-raw-v1', split='test', trust_remote_code=True)
+    testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt")
     
     return testenc
 
 
-def get_calib_dataset(data="pileval", tokenizer=None, n_samples=512, block_size=512):
-    if data == "pileval":
-        dataset = load_from_disk('/pile_val_backup')
-    else:
-        raise NotImplementedError
+def get_calib_dataset(data_path, tokenizer=None, n_samples=512, block_size=512):
+    dataset = load_from_disk(data_path)
     dataset = dataset.shuffle(seed=42)
     samples = []
     n_run = 0
-- 
Gitee


From 7f26cead2c9063679ce9933fd896cb31ae59fa50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AE=81?= <lining.li@huawei.com>
Date: Tue, 13 May 2025 09:26:42 +0000
Subject: [PATCH 03/97] =?UTF-8?q?!2654=20fix=20error=20in=20torch=202.1=20?=
 =?UTF-8?q?Merge=20pull=20request=20!2654=20from=20=E6=9D=8E=E5=AE=81/mast?=
 =?UTF-8?q?er?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../dataflow/plugin/torch/torch_plugin.py     | 69 ++++++++++++-------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/inference/dataflow/py_dflow/python/dataflow/plugin/torch/torch_plugin.py b/inference/dataflow/py_dflow/python/dataflow/plugin/torch/torch_plugin.py
index 1bbf8fb88..684a859d7 100644
--- a/inference/dataflow/py_dflow/python/dataflow/plugin/torch/torch_plugin.py
+++ b/inference/dataflow/py_dflow/python/dataflow/plugin/torch/torch_plugin.py
@@ -18,6 +18,7 @@
 import functools
 import inspect
 import traceback
+import threading
 from typing import Union, List
 import dataflow.data_type as dt
 import dataflow.dataflow as df
@@ -40,47 +41,63 @@ _npu_actor_model_support_args_ = _npu_model_support_args_ + [
     "input_descs",
 ]
 
+_df_to_torch_dtype = None
+_torch_to_df_dtype = None
+_lock = threading.Lock()
 
-def _convert_df_to_torch_tensor_dtype(df_dtype):
-    import torch
-
-    df_to_torch_dtype = {
-        dt.DT_FLOAT: torch.float32,
-        dt.DT_FLOAT16: torch.float16,
-        dt.DT_BF16: torch.bfloat16,
-        dt.DT_INT8: torch.int8,
-        dt.DT_INT16: torch.int16,
-        dt.DT_UINT16: torch.uint16,
-        dt.DT_UINT8: torch.uint8,
-        dt.DT_INT32: torch.int32,
-        dt.DT_INT64: torch.int64,
-        dt.DT_UINT32: torch.uint32,
-        dt.DT_UINT64: torch.uint64,
-        dt.DT_BOOL: torch.bool,
-        dt.DT_DOUBLE: torch.float64,
-    }
-    return df_to_torch_dtype[df_dtype]
 
-
-def _convert_torch_to_df_tensor_dtype(torch_dtype):
+def _initialize_torch_to_df_dtype():
     import torch
 
-    torch_to_df_dtype = {
+    global _torch_to_df_dtype
+    global _df_to_torch_dtype
+    _torch_to_df_dtype = {
         torch.float32: dt.DT_FLOAT,
         torch.float16: dt.DT_FLOAT16,
         torch.bfloat16: dt.DT_BF16,
         torch.int8: dt.DT_INT8,
         torch.int16: dt.DT_INT16,
-        torch.uint16: dt.DT_UINT16,
         torch.uint8: dt.DT_UINT8,
         torch.int32: dt.DT_INT32,
         torch.int64: dt.DT_INT64,
-        torch.uint32: dt.DT_UINT32,
-        torch.uint64: dt.DT_UINT64,
         torch.bool: dt.DT_BOOL,
         torch.float64: dt.DT_DOUBLE,
     }
-    return torch_to_df_dtype[torch_dtype]
+    if torch.__version__ >= "2.3":
+        _torch_to_df_dtype.update(
+            {
+                torch.uint16: dt.DT_UINT16,
+                torch.uint32: dt.DT_UINT32,
+                torch.uint64: dt.DT_UINT64,
+            }
+        )
+    _df_to_torch_dtype = {v: k for k, v in _torch_to_df_dtype.items()}
+
+
+def _convert_df_to_torch_tensor_dtype(df_dtype):
+    global _df_to_torch_dtype
+    # 使用锁来确保初始化操作是线程安全的
+    if _df_to_torch_dtype is None:
+        with _lock:  # 获取锁
+            if _df_to_torch_dtype is None:  # 双重检查，确保只有一个线程初始化
+                _initialize_torch_to_df_dtype()
+
+    if df_dtype not in _df_to_torch_dtype:
+        raise ValueError(f"df_dtype {df_dtype} is not supported")
+    return _df_to_torch_dtype[df_dtype]
+
+
+def _convert_torch_to_df_tensor_dtype(torch_dtype):
+    global _torch_to_df_dtype
+    # 使用锁来确保初始化操作是线程安全的
+    if _torch_to_df_dtype is None:
+        with _lock:  # 获取锁
+            if _torch_to_df_dtype is None:  # 双重检查，确保只有一个线程初始化
+                _initialize_torch_to_df_dtype()
+
+    if torch_dtype not in _torch_to_df_dtype:
+        raise ValueError(f"torch_dtype {torch_dtype} is not supported")
+    return _torch_to_df_dtype[torch_dtype]
 
 
 def _prepare_inputs(inputs: Union[List[fw.FlowMsg]], input_num):
-- 
Gitee


From fdf8ca5fa8d01941c33dd2307dec05f02ee903f9 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Thu, 22 May 2025 11:43:43 +0000
Subject: [PATCH 04/97] =?UTF-8?q?!2656=20=E3=80=90tiling=E4=B8=8B=E6=B2=89?=
 =?UTF-8?q?=E6=A0=B7=E4=BE=8B=E3=80=91=E3=80=90AR20250522891845=E3=80=91Ad?=
 =?UTF-8?q?dCustomTilingSink=E6=A0=B7=E4=BE=8B=20Merge=20pull=20request=20?=
 =?UTF-8?q?!2656=20from=20renjie/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../OpImpl/AddCustomTilingSink.json           |  40 ++++++
 .../AddCustomTilingSink/OpImpl/README.md      | 117 ++++++++++++++++++
 .../tf_plugin/tensorflow_add_custom_plugin.cc |  22 ++++
 .../AddCustomTilingSink/OpImpl/install.sh     |  57 +++++++++
 .../OpImpl/op_host/add_custom_tiling_sink.cpp |  56 +++++++++
 .../op_host/add_custom_tiling_sink_tiling.cpp |  36 ++++++
 .../op_host/add_custom_tiling_sink_tiling.h   |  25 ++++
 .../op_kernel/add_custom_tiling_sink.cpp      |  95 ++++++++++++++
 8 files changed, 448 insertions(+)
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json
new file mode 100644
index 000000000..1d93e1f49
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json
@@ -0,0 +1,40 @@
+[
+    {
+        "op": "AddCustomTilingSink",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "x",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float32"
+                ]
+            },
+            {
+                "name": "y",
+                "param_type": "optional",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float32"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "z",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float32"
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md
new file mode 100644
index 000000000..a89d51c80
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md
@@ -0,0 +1,117 @@
+
+## 概述
+本样例基于AddCustom算子工程，提供了支持Tiling下沉的自定义算子开发样例。
+若要使能tiling下沉，算子tiling函数必须独立实现，详细开发指导请参考[Tiling下沉自定义算子开发指南](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/Ascendcopdevg/atlas_ascendc_10_00014.html)
+
+## 目录结构介绍
+```
+├─OpImpl										// 算子实现
+│   ├─framework									// 算子插件实现文件目录
+│   ├─op_host									// host侧实现文件
+│   │   ├─add_custom_tiling_sink.cpp			// 算子原型定义、tiling函数注册等
+│   │   │ add_custom_tiling_sink_tiling.cpp		// 算子tiling函数的所有实现(必须独立实现于cpp中)
+│   │   └─add_custom_tiling_sink_tiling.h		// 算子tiling结构体定义
+│   └─op_kernel									// kernel侧实现文件
+│  AddCustomTilingSink.json						// 算子的原型定义json文件
+│  install.sh									// 脚本，调用msOpGen生成自定义算子工程，并编译
+```
+
+## 算子描述
+Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：
+```
+z = x + y
+```
+## 算子规格描述
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件
+- Atlas A3 训练系列产品/Atlas A3 推理系列产品
+
+## 编译运行样例算子
+针对自定义算子工程，编译运行包含如下步骤：
+- 调用msOpGen工具生成自定义算子工程；
+- 完成算子host和kernel实现；
+- 编译自定义算子工程生成自定义算子包；
+- 安装自定义算子包到自定义算子库中；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+### 1. 获取源码包
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
+  - 切换到msOpGen脚本install.sh所在目录
+    ```bash
+    # 若开发者以git命令行方式clone了master分支代码，并切换目录
+    cd ${git_clone_path}/samples/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl
+    ```
+
+  - 调用脚本，生成自定义算子工程，复制host和kernel实现并编译算子
+    - 方式一：配置环境变量运行脚本
+      请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量命令。
+      - 默认路径，root用户安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+        ```
+      - 默认路径，非root用户安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+        ```
+      - 指定路径install_path，安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+        ```
+        运行install.sh脚本
+        ```bash
+        bash install.sh -v [SOC_VERSION]
+        ```
+    - 方式二：指定命令行安装路径来运行脚本
+      ```bash
+      bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH]
+      ```
+    参数说明：
+    - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+		- Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件
+		- Atlas A3 训练系列产品/Atlas A3 推理系列产品
+    - ASCEND_INSTALL_PATH：CANN软件包安装路径
+
+    脚本运行成功后，会在当前目录下创建CustomOp目录，编译完成后，会在CustomOp/build_out中，生成自定义算子安装包custom_opp_\<target os>_\<target architecture>.run，例如“custom_opp_ubuntu_x86_64.run”。
+
+
+### 3. 部署自定义算子包
+- 部署自定义算子包前，请确保存在自定义算子包默认部署路径环境变量ASCEND_OPP_PATH
+    ```bash
+    echo $ASCEND_OPP_PATH
+    # 输出示例 /usr/local/Ascend/ascend-toolkit/latest/opp
+
+    # 若没有，则需导出CANN环境变量
+    source [ASCEND_INSTALL_PATH]/bin/setenv.bash
+    # 例如 source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash
+    ```
+    参数说明：
+    - ASCEND_INSTALL_PATH：CANN软件包安装路径，一般和上一步中指定的路径保持一致
+
+- 在自定义算子安装包所在路径下，执行如下命令安装自定义算子包
+    ```bash
+    cd CustomOp/build_out
+    ./custom_opp_<target os>_<target architecture>.run
+    ```
+  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。若要执行tiling下沉样例，则算子包不支持通过--install-path指定目录安装。
+
+## 更新说明
+| 时间       | 更新事项                     |
+| ---------- | ---------------------------- |
+| 2025/5/22 | 新增AddCustomTilingSink算子样例 |
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc
new file mode 100644
index 000000000..b96757140
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc
@@ -0,0 +1,22 @@
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the Apache License Version 2.0.
+ * You may not use this file except in compliance with the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Apache License for more details at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include "register/register.h"
+
+namespace domi {
+// register op info to GE
+REGISTER_CUSTOM_OP("AddCustomTilingSink")
+    .FrameworkType(TENSORFLOW)   // type: CAFFE, TENSORFLOW
+    .OriginOpType("Add")      // name in tf module
+    .ParseParamsByOperatorFn(AutoMappingByOpFn);
+}  // namespace domi
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
new file mode 100644
index 000000000..5c36ce5f4
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+VERSION_LIST="Ascend910B1"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+OP_NAME=AddCustomTilingSink
+rm -rf CustomOp
+# Generate the op framework
+msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp
+# Copy op implementation files to CustomOp
+cp -rf framework CustomOp/;cp -rf op_host CustomOp/;cp -rf op_kernel CustomOp/
+#Add Device Compile Task in op_host/CMakeLists.txt
+sed -i '$a ascendc_device_library( TARGET cust_opmaster\n                        OPTION SHARED\n                        SRC ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_tiling_sink_tiling.cpp)' CustomOp/op_host/CMakeLists.txt
+# Build CustomOp project
+(cd CustomOp && bash build.sh)
\ No newline at end of file
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp
new file mode 100644
index 000000000..c88a110b0
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp
@@ -0,0 +1,56 @@
+/**
+ * @file add_custom_tiling_sink.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling_sink_tiling.h"
+
+namespace ge {
+static graphStatus InferShape(gert::InferShapeContext *context)
+{
+    const gert::Shape *x1_shape = context->GetInputShape(0);
+    gert::Shape *y_shape = context->GetOutputShape(0);
+    *y_shape = *x1_shape;
+    return GRAPH_SUCCESS;
+}
+
+static graphStatus InferDataType(gert::InferDataTypeContext *context)
+{
+    const auto inputDataType = context->GetInputDataType(0);
+    context->SetOutputDataType(0, inputDataType);
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace ge
+
+namespace ops {
+class AddCustomTilingSink : public OpDef {
+public:
+    explicit AddCustomTilingSink(const char *name) : OpDef(name)
+    {
+        this->Input("x")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND});
+        this->Input("y")
+            .ParamType(OPTIONAL)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND})
+            .ValueDepend(OPTIONAL, DependScope::TILING); // 表示输入y为Tiling值依赖
+        this->Output("z")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND});
+
+        this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType);
+
+        this->AICore().SetTiling(optiling::AddCustomSinkTilingFunc);
+        
+        this->AICore().AddConfig("ascend910b");
+    }
+};
+OP_ADD(AddCustomTilingSink);
+} // namespace ops
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp
new file mode 100644
index 000000000..32ffb8a3e
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp
@@ -0,0 +1,36 @@
+/**
+ * @file add_custom_tiling_sink_tiling.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include "add_custom_tiling_sink_tiling.h"
+#include "register/device_op_impl_registry.h"
+
+namespace optiling {
+static constexpr uint32_t BLOCK_DIM = 8;
+static constexpr uint32_t TILE_NUM = 8;
+static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 能获取到的最大workspace大小
+static constexpr size_t DEFAULT_WORKSPACE_SIZE = 1;
+ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
+{
+    TilingSinkTilingData tiling;
+    uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
+    context->SetBlockDim(BLOCK_DIM);
+    tiling.set_totalLength(totalLength);
+    tiling.set_tileNum(TILE_NUM);
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = DEFAULT_WORKSPACE_SIZE;
+    if (context->GetInputTensor(1) != nullptr && context->GetInputTensor(1)->GetData<float>() == nullptr) {
+        currentWorkspace[0] = MAX_WORKSPACE_SIZE;
+    }
+    return ge::GRAPH_SUCCESS;
+}
+DEVICE_IMPL_OP_OPTILING(AddCustomTilingSink).Tiling(optiling::AddCustomSinkTilingFunc); // 下沉tiling函数注册
+} // namespace optiling
\ No newline at end of file
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h
new file mode 100644
index 000000000..3230af7ba
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h
@@ -0,0 +1,25 @@
+/**
+ * @file add_custom_tiling_sink_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_SINK_TILING_H
+#define ADD_CUSTOM_TILING_SINK_TILING_H
+#include "register/tilingdata_base.h"
+#include "register/op_def_registry.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(TilingSinkTilingData)
+TILING_DATA_FIELD_DEF(uint32_t, totalLength);
+TILING_DATA_FIELD_DEF(uint32_t, tileNum);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(AddCustomTilingSink, TilingSinkTilingData)
+
+ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext* context);
+} // namespace optiling
+#endif // ADD_CUSTOM_TILING_SINK_TILING_H
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp
new file mode 100644
index 000000000..4b1cb2f1d
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp
@@ -0,0 +1,95 @@
+/**
+ * @file add_custom_tiling_sink.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "lib/matmul_intf.h"
+namespace AscendC {
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        if (tileNum == 0 || BUFFER_NUM == 0) {
+            return;
+        }
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+
+        xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(DTYPE_X));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Y));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Z));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<DTYPE_X> xGm;
+    AscendC::GlobalTensor<DTYPE_Y> yGm;
+    AscendC::GlobalTensor<DTYPE_Z> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+} // namespace AscendC
+
+extern "C" __global__ __aicore__ void add_custom_tiling_sink(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    GET_TILING_DATA(tiling_data, tiling);
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+    if ASCEND_IS_AIC {
+        return;
+    }
+    AscendC::KernelAdd op;
+    op.Init(x, y, z, tiling_data.totalLength, tiling_data.tileNum);
+    op.Process();
+}
\ No newline at end of file
-- 
Gitee


From 3337bde74b5f816fef941bcd57f92714c6fbfa78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Thu, 22 May 2025 13:43:27 +0000
Subject: [PATCH 05/97] =?UTF-8?q?!2659=20[feature]torchair=20support=20til?=
 =?UTF-8?q?ing=20custom=20op=20Merge=20pull=20request=20!2659=20from=20?=
 =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink/AddCustom/README.md   | 322 ++++++++++++++++++
 .../AddCustom/src/add_custom.py               |  78 +++++
 .../AddCustom/test_add_custom.py              |  34 ++
 3 files changed, 434 insertions(+)
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
new file mode 100644
index 000000000..c2f849645
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
@@ -0,0 +1,322 @@
+## 背景介绍
+
+Tiling下沉是在Device侧CPU做Tiling计算。由于NPU中AI Core内部存储无法完全容纳算子输入输出的所有数据，需要每次搬运一部分输入数据进行计算然后搬出，再搬运下一部分输入数据进行计算，该过程称之为Tiling；根据算子的shape等信息来确定数据切分算法相关参数（比如每次搬运的块大小，以及总共循环多少次）的计算程序，称之为Tiling实现。由于Tiling实现中完成的均为标量计算，AI Core并不擅长，故一般在Host侧CPU上执行，但是满足下述条件Tiling实现会下沉到Device侧执行：
+
+模型为静态shape。
+模型中的算子支持Tiling下沉，比如FusedInferAttentionScore、IncreFlashAttention等融合算子。
+支持Tiling下沉的算子值有依赖，需要满足前一个算子的值有device的执行结果；如果依赖的值是Const，则不需要下沉执行Tiling，编译时会完成Tiling。
+
+## 目录结构介绍
+
+```
+├── AddCustom   // torch注册的自定义算子
+│   ├── src
+│   │   ├── add_custom.py      // 自定义算子py文件
+│   └── test_add_custom.py    // 测试脚本
+```
+
+## 代码实现介绍
+
+新增自定义算子入图步骤，该过程可参考[torchair社区新增自定义算子入图介绍](https://gitee.com/ascend/torchair/blob/master/CONTRIBUTING.md#converter%E8%A1%A5%E9%BD%90)converter补齐第五小节：
+1.下载torchair仓，新建一个add_custom.py文件放在torchair/python/torchair/ops/add_custom.py，然后自定义算子在torch框架中注册：
+
+```python
+# add_custom.py
+import torch
+
+lib = torch.library.Library("air", "FRAGMENT")
+lib.define(
+    """
+    add_custom(Tensor x, Tensor y) -> Tensor
+    """
+)
+```
+
+2.向torch注册自定义算子meta后端实现，用来完成图模式下的shape推导:
+
+```python
+@torch.library.impl(lib, "add_custom", "Meta")
+   def kernel_meta(x, y):
+       return torch.empty_like(x)
+```
+
+3.codegen生成ge构图api
+（1）将REG_OP算子原型放置到codegen/custom_op/custom_reg_op.h文件中，替换原来示例的REG_OP：
+
+```cpp
+#ifndef ASCENDADAPTER2_CUSTOM_REG_OP_H
+#define ASCENDADAPTER2_CUSTOM_REG_OP_H
+#include "graph/operator_reg.h"
+
+namespace ge {
+REG_OP(AddCustomTilingsink)
+   .INPUT(x, TensorType::ALL())
+   .INPUT(y, TensorType::ALL())
+   .OUTPUT(z, TensorType::ALL())
+   .OP_END_FACTORY_REG(AddCustomTilingsink)
+}
+
+#endif  // ASCENDADAPTER2_CUSTOM_REG_OP_H
+```
+
+（2）进入torchair仓根目录执行编译命令：
+
+```
+cd build
+cmake ..
+make generate_ge_raw_custom_ops
+```
+
+生成的ge.api函数在codegen/custom\_op/auto\_generated\_ge\_raw\_custom\_ops.py文件中, 内容如下所示：
+
+```python
+# This file is auto-generated
+# Summary: total 1, generated 1, skipped 0
+from typing import Any, Dict, List, Tuple, Union, Callable, Optional
+from torchair.ge._ge_graph import auto_convert_to_tensor, TensorType
+from torchair.ge import Tensor, DataType, attr
+from torchair._ge_concrete_graph.ge_converter import ge_op, IrDef
+
+
+# This api is auto-generated from IR AddCustomTilingsink
+@auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
+def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingsink)\n
+.INPUT(x, TensorType::ALL())\n
+.INPUT(y, TensorType::ALL())\n
+.OUTPUT(z, TensorType::ALL())\n
+"""
+
+    # process inputs
+    inputs = {
+        "x": x,
+        "y": y,
+    }
+
+    # process attrs
+    attrs = {
+    }
+
+    # process outputs
+    outputs = [
+    "z",
+    ]
+
+    return ge_op(
+        op_type="AddCustomTilingsink",
+        inputs=inputs,
+        attrs=attrs,
+        outputs=outputs,
+        dependencies=dependencies,
+        ir=IrDef("AddCustomTilingsink") \
+        .input("x", "") \
+        .input("y", "") \
+        .output("z" , "")
+    )
+```
+
+需要修改`from torchair._ge_concrete_graph.ge_converter import ge_op, IrDef`
+为``from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef``
+
+将上述生成内容拷贝至前面我们新建的add_custom.py文件中。
+
+4.向torchair注册自定义算子的converter：
+
+```python
+@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
+def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
+    return AddCustomTilingsink(x, y)
+```
+
+5.单算子部分为用户自行注册，此处预留未实现：
+
+```python
+def kernel_impl(x, y):
+    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
+
+
+torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
+torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
+```
+
+6.调用，需要import前面新建的add_custom.py：
+
+```python
+import torchair.ops.add_custom
+
+def forward(self, x, y):
+    z = torch.ops.air.add_custom.default(x, y)
+    return z
+```
+
+## 运行样例算子
+
+### 1. 编译安装torchair包
+
+1.编译，进入torchair根目录，执行：
+
+```
+bash build.sh -c
+```
+
+2.安装，进入torchair根目录，执行注意pip3.x为对应Python版本：
+
+```
+pip3.x uninstall torchair
+pip3.x install output/torchair_xxxx.whl
+```
+
+3.删除环境上torch_npu模块下的torchair子模块，使得我们安装的torchair模块生效：
+
+```
+rm -rf /usr/local/python3.8.1/lib/python3.8/site-packages/torch_npu/dynamo/torchair
+```
+
+查看torch_npu路径：
+
+```
+pip3.x show torch_npu
+```
+### 2. 部署自定义算子包
+请参考[tiling下沉样例](https://gitee.com/ascend/samples/tree/master/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl)部署自定义算子包章节：
+
+### 3. 执行脚本
+
+需要脚本中先打开tiling下沉的开关
+
+```python
+from torchair.configs.compiler_config import CompilerConfig
+
+config = CompilerConfig()
+config.experimental_config.tiling_schedule_optimize = True
+```
+
+## 更新说明
+
+| 时间      | 更新事项     |
+| --------- | ------------ |
+| 2025/5/22 | 新增本readme |
+
+## add_custom.py
+
+```python
+from typing import (
+    Optional,
+    Union,
+    List,
+)
+import torch
+from torchair._ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter
+from torchair.ge._ge_graph import Tensor, TensorSpec
+
+lib = torch.library.Library("air", "FRAGMENT")
+lib.define(
+    """
+    add_custom(Tensor x, Tensor y) -> Tensor
+    """
+)
+
+
+@torch.library.impl(lib, "add_custom", "Meta")
+def kernel_meta(x, y):
+    return torch.empty_like(x)
+
+
+def kernel_impl(x, y):
+    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
+
+
+torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
+torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
+
+
+@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
+def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
+    return AddCustomTilingsink(x, y)
+
+
+# This file is auto-generated by
+# Summary: total 1, generated 1, skipped 0
+from typing import Any, Dict, List, Tuple, Union, Callable, Optional
+from torchair.ge._ge_graph import auto_convert_to_tensor, TensorType
+from torchair.ge import Tensor, DataType, attr
+from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef
+
+
+# This api is auto-generated from IR AddCustomTilingsink
+@auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
+def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingsink)\n
+.INPUT(x, TensorType::ALL())\n
+.INPUT(y, TensorType::ALL())\n
+.OUTPUT(z, TensorType::ALL())\n
+"""
+
+    # process inputs
+    inputs = {
+        "x": x,
+        "y": y,
+    }
+
+    # process attrs
+    attrs = {
+    }
+
+    # process outputs
+    outputs = [
+    "z",
+    ]
+
+    return ge_op(
+        op_type="AddCustomTilingsink",
+        inputs=inputs,
+        attrs=attrs,
+        outputs=outputs,
+        dependencies=dependencies,
+        ir=IrDef("AddCustomTilingsink") \
+        .input("x", "") \
+        .input("y", "") \
+        .output("z" , "")
+    )
+
+```
+
+## test_add_custom.py
+
+```python
+import torch
+import torch_npu
+import torchair
+from torchair.configs.compiler_config import CompilerConfig
+from torchair.core.utils import logger
+import logging
+
+logger.setLevel(logging.DEBUG)
+config = CompilerConfig()
+config.debug.graph_dump.type = "pbtxt"
+config.experimental_config.tiling_schedule_optimize = True
+npu_backend = torchair.get_npu_backend(compiler_config=config)
+
+import torchair.ops._add_custom
+
+class MyModule(torch.nn.Module):
+    def __init__(self):
+        super(MyModule, self).__init__()
+
+    def forward(self, x, y):
+        z = torch.ops.air.add_custom.default(x, y)
+        return z
+
+
+# 创建并编译模块
+module = MyModule().npu()
+module = torch.compile(module, fullgraph=True, backend=npu_backend, dynamic=False)
+
+# 示例输入
+x = torch.randn(6, 64, dtype=torch.float32).npu()
+y = torch.randn(6, 64, dtype=torch.float32).npu()
+
+output = module(x, y)
+print(output)
+
+```
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
new file mode 100644
index 000000000..4dd84002b
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
@@ -0,0 +1,78 @@
+from typing import (
+    Optional,
+    Union,
+    List,
+)
+import torch
+from torchair._ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter
+from torchair.ge._ge_graph import Tensor, TensorSpec
+
+lib = torch.library.Library("air", "FRAGMENT")
+lib.define(
+    """
+    add_custom(Tensor x, Tensor y) -> Tensor
+    """
+)
+
+
+@torch.library.impl(lib, "add_custom", "Meta")
+def kernel_meta(x, y):
+    return torch.empty_like(x)
+
+
+def kernel_impl(x, y):
+    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
+
+
+torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
+torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
+
+
+@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
+def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
+    return AddCustomTilingsink(x, y)
+
+
+# This file is auto-generated by
+# Summary: total 1, generated 1, skipped 0
+from typing import Any, Dict, List, Tuple, Union, Callable, Optional
+from torchair.ge._ge_graph import auto_convert_to_tensor, TensorType
+from torchair.ge import Tensor, DataType, attr
+from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef
+
+
+# This api is auto-generated from IR AddCustomTilingsink
+@auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
+def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingsink)\n
+.INPUT(x, TensorType::ALL())\n
+.INPUT(y, TensorType::ALL())\n
+.OUTPUT(z, TensorType::ALL())\n
+"""
+
+    # process inputs
+    inputs = {
+        "x": x,
+        "y": y,
+    }
+
+    # process attrs
+    attrs = {
+    }
+
+    # process outputs
+    outputs = [
+    "z",
+    ]
+
+    return ge_op(
+        op_type="AddCustomTilingsink",
+        inputs=inputs,
+        attrs=attrs,
+        outputs=outputs,
+        dependencies=dependencies,
+        ir=IrDef("AddCustomTilingsink") \
+        .input("x", "") \
+        .input("y", "") \
+        .output("z" , "")
+    )
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
new file mode 100644
index 000000000..c093d75b8
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
@@ -0,0 +1,34 @@
+import torch
+import torch_npu
+import torchair
+from torchair.configs.compiler_config import CompilerConfig
+from torchair.core.utils import logger
+import logging
+
+logger.setLevel(logging.DEBUG)
+config = CompilerConfig()
+config.debug.graph_dump.type = "pbtxt"
+config.experimental_config.tiling_schedule_optimize = True
+npu_backend = torchair.get_npu_backend(compiler_config=config)
+
+import torchair.ops._add_custom
+
+class MyModule(torch.nn.Module):
+    def __init__(self):
+        super(MyModule, self).__init__()
+
+    def forward(self, x, y):
+        z = torch.ops.air.add_custom.default(x, y)
+        return z
+
+
+# 创建并编译模块
+module = MyModule().npu()
+module = torch.compile(module, fullgraph=True, backend=npu_backend, dynamic=False)
+
+# 示例输入
+x = torch.randn(6, 64, dtype=torch.float32).npu()
+y = torch.randn(6, 64, dtype=torch.float32).npu()
+
+output = module(x, y)
+print(output.shape)
\ No newline at end of file
-- 
Gitee


From e474a6d7c2f4cb9b4e43f9d1b576c860d881891b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Fri, 23 May 2025 09:04:23 +0000
Subject: [PATCH 06/97] =?UTF-8?q?!2661=20[feature]fix=20add=5Fcustom=20Mer?=
 =?UTF-8?q?ge=20pull=20request=20!2661=20from=20=E9=99=88=E5=A8=81?=
 =?UTF-8?q?=E4=BA=A8/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink/AddCustom/README.md   | 39 ++++++++++---------
 .../AddCustom/src/add_custom.py               | 12 +++---
 .../AddCustom/test_add_custom.py              |  2 +-
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
index c2f849645..217cbfad8 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
@@ -18,7 +18,7 @@ Tiling下沉是在Device侧CPU做Tiling计算。由于NPU中AI Core内部存储
 ## 代码实现介绍
 
 新增自定义算子入图步骤，该过程可参考[torchair社区新增自定义算子入图介绍](https://gitee.com/ascend/torchair/blob/master/CONTRIBUTING.md#converter%E8%A1%A5%E9%BD%90)converter补齐第五小节：
-1.下载torchair仓，新建一个add_custom.py文件放在torchair/python/torchair/ops/add_custom.py，然后自定义算子在torch框架中注册：
+1.下载[torchair仓](https://gitee.com/ascend/torchair)，新建一个add_custom.py文件放在torchair/python/torchair/ops/add_custom.py，然后在torch框架中注册自定义算子：
 
 ```python
 # add_custom.py
@@ -49,11 +49,11 @@ lib.define(
 #include "graph/operator_reg.h"
 
 namespace ge {
-REG_OP(AddCustomTilingsink)
+REG_OP(AddCustomTilingSink)
    .INPUT(x, TensorType::ALL())
    .INPUT(y, TensorType::ALL())
    .OUTPUT(z, TensorType::ALL())
-   .OP_END_FACTORY_REG(AddCustomTilingsink)
+   .OP_END_FACTORY_REG(AddCustomTilingSink)
 }
 
 #endif  // ASCENDADAPTER2_CUSTOM_REG_OP_H
@@ -78,10 +78,10 @@ from torchair.ge import Tensor, DataType, attr
 from torchair._ge_concrete_graph.ge_converter import ge_op, IrDef
 
 
-# This api is auto-generated from IR AddCustomTilingsink
+# This api is auto-generated from IR AddCustomTilingSink
 @auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
-def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
-    """REG_OP(AddCustomTilingsink)\n
+def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingSink)\n
 .INPUT(x, TensorType::ALL())\n
 .INPUT(y, TensorType::ALL())\n
 .OUTPUT(z, TensorType::ALL())\n
@@ -103,12 +103,12 @@ def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
     ]
 
     return ge_op(
-        op_type="AddCustomTilingsink",
+        op_type="AddCustomTilingSink",
         inputs=inputs,
         attrs=attrs,
         outputs=outputs,
         dependencies=dependencies,
-        ir=IrDef("AddCustomTilingsink") \
+        ir=IrDef("AddCustomTilingSink") \
         .input("x", "") \
         .input("y", "") \
         .output("z" , "")
@@ -116,7 +116,7 @@ def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
 ```
 
 需要修改`from torchair._ge_concrete_graph.ge_converter import ge_op, IrDef`
-为``from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef``
+为`from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef`
 
 将上述生成内容拷贝至前面我们新建的add_custom.py文件中。
 
@@ -125,7 +125,7 @@ def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
 ```python
 @register_fx_node_ge_converter(torch.ops.air.add_custom.default)
 def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingsink(x, y)
+    return AddCustomTilingSink(x, y)
 ```
 
 5.单算子部分为用户自行注册，此处预留未实现：
@@ -139,7 +139,7 @@ torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
 torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
 ```
 
-6.调用，需要import前面新建的add_custom.py：
+6.调用时，需要import前面新建的add_custom.py：
 
 ```python
 import torchair.ops.add_custom
@@ -172,11 +172,12 @@ pip3.x install output/torchair_xxxx.whl
 rm -rf /usr/local/python3.8.1/lib/python3.8/site-packages/torch_npu/dynamo/torchair
 ```
 
-查看torch_npu路径：
+查看环境上安装的torch_npu的路径：
 
 ```
 pip3.x show torch_npu
 ```
+
 ### 2. 部署自定义算子包
 请参考[tiling下沉样例](https://gitee.com/ascend/samples/tree/master/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl)部署自定义算子包章节：
 
@@ -232,7 +233,7 @@ torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
 
 @register_fx_node_ge_converter(torch.ops.air.add_custom.default)
 def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingsink(x, y)
+    return AddCustomTilingSink(x, y)
 
 
 # This file is auto-generated by
@@ -243,10 +244,10 @@ from torchair.ge import Tensor, DataType, attr
 from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef
 
 
-# This api is auto-generated from IR AddCustomTilingsink
+# This api is auto-generated from IR AddCustomTilingSink
 @auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
-def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
-    """REG_OP(AddCustomTilingsink)\n
+def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingSink)\n
 .INPUT(x, TensorType::ALL())\n
 .INPUT(y, TensorType::ALL())\n
 .OUTPUT(z, TensorType::ALL())\n
@@ -268,12 +269,12 @@ def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
     ]
 
     return ge_op(
-        op_type="AddCustomTilingsink",
+        op_type="AddCustomTilingSink",
         inputs=inputs,
         attrs=attrs,
         outputs=outputs,
         dependencies=dependencies,
-        ir=IrDef("AddCustomTilingsink") \
+        ir=IrDef("AddCustomTilingSink") \
         .input("x", "") \
         .input("y", "") \
         .output("z" , "")
@@ -297,7 +298,7 @@ config.debug.graph_dump.type = "pbtxt"
 config.experimental_config.tiling_schedule_optimize = True
 npu_backend = torchair.get_npu_backend(compiler_config=config)
 
-import torchair.ops._add_custom
+import torchair.ops.add_custom
 
 class MyModule(torch.nn.Module):
     def __init__(self):
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
index 4dd84002b..dc73f0b07 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
@@ -30,7 +30,7 @@ torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
 
 @register_fx_node_ge_converter(torch.ops.air.add_custom.default)
 def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingsink(x, y)
+    return AddCustomTilingSink(x, y)
 
 
 # This file is auto-generated by
@@ -41,10 +41,10 @@ from torchair.ge import Tensor, DataType, attr
 from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef
 
 
-# This api is auto-generated from IR AddCustomTilingsink
+# This api is auto-generated from IR AddCustomTilingSink
 @auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
-def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
-    """REG_OP(AddCustomTilingsink)\n
+def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
+    """REG_OP(AddCustomTilingSink)\n
 .INPUT(x, TensorType::ALL())\n
 .INPUT(y, TensorType::ALL())\n
 .OUTPUT(z, TensorType::ALL())\n
@@ -66,12 +66,12 @@ def AddCustomTilingsink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
     ]
 
     return ge_op(
-        op_type="AddCustomTilingsink",
+        op_type="AddCustomTilingSink",
         inputs=inputs,
         attrs=attrs,
         outputs=outputs,
         dependencies=dependencies,
-        ir=IrDef("AddCustomTilingsink") \
+        ir=IrDef("AddCustomTilingSink") \
         .input("x", "") \
         .input("y", "") \
         .output("z" , "")
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
index c093d75b8..81bba97bb 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
@@ -11,7 +11,7 @@ config.debug.graph_dump.type = "pbtxt"
 config.experimental_config.tiling_schedule_optimize = True
 npu_backend = torchair.get_npu_backend(compiler_config=config)
 
-import torchair.ops._add_custom
+import torchair.ops.add_custom
 
 class MyModule(torch.nn.Module):
     def __init__(self):
-- 
Gitee


From a4f649d8020d8d7841ef71a12cf3558f97b4a0d6 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Fri, 23 May 2025 09:17:21 +0000
Subject: [PATCH 07/97] =?UTF-8?q?!2662=20=E5=88=A0=E9=99=A4=E6=A0=A1?=
 =?UTF-8?q?=E9=AA=8C=20*=20=E5=88=A0=E9=99=A4=E6=A0=A1=E9=AA=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh     | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
index 5c36ce5f4..d4ee2aa9a 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
@@ -25,11 +25,6 @@ while :; do
     esac
 done
 
-VERSION_LIST="Ascend910B1"
-if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
-    exit -1
-fi
 
 if [ -n "$ASCEND_INSTALL_PATH" ]; then
     _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
-- 
Gitee


From 06fb25b16e417d0a6d4e607fa7eeba414be88e20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=87=E7=A5=96=E6=B6=9B?= <wanzutao1@h-partners.com>
Date: Mon, 26 May 2025 06:30:09 +0000
Subject: [PATCH 08/97] =?UTF-8?q?!2655=20kernel=E7=9B=B4=E8=B0=83AddCustom?=
 =?UTF-8?q?=E6=A0=B7=E4=BE=8B=E6=9B=B4=E6=96=B0=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2655=20from=20=E4=B8=87=E7=A5=96=E6=B6=9B/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomKernel/CMakeLists.txt            |  30 +++
 .../KernelLaunch/AddCustomKernel/README.md    |  77 +++++++
 .../AddCustomKernel/add_custom.cpp            |  84 ++++++++
 .../examples/CPPInvocation/CMakeLists.txt     |  34 +++
 .../examples/CPPInvocation/README.md          |  40 ++++
 .../examples/CPPInvocation/data_utils.h       | 204 ++++++++++++++++++
 .../examples/CPPInvocation/main.cpp           |  62 ++++++
 .../examples/CPPInvocation/run.sh             |  47 ++++
 .../CPPInvocation/scripts/gen_data.py         |  25 +++
 .../CPPInvocation/scripts/verify_result.py    |  53 +++++
 .../KernelLaunch/AddCustomKernel/run.sh       |  79 +++++++
 .../AddCustomTilingKernel/CMakeLists.txt      |  30 +++
 .../AddCustomTilingKernel/README.md           |  78 +++++++
 .../AddCustomTilingKernel/add_custom.cpp      |  92 ++++++++
 .../AddCustomTilingKernel/add_custom_tiling.h |  15 ++
 .../examples/CPPInvocation/CMakeLists.txt     |  35 +++
 .../examples/CPPInvocation/README.md          |  40 ++++
 .../examples/CPPInvocation/data_utils.h       | 203 +++++++++++++++++
 .../examples/CPPInvocation/main.cpp           |  63 ++++++
 .../examples/CPPInvocation/run.sh             |  47 ++++
 .../CPPInvocation/scripts/gen_data.py         |  25 +++
 .../CPPInvocation/scripts/verify_result.py    |  53 +++++
 .../examples/PythonInvocation/CMakeLists.txt  |  53 +++++
 .../examples/PythonInvocation/README.md       |  69 ++++++
 .../PythonInvocation/add_custom_test.py       |  38 ++++
 .../examples/PythonInvocation/pybind11.cpp    |  40 ++++
 .../examples/PythonInvocation/run.sh          |  25 +++
 .../KernelLaunch/AddCustomTilingKernel/run.sh |  79 +++++++
 operator_contrib/AddCustomSample/README.md    | 112 ++++++++++
 29 files changed, 1832 insertions(+)
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/CMakeLists.txt
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/README.md
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/add_custom.cpp
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/CMakeLists.txt
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/README.md
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/data_utils.h
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/main.cpp
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/run.sh
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/gen_data.py
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/run.sh
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/CMakeLists.txt
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/README.md
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom.cpp
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom_tiling.h
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/CMakeLists.txt
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/README.md
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/data_utils.h
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/main.cpp
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/run.sh
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/gen_data.py
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/CMakeLists.txt
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/README.md
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/add_custom_test.py
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/pybind11.cpp
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/run.sh
 create mode 100644 operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/run.sh
 create mode 100644 operator_contrib/AddCustomSample/README.md

diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/CMakeLists.txt b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/CMakeLists.txt
new file mode 100644
index 000000000..a3a200642
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/CMakeLists.txt
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(Ascend_C)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+file(GLOB SOURCES "*.cpp")
+# user-defined configuration
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package installation directory")
+set(RUN_MODE "npu" CACHE STRING "run mode: npu")
+set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+set(LIBRARY_TYPE "SHARED" CACHE STRING "library type:SHARED or STATIC")
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")
+endif()
+
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+# ascendc_library use to add kernel file to generate ascendc library
+if(LIBRARY_TYPE STREQUAL "SHARED")
+    ascendc_library(kernels SHARED ${SOURCES})
+else()
+    ascendc_library(kernels STATIC ${SOURCES})
+endif()
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/README.md b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/README.md
new file mode 100644
index 000000000..449abe628
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/README.md
@@ -0,0 +1,77 @@
+## `AddCustom`自定义算子样例说明
+
+本样例通过`Ascend C`编程语言实现了`AddCustom`算子不带Tiling场景。
+
+### 算子描述
+
+`AddCustom`算子返回两个数据相加的结果。
+
+### 算子规格描述
+
+| 算子类型(OpType) | AddCustom  |          |           |        |
+| ---------------- | ---------- | -------- | --------- | ------ |
+| 算子输入         | name       | shape    | data type | format |
+| x                | 8 * 2048   | float16  | ND        |        |
+| y                | 8 * 2048   | float16  | ND        |        |
+| 算子输出         | z          | 8 * 2048 | float16   | ND     |
+| 核函数名         | add_custom |          |           |        |
+
+### 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas 训练系列产品
+- Atlas 推理系列产品
+- Atlas A2训练系列产品
+- Atlas 800I A2推理产品
+- Atlas 200I/500 A2推理产品
+
+### 目录结构介绍
+
+```
+├── examples                     // 调用示例目录
+├── add_custom.cpp               // 算子kernel代码
+├── CMakeLists.txt               // cmake编译文件
+├── run.sh                       // 运行脚本
+└── README.md                   // 样例指导手册 
+```
+
+### 环境要求
+
+编译运行此样例前，请参考[《CANN软件安装指南》](https://gitee.com/link?target=https%3A%2F%2Fhiascend.com%2Fdocument%2Fredirect%2FCannCommunityInstSoftware)完成开发运行环境的部署。
+
+### 算子包编译部署
+
+1.进入到样例目录
+
+```
+cd ${git_clone_path}/samples/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel
+```
+
+2.算子编译部署
+
+- 打包动态库部署
+
+  ```
+  bash run.sh -l SHARED -v Ascend***(由npu-smi info查询得到)
+  ```
+
+- 打包静态库部署
+
+  ```
+  bash run.sh -l STATIC -v Ascend***(由npu-smi info查询得到)
+  ```
+
+  
+
+### 算子调用
+
+| 目录                                                         | 描述                                     |
+| ------------------------------------------------------------ | ---------------------------------------- |
+| [CPPInvocation](./examples/CPPInvocation) | Pybind方式调用AddCustom算子。            |
+
+### 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/01/06 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/add_custom.cpp b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/add_custom.cpp
new file mode 100644
index 000000000..eb662e8aa
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/add_custom.cpp
@@ -0,0 +1,84 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+ #include "kernel_operator.h"
+
+ constexpr int32_t TOTAL_LENGTH = 8 * 2048;                            // total length of data
+ constexpr int32_t USE_CORE_NUM = 8;                                   // num of core used
+ constexpr int32_t BLOCK_LENGTH = TOTAL_LENGTH / USE_CORE_NUM;         // length computed of each core
+ constexpr int32_t TILE_NUM = 8;                                       // split data into 8 tiles for each core
+ constexpr int32_t BUFFER_NUM = 2;                                     // tensor num for each queue
+ constexpr int32_t TILE_LENGTH = BLOCK_LENGTH / TILE_NUM / BUFFER_NUM; // separate to 2 parts, due to double buffer
+ 
+ class KernelAdd {
+ public:
+     __aicore__ inline KernelAdd() {}
+     __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+     {
+         xGm.SetGlobalBuffer((__gm__ half *)x + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+         yGm.SetGlobalBuffer((__gm__ half *)y + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+         zGm.SetGlobalBuffer((__gm__ half *)z + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+         pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+         pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+         pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+     }
+     __aicore__ inline void Process()
+     {
+         int32_t loopCount = TILE_NUM * BUFFER_NUM;
+         for (int32_t i = 0; i < loopCount; i++) {
+             CopyIn(i);
+             Compute(i);
+             CopyOut(i);
+         }
+     }
+ 
+ private:
+     __aicore__ inline void CopyIn(int32_t progress)
+     {
+         AscendC::LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
+         AscendC::LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
+         AscendC::DataCopy(xLocal, xGm[progress * TILE_LENGTH], TILE_LENGTH);
+         AscendC::DataCopy(yLocal, yGm[progress * TILE_LENGTH], TILE_LENGTH);
+         inQueueX.EnQue(xLocal);
+         inQueueY.EnQue(yLocal);
+     }
+     __aicore__ inline void Compute(int32_t progress)
+     {
+         AscendC::LocalTensor<half> xLocal = inQueueX.DeQue<half>();
+         AscendC::LocalTensor<half> yLocal = inQueueY.DeQue<half>();
+         AscendC::LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
+         AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH);
+         outQueueZ.EnQue<half>(zLocal);
+         inQueueX.FreeTensor(xLocal);
+         inQueueY.FreeTensor(yLocal);
+     }
+     __aicore__ inline void CopyOut(int32_t progress)
+     {
+         AscendC::LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
+         AscendC::DataCopy(zGm[progress * TILE_LENGTH], zLocal, TILE_LENGTH);
+         outQueueZ.FreeTensor(zLocal);
+     }
+ 
+ private:
+     AscendC::TPipe pipe;
+     AscendC::TQue<AscendC::QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+     AscendC::TQue<AscendC::QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
+     AscendC::GlobalTensor<half> xGm;
+     AscendC::GlobalTensor<half> yGm;
+     AscendC::GlobalTensor<half> zGm;
+ };
+ 
+ extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+ {
+     KernelAdd op;
+     op.Init(x, y, z);
+     op.Process();
+ }
+ 
+ 
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/CMakeLists.txt b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/CMakeLists.txt
new file mode 100644
index 000000000..ccef61311
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--copy-dt-needed-entries")
+add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+target_link_directories(ascendc_kernels_bbit PRIVATE
+  ${ASCEND_CANN_PACKAGE_PATH}/lib64
+)
+target_include_directories(ascendc_kernels_bbit PRIVATE
+  ${ASCEND_CANN_PACKAGE_PATH}/include
+)
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    ascendcl
+    kernels
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/README.md b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/README.md
new file mode 100644
index 000000000..df0672608
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/README.md
@@ -0,0 +1,40 @@
+## 概述
+
+通过C++接口调用核函数直调实现的不带Tiling的AddCustom算子
+
+## 目录结构介绍
+
+```
+├── CPPInvocation
+│   ├── scripts
+        └── gen_data.py       // 输入数据和标杆数据构造脚本
+        └── verify_result.py  // 标杆数据和自定义算子输出数据对比脚本
+│   ├── CMakeLists.txt        // cmake编译文件
+│   ├── main.cpp              // 算子调用代码
+│   ├── data_utils.h          // 数据类型定义,数据读取代码
+│   ├── run.sh                // 编译运行算子的脚本
+```
+
+## 运行样例算子
+
+  **请确保已根据算子包编译部署步骤完成本算子的编译部署动作。**
+
+  - 进入样例代码所在路径
+
+  ```bash
+ cd ${git_clone_path}/samples/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation
+  ```
+
+  - 样例执行
+
+    样例执行过程中会自动生成测试数据，然后编译与运行C++调用样例，最后打印运行结果。
+
+    ```bash
+    bash run.sh
+    ```
+
+## 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/05/19 | 样例首次提交 |
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/data_utils.h b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/data_utils.h
new file mode 100644
index 000000000..ae9cf84f9
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/data_utils.h
@@ -0,0 +1,204 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+ #ifndef DATA_UTILS_H
+ #define DATA_UTILS_H
+ #include <fcntl.h>
+ #include <sys/stat.h>
+ #include <unistd.h>
+ 
+ #include <cassert>
+ #include <cstdio>
+ #include <fstream>
+ #include <iomanip>
+ #include <iostream>
+ #include <string>
+ #include <vector>
+ 
+ #include "acl/acl.h"
+ 
+ typedef enum {
+     DT_UNDEFINED = -1,
+     FLOAT = 0,
+     HALF = 1,
+     INT8_T = 2,
+     INT32_T = 3,
+     UINT8_T = 4,
+     INT16_T = 6,
+     UINT16_T = 7,
+     UINT32_T = 8,
+     INT64_T = 9,
+     UINT64_T = 10,
+     DOUBLE = 11,
+     BOOL = 12,
+     STRING = 13,
+     COMPLEX64 = 16,
+     COMPLEX128 = 17,
+     BF16 = 27
+ } printDataType;
+ 
+ #define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+ #define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+ #define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+ #define CHECK_ACL(x)                                                                        \
+     do {                                                                                    \
+         aclError __ret = x;                                                                 \
+         if (__ret != ACL_ERROR_NONE) {                                                      \
+             std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+         }                                                                                   \
+     } while (0);
+ 
+ /**
+  * @brief Read data from file
+  * @param [in] filePath: file path
+  * @param [out] fileSize: file size
+  * @return read result
+  */
+ bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+ {
+     struct stat sBuf;
+     int fileStatus = stat(filePath.data(), &sBuf);
+     if (fileStatus == -1) {
+         ERROR_LOG("failed to get file");
+         return false;
+     }
+     if (S_ISREG(sBuf.st_mode) == 0) {
+         ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+         return false;
+     }
+ 
+     std::ifstream file;
+     file.open(filePath, std::ios::binary);
+     if (!file.is_open()) {
+         ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+         return false;
+     }
+ 
+     std::filebuf *buf = file.rdbuf();
+     size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+     if (size == 0) {
+         ERROR_LOG("file size is 0");
+         file.close();
+         return false;
+     }
+     if (size > bufferSize) {
+         ERROR_LOG("file size is larger than buffer size");
+         file.close();
+         return false;
+     }
+     buf->pubseekpos(0, std::ios::in);
+     buf->sgetn(static_cast<char *>(buffer), size);
+     fileSize = size;
+     file.close();
+     return true;
+ }
+ 
+ /**
+  * @brief Write data to file
+  * @param [in] filePath: file path
+  * @param [in] buffer: data to write to file
+  * @param [in] size: size to write
+  * @return write result
+  */
+ bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+ {
+     if (buffer == nullptr) {
+         ERROR_LOG("Write file failed. buffer is nullptr");
+         return false;
+     }
+ 
+     int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+     if (fd < 0) {
+         ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+         return false;
+     }
+ 
+     size_t writeSize = write(fd, buffer, size);
+     (void)close(fd);
+     if (writeSize != size) {
+         ERROR_LOG("Write file Failed.");
+         return false;
+     }
+ 
+     return true;
+ }
+ 
+ template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+ {
+     assert(elementsPerRow != 0);
+     for (size_t i = 0; i < count; ++i) {
+         std::cout << std::setw(10) << data[i];
+         if (i % elementsPerRow == elementsPerRow - 1) {
+             std::cout << std::endl;
+         }
+     }
+ }
+ 
+ void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+ {
+     assert(elementsPerRow != 0);
+     for (size_t i = 0; i < count; ++i) {
+         std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+         if (i % elementsPerRow == elementsPerRow - 1) {
+             std::cout << std::endl;
+         }
+     }
+ }
+ 
+ void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+ {
+     if (data == nullptr) {
+         ERROR_LOG("Print data failed. data is nullptr");
+         return;
+     }
+ 
+     switch (dataType) {
+         case BOOL:
+             DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+             break;
+         case INT8_T:
+             DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+             break;
+         case UINT8_T:
+             DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+             break;
+         case INT16_T:
+             DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+             break;
+         case UINT16_T:
+             DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+             break;
+         case INT32_T:
+             DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+             break;
+         case UINT32_T:
+             DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+             break;
+         case INT64_T:
+             DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+             break;
+         case UINT64_T:
+             DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+             break;
+         case HALF:
+             DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+             break;
+         case FLOAT:
+             DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+             break;
+         case DOUBLE:
+             DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+             break;
+         default:
+             ERROR_LOG("Unsupported type: %d", dataType);
+     }
+     std::cout << std::endl;
+ }
+ #endif // DATA_UTILS_H
+ 
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/main.cpp b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/main.cpp
new file mode 100644
index 000000000..78b372929
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/main.cpp
@@ -0,0 +1,62 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+ #include "data_utils.h"
+ #include "acl/acl.h"
+ #include "aclrtlaunch_add_custom.h"
+ 
+ int32_t main(int32_t argc, char *argv[])
+ {
+    uint32_t blockDim = 8;
+    size_t inputByteSize = 8 * 2048 * sizeof(uint16_t);
+    size_t outputByteSize = 8 * 2048 * sizeof(uint16_t);
+
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    void *xHost, *yHost, *zHost;
+    void *xDevice, *yDevice, *zDevice;
+
+    CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize));
+    CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+
+    ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
+    ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize);
+
+    CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+    CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    // add_custom_do(blockDim, stream, xDevice, yDevice, zDevice);
+    ACLRT_LAUNCH_KERNEL(add_custom)
+    (blockDim, stream, xDevice, yDevice, zDevice);
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+
+    CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
+    WriteFile("./output/output_z.bin", zHost, outputByteSize);
+
+    CHECK_ACL(aclrtFree(xDevice));
+    CHECK_ACL(aclrtFree(yDevice));
+    CHECK_ACL(aclrtFree(zDevice));
+    CHECK_ACL(aclrtFreeHost(xHost));
+    CHECK_ACL(aclrtFreeHost(yHost));
+    CHECK_ACL(aclrtFreeHost(zHost));
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+    return 0;
+ }
+ 
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/run.sh b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/run.sh
new file mode 100644
index 000000000..9e80ad982
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/run.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    msprof op --application=./ascendc_kernels_bbit
+    
+)
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/gen_data.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/gen_data.py
new file mode 100644
index 000000000..ea8ce828a
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    golden = (input_x + input_y).astype(np.float16)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
new file mode 100644
index 000000000..1a21d809a
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float16
+relative_tol = 1e-3
+absolute_tol = 1e-5
+error_tol = 1e-3
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float16).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float16).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/run.sh b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/run.sh
new file mode 100644
index 000000000..cdb2e0be7
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/run.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+BUILD_TYPE="Debug"
+LIBRARY_TYPE="SHARED"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+RUN_MODE="npu"
+SHORT=v:,l:,
+LONG=soc-version:,library-type
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+SOC_VERSION="Ascend310P3"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -l | --library-type)
+        LIBRARY_TYPE="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+LIBRARY_LIST="SHARED STATIC"
+if [[ " $LIBRARY_LIST " != *" $LIBRARY_TYPE "* ]]; then
+    echo "ERROR: LIBRARY_TYPE should be in [$LIBRARY_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} \
+    -DLIBRARY_TYPE=${LIBRARY_TYPE}
+cmake --build build -j
+cmake --install build
+cp -rf out/lib/libkernels.* /usr/lib
+cp -rf out/include/kernels/aclrtlaunch_add_custom.h /usr/include
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/CMakeLists.txt b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/CMakeLists.txt
new file mode 100644
index 000000000..a3a200642
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/CMakeLists.txt
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(Ascend_C)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+file(GLOB SOURCES "*.cpp")
+# user-defined configuration
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package installation directory")
+set(RUN_MODE "npu" CACHE STRING "run mode: npu")
+set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+set(LIBRARY_TYPE "SHARED" CACHE STRING "library type:SHARED or STATIC")
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")
+endif()
+
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+# ascendc_library use to add kernel file to generate ascendc library
+if(LIBRARY_TYPE STREQUAL "SHARED")
+    ascendc_library(kernels SHARED ${SOURCES})
+else()
+    ascendc_library(kernels STATIC ${SOURCES})
+endif()
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/README.md b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/README.md
new file mode 100644
index 000000000..1992e4ed2
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/README.md
@@ -0,0 +1,78 @@
+## `AddCustom`自定义算子样例说明
+
+本样例通过`Ascend C`编程语言实现了`AddCustom`算子带Tiling场景。
+
+### 算子描述
+
+`AddCustom`算子返回两个数据相加的结果。
+
+### 算子规格描述
+
+| 算子类型(OpType) | AddCustom  |          |           |        |
+| ---------------- | ---------- | -------- | --------- | ------ |
+| 算子输入         | name       | shape    | data type | format |
+| x                | 8 * 2048   | float16  | ND        |        |
+| y                | 8 * 2048   | float16  | ND        |        |
+| 算子输出         | z          | 8 * 2048 | float16   | ND     |
+| 核函数名         | add_custom |          |           |        |
+
+### 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas 训练系列产品
+- Atlas 推理系列产品
+- Atlas A2训练系列产品
+- Atlas 800I A2推理产品
+- Atlas 200I/500 A2推理产品
+
+### 目录结构介绍
+
+```
+├── examples                     // 调用示例目录
+├── add_custom_tiling.h          // 算子tiling结构体定义
+├── add_custom.cpp               // 算子kernel代码
+├── CMakeLists.txt               // cmake编译文件
+├── run.sh                       // 运行脚本
+└── README.md                   // 样例指导手册 
+```
+
+### 环境要求
+
+编译运行此样例前，请参考[《CANN软件安装指南》](https://gitee.com/link?target=https%3A%2F%2Fhiascend.com%2Fdocument%2Fredirect%2FCannCommunityInstSoftware)完成开发运行环境的部署。
+
+### 算子包编译部署
+
+1.进入到样例目录
+
+```
+cd ${git_clone_path}/samples/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernelTiling
+```
+
+2.算子编译部署
+
+- 打包动态库部署
+
+  ```
+  bash run.sh -l SHARED -v Ascend***(由npu-smi info查询得到)
+  ```
+
+- 打包静态库部署
+
+  ```
+  bash run.sh -l STATIC -v Ascend***(由npu-smi info查询得到)
+  ```
+
+  
+
+### 算子调用
+
+| 目录                                                         | 描述                                     |
+| ------------------------------------------------------------ | ---------------------------------------- |
+| [PythonInvocation](./examples/PythonInvocation) | 通过Python调用的方式调用AddCustom算子。 |
+
+### 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/01/06 | 新增本readme |
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom.cpp b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom.cpp
new file mode 100644
index 000000000..35196ea70
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom.cpp
@@ -0,0 +1,92 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "kernel_operator.h"
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+
+        xGm.SetGlobalBuffer((__gm__ half *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ half *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ half *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(half));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(half));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(half));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.DeQue<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.DeQue<half>();
+        AscendC::LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<half>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<half> xGm;
+    AscendC::GlobalTensor<half> yGm;
+    AscendC::GlobalTensor<half> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, AddCustomTiling tiling)
+{
+    KernelAdd op;
+    op.Init(x, y, z, tiling.totalLength, tiling.tileNum);
+    op.Process();
+}
+
+AddCustomTiling* GenerateAddCustomTiling(uint32_t totalLength)
+{
+    AddCustomTiling* tiling = new AddCustomTiling();
+    uint32_t tileNum = 8;
+    tiling->totalLength = totalLength;
+    tiling->tileNum = tileNum;
+    return tiling;
+}
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom_tiling.h b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom_tiling.h
new file mode 100644
index 000000000..9ab640d51
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/add_custom_tiling.h
@@ -0,0 +1,15 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+struct AddCustomTiling{
+    uint32_t tileNum;
+    uint32_t totalLength;
+};
+ 
+
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/CMakeLists.txt b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/CMakeLists.txt
new file mode 100644
index 000000000..63e29f1bb
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/CMakeLists.txt
@@ -0,0 +1,35 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--copy-dt-needed-entries")
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+target_link_directories(ascendc_kernels_bbit PRIVATE
+  ${ASCEND_CANN_PACKAGE_PATH}/lib64
+)
+target_include_directories(ascendc_kernels_bbit PRIVATE
+  ${ASCEND_CANN_PACKAGE_PATH}/include
+)
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    ascendcl
+    kernels
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/README.md b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/README.md
new file mode 100644
index 000000000..585b04f13
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/README.md
@@ -0,0 +1,40 @@
+## 概述
+
+通过C++接口调用核函数直调实现的带Tiling的AddCustom算子
+
+## 目录结构介绍
+
+```
+├── CPPInvocation
+│   ├── scripts
+        └── gen_data.py       // 输入数据和标杆数据构造脚本
+        └── verify_result.py  // 标杆数据和自定义算子输出数据对比脚本
+│   ├── CMakeLists.txt        // cmake编译文件
+│   ├── main.cpp              // 算子调用代码
+│   ├── data_utils.h          // 数据类型定义,数据读取代码
+│   ├── run.sh                // 编译运行算子的脚本
+```
+
+## 运行样例算子
+
+  **请确保已根据算子包编译部署步骤完成本算子的编译部署动作。**
+
+  - 进入样例代码所在路径
+
+  ```bash
+ cd ${git_clone_path}/samples/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation
+  ```
+
+  - 样例执行
+
+    样例执行过程中会自动生成测试数据，然后编译与运行C++调用样例，最后打印运行结果。
+
+    ```bash
+    bash run.sh
+    ```
+
+## 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/05/19 | 样例首次提交 |
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/data_utils.h b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/data_utils.h
new file mode 100644
index 000000000..fb1363721
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/data_utils.h
@@ -0,0 +1,203 @@
+/**
+* @file data_utils.h
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+
+/**
+* @brief Read data from file
+* @param [in] filePath: file path
+* @param [out] fileSize: file size
+* @return read result
+*/
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+* @brief Write data to file
+* @param [in] filePath: file path
+* @param [in] buffer: data to write to file
+* @param [in] size: size to write
+* @return write result
+*/
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // DATA_UTILS_H
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/main.cpp b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/main.cpp
new file mode 100644
index 000000000..a00e0b2bd
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/main.cpp
@@ -0,0 +1,63 @@
+/**
+* @file main.cpp
+*
+* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+#include "data_utils.h"
+#include "acl/acl.h"
+#include "add_custom_tiling.h"
+#include "aclrtlaunch_add_custom.h"
+extern AddCustomTiling* GenerateAddCustomTiling(uint32_t totalLength);
+int32_t main(int32_t argc, char *argv[])
+{
+   uint32_t blockDim = 8;
+   size_t inputByteSize = 8 * 2048 * sizeof(uint16_t);
+   size_t outputByteSize = 8 * 2048 * sizeof(uint16_t);
+   size_t totalLength = 8 * 2048;
+
+   CHECK_ACL(aclInit(nullptr));
+   int32_t deviceId = 0;
+   CHECK_ACL(aclrtSetDevice(deviceId));
+   aclrtStream stream = nullptr;
+   CHECK_ACL(aclrtCreateStream(&stream));
+
+   void *xHost, *yHost, *zHost;
+   void *xDevice, *yDevice, *zDevice;
+
+   CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize));
+   CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize));
+   CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize));
+   CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+   CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+   CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+
+   ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
+   ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize);
+
+   CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+   CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+   
+   AddCustomTiling* tiling = GenerateAddCustomTiling(totalLength);
+   ACLRT_LAUNCH_KERNEL(add_custom)
+   (blockDim, stream, xDevice, yDevice, zDevice,tiling);
+   CHECK_ACL(aclrtSynchronizeStream(stream));
+
+   CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
+   WriteFile("./output/output_z.bin", zHost, outputByteSize);
+
+   CHECK_ACL(aclrtFree(xDevice));
+   CHECK_ACL(aclrtFree(yDevice));
+   CHECK_ACL(aclrtFree(zDevice));
+   CHECK_ACL(aclrtFreeHost(xHost));
+   CHECK_ACL(aclrtFreeHost(yHost));
+   CHECK_ACL(aclrtFreeHost(zHost));
+
+   CHECK_ACL(aclrtDestroyStream(stream));
+   CHECK_ACL(aclrtResetDevice(deviceId));
+   CHECK_ACL(aclFinalize());
+   return 0;
+}
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/run.sh b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/run.sh
new file mode 100644
index 000000000..600568cff
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/run.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    #msprof op --application=./ascendc_kernels_bbit
+    ./ascendc_kernels_bbit
+)
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/gen_data.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/gen_data.py
new file mode 100644
index 000000000..ea8ce828a
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    golden = (input_x + input_y).astype(np.float16)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
new file mode 100644
index 000000000..1a21d809a
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float16
+relative_tol = 1e-3
+absolute_tol = 1e-5
+error_tol = 1e-3
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float16).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float16).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/CMakeLists.txt b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/CMakeLists.txt
new file mode 100644
index 000000000..549a603f8
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/CMakeLists.txt
@@ -0,0 +1,53 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(Ascend_C)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+file(GLOB SOURCES "*.cpp")
+# user-defined configuration
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--copy-dt-needed-entries")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package installation directory")
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+add_library(pybind11_lib SHARED pybind11.cpp)
+target_link_libraries(pybind11_lib PRIVATE
+  kernels
+  torch_npu
+)
+execute_process(COMMAND python3 -c "import os; import torch; print(os.path.dirname(torch.__file__))"
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE TORCH_PATH
+)
+message("TORCH_PATH is ${TORCH_PATH}")
+set(ENV{ASCEND_HOME_PATH} ${ASCEND_CANN_PACKAGE_PATH})
+execute_process(COMMAND python3 -c "import os; import torch_npu; print(os.path.dirname(torch_npu.__file__))"
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE TORCH_NPU_PATH
+)
+message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
+target_link_directories(pybind11_lib PRIVATE
+  ${TORCH_PATH}/lib
+  ${TORCH_NPU_PATH}/lib
+)
+target_include_directories(pybind11_lib PRIVATE
+  ${TORCH_NPU_PATH}/include
+  ${TORCH_PATH}/include
+  ${TORCH_PATH}/include/torch/csrc/api/include
+  ${ASCEND_CANN_PACKAGE_PATH}/include
+)
+execute_process(COMMAND python3 -m pybind11 --includes
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE PYBIND11_INC
+)
+string(REPLACE " " ";" PYBIND11_INC ${PYBIND11_INC})
+target_compile_options(pybind11_lib PRIVATE
+  ${PYBIND11_INC}
+  -D_GLIBCXX_USE_CXX11_ABI=0
+)
+
+execute_process(COMMAND python3-config --extension-suffix
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE PYBIND11_SUFFIX
+)
+set_target_properties(pybind11_lib PROPERTIES
+  OUTPUT_NAME add_custom${PYBIND11_SUFFIX}
+  PREFIX "" SUFFIX ""
+)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/README.md b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/README.md
new file mode 100644
index 000000000..41b5db466
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/README.md
@@ -0,0 +1,69 @@
+## 概述
+
+通过Python接口调用核函数直调实现的带Tiling的AddCustom算子
+
+## 目录结构介绍
+
+```
+├── PythonInvocation
+│   ├── add_custom_test.py    // add_custom python调用测试代码 
+│   ├── CMakeLists.txt        // cmake编译文件
+│   ├── pybind11.cpp          // pybind绑定核函数和python接口代码
+│   ├── run.sh                // 编译运行算子的脚本
+│   ├── README.md             // 样例指导手册
+```
+
+## 运行样例算子
+  - 安装pytorch (这里使用2.1.0版本为例)
+
+    **aarch64:**
+
+    ```bash
+    pip3 install torch==2.1.0
+    ```
+
+    **x86:**
+
+    ```bash
+    pip3 install torch==2.1.0+cpu  --index-url https://download.pytorch.org/whl/cpu
+    ```
+
+  - 安装torch-npu （以Pytorch2.1.0、python3.9、CANN版本8.0.RC1.alpha002为例）
+
+    ```bash
+    git clone https://gitee.com/ascend/pytorch.git -b v6.0.rc1.alpha002-pytorch2.1.0
+    cd pytorch/
+    bash ci/build.sh --python=3.9
+    pip3 install dist/*.whl
+    ```
+
+    安装pybind11
+    ```bash
+    pip3 install pybind11
+    ```
+    安装expecttest
+    ```bash
+    pip3 install expecttest
+    ```
+
+  **请确保已根据算子包编译部署步骤完成本算子的编译部署动作。**
+
+  - 进入样例代码所在路径
+
+  ```bash
+ cd ${git_clone_path}/samples/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation
+  ```
+
+  - 样例执行
+
+    样例执行过程中会自动生成测试数据，然后编译与运行C++调用样例，最后打印运行结果。
+
+    ```bash
+    bash run.sh
+    ```
+
+## 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/05/19 | 样例首次提交 |
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/add_custom_test.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/add_custom_test.py
new file mode 100644
index 000000000..efdda2537
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/add_custom_test.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import torch
+import torch_npu
+from torch_npu.testing.testcase import TestCase, run_tests
+import sys, os
+
+sys.path.append(os.getcwd())
+import add_custom
+
+torch.npu.config.allow_internal_format = False
+
+
+class TestCustomAdd(TestCase):
+
+    def test_add_custom_ops(self):
+        length = [8, 2048]
+        x = torch.rand(length, device='cpu', dtype=torch.float16)
+        y = torch.rand(length, device='cpu', dtype=torch.float16)
+
+        x_npu = x.npu()
+        y_npu = y.npu()
+        output = add_custom.run_add_custom(x_npu, y_npu)
+        cpuout = torch.add(x, y)
+
+        self.assertRtolEqual(output, cpuout)
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/pybind11.cpp b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/pybind11.cpp
new file mode 100644
index 000000000..629782a55
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/pybind11.cpp
@@ -0,0 +1,40 @@
+/**
+ * @file pybind11.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+ #include <pybind11/pybind11.h>
+ #include <torch/extension.h>
+ #include "add_custom_tiling.h"
+ #include "aclrtlaunch_add_custom.h"
+ 
+ #include "torch_npu/csrc/core/npu/NPUStream.h"
+ extern AddCustomTiling* GenerateAddCustomTiling(uint32_t totalLength);
+ namespace my_add {
+ at::Tensor run_add_custom(const at::Tensor &x, const at::Tensor &y)
+ {
+     auto acl_stream = c10_npu::getCurrentNPUStream().stream(false);
+     at::Tensor z = at::empty_like(x);
+     uint32_t blockDim = 8;
+     uint32_t totalLength = 1;
+     for (uint32_t size : x.sizes()) {
+         totalLength *= size;
+     }
+     AddCustomTiling* tiling = GenerateAddCustomTiling(totalLength);
+     ACLRT_LAUNCH_KERNEL(add_custom)
+     (blockDim, acl_stream, const_cast<void *>(x.storage().data()), const_cast<void *>(y.storage().data()),
+      const_cast<void *>(z.storage().data()), tiling);
+     return z;
+ }
+ } // namespace my_add
+ 
+ PYBIND11_MODULE(add_custom, m)
+ {
+     m.doc() = "add_custom pybind11 interfaces"; // optional module docstring
+     m.def("run_add_custom", &my_add::run_add_custom, "");
+ }
+ 
\ No newline at end of file
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/run.sh b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/run.sh
new file mode 100644
index 000000000..0cf216ce5
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/PythonInvocation/run.sh
@@ -0,0 +1,25 @@
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+
+set -e
+pip3 install pybind11
+rm -rf build
+mkdir -p build
+cmake -B build \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+(
+    cd build
+    python3 ../add_custom_test.py
+)
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/run.sh b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/run.sh
new file mode 100644
index 000000000..f945ec5bf
--- /dev/null
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/run.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+BUILD_TYPE="Debug"
+LIBRARY_TYPE="SHARED"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+RUN_MODE="npu"
+SHORT=v:,l:,
+LONG=soc-version:,library-type
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+SOC_VERSION="Ascend310P3"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -l | --library-type)
+        LIBRARY_TYPE="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+LIBRARY_LIST="SHARED STATIC"
+if [[ " $LIBRARY_LIST " != *" $LIBRARY_TYPE "* ]]; then
+    echo "ERROR: LIBRARY_TYPE should be in [$LIBRARY_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} \
+    -DLIBRARY_TYPE=${LIBRARY_TYPE}
+cmake --build build -j
+cmake --install build
+cp -rf out/lib/libkernels.* /usr/lib
+cp -rf add_custom_tiling.h out/include/kernels/aclrtlaunch_add_custom.h /usr/include
diff --git a/operator_contrib/AddCustomSample/README.md b/operator_contrib/AddCustomSample/README.md
new file mode 100644
index 000000000..616f2c6cf
--- /dev/null
+++ b/operator_contrib/AddCustomSample/README.md
@@ -0,0 +1,112 @@
+## Add自定义算子样例说明
+
+本样例通过Ascend C编程语言实现了Add算子不带Tiling和带Tiling的场景,并提供了C++和Python的调用方式.
+
+- [KernelLaunch](./KernelLaunch)：使用核函数直调Add自定义算子。  
+  核函数的基础调用（Kernel Launch）方式，开发者完成算子核函数的开发和Tiling实现后，即可通过AscendCL运行时接口，完成算子的调用。
+
+本样例中包含如下调用方式：
+
+<table>
+    <th>调用方式</th><th>目录</th><th>描述</th>
+    <tr>
+        <!-- 列的方向占据4个cell -->
+        <td rowspan='4'><a href="./KernelLaunch"> KernelLaunch</td>
+    </tr>
+    <tr>
+        <td><a href="./KernelLaunch/AddCustomKernel"> AddCustomKernel</td><td>AddCustom不带Tiling场景,同时提供了静态库和动态库的算子打包方式</td>
+    </tr>
+    <tr>
+        <td><a href="./KernelLaunch/AddCustomTilingKernel"> AddCustomTilingKernel</td><td>AddCustom带Tiling场景,同时提供了静态库和动态库的算子打包方式</td>
+    </tr>
+    </tr>
+</table>
+
+
+## 算子描述
+
+Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：  
+
+```
+z = x + y
+```
+
+## 算子规格描述
+
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+
+## 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas 训练系列产品
+- Atlas 推理系列产品AI Core
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+- Atlas 200/500 A2推理产品
+
+## 目录结构介绍
+
+```
+└── KernelLaunch            // 使用核函数直调的方式调用Add自定义算子。
+  └── AddCustomKernel       // AddCustom不带Tiling场景
+  └── AddCustomKernelTiling // AddCustom带Tiling场景
+```
+
+## 环境要求
+
+编译运行此样例前，请参考[《CANN软件安装指南》](https://hiascend.com/document/redirect/CannCommunityInstSoftware)完成开发运行环境的部署。
+
+## 编译运行样例算子
+
+### 1. 准备：获取样例代码<a name="codeready"></a>
+
+ 可以使用以下两种方式下载，请选择其中一种进行源码准备。
+
+ - 命令行方式下载（下载时间较长，但步骤简单）。
+
+   ```bash
+   # 开发环境，非root用户命令行中执行以下命令下载源码仓。git_clone_path为用户自己创建的某个目录。
+   cd ${git_clone_path}
+   git clone https://gitee.com/ascend/samples.git
+   ```
+
+   **注：如果需要切换到其它tag版本，以v0.5.0为例，可执行以下命令。**
+
+   ```bash
+   git checkout v0.5.0
+   ```
+
+ - 压缩包方式下载（下载时间较短，但步骤稍微复杂）。
+
+   **注：如果需要下载其它版本代码，请先请根据前置条件说明进行samples仓分支切换。下载压缩包命名跟tag/branch相关，此处以master分支为例，下载的名字将会是samples-master.zip**
+
+   ```bash
+   # 1. samples仓右上角选择 【克隆/下载】 下拉框并选择 【下载ZIP】。
+   # 2. 将ZIP包上传到开发环境中的普通用户某个目录中，【例如：${git_clone_path}/samples-master.zip】。
+   # 3. 开发环境中，执行以下命令，解压zip包。
+   cd ${git_clone_path}
+   unzip samples-master.zip
+   ```
+
+### 2. 编译运行样例工程
+
+- 若是不带tiling场景，编译运行操作请参见[AddCustomKernel](./KernelLaunch/AddCustomKernel)。
+- 若是带tiling场景，编译运行操作请参见[AddCustomTilingKernel](./KernelLaunch/AddCustomTilingKernel)。
+
+## 更新说明
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/05/19 | 样例首次上仓 |
\ No newline at end of file
-- 
Gitee


From babe54f102e8917a81c7b8f80dc39f8b6c567efa Mon Sep 17 00:00:00 2001
From: wangyuqing <wangyuqing33@huawei.com>
Date: Thu, 29 May 2025 11:51:21 +0000
Subject: [PATCH 09/97] !2665 add lut4 llama7b quantization Merge pull request
 !2665 from wangyuqing/master

---
 .../lut4_quantization/README_CN.md            |  61 +++++++++
 .../lut4_quantization/config/lut4_quant.cfg   |   9 ++
 .../lut4_quantization/requirements.txt        |   7 ++
 .../src/run_llama7b_calibration.py            |  82 ++++++++++++
 .../src/save_llama7b_quant_model.py           |  79 ++++++++++++
 .../lut4_quantization/src/utils.py            | 117 ++++++++++++++++++
 6 files changed, 355 insertions(+)
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/README_CN.md
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/config/lut4_quant.cfg
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/requirements.txt
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/run_llama7b_calibration.py
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/save_llama7b_quant_model.py
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/utils.py

diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/README_CN.md
new file mode 100644
index 000000000..eef6c9c18
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/README_CN.md
@@ -0,0 +1,61 @@
+# LUT4bit量化
+
+## 1 LUT4bit量化前提
+
+### 1.1 安装依赖
+
+本sample依赖包可参考[requirements.txt](requirements.txt)
+
+### 1.2 模型和数据集准备
+
+本sample以Llama2-7b模型,pileval和wikitext2数据集为示例，请用户自行下载
+
+### 1.3 简易量化配置
+./src/lut4_quant.cfg文件为用户自定义的简易量化配置，具体表示信息如下：
+
+| 字段 |类型| 说明 | 默认值 | 取值范围 | 注意事项 |
+|:--| :-: | :-- | :-: | :-: | :-: |
+|batch_num|uint32|量化使用的batch数量 |1|/|校准使用batch数与推理使用输入数据有关，是校准脚本中的batch_num|
+|skip_layers|str|跳过量化的层 |/|/|跳过量化层支持模糊匹配，当配置字符串为层名字串，或与层名一致时，跳过该层量化，不生成量化配置。字符串必须包含数字或字母|
+|weight_only_config.weight_compress_only|bool|是否为仅权重量化|False|True/False|LUT4bit量化目前仅支持权重量化，需要设置为True|
+|weight_only_config.wts_type|enum|量化后权重类型|INT8|本sample支持INT4|/|
+|weight_only_config.weight_granularity|enum|权重量化粒度|PER_TENSOR|PER_TENSOR/PER_CHANNEL/PER_GROUP|LUT4bit仅支持PER_GROUP模式|
+|weight_only_config.round_mode|enum|舍入模式|/|HYBRID/ROUND/RINT|LUT4bit仅支持RINT模式|
+|weight_only_config.lut_quantize.lut_alog|enum|lut量化算法模式|CLUSTER|CLUSTER/ATCTAN|
+
+## 2 LUT4量化示例
+
+### 2.1 使用接口方式调用
+
+**step 1.**  请在当前目录执行如下两条命令运行示例程序，用户需根据实际情况修改示例程序中的模型和数据集路径：
+
+校准:
+`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/run_llama7b_calibration.py --calibration_data=/pile_val_backup/ --model=/data/Models/pytorch/Llama2/Llama2_7b_hf`
+- 校准可以使用--finetune, 入参格式是bool,用来表示做精调/粗调
+
+
+保存并推理量化模型:
+`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/save_llama7b_quant_model.py --verify_data=/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py --model=/data/Models/pytorch/Llama2/Llama2_7b_hf`
+
+若出现如下信息，则说明校准成功：
+
+```none
+Calibration success, time taken:  56.0 min  20.263916969299316 s
+```
+
+出现如下信息，说明量化成功
+
+```none
+Test time taken:  7.0 min  12.269736528396606 s
+Score:  5.595210552215576
+```
+
+**step 2.**  推理成功后，在当前目录会生成量化日志文件./amct_log/amct_pytorch.log和./outputs文件夹，该文件夹内包含以下内容：
+
+- config.json：量化配置文件，描述了如何对模型中的每一层进行量化。
+- record.txt：量化因子记录文件。
+- lut_result.pt：lut算法参数文件。
+
+> 如果outputs目录下已经存在量化配置文件或量化因子记录文件，再次运行示例程序时，如果新生成的文件与已有文件同名，则会覆盖已有的量化配置文件或量化因子记录文件。
+
+**LLMHelper:**  定义用于大语言模型量化校准的辅助类，核心参数有:校准模型，校准数据集，前向方法，校准模块，校准模块推理方法，学习率，迭代次数，是否开启量化层筛选，量化误差比例阈值，量化误差平均阈值。详细使用方式可查阅AMCT使用手册
diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/config/lut4_quant.cfg b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/config/lut4_quant.cfg
new file mode 100644
index 000000000..6f532c21c
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/config/lut4_quant.cfg
@@ -0,0 +1,9 @@
+batch_num: 1
+skip_layers: "lm_head"
+weight_only_config: {
+    weight_compress_only: True
+    wts_type: INT4
+    lut_quantize : {
+        lut_algo: CLUSTER
+    }
+}
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/requirements.txt b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/requirements.txt
new file mode 100644
index 000000000..55441d062
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/requirements.txt
@@ -0,0 +1,7 @@
+torch==2.1.0
+transformers==4.40.0
+accelerate==0.30.1
+datasets==2.19.1
+sentencepiece==0.2.0
+numpy==1.23.5
+protobuf==3.20.2
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/run_llama7b_calibration.py b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/run_llama7b_calibration.py
new file mode 100644
index 000000000..6df231876
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/run_llama7b_calibration.py
@@ -0,0 +1,82 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+
+import os
+import copy
+import time
+import tqdm
+import torch
+import argparse
+import torch.nn as nn
+
+from utils import get_llama2, get_calib_dataset, build_model_and_enc
+import amct_pytorch as amct
+from amct_pytorch.post_quant_calibration import LLMHelper
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--calibration_data', type=str, default='/pile_val_backup')
+    parser.add_argument('--model', type=str, default='/data/Models/pytorch/Llama2/Llama2_7b_hf')
+    parser.add_argument('--finetune', type=bool, default=False)
+
+    args = parser.parse_args()
+    model, model_path = get_llama2(args.model)
+    model = model.eval()
+    gpu_num = torch.cuda.device_count()
+    model, enc = build_model_and_enc(model, model_path, gpu_num)
+
+    proto_file = './config/lut4_quant.cfg'
+    config_file = './output/config.json'
+    record_file = './output/record.txt'
+
+    test_start_time = time.time()
+    # Phase1: generate quant config json
+    amct.create_post_quant_config(config_file,
+                             model,
+                             config_defination=proto_file)
+    
+    # Phase2: generate calibration model
+    samples = get_calib_dataset(
+        data_path=args.calibration_data, tokenizer=enc, n_samples=512, block_size=256
+    )
+    samples = torch.cat(samples, dim=0)[:1,:]
+    # do weights calibration without finetune
+    # Please check README.md for LLMHelper usage
+    with torch.no_grad():
+        post_quant_model = amct.create_post_quant_model(config_file,
+                                                        record_file,
+                                                        model)
+    calibration_helper = LLMHelper(post_quant_model, samples, calibration_block='LlamaDecoderLayer', layer_filter=True)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    post_quant_model.config.use_cache = False
+    amct.quant_calibration(calibration_helper)
+     # do weights calibration with finetune
+    if args.finetune:
+        with torch.no_grad():
+            post_quant_model = amct.create_post_quant_model(config_file,
+                                                            record_file,
+                                                            post_quant_model)
+        calibration_finetune_helper = LLMHelper(post_quant_model, samples, calibration_block='LlamaDecoderLayer', layer_filter=True)                                                   
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        post_quant_model.config.use_cache = False
+        amct.quant_calibration(calibration_finetune_helper)
+    test_end_time = time.time()
+    total_time = test_end_time - test_start_time
+    print('Calibration success, time taken: ', total_time // 60, 'min ', total_time%60, 's')
diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/save_llama7b_quant_model.py b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/save_llama7b_quant_model.py
new file mode 100644
index 000000000..11f79c1c5
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/save_llama7b_quant_model.py
@@ -0,0 +1,79 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+
+import os
+import copy
+import time
+import tqdm
+import torch
+import argparse
+import torch.nn as nn
+
+from utils import get_loaders,  get_llama2, get_calib_dataset, build_model_and_enc
+import amct_pytorch as amct
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--verify_data', type=str, default='/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py')
+    parser.add_argument('--model', type=str, default='/data/Models/pytorch/Llama2/Llama2_7b_hf')
+
+    args = parser.parse_args()
+    model, model_path = get_llama2(args.model)
+    model = model.eval()
+    gpu_num = torch.cuda.device_count()
+
+    record_file = './output/record.txt'
+
+    test_start_time = time.time()
+    model, enc = build_model_and_enc(model, model_path, gpu_num)
+    
+    # Phase1: save fakequant model
+    testenc = get_loaders(data_path=args.verify_data,
+                        enc=enc,
+                        seqlen=model.seqlen)
+
+    testenc = testenc.input_ids.to(model.device)
+    nsamples = testenc.numel() // model.seqlen
+    fake_quant_model = amct.save_post_quant_model(record_file, model, mode='fakequant')
+    
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    
+    # Phase2: Test ppl result
+    nlls = []
+    test_start_time = time.time()
+    for i in tqdm.tqdm(range(nsamples), desc="evaluating..."):
+        batch = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)].to(
+            model.device
+        )
+        with torch.no_grad():
+            lm_logits = fake_quant_model(batch).logits
+        shift_logits = lm_logits[:, :-1, :].contiguous().float()
+        shift_labels = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)][:, 1:]
+        loss_fct = nn.CrossEntropyLoss()
+        loss = loss_fct(
+            shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
+        )
+        neg_log_likelihood = loss.float() * model.seqlen
+        nlls.append(neg_log_likelihood)
+    test_end_time = time.time()
+
+    ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen))
+    total_time = test_end_time - test_start_time
+    print('Test time taken: ', total_time // 60, 'min ', total_time%60, 's'  )
+    print('Score: ', ppl.item())
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/utils.py b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/utils.py
new file mode 100644
index 000000000..7bd34ba3d
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/lut4_quantization/src/utils.py
@@ -0,0 +1,117 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+import os
+import torch
+import torch.nn as nn
+from datasets import load_dataset,load_from_disk
+
+from transformers import AutoTokenizer, AutoConfig
+from accelerate import infer_auto_device_map, dispatch_model
+from accelerate.utils.modeling import get_balanced_memory
+
+def build_model_and_enc(model, model_path, gpu_num):
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+    if "mpt" in config.__class__.__name__.lower():
+        enc = AutoTokenizer.from_pretrained(
+            config.tokenizer_name, trust_remote_code=True
+        )
+    else:
+        enc = AutoTokenizer.from_pretrained(
+            model_path, use_fast=False, trust_remote_code=True
+        )
+
+    # Move the model to GPU (as much as possible) for LM evaluation
+    # max_memory = ['0:16GiB', '1:16GiB','2:16GiB', 'cpu:30GiB'], '0' means the first GPU that you specify.
+    # I don't recommend use 16GiB, we need to reserve some space for other tensors during calculation
+    # please see the recommand memeory allocation in the Word file
+    # Adjust the max_size accroding to the real situation
+    # a clever way:
+
+    max_memory = []
+    for i in range(gpu_num):
+        max_memory.append(f'{i}:12GiB')
+    max_memory.append('cpu:80GiB')
+    print('Max_memory allocation: \n', max_memory)
+
+    max_memory = [v.split(":") for v in (max_memory or [])]
+    max_memory = {(int(k) if k.isdigit() else k): v for k, v in max_memory}
+    kwargs = {
+        "max_memory": get_balanced_memory(
+            model, max_memory if len(max_memory) > 0 else None
+        )
+    }
+    model.tie_weights()
+    device_map = infer_auto_device_map(
+        model,
+        no_split_module_classes=[
+            "LlamaDecoderLayer",
+        ],
+        **kwargs,
+    )
+    model = dispatch_model(model, device_map=device_map, 
+        offload_dir=os.path.join(model_path, 'offload_dir'))
+
+    return model, enc
+
+def get_llama2(model_path, seqlen=2048):
+    def skip(*args, **kwargs):
+        pass
+
+    torch.nn.init.kaiming_uniform_ = skip
+    torch.nn.init.uniform_ = skip
+    torch.nn.init.normal_ = skip
+    from transformers import LlamaForCausalLM
+    
+    model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, offload_folder="offload/")
+
+    model.seqlen = seqlen
+    return model, model_path
+
+
+def get_loaders(data_path: str, enc, seqlen):
+    print('Loading dataset: Wikitext2')
+    testenc = load_dataset(data_path, 'wikitext-2-raw-v1', split='test', trust_remote_code=True)
+    testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt")
+    
+    return testenc
+
+
+def get_calib_dataset(data_path="pileval", tokenizer=None, n_samples=512, block_size=512):
+    dataset = load_from_disk(data_path)
+    dataset = dataset.shuffle(seed=42)
+    samples = []
+    n_run = 0
+    for data in dataset:
+        line = data["text"]
+        line = line.strip()
+        line_encoded = tokenizer.encode(line)
+        if len(line_encoded) > 512:
+            continue
+        sample = torch.tensor([line_encoded])
+        if sample.numel() == 0:
+            continue
+        samples.append(sample)
+        n_run += 1
+        if n_run == n_samples:
+            break
+    # now concatenate all samples and split according to block size
+    cat_samples = torch.cat(samples, dim=1)
+    n_split = cat_samples.shape[1] // block_size
+    print(f" * Split into {n_split} blocks")
+    return [
+        cat_samples[:, i * block_size : (i + 1) * block_size] for i in range(n_split)
+    ]
-- 
Gitee


From 7c0d773c717383bbaf8c3ddeccc8430637517aa1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Fri, 30 May 2025 01:42:17 +0000
Subject: [PATCH 10/97] =?UTF-8?q?!2664=20update=20tiling=20sink=20Merge=20?=
 =?UTF-8?q?pull=20request=20!2664=20from=20=E9=99=88=E5=A8=81=E4=BA=A8/mas?=
 =?UTF-8?q?ter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink.json                  |   0
 .../{OpImpl => AddCustomTilingSink}/README.md |   0
 .../tf_plugin/tensorflow_add_custom_plugin.cc |   0
 .../install.sh                                |   0
 .../op_host/add_custom_tiling_sink.cpp        |   0
 .../op_host/add_custom_tiling_sink_tiling.cpp |   0
 .../op_host/add_custom_tiling_sink_tiling.h   |   0
 .../op_kernel/add_custom_tiling_sink.cpp      |   0
 .../README.md                                 | 228 ++++--------------
 .../src/add_custom_tiling_sink.py}            |  14 +-
 .../test_add_custom_tiling_sink.py}           |   4 +-
 .../AddCustomTilingSink/README.md             |  51 ++++
 .../2_features/17_tiling_sink/README.md       |  16 ++
 13 files changed, 124 insertions(+), 189 deletions(-)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/AddCustomTilingSink.json (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/README.md (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/framework/tf_plugin/tensorflow_add_custom_plugin.cc (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/install.sh (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/op_host/add_custom_tiling_sink.cpp (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/op_host/add_custom_tiling_sink_tiling.cpp (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/op_host/add_custom_tiling_sink_tiling.h (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{OpImpl => AddCustomTilingSink}/op_kernel/add_custom_tiling_sink.cpp (100%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{AddCustom => PytorchInvocation}/README.md (32%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{AddCustom/src/add_custom.py => PytorchInvocation/src/add_custom_tiling_sink.py} (73%)
 rename operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/{AddCustom/test_add_custom.py => PytorchInvocation/test_add_custom_tiling_sink.py} (88%)
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
 create mode 100644 operator/ascendc/2_features/17_tiling_sink/README.md

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/AddCustomTilingSink.json
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/README.md
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/framework/tf_plugin/tensorflow_add_custom_plugin.cc
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/install.sh
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink.cpp
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.cpp
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.h
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_host/add_custom_tiling_sink_tiling.h
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.h
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp
similarity index 100%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl/op_kernel/add_custom_tiling_sink.cpp
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
similarity index 32%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
index 217cbfad8..8167f74d0 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
@@ -1,47 +1,51 @@
-## 背景介绍
-
-Tiling下沉是在Device侧CPU做Tiling计算。由于NPU中AI Core内部存储无法完全容纳算子输入输出的所有数据，需要每次搬运一部分输入数据进行计算然后搬出，再搬运下一部分输入数据进行计算，该过程称之为Tiling；根据算子的shape等信息来确定数据切分算法相关参数（比如每次搬运的块大小，以及总共循环多少次）的计算程序，称之为Tiling实现。由于Tiling实现中完成的均为标量计算，AI Core并不擅长，故一般在Host侧CPU上执行，但是满足下述条件Tiling实现会下沉到Device侧执行：
-
-模型为静态shape。
-模型中的算子支持Tiling下沉，比如FusedInferAttentionScore、IncreFlashAttention等融合算子。
-支持Tiling下沉的算子值有依赖，需要满足前一个算子的值有device的执行结果；如果依赖的值是Const，则不需要下沉执行Tiling，编译时会完成Tiling。
-
 ## 目录结构介绍
 
 ```
-├── AddCustom   // torch注册的自定义算子
+├── PytorchInvocation       // torch注册的自定义算子
 │   ├── src
-│   │   ├── add_custom.py      // 自定义算子py文件
-│   └── test_add_custom.py    // 测试脚本
+│   │   ├── add_custom_tiling_sink.py   // 自定义算子py文件
+│   └── test_add_custom_tiling_sink.py  // 测试脚本
 ```
 
-## 代码实现介绍
+## 代码实现
+
+src/add_custom_tiling_sink.py是调用自定义算子的torch脚本，如何开发该脚本代码，具体步骤如下。
+> 注意：如需详细了解入图操作，请参考Ascend torchair仓中[converter补齐](https://gitee.com/ascend/torchair/blob/master/CONTRIBUTING.md#converter%E8%A1%A5%E9%BD%90)章节。 
 
-新增自定义算子入图步骤，该过程可参考[torchair社区新增自定义算子入图介绍](https://gitee.com/ascend/torchair/blob/master/CONTRIBUTING.md#converter%E8%A1%A5%E9%BD%90)converter补齐第五小节：
-1.下载[torchair仓](https://gitee.com/ascend/torchair)，新建一个add_custom.py文件放在torchair/python/torchair/ops/add_custom.py，然后在torch框架中注册自定义算子：
+1.下载[torchair工程源码](https://gitee.com/ascend/torchair)，并在torchair/python/torchair/ops目录下新建add_custom_tiling_sink.py空文件。  
+> 注意，请根据实际情况下载配套版本分支的torchair工程源码，版本配套关系请查看[PyTorch框架适配官网](https://www.hiascend.com/software/ai-frameworks/pytorch)。
 
+2.将自定义算子注册到PyTorch框架。
 ```python
-# add_custom.py
+# add_custom_tiling_sink.py
 import torch
 
 lib = torch.library.Library("air", "FRAGMENT")
 lib.define(
     """
-    add_custom(Tensor x, Tensor y) -> Tensor
+    add_custom_tiling_sink(Tensor x, Tensor y) -> Tensor
     """
 )
 ```
+3.实现自定义算子的单算子模式。  
+该部分目前仅为示例，当前预留为为实现，请用户根据实际需要自行定义。
+```python
+def kernel_impl(x, y):
+    raise NotImplementedError("torch.ops.air.add_custom_tiling_sink kernel_impl is not implemented!")
 
-2.向torch注册自定义算子meta后端实现，用来完成图模式下的shape推导:
+torch.library.impl(lib, "add_custom_tiling_sink", "CPU")(kernel_impl)
+torch.library.impl(lib, "add_custom_tiling_sink", "PrivateUse1")(kernel_impl)
+```
 
+4.为自定义算子注册Meta函数，通过PyTorch Meta后端完成入图时所需要的shape和data type推导。
 ```python
-@torch.library.impl(lib, "add_custom", "Meta")
-   def kernel_meta(x, y):
-       return torch.empty_like(x)
+@torch.library.impl(lib, "add_custom_tiling_sink", "Meta")
+def kernel_meta(x, y):
+    return torch.empty_like(x)
 ```
 
-3.codegen生成ge构图api
-（1）将REG_OP算子原型放置到codegen/custom_op/custom_reg_op.h文件中，替换原来示例的REG_OP：
+5.codegen生成ge构图api  
+（1）将REG_OP算子原型放置到codegen/custom_op/custom_reg_op.h文件中，替换原来示例的REG_OP
 
 ```cpp
 #ifndef ASCENDADAPTER2_CUSTOM_REG_OP_H
@@ -59,7 +63,7 @@ REG_OP(AddCustomTilingSink)
 #endif  // ASCENDADAPTER2_CUSTOM_REG_OP_H
 ```
 
-（2）进入torchair仓根目录执行编译命令：
+（2）进入torchair工程源码根目录执行编译命令，产物在codegen/custom_op/auto_generated_ge_raw_custom_ops.py目录。
 
 ```
 cd build
@@ -67,7 +71,7 @@ cmake ..
 make generate_ge_raw_custom_ops
 ```
 
-生成的ge.api函数在codegen/custom\_op/auto\_generated\_ge\_raw\_custom\_ops.py文件中, 内容如下所示：
+生成的ge.api函数内容如下所示：
 
 ```python
 # This file is auto-generated
@@ -118,40 +122,19 @@ def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
 需要修改`from torchair._ge_concrete_graph.ge_converter import ge_op, IrDef`
 为`from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef`
 
-将上述生成内容拷贝至前面我们新建的add_custom.py文件中。
-
-4.向torchair注册自定义算子的converter：
-
-```python
-@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
-def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingSink(x, y)
-```
-
-5.单算子部分为用户自行注册，此处预留未实现：
+将上述生成内容拷贝至前面我们新建的add_custom_tiling_sink.py文件中。
 
-```python
-def kernel_impl(x, y):
-    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
-
-
-torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
-torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
-```
-
-6.调用时，需要import前面新建的add_custom.py：
+6.实现自定算子converetr并注册：
 
 ```python
-import torchair.ops.add_custom
-
-def forward(self, x, y):
-    z = torch.ops.air.add_custom.default(x, y)
-    return z
+@register_fx_node_ge_converter(torch.ops.air.add_custom_tiling_sink.default)
+def convert_add_custom_tiling_sink(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
+    return AddCustomTilingSink(x, y) # 此为前面生产的构图api
 ```
 
 ## 运行样例算子
 
-### 1. 编译安装torchair包
+### 编译安装torchair包
 
 1.编译，进入torchair根目录，执行：
 
@@ -178,13 +161,22 @@ rm -rf /usr/local/python3.8.1/lib/python3.8/site-packages/torch_npu/dynamo/torch
 pip3.x show torch_npu
 ```
 
-### 2. 部署自定义算子包
-请参考[tiling下沉样例](https://gitee.com/ascend/samples/tree/master/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl)部署自定义算子包章节：
+### 编译部署自定义算子包
+请参考[AddCustomTilingSink自定义算子实现](../AddCustomTilingSink/README.md)。
 
-### 3. 执行脚本
+### 执行脚本
+test_add_custom_tiling_sink.py是图模式调用算子tiling下沉测试脚本，请根据实际情况替换里面的模型定义、参数等内容。  
+该脚本有2个关键操作必须确保完成，具体如下：  
+1.测试脚本必须import自定义的add_custom_tiling_sink.py模块。
+```python
+import torchair.ops.add_custom_tiling_sink
 
-需要脚本中先打开tiling下沉的开关
+def forward(self, x, y):
+    z = torch.ops.air.add_custom_tiling_sink.default(x, y)
+    return z
+```
 
+2.测试脚本显式开启tiling_schedule_optimize配置项。
 ```python
 from torchair.configs.compiler_config import CompilerConfig
 
@@ -196,128 +188,4 @@ config.experimental_config.tiling_schedule_optimize = True
 
 | 时间      | 更新事项     |
 | --------- | ------------ |
-| 2025/5/22 | 新增本readme |
-
-## add_custom.py
-
-```python
-from typing import (
-    Optional,
-    Union,
-    List,
-)
-import torch
-from torchair._ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter
-from torchair.ge._ge_graph import Tensor, TensorSpec
-
-lib = torch.library.Library("air", "FRAGMENT")
-lib.define(
-    """
-    add_custom(Tensor x, Tensor y) -> Tensor
-    """
-)
-
-
-@torch.library.impl(lib, "add_custom", "Meta")
-def kernel_meta(x, y):
-    return torch.empty_like(x)
-
-
-def kernel_impl(x, y):
-    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
-
-
-torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
-torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
-
-
-@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
-def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingSink(x, y)
-
-
-# This file is auto-generated by
-# Summary: total 1, generated 1, skipped 0
-from typing import Any, Dict, List, Tuple, Union, Callable, Optional
-from torchair.ge._ge_graph import auto_convert_to_tensor, TensorType
-from torchair.ge import Tensor, DataType, attr
-from torchair._ge_concrete_graph.compat_ir import ge_op, IrDef
-
-
-# This api is auto-generated from IR AddCustomTilingSink
-@auto_convert_to_tensor([False, False], [False, False], inputs_tensor_type=[TensorType.TT_ALL, TensorType.TT_ALL])
-def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None):
-    """REG_OP(AddCustomTilingSink)\n
-.INPUT(x, TensorType::ALL())\n
-.INPUT(y, TensorType::ALL())\n
-.OUTPUT(z, TensorType::ALL())\n
-"""
-
-    # process inputs
-    inputs = {
-        "x": x,
-        "y": y,
-    }
-
-    # process attrs
-    attrs = {
-    }
-
-    # process outputs
-    outputs = [
-    "z",
-    ]
-
-    return ge_op(
-        op_type="AddCustomTilingSink",
-        inputs=inputs,
-        attrs=attrs,
-        outputs=outputs,
-        dependencies=dependencies,
-        ir=IrDef("AddCustomTilingSink") \
-        .input("x", "") \
-        .input("y", "") \
-        .output("z" , "")
-    )
-
-```
-
-## test_add_custom.py
-
-```python
-import torch
-import torch_npu
-import torchair
-from torchair.configs.compiler_config import CompilerConfig
-from torchair.core.utils import logger
-import logging
-
-logger.setLevel(logging.DEBUG)
-config = CompilerConfig()
-config.debug.graph_dump.type = "pbtxt"
-config.experimental_config.tiling_schedule_optimize = True
-npu_backend = torchair.get_npu_backend(compiler_config=config)
-
-import torchair.ops.add_custom
-
-class MyModule(torch.nn.Module):
-    def __init__(self):
-        super(MyModule, self).__init__()
-
-    def forward(self, x, y):
-        z = torch.ops.air.add_custom.default(x, y)
-        return z
-
-
-# 创建并编译模块
-module = MyModule().npu()
-module = torch.compile(module, fullgraph=True, backend=npu_backend, dynamic=False)
-
-# 示例输入
-x = torch.randn(6, 64, dtype=torch.float32).npu()
-y = torch.randn(6, 64, dtype=torch.float32).npu()
-
-output = module(x, y)
-print(output)
-
-```
+| 2025/5/22 | 新增本readme |
\ No newline at end of file
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/src/add_custom_tiling_sink.py
similarity index 73%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/src/add_custom_tiling_sink.py
index dc73f0b07..8da1ef815 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/src/add_custom.py
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/src/add_custom_tiling_sink.py
@@ -10,26 +10,26 @@ from torchair.ge._ge_graph import Tensor, TensorSpec
 lib = torch.library.Library("air", "FRAGMENT")
 lib.define(
     """
-    add_custom(Tensor x, Tensor y) -> Tensor
+    add_custom_tiling_sink(Tensor x, Tensor y) -> Tensor
     """
 )
 
 
-@torch.library.impl(lib, "add_custom", "Meta")
+@torch.library.impl(lib, "add_custom_tiling_sink", "Meta")
 def kernel_meta(x, y):
     return torch.empty_like(x)
 
 
 def kernel_impl(x, y):
-    raise NotImplementedError("torch.ops.air.add_custom kernel_impl is not implemented!")
+    raise NotImplementedError("torch.ops.air.add_custom_tiling_sink kernel_impl is not implemented!")
 
 
-torch.library.impl(lib, "add_custom", "CPU")(kernel_impl)
-torch.library.impl(lib, "add_custom", "PrivateUse1")(kernel_impl)
+torch.library.impl(lib, "add_custom_tiling_sink", "CPU")(kernel_impl)
+torch.library.impl(lib, "add_custom_tiling_sink", "PrivateUse1")(kernel_impl)
 
 
-@register_fx_node_ge_converter(torch.ops.air.add_custom.default)
-def convert_add_custom(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
+@register_fx_node_ge_converter(torch.ops.air.add_custom_tiling_sink.default)
+def convert_add_custom_tiling_sink(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
     return AddCustomTilingSink(x, y)
 
 
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/test_add_custom_tiling_sink.py
similarity index 88%
rename from operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
rename to operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/test_add_custom_tiling_sink.py
index 81bba97bb..04aef9313 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustom/test_add_custom.py
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/test_add_custom_tiling_sink.py
@@ -11,14 +11,14 @@ config.debug.graph_dump.type = "pbtxt"
 config.experimental_config.tiling_schedule_optimize = True
 npu_backend = torchair.get_npu_backend(compiler_config=config)
 
-import torchair.ops.add_custom
+import torchair.ops.add_custom_tiling_sink
 
 class MyModule(torch.nn.Module):
     def __init__(self):
         super(MyModule, self).__init__()
 
     def forward(self, x, y):
-        z = torch.ops.air.add_custom.default(x, y)
+        z = torch.ops.air.add_custom_tiling_sink.default(x, y)
         return z
 
 
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
new file mode 100644
index 000000000..8d884730e
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
@@ -0,0 +1,51 @@
+## 概述
+本样例以AddCustomTilingSink自定义算子为例，介绍了在开发自定义算子时如何启用Tiling下沉，以及如何通过PyTorch在图模式下调用该自定义算子的完整流程。
+
+## 目录结构介绍
+
+```
+├── AddCustomTilingSink      
+│   ├── AddCustomTilingSink  // AscendC算子实现
+│   └── PytorchInvocation    // Pytorch调用样例
+```
+
+## 算子描述
+Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：
+```
+z = x + y
+```
+## 算子规格描述
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom_tiling_sink</td></tr>
+</table>
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件
+- Atlas A3 训练系列产品/Atlas A3 推理系列产品
+
+## 编译运行样例算子
+
+### 1. 实现Pytorch自定义算子并注册
+请参考本目录中[PytorchInvocation/readme.md](./PytorchInvocation/README.md)实现Pytorch侧注册。
+
+### 2. 实现CANN自定义算子，并完成编译部署
+请参考本目录中[AddCustomTilingSink/README.md](./AddCustomTilingSink/README.md)部署自定义算子包。
+
+### 3. 执行测试脚本
+执行本目录中[PytorchInvocation/test_add_custom.py](./PytorchInvocation/test_add_custom.py)测试脚本验证功能。 
+
+## 更新说明
+
+| 时间      | 更新事项     |
+| --------- | ------------ |
+| 2025/5/28 | 新增本readme |
diff --git a/operator/ascendc/2_features/17_tiling_sink/README.md b/operator/ascendc/2_features/17_tiling_sink/README.md
new file mode 100644
index 000000000..3cf3b3be2
--- /dev/null
+++ b/operator/ascendc/2_features/17_tiling_sink/README.md
@@ -0,0 +1,16 @@
+## 背景介绍
+
+在静态图模式下，可以通过整图下沉优化调度性能。将完整的计算图一次性下发至Device侧，后续执行则无需Host参与，由Device自主完成计算，从而减少Host-Device交互开销，提升执行效率。部分算子的Tiling计算依赖运行时输入的具体数值（Tiling值依赖），需在执行时动态计算Tiling参数。针对该场景，可采用Tiling下沉优化方案：将Tiling计算下沉至Device侧的AI CPU上执行，从而实现计算全程在Device侧高效完成。  
+当前仅融合算子（矢量计算和矩阵计算融合）支持进行Tiling下沉。  
+
+## 算子开发样例
+当前本目录包含的所有样例如下。
+|  目录名称                                                   |  功能描述                                              |  运行环境 |
+| ------------------------------------------------------------ | ---------------------------------------------------- | -- |
+| [AddCustomTilingSink](./AddCustomTilingSink/) | 基于Ascend C的自定义Tiling下沉算子及Pytorch调用样例，通过使能Tiling下沉，实现下沉执行优化调度性能。|Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件<br>Atlas A3 训练系列产品/Atlas A3 推理系列产品|
+
+## 更新说明
+
+| 时间      | 更新事项     |
+| --------- | ------------ |
+| 2025/5/28 | 新增本readme |
-- 
Gitee


From 4bd13a6999b4e930ed0e7991e5fc0ac4e5f2e37d Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 3 Jun 2025 09:32:57 +0000
Subject: [PATCH 11/97] =?UTF-8?q?!2667=20=E3=80=90tiling=E4=B8=8B=E6=B2=89?=
 =?UTF-8?q?=E6=A0=B7=E4=BE=8B=E3=80=91=E3=80=90AR20250522891845=E3=80=91RE?=
 =?UTF-8?q?ADME=E4=BF=AE=E6=94=B9=20Merge=20pull=20request=20!2667=20from?=
 =?UTF-8?q?=20renjie/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink.json                  |  2 +-
 .../AddCustomTilingSink/README.md             | 50 ++++++++++++-------
 .../tf_plugin/tensorflow_add_custom_plugin.cc | 22 --------
 .../op_host/add_custom_tiling_sink.cpp        |  6 ++-
 .../op_host/add_custom_tiling_sink_tiling.cpp |  8 +--
 .../op_kernel/add_custom_tiling_sink.cpp      |  2 +-
 6 files changed, 42 insertions(+), 48 deletions(-)
 delete mode 100644 operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
index 1d93e1f49..9a1ee691b 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
@@ -15,7 +15,7 @@
             },
             {
                 "name": "y",
-                "param_type": "optional",
+                "param_type": "required",
                 "format": [
                     "ND"
                 ],
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
index a89d51c80..16e430cc8 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
@@ -1,29 +1,28 @@
-
 ## 概述
-本样例基于AddCustom算子工程，提供了支持Tiling下沉的自定义算子开发样例。
-若要使能tiling下沉，算子tiling函数必须独立实现，详细开发指导请参考[Tiling下沉自定义算子开发指南](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/Ascendcopdevg/atlas_ascendc_10_00014.html)
+本样例基于AddCustom算子工程，提供支持Tiling下沉的自定义算子开发样例。
+若要使能Tiling下沉，算子Tiling函数必须独立实现，详细开发指导请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)手册中的Tiling下沉章节。
 
 ## 目录结构介绍
 ```
-├─OpImpl										// 算子实现
-│   ├─framework									// 算子插件实现文件目录
-│   ├─op_host									// host侧实现文件
-│   │   ├─add_custom_tiling_sink.cpp			// 算子原型定义、tiling函数注册等
-│   │   │ add_custom_tiling_sink_tiling.cpp		// 算子tiling函数的所有实现(必须独立实现于cpp中)
-│   │   └─add_custom_tiling_sink_tiling.h		// 算子tiling结构体定义
-│   └─op_kernel									// kernel侧实现文件
-│  AddCustomTilingSink.json						// 算子的原型定义json文件
-│  install.sh									// 脚本，调用msOpGen生成自定义算子工程，并编译
-```
+├─op_host									// host侧实现文件
+│   ├─add_custom_tiling_sink.cpp			// 算子原型定义、Tiling函数注册等
+│   │ add_custom_tiling_sink_tiling.cpp		// 算子Tiling函数的所有实现(必须独立实现于cpp中)
+│   └─add_custom_tiling_sink_tiling.h		// 算子Tiling结构体定义
+├─op_kernel									// kernel侧实现文件
+├─AddCustomTilingSink.json					// 算子的原型定义json文件
+├─install.sh								// 脚本，调用msOpGen生成自定义算子工程，并编译
 
+```
 ## 算子描述
-Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：
+AddCustomTilingSink算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：
 ```
+
 z = x + y
+
 ```
 ## 算子规格描述
 <table>
-<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">AddCustomTilingSink</td></tr>
 </tr>
 <tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
 <tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
@@ -32,9 +31,22 @@ z = x + y
 </tr>
 <tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
 </tr>
-<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom_tiling_sink</td></tr>
 </table>
 
+## 代码实现介绍
+本样例基于AddCustom算子工程，使能Tiling下沉做出了以下修改：
+- 算子原型定义：在op_host/add_custom_tiling_sink.cpp中，定义了算子原型，指定输入"y"为Tiling值依赖。
+- Tiling函数逻辑：添加判断逻辑，通过判断值依赖InputTensor的Data是否为空指针，确认当前是否处于编译期。若处于编译期，需要设置最大的workspace用于内存分配。
+- Tiling函数下沉注册：将所有的Tiling函数逻辑单独在op_host/add_custom_tiling_sink_tiling.cpp中实现，并通过DEVICE_IMPL_OP_OPTILING接口注册下沉的Tiling函数。(DEVICE_IMPL_OP_OPTILING接口定义在头文件device_op_impl_registry.h中)
+- 算子host侧CMakeList.txt：Tiling下沉需要添加device侧的编译任务，本样例通过install.sh脚本添加，具体添加内容如下。
+```
+ascendc_device_library( TARGET cust_opmaster
+                        OPTION SHARED
+                        SRC ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_tiling_sink_tiling.cpp)
+```
+- 算子kernel实现：通过KERNEL_TASK_TYPE_DEFAULT接口将算子强制指定在AIC、AIV混合场景运行，满足Tiling下沉算子条件。
+
 ## 支持的产品型号
 本样例支持如下产品型号：
 - Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件
@@ -56,7 +68,7 @@ z = x + y
   - 切换到msOpGen脚本install.sh所在目录
     ```bash
     # 若开发者以git命令行方式clone了master分支代码，并切换目录
-    cd ${git_clone_path}/samples/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/OpImpl
+    cd ${git_clone_path}/samples/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink
     ```
 
   - 调用脚本，生成自定义算子工程，复制host和kernel实现并编译算子
@@ -109,9 +121,9 @@ z = x + y
     cd CustomOp/build_out
     ./custom_opp_<target os>_<target architecture>.run
     ```
-  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。若要执行tiling下沉样例，则算子包不支持通过--install-path指定目录安装。
+  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。若要执行Tiling下沉样例，则算子包不支持通过--install-path指定目录安装。
 
 ## 更新说明
 | 时间       | 更新事项                     |
 | ---------- | ---------------------------- |
-| 2025/5/22 | 新增AddCustomTilingSink算子样例 |
+| 2025/5/22 | 新增AddCustomTilingSink算子样例 |
\ No newline at end of file
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc
deleted file mode 100644
index b96757140..000000000
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/framework/tf_plugin/tensorflow_add_custom_plugin.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the Apache License Version 2.0.
- * You may not use this file except in compliance with the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Apache License for more details at
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-
-#include "register/register.h"
-
-namespace domi {
-// register op info to GE
-REGISTER_CUSTOM_OP("AddCustomTilingSink")
-    .FrameworkType(TENSORFLOW)   // type: CAFFE, TENSORFLOW
-    .OriginOpType("Add")      // name in tf module
-    .ParseParamsByOperatorFn(AutoMappingByOpFn);
-}  // namespace domi
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp
index c88a110b0..ea682bb3c 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink.cpp
@@ -36,7 +36,7 @@ public:
             .DataType({ge::DT_FLOAT})
             .Format({ge::FORMAT_ND});
         this->Input("y")
-            .ParamType(OPTIONAL)
+            .ParamType(REQUIRED)
             .DataType({ge::DT_FLOAT})
             .Format({ge::FORMAT_ND})
             .ValueDepend(OPTIONAL, DependScope::TILING); // 表示输入y为Tiling值依赖
@@ -49,7 +49,9 @@ public:
 
         this->AICore().SetTiling(optiling::AddCustomSinkTilingFunc);
         
-        this->AICore().AddConfig("ascend910b");
+        this->AICore()
+            .AddConfig("ascend910b")
+            .AddConfig("ascend910_93");
     }
 };
 OP_ADD(AddCustomTilingSink);
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
index 32ffb8a3e..563ba0b63 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
@@ -14,8 +14,8 @@
 namespace optiling {
 static constexpr uint32_t BLOCK_DIM = 8;
 static constexpr uint32_t TILE_NUM = 8;
-static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 能获取到的最大workspace大小
-static constexpr size_t DEFAULT_WORKSPACE_SIZE = 1;
+static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 算子所需workspace的最大值，AddCustomTilingSink样例不需要workspace，不涉及设置，此处设置为固定值仅作为示例
+static constexpr size_t DEFAULT_WORKSPACE_SIZE = 0;
 ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
 {
     TilingSinkTilingData tiling;
@@ -26,8 +26,10 @@ ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
     tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
     context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
     size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = DEFAULT_WORKSPACE_SIZE;
+    currentWorkspace[0] = DEFAULT_WORKSPACE_SIZE; // 设置运行时workspace大小
     if (context->GetInputTensor(1) != nullptr && context->GetInputTensor(1)->GetData<float>() == nullptr) {
+        // 通过判断值依赖InputTensor的Data是否为空指针来确认当前是否处于编译期。
+        // Tiling下沉场景，编译期需要为算子分配内存，包括其所需的workspace。为了保证运行时的高效性，编译期应根据算子的执行需求，合理设置所需的workspace最大值，以避免内存不足或浪费。
         currentWorkspace[0] = MAX_WORKSPACE_SIZE;
     }
     return ge::GRAPH_SUCCESS;
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp
index 4b1cb2f1d..d8b3738ce 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_kernel/add_custom_tiling_sink.cpp
@@ -85,7 +85,7 @@ private:
 extern "C" __global__ __aicore__ void add_custom_tiling_sink(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
 {
     GET_TILING_DATA(tiling_data, tiling);
-    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2); // 将算子强制指定在AIC、AIV混合场景运行，模拟融合算子场景
     if ASCEND_IS_AIC {
         return;
     }
-- 
Gitee


From 6f4cf00910fa1650c2b96590b2f909175c6c4372 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Wed, 4 Jun 2025 02:34:45 +0000
Subject: [PATCH 12/97] =?UTF-8?q?!2668=20[bugfix]fix=20ModuleNotFoundError?=
 =?UTF-8?q?:torch=5Fnpu.meta=20Merge=20pull=20request=20!2668=20from=20?=
 =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_ops_custom_register_in_graph.py             | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
index a8f095457..f9bed9c44 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
@@ -16,7 +16,17 @@ import torchair
 from torch_npu.testing.testcase import TestCase, run_tests
 from torchair import register_fx_node_ge_converter
 from torchair.ge import Tensor
-from torch_npu.meta._meta_registrations import m
+try:
+    from torch_npu.meta._meta_registrations import m
+except ModuleNotFoundError:
+    try:
+        from torch_npu.op_plugin.meta import _meta_registrations as m
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError(
+            "Failed to import '_meta_registrations' module. "
+            "Neither 'torch_npu.meta._meta_registrations' "
+            "nor 'torch_npu.op_plugin.meta._meta_registrations' could be found. "
+        )
 
 
 @impl(m, "npu_add_custom")
-- 
Gitee


From fc7a1cc09da29bb2cd41e36b6e7a213b89cdfab0 Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Tue, 10 Jun 2025 01:45:07 +0000
Subject: [PATCH 13/97] =?UTF-8?q?!2672=20=E4=BF=AE=E6=94=B9=E7=9C=9F?=
 =?UTF-8?q?=E5=80=BC=E7=94=9F=E6=88=90=E7=B1=BB=E5=9E=8B=20Merge=20pull=20?=
 =?UTF-8?q?request=20!2672=20from=20PengC/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../MmadBiasInvocation/scripts/gen_data.py                  | 6 +++---
 .../20_mmad_kernellaunch/MmadInvocation/scripts/gen_data.py | 4 ++--
 .../DumpTensorCube/AclNNInvocation/scripts/gen_data.py      | 4 ++--
 .../DumpTensorKernelInvocationCube/scripts/gen_data.py      | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/gen_data.py
index 0fdd40e64..4fcd9b96b 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/gen_data.py
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/gen_data.py
@@ -17,9 +17,9 @@ def gen_golden_data():
     N = 32
     K = 32
 
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(np.float16)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(np.float16)
-    bias_gm = np.random.randint(1, 10, [N]).astype(np.float16)
+    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
+    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
+    bias_gm = np.random.uniform(1, 10, [N]).astype(np.float16)
     golden = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32)) + bias_gm.astype(np.float32)).astype(np.float32)
     os.system("mkdir -p input")
     os.system("mkdir -p output")
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/gen_data.py
index d4cb3e7d2..dc82df2a1 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/gen_data.py
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/gen_data.py
@@ -17,8 +17,8 @@ def gen_golden_data():
     N = 32
     K = 32
 
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(np.float16)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(np.float16)
+    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
+    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
     golden = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32))).astype(np.float32)
     os.system("mkdir -p input")
     os.system("mkdir -p output")
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/gen_data.py
index bf5be8383..d773c163b 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/gen_data.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/gen_data.py
@@ -17,8 +17,8 @@ def gen_golden_data():
     N = 32
     K = 32
 
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(np.float16)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(np.float16)
+    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
+    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
     golden = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32))).astype(np.float32)
     os.system("mkdir -p input")
     os.system("mkdir -p output")
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/gen_data.py b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/gen_data.py
index 88b51c629..e00c3067e 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/gen_data.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/gen_data.py
@@ -17,8 +17,8 @@ def gen_golden_data():
     N = 32
     K = 32
 
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(np.float16)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(np.float16)
+    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
+    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
     golden = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32))).astype(np.float32)
     os.system("mkdir -p input")
     os.system("mkdir -p output")
-- 
Gitee


From 5e1236944f5a90058758a918fdbf7d3884d3cd57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E9=81=93=E6=98=8E?= <lidaoming1@huawei.com>
Date: Tue, 10 Jun 2025 03:17:40 +0000
Subject: [PATCH 14/97] =?UTF-8?q?!2671=20add=20limit=20for=20mc2=20Merge?=
 =?UTF-8?q?=20pull=20request=20!2671=20from=20=E6=9D=8E=E9=81=93=E6=98=8E/?=
 =?UTF-8?q?fix=5Flimit=5Fmc2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../4_best_practices/21_all_gather_matmul_custom/README.md   | 3 +++
 .../22_matmul_reduce_scatter_custom/README.md                | 5 ++++-
 .../4_best_practices/23_matmul_all_reduce_custom/README.md   | 5 ++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
index 074c90feb..c121aac07 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
@@ -75,6 +75,8 @@ CANN软件包中提供了工程创建工具msopgen，AllGatherMatmulCustom算子
 ### 1. 获取源码包
 编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
 
+注意：本样例的执行依赖8卡集成环境。为保证样例的正常执行，请预先安装2.1版本的torch和torch_npu安装包。
+
 ### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
   - 切换到msOpGen脚本install.sh所在目录
     ```bash
@@ -141,3 +143,4 @@ CANN软件包中提供了工程创建工具msopgen，AllGatherMatmulCustom算子
 | 时间       | 更新事项                     |
 | ---------- | ---------------------------- |
 | 2024/12/19 | 新增样例 |
+| 2025/06/09 | 添加算子执行环境备注 |
diff --git a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/README.md b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/README.md
index 0d85f3188..36f47216b 100644
--- a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/README.md
+++ b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/README.md
@@ -75,6 +75,8 @@ CANN软件包中提供了工程创建工具msopgen，MatmulReduceScatterCustom
 ### 1. 获取源码包
 编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
 
+注意：本样例的执行依赖8卡集成环境。为保证样例的正常执行，请预先安装2.1版本的torch和torch_npu安装包。
+
 ### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
   - 切换到msOpGen脚本install.sh所在目录
     ```bash
@@ -140,4 +142,5 @@ CANN软件包中提供了工程创建工具msopgen，MatmulReduceScatterCustom
 ## 更新说明
 | 时间       | 更新事项                     |
 | ---------- | ---------------------------- |
-| 2024/12/19 | 新增样例 |
\ No newline at end of file
+| 2024/12/19 | 新增样例 |
+| 2025/06/09 | 添加算子执行环境备注 |
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/README.md b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/README.md
index 3bc513470..155dcc322 100644
--- a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/README.md
+++ b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/README.md
@@ -75,6 +75,8 @@ CANN软件包中提供了工程创建工具msopgen，MatmulAllReduceCustom算子
 ### 1. 获取源码包
 编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
 
+注意：本样例的执行依赖8卡集成环境。为保证样例的正常执行，请预先安装2.1版本的torch和torch_npu安装包。
+
 ### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
   - 切换到msOpGen脚本install.sh所在目录
     ```bash
@@ -140,4 +142,5 @@ CANN软件包中提供了工程创建工具msopgen，MatmulAllReduceCustom算子
 ## 更新说明
 | 时间       | 更新事项                     |
 | ---------- | ---------------------------- |
-| 2024/12/19 | 新增样例 |
\ No newline at end of file
+| 2024/12/19 | 新增样例 |
+| 2025/06/09 | 添加算子执行环境备注 |
\ No newline at end of file
-- 
Gitee


From 575b5fc5ddc5d44e0623e284ee891aedf07c8d02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Wed, 11 Jun 2025 02:39:02 +0000
Subject: [PATCH 15/97] =?UTF-8?q?!2676=20update=2017=5Ftiling=5Fsink=20rea?=
 =?UTF-8?q?dme=20Merge=20pull=20request=20!2676=20from=20=E9=99=88?=
 =?UTF-8?q?=E5=A8=81=E4=BA=A8/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink/PytorchInvocation/README.md    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
index 8167f74d0..4c7468bcb 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/PytorchInvocation/README.md
@@ -127,9 +127,17 @@ def AddCustomTilingSink(x: Tensor, y: Tensor, *, dependencies=[], node_name=None
 6.实现自定算子converetr并注册：
 
 ```python
+from typing import (
+    Optional,
+    Union,
+    List,
+)
+from torchair._ge_concrete_graph.fx2ge_converter import register_fx_node_ge_converter
+from torchair.ge._ge_graph import Tensor, TensorSpec
+
 @register_fx_node_ge_converter(torch.ops.air.add_custom_tiling_sink.default)
 def convert_add_custom_tiling_sink(x: torch.Tensor, y: torch.Tensor, meta_outputs: Union[TensorSpec, List[TensorSpec]] = None):
-    return AddCustomTilingSink(x, y) # 此为前面生产的构图api
+    return AddCustomTilingSink(x, y) # 此为前面生成的构图api
 ```
 
 ## 运行样例算子
-- 
Gitee


From 618064c42bbc2ef4150d60083a5db9a0e50c5cd9 Mon Sep 17 00:00:00 2001
From: wangyuqing <wangyuqing33@huawei.com>
Date: Thu, 12 Jun 2025 01:38:08 +0000
Subject: [PATCH 16/97] !2675 update mobilenet_v2_1.0_224.tgz download url
 Merge pull request !2675 from wangyuqing/master

---
 .../9_amct/amct_tensorflow/auto_calibration/README_CN.md    | 2 +-
 .../9_amct/amct_tensorflow/calibration/README_CN.md         | 2 +-
 .../9_amct/amct_tensorflow/cmd/README_CN.md                 | 2 +-
 .../9_amct/amct_tensorflow/convert_model/README_CN.md       | 2 +-
 .../9_amct/amct_tensorflow/mobilenet_v2/README_CN.md        | 6 +++---
 .../9_amct/amct_tensorflow_ascend/mobilenetv2/README_CN.md  | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/level1_single_api/9_amct/amct_tensorflow/auto_calibration/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow/auto_calibration/README_CN.md
index 1c9649844..1a541b9ca 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow/auto_calibration/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow/auto_calibration/README_CN.md
@@ -5,7 +5,7 @@
 ### 1.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 自动量化回退过程中，需要不断的对模型进行校准和测试，因此需要用户准备数据集，本示例所采用的数据集为标准 TFRecord 格式的 ImageNet 的 子集 ILSVRC-2012-CLS 的验证集，共有 50000 张图片，如果采用其他数据集，则需要用户自行修改 sample 文件中的数据预处理部分以匹配模型输入。
diff --git a/python/level1_single_api/9_amct/amct_tensorflow/calibration/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow/calibration/README_CN.md
index 2eb39eb04..a6ea8bceb 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow/calibration/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow/calibration/README_CN.md
@@ -5,7 +5,7 @@
 ### 1.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 使用昇腾模型压缩工具对模型完成量化后，需要对模型进行推理，以测试量化数据的精度。推理过程中需要使用和模型相匹配的数据集。请下载测试图片 [classification.jpg](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/mobilenet_v2_calibration/classification.jpg)，并将该图片放到 [data](./data/) 目录下。
diff --git a/python/level1_single_api/9_amct/amct_tensorflow/cmd/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow/cmd/README_CN.md
index f6795fd29..d4b58cc67 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow/cmd/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow/cmd/README_CN.md
@@ -5,7 +5,7 @@
 ### 1.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **准备校准数据集**
 校准集用来产生量化因子，保证精度。计算量化参数的过程被称为“校准 (calibration)”。校准过程需要使用一部分图片来针对性计算量化参数，使用一个或多个 batch 对量化后的网络模型进行推理即可完成校准。为了保证量化精度，校准集与测试精度的数据集来源应一致。
diff --git a/python/level1_single_api/9_amct/amct_tensorflow/convert_model/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow/convert_model/README_CN.md
index f4efdce91..58ec4706e 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow/convert_model/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow/convert_model/README_CN.md
@@ -5,7 +5,7 @@
 ### 1.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 使用昇腾模型压缩工具对模型完成量化后，需要对模型进行推理，以测试量化数据的精度。推理过程中需要使用和模型相匹配的数据集。请下载测试图片 [classification.jpg](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/mobilenet_v2_calibration/classification.jpg)，并将该图片放到 [data](./data/) 目录下。
diff --git a/python/level1_single_api/9_amct/amct_tensorflow/mobilenet_v2/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow/mobilenet_v2/README_CN.md
index f33272652..1ae0c9539 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow/mobilenet_v2/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow/mobilenet_v2/README_CN.md
@@ -5,7 +5,7 @@
 ### 1.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 使用昇腾模型压缩工具对模型完成量化后，需要对模型进行推理，以测试量化数据的精度。推理过程中需要使用和模型相匹配的数据集。请下载测试图片 [classification.jpg](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/mobilenet_v2_calibration/classification.jpg)，并将该图片放到 [data](./data/) 目录下。
@@ -60,7 +60,7 @@ Quantized Model Prediction:
 ### 2.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 使用昇腾模型压缩工具对模型完成量化后，需要对模型进行推理，以测试量化数据的精度。推理过程中需要使用和模型相匹配的数据集。请下载测试图片 [classification.jpg](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/mobilenet_v2_calibration/classification.jpg)，并将该图片放到 [data](./data/) 目录下。
@@ -158,7 +158,7 @@ Quantized Model Prediction:
 ### 4.1 量化前提
 
 + **模型准备**  
-请点击下载 [MobileNet V2](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
+请点击下载 [MobileNet V2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz) 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放到 [model](./model/) 目录下。
 
 + **数据集准备**  
 自动量化回退过程中，需要不断的对模型进行校准和测试，因此需要用户准备数据集，本示例所采用的数据集为标准 TFRecord 格式的 ImageNet 的 子集 ILSVRC-2012-CLS 的验证集，共有 50000 张图片，如果采用其他数据集，则需要用户自行修改 sample 文件中的数据预处理部分以匹配模型输入。
diff --git a/python/level1_single_api/9_amct/amct_tensorflow_ascend/mobilenetv2/README_CN.md b/python/level1_single_api/9_amct/amct_tensorflow_ascend/mobilenetv2/README_CN.md
index f6619b1b4..06fb0f66f 100644
--- a/python/level1_single_api/9_amct/amct_tensorflow_ascend/mobilenetv2/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_tensorflow_ascend/mobilenetv2/README_CN.md
@@ -7,7 +7,7 @@
 请按照手册准备好环境并安装好amct_tensorflow_ascend工具包。
 ##### 模型准备
 请至
-[Tensorflow-models](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz)
+[Tensorflow-models](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com:443/resource/mobilenet_v2_1.0_224.tgz)
 下载 MobileNetV2 模型文件。解压并将其中的 mobilenet_v2_1.0_224_frozen.pb 文件放入[pre_model](./pre_model)文件夹中。
 ##### 数据集准备
 可以对量化前后的模型进行推理，以测试量化对精度的影响，推理过程中需要使用和模型相匹配的数据集。请下载[测试图片](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/mobilenet_v2_calibration/classification.jpg)，并将该图片放到 [data](./data/) 目录下。
-- 
Gitee


From ecb188c9da7b863f1307c706edd18038c9b2ac31 Mon Sep 17 00:00:00 2001
From: alpaca12345UUU <zhanglong170@huawei.com>
Date: Thu, 12 Jun 2025 09:28:28 +0000
Subject: [PATCH 17/97] !2677 add tbufpool sample Merge pull request !2677 from
 alpaca12345UUU/master

---
 .../2_features/2_tbufpool/CMakeLists.txt      |  76 +++++++
 .../ascendc/2_features/2_tbufpool/README.md   |  87 +++++++-
 .../2_features/2_tbufpool/cmake/cpu_lib.cmake |  26 +++
 .../2_features/2_tbufpool/cmake/npu_lib.cmake |  12 +
 .../2_features/2_tbufpool/data_utils.h        | 211 ++++++++++++++++++
 .../ascendc/2_features/2_tbufpool/main.cpp    | 172 ++++++++++++++
 .../op_host/tbufpool_custom_tiling.cpp        |  19 ++
 .../op_host/tbufpool_custom_tiling.h          |  18 ++
 .../2_tbufpool/op_kernel/tbufpool_custom.cpp  |  20 ++
 .../2_tbufpool/op_kernel/tbufpool_custom.h    | 128 +++++++++++
 operator/ascendc/2_features/2_tbufpool/run.sh |  48 ++++
 .../2_features/2_tbufpool/scripts/gen_data.py |  32 +++
 operator/ascendc/2_features/README.md         |   1 +
 13 files changed, 849 insertions(+), 1 deletion(-)
 create mode 100644 operator/ascendc/2_features/2_tbufpool/CMakeLists.txt
 create mode 100644 operator/ascendc/2_features/2_tbufpool/cmake/cpu_lib.cmake
 create mode 100644 operator/ascendc/2_features/2_tbufpool/cmake/npu_lib.cmake
 create mode 100644 operator/ascendc/2_features/2_tbufpool/data_utils.h
 create mode 100644 operator/ascendc/2_features/2_tbufpool/main.cpp
 create mode 100644 operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.cpp
 create mode 100644 operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
 create mode 100644 operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.cpp
 create mode 100644 operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
 create mode 100644 operator/ascendc/2_features/2_tbufpool/run.sh
 create mode 100644 operator/ascendc/2_features/2_tbufpool/scripts/gen_data.py

diff --git a/operator/ascendc/2_features/2_tbufpool/CMakeLists.txt b/operator/ascendc/2_features/2_tbufpool/CMakeLists.txt
new file mode 100644
index 000000000..060c0adc0
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/CMakeLists.txt
@@ -0,0 +1,76 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+# ======================================================================================================================
+
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+if(${RUN_MODE})
+    set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
+endif()
+if (${SOC_VERSION})
+    set(SOC_VERSION "Ascend910" CACHE STRING "system on chip type")
+endif()
+
+set(ASCEND_CANN_PACKAGE_PATH "~/Ascend/ascend-toolkit/latest" CACHE STRING "ASCEND CANN package installation directory")
+
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug"  CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out"  CACHE STRING "path for install()" FORCE)
+endif()
+
+file(GLOB KERNEL_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel/tbufpool_custom.cpp
+)
+set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3")
+
+if("${RUN_MODE}" STREQUAL "cpu")
+    include(cmake/cpu_lib.cmake)
+elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu")
+    include(cmake/npu_lib.cmake)
+else()
+    message("invalid RUN_MODE: ${RUN_MODE}")
+endif()
+
+add_executable(tbufpool_direct_kernel_op
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/op_host/tbufpool_custom_tiling.cpp
+)
+
+target_compile_options(tbufpool_direct_kernel_op PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:-g>>
+    -O2
+    -std=c++17
+    -D_GLIBCXX_USE_CXX11_ABI=0
+)
+
+target_compile_definitions(tbufpool_direct_kernel_op PRIVATE
+    $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+)
+
+target_include_directories(tbufpool_direct_kernel_op PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:${ASCEND_CANN_PACKAGE_PATH}/include>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:${ASCEND_CANN_PACKAGE_PATH}/runtime/include>>
+)
+
+target_link_libraries(tbufpool_direct_kernel_op PRIVATE
+    $<BUILD_INTERFACE:$<$<OR:$<STREQUAL:${RUN_MODE},npu>,$<STREQUAL:${RUN_MODE},sim>>:host_intf_pub>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:tikicpulib::${SOC_VERSION}>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:ascendcl>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:c_sec>>
+    ascendc_kernels_${RUN_MODE}
+    tiling_api
+    register
+    platform
+    ascendalog
+    dl
+    graph_base
+)
+
+install(TARGETS tbufpool_direct_kernel_op
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/README.md b/operator/ascendc/2_features/2_tbufpool/README.md
index 5af80e6c5..b87611f13 100644
--- a/operator/ascendc/2_features/2_tbufpool/README.md
+++ b/operator/ascendc/2_features/2_tbufpool/README.md
@@ -1 +1,86 @@
-tbufpool（待补充）
\ No newline at end of file
+## 目录结构介绍
+```
+├── 2_tbufpool
+│   ├── cmake                               // 编译工程文件
+│   ├── op_host                             // 本样例tiling代码实现
+│   │   ├── tbufpool_custom_tilling.cpp
+│   │   ├── tbufpool_custom_tilling.h
+│   ├── op_kernel                           // 本样例kernel侧代码实现
+│   │   ├── tbufpool_custom.cpp
+│   │   ├── tbufpool_custom.h
+│   ├── scripts
+│   │   ├── gen_data.py                     // 输入数据和真值数据生成脚本    
+│   ├── CMakeLists.txt                      // 编译工程文件
+│   ├── data_utils.h                        // 数据读入写出函数
+│   ├── main.cpp                            // 主函数，调用算子的应用程序，含CPU域及NPU域调用
+│   └── run.sh                              // 编译运行算子的脚本
+```
+## 代码实现介绍
+数据量较大且内存有限时，无法一次完成所有数据搬运，需要拆分成多个阶段计算，每次计算使用其中的一部分数据，可以通过TBufPool资源池进行内存地址复用。本例中，通过调用InitBufPool基础API对Add算子和Sub算子实现过程进行内存管理。从Tpipe划分出资源池tbufPool0，tbufPool0为src0Gm分配空间后，继续分配了资源池tbufPool1，指定tbufPool1与tbufPool2复用并分别运用于第一、二轮计算，此时tbufPool1及tbufPool2共享起始地址及长度。
+
+- kernel实现  
+  Add算子的数学表达式为：
+  ```
+  z = x + y
+  ```
+  Sub算子的数学表达式为：
+  ```
+  z = x - y
+  ```
+
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，首先启用tbufool1，将部分输入数据src0Gm，部分输入数据src1Gm搬运进片上储存，调用计算接口完成相加计算，搬出到外部存储上。之后切换到tbufpool2进行剩余数据相减计算，得到最终结果，再搬出到外部存储上。  
+
+  本样例算子的实现流程分为6个基本任务：CopyIn，Compute，CopyOut，CopyIn1，Compute1，CopyOut1。
+  - CopyIn任务负责将Global Memory上的部分输入Tensor src0Gm和src1Gm搬运到Local Memory，分别存储在src0Local、src1Local；
+  - Compute任务负责对src0Local、src1Local执行加法操作，计算结果存储在dstLocal中；
+  - CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm0中。
+  - CopyIn1任务负责将Global Memory上的剩余输入Tensor src0Gm和src1Gm搬运到Local Memory，分别存储在src0Local、src1Local；
+  - Compute1任务负责对src0Local、src1Local执行剩余数据减法操作，计算结果存储在dstLocal中；
+  - CopyOut1任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm1中。
+
+- 调用实现
+  1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
+  2. NPU侧运行验证主要通过使用ACLRT_LAUNCH_KERNEL内核调用宏来完成。
+
+  应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。
+
+## 运行样例算子
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/2_features/2_tbufpool
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    
+    配置仿真模式日志文件目录，默认为sim_log。
+    ```bash
+    export CAMODEL_LOG_PATH=./sim_log
+    ```
+
+  - 样例执行
+
+    ```bash
+    bash run.sh -r [RUN_MODE] -v  [SOC_VERSION]
+    ```
+    - RUN_MODE：编译方式，可选择CPU调试，NPU仿真，NPU上板。支持参数为[cpu / sim / npu]。
+    - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+      - Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+    示例如下，Ascendxxxyy请替换为实际的AI处理器型号。
+    ```bash
+    bash run.sh -r cpu -v Ascendxxxyy
+    ```
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/cmake/cpu_lib.cmake b/operator/ascendc/2_features/2_tbufpool/cmake/cpu_lib.cmake
new file mode 100644
index 000000000..693f15ac1
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/cmake/cpu_lib.cmake
@@ -0,0 +1,26 @@
+if(NOT DEFINED ENV{CMAKE_PREFIX_PATH})
+    set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake)
+endif()
+find_package(tikicpulib REQUIRED)
+
+add_library(ascendc_kernels_${RUN_MODE} SHARED
+    ${KERNEL_FILES}
+)
+
+target_link_libraries(ascendc_kernels_${RUN_MODE} PRIVATE
+    tikicpulib::${SOC_VERSION}
+)
+
+target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
+    $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+)
+
+target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE
+    -g
+    -O0
+    -std=c++17
+)
+
+install(TARGETS ascendc_kernels_${RUN_MODE}
+DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/cmake/npu_lib.cmake b/operator/ascendc/2_features/2_tbufpool/cmake/npu_lib.cmake
new file mode 100644
index 000000000..8ad136f38
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/cmake/npu_lib.cmake
@@ -0,0 +1,12 @@
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+ascendc_library(ascendc_kernels_${RUN_MODE} STATIC
+    ${KERNEL_FILES}
+)
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/data_utils.h b/operator/ascendc/2_features/2_tbufpool/data_utils.h
new file mode 100644
index 000000000..05590dd72
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/data_utils.h
@@ -0,0 +1,211 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <iomanip>
+#include <cassert>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+#endif
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+
+#ifndef ASCENDC_CPU_DEBUG
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+#endif
+
+/**
+* @brief Read data from file
+* @param [in] filePath: file path
+* @param [out] fileSize: file size
+* @return read result
+*/
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+* @brief Write data to file
+* @param [in] filePath: file path
+* @param [in] buffer: data to write to file
+* @param [in] size: size to write
+* @return write result
+*/
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    auto writeSize = write(fd, buffer, size);
+    (void) close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T>
+void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+#endif
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow=16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+#ifndef ASCENDC_CPU_DEBUG 
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+#endif
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // EXAMPLES_COMMON_DATA_UTILS_H
diff --git a/operator/ascendc/2_features/2_tbufpool/main.cpp b/operator/ascendc/2_features/2_tbufpool/main.cpp
new file mode 100644
index 000000000..ba4f849dd
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/main.cpp
@@ -0,0 +1,172 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include "data_utils.h"
+#include "./op_host/tbufpool_custom_tiling.h"
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+#include "aclrtlaunch_tbufpool_custom.h"
+#include "tiling/platform/platform_ascendc.h"
+#else
+#include "tikicpulib.h"
+extern "C" __global__ __aicore__ void tbufpool_custom(GM_ADDR x, GM_ADDR y, GM_ADDR zAdd, TbufPoolTilingData tiling);
+#endif
+
+namespace {
+constexpr uint32_t USED_CORE_NUM = 1;
+constexpr uint32_t TOTAL_LENGTH = 2048;
+constexpr uint32_t DST_LENGTH = 1024;
+constexpr uint32_t TILING_SIZE = 1;
+}
+
+extern void GenerateTilingData(const uint32_t totalLength, uint8_t *tilingBuf);
+
+static bool CompareResult(const void *outputData, int64_t outSize) {
+    void *goldenData;
+#ifdef ASCENDC_CPU_DEBUG
+    goldenData = (uint8_t *)AscendC::GmAlloc(outSize);
+#else
+    CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize));
+#endif
+    size_t goldenSize = outSize;
+    bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize);
+    if (ret) {
+        printf("ReadFile golden.bin success!\n");
+    } else {
+        printf("test failed!\n");
+        return false;
+    }
+    constexpr float EPS = 1e-4;
+    int64_t wrongNum = 0;
+
+    for (int i = 0; i < outSize / sizeof(float); i++) {
+        float a = (reinterpret_cast<const float *>(outputData))[i];
+        float b = (reinterpret_cast<const float *>(goldenData))[i];
+        float ae = std::abs(a - b);
+        float re = ae / abs(b);
+        if (ae > EPS && re > EPS) {
+            printf(" %lf CompareResult failed output is %lf, golden is %lf\n", float(i), a, b);
+            wrongNum++;
+        }
+    }
+#ifdef ASCENDC_CPU_DEBUG
+    AscendC::GmFree((void *)goldenData);
+#else
+    CHECK_ACL(aclrtFreeHost(goldenData));
+#endif
+    if (wrongNum != 0) {
+        return false;
+    } else {
+        printf("CompareResult golden.bin success!\n");
+        return true;
+    }
+}
+
+int32_t main(int32_t argc, char *argv[]) {
+    size_t tilingSize = TILING_SIZE * sizeof(uint32_t);
+    size_t inputSize = TOTAL_LENGTH * sizeof(float);
+    size_t outputSizeAdd = inputSize;
+
+#ifdef ASCENDC_CPU_DEBUG
+    uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputSize);
+    uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputSize);
+    uint8_t *zAdd = (uint8_t *)AscendC::GmAlloc(outputSizeAdd);
+    uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingSize);
+
+    ReadFile("../input/input_x.bin", inputSize, x, inputSize);
+    ReadFile("../input/input_y.bin", inputSize, y, inputSize);
+
+    GenerateTilingData(TOTAL_LENGTH, tiling);
+
+    AscendC::SetKernelMode(KernelMode::AIV_MODE); // run in aiv mode
+
+    ICPU_RUN_KF(tbufpool_custom, USED_CORE_NUM, x, y, zAdd, *reinterpret_cast<TbufPoolTilingData *>(tiling)); // use this macro for cpu debug
+
+    WriteFile("../output/output.bin", zAdd, outputSizeAdd);
+
+    bool goldenResult = true;
+    goldenResult = CompareResult(zAdd, outputSizeAdd);
+
+    AscendC::GmFree((void *)x);
+    AscendC::GmFree((void *)y);
+    AscendC::GmFree((void *)zAdd);
+    AscendC::GmFree((void *)tiling);
+#else
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    uint8_t *xHost; 
+    uint8_t *yHost; 
+    uint8_t *zHostAdd; 
+    uint8_t *tiling;
+    uint8_t *xDevice; 
+    uint8_t *yDevice; 
+    uint8_t *zDeviceAdd;
+
+    CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&zHostAdd), outputSizeAdd));
+    CHECK_ACL(aclrtMallocHost((void **)(&tiling), tilingSize));
+
+    CHECK_ACL(aclrtMalloc((void **)&xDevice, inputSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&yDevice, inputSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&zDeviceAdd, outputSizeAdd, ACL_MEM_MALLOC_HUGE_FIRST));
+
+    ReadFile("../input/input_x.bin", inputSize, xHost, inputSize);
+    ReadFile("../input/input_y.bin", inputSize, yHost, inputSize);
+
+    GenerateTilingData(TOTAL_LENGTH, tiling);
+
+    // Copy host memory to device memory
+    CHECK_ACL(aclrtMemcpy(xDevice, inputSize, xHost, inputSize, ACL_MEMCPY_HOST_TO_DEVICE));
+    CHECK_ACL(aclrtMemcpy(yDevice, inputSize, yHost, inputSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    // Execute the kernel
+    ACLRT_LAUNCH_KERNEL(tbufpool_custom)
+    (USED_CORE_NUM, stream, xDevice, yDevice, zDeviceAdd, reinterpret_cast<TbufPoolTilingData *>(tiling));
+
+    // Wait for the stop event to complete
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+
+    // Copy result to host memory and write to output file
+    CHECK_ACL(aclrtMemcpy(zHostAdd, outputSizeAdd, zDeviceAdd, outputSizeAdd, ACL_MEMCPY_DEVICE_TO_HOST));
+    WriteFile("../output/output.bin", zHostAdd, outputSizeAdd);
+
+    // Compare the result with the golden result
+    bool goldenResult = true;
+    goldenResult = CompareResult(zHostAdd, outputSizeAdd);
+
+    // Clean up memory
+    CHECK_ACL(aclrtFree(xDevice));
+    CHECK_ACL(aclrtFree(yDevice));
+    CHECK_ACL(aclrtFree(zDeviceAdd));
+
+    CHECK_ACL(aclrtFreeHost(xHost));
+    CHECK_ACL(aclrtFreeHost(yHost));
+    CHECK_ACL(aclrtFreeHost(zHostAdd));
+
+    CHECK_ACL(aclrtFreeHost(tiling));
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+#endif
+
+    if (goldenResult) {
+        printf("test pass!\n");
+    } else {
+        printf("test failed!\n");
+    }
+    return 0;
+}
+  
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.cpp b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.cpp
new file mode 100644
index 000000000..0bc2f1c1d
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.cpp
@@ -0,0 +1,19 @@
+/**
+ * @file tbufpool_custom_tiling.cpp
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include "tiling/tiling_api.h"
+#include "tbufpool_custom_tiling.h"
+
+
+void GenerateTilingData(uint32_t totalLength, uint8_t* tilingBuf)
+{
+    TbufPoolTilingData *tiling = reinterpret_cast<TbufPoolTilingData *>(tilingBuf);
+    tiling->totalLength = totalLength;
+}
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
new file mode 100644
index 000000000..63c60d78c
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
@@ -0,0 +1,18 @@
+/**
+ * @file tbufpool_custom_tiling.h
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef EXAMPLES_ACTIVATION_TBUFPOOL_CUSTOM_TILING_H
+#define EXAMPLES_ACTIVATION_TBUFPOOL_CUSTOM_TILING_H
+#include <cstdint>
+
+struct TbufPoolTilingData {
+    uint32_t totalLength;
+};
+#endif
diff --git a/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.cpp b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.cpp
new file mode 100644
index 000000000..d17a4d185
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.cpp
@@ -0,0 +1,20 @@
+/**
+ * @file tbufpool_custom.cpp
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include "./tbufpool_custom.h"
+#include "kernel_operator.h"
+
+extern "C" __global__ __aicore__ void tbufpool_custom(GM_ADDR src0Gm, GM_ADDR src1Gm, GM_ADDR dstGm, TbufPoolTilingData tiling)
+{
+    AscendC::TPipe pipe;
+    MyCustomKernel::TbufPoolImpl op;
+    op.Init(src0Gm, src1Gm, dstGm, tiling, &pipe);
+    op.Process();
+}
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
new file mode 100644
index 000000000..9c3559512
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
@@ -0,0 +1,128 @@
+/**
+ * @file tbufpool_custom.h
+ *
+ * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef EXAMPLES_ACTIVATION_INITBUFPOOL_CUSTOM_H
+#define EXAMPLES_ACTIVATION_INITBUFPOOL_CUSTOM_H
+#include "../op_host/tbufpool_custom_tiling.h"
+#include "kernel_operator.h"
+
+
+namespace MyCustomKernel {
+constexpr int32_t BUFFER_NUM = 1;
+constexpr int32_t BUFFER_NUM_T1 = 1;
+constexpr int32_t BUFFER_NUM_T2 = 1;
+constexpr int32_t BUFFER_LENGTH = 4096 * sizeof(float);
+constexpr int32_t BUFF_POOL_LENGTH = 2048 * sizeof(float);
+constexpr int32_t INIT_TENSOR_LENGTH = 1024 * sizeof(float);
+constexpr int32_t COMPUTE_LENGTH = 1024;
+
+class TbufPoolImpl {
+    public:
+        __aicore__ inline TbufPoolImpl() {}
+        __aicore__ inline void Init(__gm__ uint8_t* src0Gm, __gm__ uint8_t* src1Gm, __gm__ uint8_t* dstGm, 
+                                     TbufPoolTilingData tiling, AscendC::TPipe* pipeIn)
+        {
+            pipe = pipeIn;
+            src0Global.SetGlobalBuffer((__gm__ float*)src0Gm);
+            src1Global.SetGlobalBuffer((__gm__ float*)src1Gm);
+            dstGlobal.SetGlobalBuffer((__gm__ float*)dstGm);
+            pipe->InitBufPool(tbufPool0, BUFFER_LENGTH);
+            tbufPool0.InitBuffer(srcQue0, BUFFER_NUM, BUFF_POOL_LENGTH); // Total src0
+            tbufPool0.InitBufPool(tbufPool1, BUFF_POOL_LENGTH);
+            tbufPool0.InitBufPool(tbufPool2, BUFF_POOL_LENGTH, tbufPool1);
+            tbufPool1.InitBuffer(srcQue1, BUFFER_NUM_T1, INIT_TENSOR_LENGTH);
+            tbufPool1.InitBuffer(dstQue0, BUFFER_NUM_T1, INIT_TENSOR_LENGTH);
+            tbufPool2.InitBuffer(srcQue2, BUFFER_NUM_T2, INIT_TENSOR_LENGTH);
+            tbufPool2.InitBuffer(dstQue1, BUFFER_NUM_T2, INIT_TENSOR_LENGTH);
+        }
+        __aicore__ inline void Process()
+        {
+            //stage 1
+            CopyIn();
+            Compute();
+            CopyOut();
+            tbufPool1.Reset();
+            //stage 2
+            CopyIn1();
+            Compute1();
+            CopyOut1();
+            tbufPool2.Reset();
+            tbufPool0.Reset();
+        }
+  
+    private:
+        __aicore__ inline void CopyIn()
+        {
+            AscendC::LocalTensor<float> src0Local = srcQue0.AllocTensor<float>();
+            AscendC::LocalTensor<float> src1Local = srcQue1.AllocTensor<float>();
+            AscendC::DataCopy(src0Local, src0Global, COMPUTE_LENGTH);
+            AscendC::DataCopy(src1Local, src1Global, COMPUTE_LENGTH);
+            srcQue0.EnQue(src0Local);
+            srcQue1.EnQue(src1Local);
+        }
+        __aicore__ inline void Compute()
+        {
+            AscendC::LocalTensor<float> src0Local = srcQue0.DeQue<float>();
+            AscendC::LocalTensor<float> src1Local = srcQue1.DeQue<float>();
+            AscendC::LocalTensor<float> dstLocal = dstQue0.AllocTensor<float>();
+            AscendC::Add(dstLocal, src0Local, src1Local, COMPUTE_LENGTH);
+            dstQue0.EnQue<float>(dstLocal);
+            srcQue0.FreeTensor(src0Local);
+            srcQue1.FreeTensor(src1Local);
+        }
+        __aicore__ inline void CopyOut()
+        {
+            AscendC::LocalTensor<float> dstLocal = dstQue0.DeQue<float>();
+            AscendC::DataCopy(dstGlobal, dstLocal, COMPUTE_LENGTH);
+            dstQue0.FreeTensor(dstLocal);
+        }
+        __aicore__ inline void CopyIn1()
+        {
+            AscendC::LocalTensor<float> src0Local = srcQue0.AllocTensor<float>();
+            AscendC::LocalTensor<float> src1Local = srcQue2.AllocTensor<float>();
+            AscendC::DataCopy(src0Local, src0Global[COMPUTE_LENGTH], COMPUTE_LENGTH);
+            AscendC::DataCopy(src1Local, src1Global[COMPUTE_LENGTH], COMPUTE_LENGTH);
+            srcQue0.EnQue(src0Local);
+            srcQue2.EnQue(src1Local);
+        }
+        __aicore__ inline void Compute1()
+        {
+            AscendC::LocalTensor<float> src0Local = srcQue0.DeQue<float>();
+            AscendC::LocalTensor<float> src1Local = srcQue2.DeQue<float>();
+            AscendC::LocalTensor<float> dstLocal = dstQue1.AllocTensor<float>();
+            AscendC::Sub(dstLocal, src0Local, src1Local, COMPUTE_LENGTH);
+            dstQue1.EnQue<float>(dstLocal);
+            srcQue0.FreeTensor(src0Local);
+            srcQue2.FreeTensor(src1Local);
+        }
+        __aicore__ inline void CopyOut1()
+        {
+            AscendC::LocalTensor<float> dstLocal = dstQue1.DeQue<float>();
+            AscendC::DataCopy(dstGlobal[COMPUTE_LENGTH], dstLocal, COMPUTE_LENGTH);
+            dstQue1.FreeTensor(dstLocal);
+        }
+    private:
+        AscendC::TPipe* pipe;
+        AscendC::TBufPool<AscendC::TPosition::VECCALC> tbufPool0; 
+        AscendC::TBufPool<AscendC::TPosition::VECCALC> tbufPool1; 
+        AscendC::TBufPool<AscendC::TPosition::VECCALC> tbufPool2;
+        AscendC::TQue<AscendC::TPosition::VECIN, 1> srcQue0; 
+        AscendC::TQue<AscendC::TPosition::VECIN, 1> srcQue1; 
+        AscendC::TQue<AscendC::TPosition::VECIN, 1> srcQue2;
+        AscendC::TQue<AscendC::TPosition::VECOUT, 1> dstQue0; 
+        AscendC::TQue<AscendC::TPosition::VECOUT, 1> dstQue1;
+        AscendC::GlobalTensor<float> src0Global; 
+        AscendC::GlobalTensor<float> src1Global; 
+        AscendC::GlobalTensor<float> dstGlobal;
+    };
+}// namespace MyCustomKernel
+
+#endif
+    
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/run.sh b/operator/ascendc/2_features/2_tbufpool/run.sh
new file mode 100644
index 000000000..5ae89dbe9
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/run.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+SHORT=r:,v:,
+LONG=run-mode:,soc-version:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+while :
+do
+    case "$1" in
+        (-r | --run-mode )
+            RUN_MODE="$2"
+            shift 2;;
+        (-v | --soc-version )
+            SOC_VERSION="$2"
+            shift 2;;
+        (--)
+            shift;
+            break;;
+        (*)
+            echo "[ERROR] Unexpected option: $1";
+            break;;
+    esac
+done
+
+rm -rf build
+mkdir build
+cd build
+
+# in case of running op in simulator, use stub so instead
+if [ "${RUN_MODE}" = "sim" ]; then
+    export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's/\/.*\/runtime\/lib64://g')
+    export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/runtime/lib64/stub:$LD_LIBRARY_PATH
+fi
+
+source $ASCEND_HOME_DIR/bin/setenv.bash
+export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+
+cmake  -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION}  -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} ..
+make -j16
+
+if [ "${RUN_MODE}" = "npu" ]; then
+    ./tbufpool_direct_kernel_op
+elif [ "${RUN_MODE}" = "sim" ]; then
+    export ASCEND_TOOLKIT_HOME=${ASCEND_HOME_DIR}
+    export ASCEND_HOME_PATH=${ASCEND_HOME_DIR}
+    msprof op simulator --application=./tbufpool_direct_kernel_op
+elif [ "${RUN_MODE}" = "cpu" ]; then
+    ./tbufpool_direct_kernel_op
+fi
\ No newline at end of file
diff --git a/operator/ascendc/2_features/2_tbufpool/scripts/gen_data.py b/operator/ascendc/2_features/2_tbufpool/scripts/gen_data.py
new file mode 100644
index 000000000..fb3dc7143
--- /dev/null
+++ b/operator/ascendc/2_features/2_tbufpool/scripts/gen_data.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+import os
+import numpy as np
+
+def gen_golden_data_simple():
+    dtype = np.float32
+
+    input_shape = [8, 256]
+    input_x = np.random.randint(0, np.nextafter(1000, np.inf), input_shape).astype(dtype)
+    input_y = np.random.randint(0, np.nextafter(1000, np.inf), input_shape).astype(dtype)
+    rows = input_shape[0]
+    mid = rows // 2
+    top_half = input_x[:mid] + input_y[:mid]
+    bottom_half = input_x[mid:] - input_y[mid:]
+    golden = np.vstack((top_half, bottom_half))
+
+    os.system("mkdir -p ./input")
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    os.system("mkdir -p ./output")
+    golden.tofile("./output/golden.bin")
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
\ No newline at end of file
diff --git a/operator/ascendc/2_features/README.md b/operator/ascendc/2_features/README.md
index 8c843758b..b5ccf828a 100644
--- a/operator/ascendc/2_features/README.md
+++ b/operator/ascendc/2_features/README.md
@@ -15,6 +15,7 @@ Ascend C相关特性的样例。特性样例逐步补充中。
 当前本目录包含的所有样例如下。
 |  目录名称                                                   |  功能描述                                              |  运行环境 |
 | ------------------------------------------------------------ | ---------------------------------------------------- | -- |
+| [2_tbufpool](./2_tbufpool) | 基于Ascend C的自定义Vector算子及kernel直调样例，通过TBufPool实现Add算子和Sub算子计算过程中的内存复用，提高计算效率。|Atlas A2训练系列产品/Atlas 800I A2推理产品|
 | [12_cube_group](./12_cube_group) | 基于Ascend C的自定义算子及FrameworkLaunch调用样例，通过软同步控制AIC和AIV之间进行通讯，实现AI Core计算资源分组。|Atlas A2训练系列产品/Atlas 800I A2推理产品|
 | [13_matmul_api_ibshare](./13_matmul_api_ibshare) | 基于Ascend C的自定义Cube算子及Kernellaunch调用样例，通过A矩阵与B矩阵使能IBSHARE，实现算子性能提升|Atlas A2训练系列产品/Atlas 800I A2推理产品|
 | [14_matmul_api_constant](./14_matmul_api_constant) | 基于Ascend C的自定义Cube算子及FrameworkLaunch调用样例，通过使用全量常量化的MatmulApiStaticTiling模板参数，替代非常量的TCubeTiling参数，以减少Scalar计算开销，实现算子性能提升|Atlas A2训练系列产品/Atlas 800I A2推理产品|
-- 
Gitee


From 4675ce1f0dd15ad85c003bb35c4785b9087bea41 Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Wed, 18 Jun 2025 06:14:45 +0000
Subject: [PATCH 18/97] !2678 fix tolerance Merge pull request !2678 from
 PengC/master

---
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../MatmulInvocationNeo/scripts/verify_result.py                | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../MatmulLeakyReluInvocation/scripts/verify_result.py          | 2 +-
 .../MatmulLeakyReluInvocationAsync/scripts/verify_result.py     | 2 +-
 .../AclOfflineModel/scripts/verify_result.py                    | 2 +-
 .../AbsDuplicateKernelInvocation/scripts/verify_result.py       | 2 +-
 .../AbsGatherMaskKernelInvocation/scripts/verify_result.py      | 2 +-
 .../AbsPadKernelInvocation/scripts/verify_result.py             | 2 +-
 .../AbsUnPadKernelInvocation/scripts/verify_result.py           | 2 +-
 .../ReduceMinKernelInvocation/scripts/verify_result.py          | 2 +-
 .../WholeReduceSumKernelInvocation/scripts/verify_result.py     | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../AclOfflineModel/scripts/verify_result.py                    | 2 +-
 .../AclOnlineModel/scripts/verify_result.py                     | 2 +-
 .../MmadBiasInvocation/scripts/verify_result.py                 | 2 +-
 .../MmadInvocation/scripts/verify_result.py                     | 2 +-
 .../VectorAddMultiCoreWithTiling/scripts/verify_result.py       | 2 +-
 .../scripts/verify_result.py                                    | 2 +-
 .../VectorAddSingleCore/scripts/verify_result.py                | 2 +-
 .../VectorAddSingleCoreWithTmpbuf/scripts/verify_result.py      | 2 +-
 .../AddKernelInvocationNeo/scripts/verify_result.py             | 2 +-
 .../AddKernelInvocationTilingNeo/scripts/verify_result.py       | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../0_introduction/5_addn_kernellaunch/scripts/verify_result.py | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py   | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py   | 2 +-
 .../DumpTensorCube/AclNNInvocation/scripts/verify_result.py     | 2 +-
 .../DumpTensorVector/AclNNInvocation/scripts/verify_result.py   | 2 +-
 .../DumpTensorKernelInvocationCube/scripts/verify_result.py     | 2 +-
 .../DumpTensorKernelInvocationVector/scripts/verify_result.py   | 2 +-
 .../12_cube_group/AclNNInvocation/scripts/verify_result.py      | 2 +-
 .../MatmulABshareInvocation/scripts/verify_result.py            | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 .../16_group_barrier/AclNNInvocation/scripts/verify_result.py   | 2 +-
 .../6_group_matmul/KernelLaunch/scripts/verify_result.py        | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../FrameworkLaunch/AclOfflineModel/scripts/verify_result.py    | 2 +-
 .../FrameworkLaunch/AclOnlineModel/scripts/verify_result.py     | 2 +-
 .../AddKernelInvocationNeo/scripts/verify_result.py             | 2 +-
 .../AddKernelInvocationTilingNeo/scripts/verify_result.py       | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py   | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../MatmulLeakyReluInvocation/scripts/verify_result.py          | 2 +-
 .../MatmulLeakyReluInvocationAsync/scripts/verify_result.py     | 2 +-
 .../examples/CPPInvocation/scripts/verify_result.py             | 2 +-
 .../examples/CPPInvocation/scripts/verify_result.py             | 2 +-
 .../AxpySample/AclNNInvocation/scripts/verify_result.py         | 2 +-
 .../FrameworkLaunch/AclNNInvocation/scripts/verify_result.py    | 2 +-
 .../AclNNInvocation/scripts/verify_result.py                    | 2 +-
 55 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/scripts/verify_result.py b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/scripts/verify_result.py b/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/scripts/verify_result.py
index e3ecffb22..6a700ca94 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/scripts/verify_result.py
@@ -38,7 +38,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/scripts/verify_result.py
index e3ecffb22..6a700ca94 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/scripts/verify_result.py
@@ -38,7 +38,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/scripts/verify_result.py
index e3ecffb22..6a700ca94 100644
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/scripts/verify_result.py
@@ -38,7 +38,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/scripts/verify_result.py b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/scripts/verify_result.py b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/scripts/verify_result.py b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/scripts/verify_result.py
index 277d94780..0c51a2cc3 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/scripts/verify_result.py
@@ -42,7 +42,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/scripts/verify_result.py b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/scripts/verify_result.py
index 277d94780..0c51a2cc3 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/scripts/verify_result.py
@@ -42,7 +42,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/scripts/verify_result.py b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/scripts/verify_result.py
index 4e1c4ad45..7cf2a635e 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/scripts/verify_result.py
@@ -41,7 +41,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/scripts/verify_result.py b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/scripts/verify_result.py
index 4e1c4ad45..7cf2a635e 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/scripts/verify_result.py
@@ -41,7 +41,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/scripts/verify_result.py b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/scripts/verify_result.py b/operator/ascendc/0_introduction/5_addn_kernellaunch/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/scripts/verify_result.py
index 604d92996..2caf6cdd4 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/verify_result.py b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/verify_result.py
index b63a4a5e1..455426365 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/scripts/verify_result.py b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/scripts/verify_result.py
index 604d92996..2caf6cdd4 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/scripts/verify_result.py
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/2_features/12_cube_group/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/2_features/12_cube_group/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/2_features/12_cube_group/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/2_features/12_cube_group/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/scripts/verify_result.py b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/scripts/verify_result.py
+++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/scripts/verify_result.py b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/scripts/verify_result.py
index ab58c2333..1cbe396b5 100644
--- a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/scripts/verify_result.py
+++ b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/scripts/verify_result.py
@@ -36,7 +36,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py b/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/scripts/verify_result.py b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
index a325cfcc6..24b30f8d4 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomKernel/examples/CPPInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
index 1a21d809a..2dd46f803 100644
--- a/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
+++ b/operator_contrib/AddCustomSample/KernelLaunch/AddCustomTilingKernel/examples/CPPInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator_contrib/AxpySample/AclNNInvocation/scripts/verify_result.py b/operator_contrib/AxpySample/AclNNInvocation/scripts/verify_result.py
index 2c7ab7c6d..3349011da 100644
--- a/operator_contrib/AxpySample/AclNNInvocation/scripts/verify_result.py
+++ b/operator_contrib/AxpySample/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator_contrib/HighPerfMatMul/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator_contrib/HighPerfMatMul/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
index 6770149f3..4f57f01b9 100644
--- a/operator_contrib/HighPerfMatMul/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
+++ b/operator_contrib/HighPerfMatMul/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 100:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
diff --git a/operator_contrib/MatmulLeakyReluCustom/AclNNInvocation/scripts/verify_result.py b/operator_contrib/MatmulLeakyReluCustom/AclNNInvocation/scripts/verify_result.py
index 0e65d9813..74d469705 100644
--- a/operator_contrib/MatmulLeakyReluCustom/AclNNInvocation/scripts/verify_result.py
+++ b/operator_contrib/MatmulLeakyReluCustom/AclNNInvocation/scripts/verify_result.py
@@ -37,7 +37,7 @@ def verify_result(output, golden):
         if index == 10:
             break
     error_ratio = float(different_element_indexes.size) / golden.size
-    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
     return error_ratio <= error_tol
 
 
-- 
Gitee


From a12d29bcb79d751d4dc765bff3aa5f671e22d9e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=9F=E6=B3=A2?= <jiangbo74@huawei.com>
Date: Thu, 19 Jun 2025 08:14:19 +0000
Subject: [PATCH 19/97] =?UTF-8?q?!2680=20=E6=9B=BF=E6=8D=A2<strong>CCE=5FK?=
 =?UTF-8?q?T=5FTEST</strong>=20Merge=20pull=20request=20!2680=20from=20?=
 =?UTF-8?q?=E6=B1=9F=E6=B3=A2/br=5Fj00600688=5FfixDefinedWord?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AllGatherMatmulCustom/op_kernel/gather_mm.h                 | 2 +-
 .../op_kernel/matmul_reduce_scatter_custom_common.h             | 2 +-
 .../op_kernel/matmul_all_reduce_custom_common.h                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
index 9b662b32b..891f1082e 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
@@ -11,7 +11,7 @@
 #ifndef MC2_GATHER_MM_H
 #define MC2_GATHER_MM_H
 
-#if defined(__CCE_KT_TEST__)
+#if defined ASCENDC_CPU_DEBUG
 #define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
 #define SET_G_CORE_TYPE_IS_AIC thread_local int g_coreType = 1
 #define DTYPE_X1 half
diff --git a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_common.h b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_common.h
index 3d323216d..bb561cf03 100644
--- a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_common.h
+++ b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_common.h
@@ -11,7 +11,7 @@
 #ifndef MC2_ALLREDUCE_COMM_H
 #define MC2_ALLREDUCE_COMM_H
 
-#if defined(__CCE_KT_TEST__)
+#if defined ASCENDC_CPU_DEBUG
 #define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
 #define SET_G_CORE_TYPE_IS_AIC thread_local int g_coreType = 1
 #define DTYPE_X1 half
diff --git a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
index 95605f718..4dbf9e704 100644
--- a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
+++ b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
@@ -11,7 +11,7 @@
 #ifndef MC2_ALLREDUCE_COMM_H
 #define MC2_ALLREDUCE_COMM_H
 
-#if defined(__CCE_KT_TEST__)
+#if defined ASCENDC_CPU_DEBUG
 #define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
 #define SET_G_CORE_TYPE_IS_AIC thread_local int g_coreType = 1
 #define DTYPE_X1 half
-- 
Gitee


From 7708812f854e743429e9126554903e83c63e9f18 Mon Sep 17 00:00:00 2001
From: alpaca12345UUU <zhanglong170@huawei.com>
Date: Fri, 20 Jun 2025 07:34:30 +0000
Subject: [PATCH 20/97] =?UTF-8?q?!2683=20=E4=BF=AE=E6=94=B9tbufpool=20READ?=
 =?UTF-8?q?ME=20Merge=20pull=20request=20!2683=20from=20alpaca12345UUU/mas?=
 =?UTF-8?q?ter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 operator/ascendc/2_features/2_tbufpool/README.md | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/2_features/2_tbufpool/README.md b/operator/ascendc/2_features/2_tbufpool/README.md
index b87611f13..964f96712 100644
--- a/operator/ascendc/2_features/2_tbufpool/README.md
+++ b/operator/ascendc/2_features/2_tbufpool/README.md
@@ -28,15 +28,15 @@
   z = x - y
   ```
 
-  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，首先启用tbufool1，将部分输入数据src0Gm，部分输入数据src1Gm搬运进片上储存，调用计算接口完成相加计算，搬出到外部存储上。之后切换到tbufpool2进行剩余数据相减计算，得到最终结果，再搬出到外部存储上。  
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，首先启用tbufpool1，将部分输入数据src0Gm，部分输入数据src1Gm搬运进片上储存，调用计算接口完成相加计算，搬出到外部存储上。之后切换到tbufpool2进行剩余数据相减计算，得到最终结果，再搬出到外部存储上。  
 
   本样例算子的实现流程分为6个基本任务：CopyIn，Compute，CopyOut，CopyIn1，Compute1，CopyOut1。
   - CopyIn任务负责将Global Memory上的部分输入Tensor src0Gm和src1Gm搬运到Local Memory，分别存储在src0Local、src1Local；
   - Compute任务负责对src0Local、src1Local执行加法操作，计算结果存储在dstLocal中；
-  - CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm0中。
+  - CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGlobal中。
   - CopyIn1任务负责将Global Memory上的剩余输入Tensor src0Gm和src1Gm搬运到Local Memory，分别存储在src0Local、src1Local；
   - Compute1任务负责对src0Local、src1Local执行剩余数据减法操作，计算结果存储在dstLocal中；
-  - CopyOut1任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm1中。
+  - CopyOut1任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGlobal中。
 
 - 调用实现
   1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
@@ -71,6 +71,13 @@
     export CAMODEL_LOG_PATH=./sim_log
     ```
 
+  - 生成输入和真值
+
+    执行如下命令后，当前目录生成input和output目录存放输入数据和真值数据。
+    ```
+    python3 scripts/gen_data.py
+    ```
+    
   - 样例执行
 
     ```bash
-- 
Gitee


From 68e759710909bdd5afbac6d573be345cdbcfc19a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=94=90=E7=92=9E?= <tangpu2@h-partners.com>
Date: Fri, 20 Jun 2025 09:26:11 +0000
Subject: [PATCH 21/97] =?UTF-8?q?!2685=20readme=E4=BF=AE=E6=94=B9=20Merge?=
 =?UTF-8?q?=20pull=20request=20!2685=20from=20=E5=94=90=E7=92=9E/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 74 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index cb6675ca6..f4843ebb8 100644
--- a/README.md
+++ b/README.md
@@ -23,32 +23,18 @@
 - 请参考[CANN社区版文档](https://hiascend.com/document/redirect/CannCommunityInstWizard?utm_source=gitee&utm_medium=sample&utm_campaign=samples)相关章节，对昇腾硬件、CANN软件及相应深度学习框架进行安装准备。
 - 本源码仓会适配CANN软件版本创建相应的标签并发行，关于CANN软件版本与本源码仓中标签的配套关系可参见["本源码仓标签与CANN版本配套表"](docs/MATCH.md#cannversionmap)。**需要注意，为确保您的源码定制开发顺利进行，请选择配套的CANN版本与Gitee标签源码，使用master分支可能存在版本不匹配的风险。**
 
-## 推荐样例
-
-|  **样例名称**  |  **样例介绍**  |  **开发语言**  |
-|---|---|---|
-| [DVPP接口样例](https://gitee.com/ascend/samples/tree/master/cplusplus/level2_simple_inference/0_data_process) | 图像视频处理（DVPP）单接口样例，包含图片视频解码（vdec/jpegd）、缩放（resize）、抠图（crop）、转换（vpc）等功能 |  C++ |
-| [单算子样例](https://gitee.com/ascend/samples/tree/master/cplusplus/level1_single_api/4_op_dev/2_verify_op) | 自定义算子开发介绍，单算子调用样例，包含Add/batchnorm/conv2d/lstm/matmul/reshape等算子 |  C++ |
-| [Ascend C单算子样例](https://gitee.com/ascend/samples/tree/master/operator) | 自定义Ascend C算子开发介绍，单算子调用样例，包含Add/LayerNorm/MatMul/MatMulLeakyRelu/MoeSoftMaxTopK等算子 |  Ascend C |
-| [sampleResnetQuickStart](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetQuickStart) | :+1:推理应用入门样例，基于Resnet50模型实现的图像分类应用 | C++/Python |
-| [sampleResnetAIPP](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetAIPP) | AIPP特性使用，基于Resnet50模型实现的图像分类应用 | C++/Python |
-| [sampleResnetDVPP](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetDVPP)  | DVPP特性使用，基于Resnet50模型实现的图像分类应用 | C++/Python |
-| [sampleYOLOV7](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7) | 使能DVPP+AIPP特性，基于YoloV7模型实现的物体检测应用 |  C++ |
-| [sampleResnetRtsp](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetRtsp)  | RTSP视频流输入，基于Resnet50模型实现的图像分类应用 |  C++ |
-| [sampleCarColor](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleCarColor) | 多模型串接，基于YoloV7模型和颜色分类模型实现的检测分类应用 |  C++ |
-| [sampleYOLOV7MultiInput](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7MultiInput)  | :+1:多路输入综合样例，基于YoloV7模型实现的物体检测应用，支持多路RTSP流/视频输入、支持多卡并行 |  C++ |
-| [sampleCrowdCounting](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleCrowdCounting) | 统计图片人头数量，基于CrowdCounting模型实现的推理应用 |  Python |
-| [sampleYOLOV7NMSONNX](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7NMSONNX) | 后处理使用CANN算子进行加速，基于YoloV7模型实现的物体检测应用 |  Python |
-
-
 ## 仓库结构
 
 ```
+- /best_practices：CANN最佳实践样例
+- /common：samples仓公共文件目录
 - /cplusplus：C++样例归档路径（待迁移至/inference）
     |--/contrib：外部贡献推理样例
     |--/level1_single_api：CANN AscendCL接口、DVPP接口、Graph接口等单接口样例
     |--/level2_simple_inference：AscendCL推理应用样例
+    |--/...
 - /docs：CANN Samples样例使用相关说明、FAQ文档
+- /growthpath：开发者CANN的学习路径，帮助开发者快速掌握每一阶段知识点及开发技巧
 - /inference：推理应用相关样例
     |--/ACLHelloWorld：AscendCL推理应用入门“Hello World”
     |--/acllite：AscendCL高阶封装接口，包含C++及Python版本
@@ -56,29 +42,53 @@
     |--/mediaProcess：媒体（音视频）接口相关样例
     |--/memoryManagement：AscendCL内存管理样例
     |--/modelInference：推理应用样例目录，包含C++及Python版本
-- /operator：算子开发与使用相关样例
-    |--/AddCustomSample：Ascend C算子开发Add样例
-       |--FrameworkLaunch: 单算子工程及调用样例
-          |--AclNNInvocation: 单算子API执行样例
-          |--AclOfflineModel: 单算子模型执行样例
-          |--AclOnlineModel: 单算子模型执行样例
-          |--AddCustom: 单算子工程
-          |--CppExtensions: pybind调用样例
-          |--PytorchInvocation: pytorch调用样例
-          |--TensorFlowInvocation: tensorflow调用样例
-       |--KernelLaunch: 内核调试调用样例
-          |--AddKernelInvocation: 内核调试调用样例
-          |--AddKernelInvocationNeo: Kernel Launch调试样例
-          |--AddKernelInvocationTilingNeo: 带Tiling的Kernel Launch调试样例
     |--/...
+- /operator：Ascend C算子开发与使用相关样例
+    |ascendc
+      |0_introduction：简单的示例，适合初学者
+      |1_utilities：编译工程和自定义工程、assert及debug功能、硬件平台信息的查询能力等
+      |2_features：Ascend C的特性
+      |3_libraries：类库的使用示例，包括数学库，激活函数等
+      |4_best_practices：最佳实践示例
+      |tutorials：生态教学的示例
+        |--/AddCustomSample：Ascend C算子开发Add样例
+- /operator_contrib：Ascend C算子开发者贡献样例
+    |--/UnalignAddCustomSample：Ascend C算子开发Add算子（非对齐）样例
     |--/...
 - /python：Python样例归档路径（待迁移至/inference）
     |--/contrib：外部贡献推理样例
     |--/level1_single_api：CANN AscendCL接口、DVPP接口、Graph接口等单接口样例
     |--/level2_simple_inference：AscendCL推理应用样例
     |--/level3_multi_model：多模型串接综合样例
+- /robot：昇腾开发板智能车实际应用样例
+- /st：样例测试用例，主要用于样例的功能性验证
+- /training：训练应用样例
+
 ```
 
+## 算子开发样例
+|  **样例名称**  |  **样例介绍**  |  **开发语言**  |
+|---|---|---|
+| [AddCustomSample](https://gitee.com/ascend/samples/tree/master/operator/ascendc/tutorials/AddCustomSample) | 基于Ascend C的Add自定义Vector算子及调用样例 | C++ |
+| [HelloWorldSample](https://gitee.com/ascend/samples/tree/master/operator/ascendc/tutorials/HelloWorldSample) | 基于Ascend C的自定义算子调用结构演示样例 | C++ |
+| [MatmulCustomSample](https://gitee.com/ascend/samples/tree/master/operator/ascendc/tutorials/MatmulCustomSample) | 基于AscendC的Matmul自定义Cube算子及调用样例 | C++ |
+| [MatmulLeakyReluCustomSample](https://gitee.com/ascend/samples/tree/master/operator/ascendc/tutorials/MatmulLeakyReluCustomSample) | 基于AscendC的MatmulLeakyRelu自定义Cube+Vector算子及调用样例 | C++ |
+| [UnalignAddCustomSample](https://gitee.com/ascend/samples/tree/master/operator_contrib/UnalignAddCustomSample) | 基于AscendC的Add算子（非对齐）算子及调用样例 | C++ |
+
+## 推理开发样例
+|  **样例名称**  |  **样例介绍**  |  **开发语言**  |
+|---|---|---|
+| [DVPP接口样例](https://gitee.com/ascend/samples/tree/master/cplusplus/level2_simple_inference/0_data_process) | 图像视频处理（DVPP）单接口样例，包含图片视频解码（vdec/jpegd）、缩放（resize）、抠图（crop）、转换（vpc）等功能 |  C++ |
+| [sampleResnetQuickStart](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetQuickStart) | 推理应用入门样例，基于Resnet50模型实现的图像分类应用 | C++/Python |
+| [sampleResnetAIPP](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetAIPP) | AIPP特性使用，基于Resnet50模型实现的图像分类应用 | C++/Python |
+| [sampleResnetDVPP](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetDVPP)  | DVPP特性使用，基于Resnet50模型实现的图像分类应用 | C++/Python |
+| [sampleYOLOV7](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7) | 使能DVPP+AIPP特性，基于YoloV7模型实现的物体检测应用 |  C++ |
+| [sampleResnetRtsp](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleResnetRtsp)  | RTSP视频流输入，基于Resnet50模型实现的图像分类应用 |  C++ |
+| [sampleCarColor](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleCarColor) | 多模型串接，基于YoloV7模型和颜色分类模型实现的检测分类应用 |  C++ |
+| [sampleYOLOV7MultiInput](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7MultiInput)  | :+1:多路输入综合样例，基于YoloV7模型实现的物体检测应用，支持多路RTSP流/视频输入、支持多卡并行 |  C++ |
+| [sampleCrowdCounting](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleCrowdCounting) | 统计图片人头数量，基于CrowdCounting模型实现的推理应用 |  Python |
+| [sampleYOLOV7NMSONNX](https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7NMSONNX) | 后处理使用CANN算子进行加速，基于YoloV7模型实现的物体检测应用 |  Python |
+
 
 ## 变更日志
   
-- 
Gitee


From 6bfdb584d600369ee0cd0ea1ea088b106faac4d5 Mon Sep 17 00:00:00 2001
From: shinoda <zhuyuchen7@huawei.com>
Date: Sat, 21 Jun 2025 07:08:05 +0000
Subject: [PATCH 22/97] !2686 fix README. * fix README.

---
 .../0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
index 84477c6ef..7bb83671c 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
@@ -22,6 +22,7 @@
 <tr><td align="center">b</td><td align="center">K * N</td><td align="center">float16</td><td align="center">ND</td></tr>
 </tr>
 </tr>
+<tr></tr>
 <tr><td rowspan="1" align="center">算子输出</td><td align="center">c</td><td align="center">M * N</td><td align="center">float</td><td align="center">ND</td></tr>
 </tr>
 <tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">mmad_custom</td></tr>
-- 
Gitee


From 72cc3b1a7b6497e1c3754601ac45684edda26277 Mon Sep 17 00:00:00 2001
From: youxiao <youxiao@huawei.com>
Date: Mon, 23 Jun 2025 11:47:37 +0000
Subject: [PATCH 23/97] !2687 change llm datadist sample Merge pull request
 !2687 from youxiao/master

---
 cplusplus/level1_single_api/11_llm_data_dist/readme.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index 10914d52e..f02114570 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -15,8 +15,8 @@
 ## 目录结构
 
 ```
-├── prompt_sampe.cpp                // prompt样例main函数
-├── decoder_sampe.cpp               // decoder样例main函数
+├── prompt_sample.cpp                // prompt样例main函数
+├── decoder_sample.cpp               // decoder样例main函数
 ├── CMakeLists.txt                  // 编译脚本 
 ```
 
@@ -71,7 +71,7 @@
 
 3. 在运行环境执行可执行文件。
 
-    - 执行prompt_sample, 参数为device_id与local_ip其中device_id为prompt要使用的device_id, local_ip为prompt所在device的ip，如:
+    - 执行prompt_sample, 参数为device_id与local_ip, 其中device_id为prompt要使用的device_id, local_ip为prompt所在device的ip，如:
         ```
         ./prompt_sample 0 10.10.10.1
         ```
-- 
Gitee


From fb4ad4383846aa727da977973358f8d7ec3f6c09 Mon Sep 17 00:00:00 2001
From: xujiuxu <xujiuxu1@huawei.com>
Date: Tue, 24 Jun 2025 09:37:22 +0000
Subject: [PATCH 24/97] !2691 change some readme Merge pull request !2691 from
 xujiuxu/master

---
 inference/dataflow/cpluscplus/README.md    | 4 ++--
 inference/dataflow/py_dflow/README.md      | 3 ++-
 inference/dataflow/python/README.md        | 9 ++++++++-
 inference/dataflow/udf_workspace/README.md | 2 +-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/inference/dataflow/cpluscplus/README.md b/inference/dataflow/cpluscplus/README.md
index 9170a3ebf..d0f31887f 100644
--- a/inference/dataflow/cpluscplus/README.md
+++ b/inference/dataflow/cpluscplus/README.md
@@ -26,7 +26,7 @@ python 版本要求：python3.9
 
 ## 程序编译
 ```bash
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
+source {HOME}/Ascend/ascend-toolkit/set_env.sh # "{HOME}/Ascend"为CANN软件包安装目录，请根据实际安装路径进行替换。
 mkdir build
 cd build
 cmake ..
@@ -41,7 +41,7 @@ cd ..
 export ASCEND_GLOBAL_LOG_LEVEL=3       #0 debug 1 info 2 warn 3 error 不设置默认error级别
 export ASCEND_SLOG_PRINT_TO_STDOUT=1   # 日志打屏，不设置日志落盘默认路径
 # 必选
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
+source {HOME}/Ascend/ascend-toolkit/set_env.sh # "{HOME}/Ascend"为CANN软件包安装目录，请根据实际安装路径进行替换。
 export RESOURCE_CONFIG_PATH=xxx/xxx/xxx/numa_config.json
 
 cd output
diff --git a/inference/dataflow/py_dflow/README.md b/inference/dataflow/py_dflow/README.md
index ddc4c3490..59a708e71 100644
--- a/inference/dataflow/py_dflow/README.md
+++ b/inference/dataflow/py_dflow/README.md
@@ -33,9 +33,10 @@ py_dflow
 `PyDFlow`提供一键式编译能力，可通过如下命令进行编译：
 
 ```shell
-  source /usr/local/Ascend/ascend-toolkit/set_env.sh
+  source {HOME}/Ascend/ascend-toolkit/set_env.sh #{HOME}为CANN软件包安装目录，请根据实际安装路径进行替换
   bash build.sh --ascend_install_path=${ASCEND_HOME_PATH} --python_path=python3.9
 ```
+"{HOME}/Ascend"为CANN软件包安装目录，请根据实际安装路径进行替换。
 
 - `--ascend_install_path`选项的默认值为`/usr/local/Ascend/ascend-toolkit/latest`，可根据实际安装的路径指定。
 
diff --git a/inference/dataflow/python/README.md b/inference/dataflow/python/README.md
index 7b5ecc1f8..3a5290d85 100644
--- a/inference/dataflow/python/README.md
+++ b/inference/dataflow/python/README.md
@@ -20,6 +20,13 @@
 ├── udf_py   
 │   ├── udf_add.py 使用python实现udf多func功能  
 │   └── udf_control.py 使用python实现udf功能，用于控制udf_add中多func实际执行的func  
+└── udf_py_ws_sample 完整样例用于说明python udf实现     
+    ├── CMakeLists.txt udf python完整工程cmake文件样例   
+    ├── func_add.json  udf python完整工程配置文件样例   
+    ├── src_cpp   
+    │   └── func_add.cpp udf python完整工程C++源码文件样例    
+    └── src_python   
+        └── func_add.py  udf python完整工程python源码文件样例   
 
 
 ## 环境准备
@@ -35,7 +42,7 @@ sample_pytorch.py、sample_npu_model.py样例依赖pytorch和torchvision包,推
 export ASCEND_GLOBAL_LOG_LEVEL=3       #0 debug 1 info 2 warn 3 error 不设置默认error级别
 export ASCEND_SLOG_PRINT_TO_STDOUT=1   # 日志打屏，不设置日志落盘默认路径
 # 必选
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
+source {HOME}/Ascend/ascend-toolkit/set_env.sh #{HOME}为CANN软件包安装目录，请根据实际安装路径进行替换
 export RESOURCE_CONFIG_PATH=xxx/xxx/xxx/numa_config.json
 
 python3.9 sample1.py
diff --git a/inference/dataflow/udf_workspace/README.md b/inference/dataflow/udf_workspace/README.md
index 816635097..08b682493 100644
--- a/inference/dataflow/udf_workspace/README.md
+++ b/inference/dataflow/udf_workspace/README.md
@@ -35,7 +35,7 @@ FLOW_FUNC_REGISTRAR(AddFlowFunc)
 ## 编译指导
 UDF函数开发完成后，可以使用以下编译指令查看CMakeLists文件及cpp源码是否存在问题。
 ```bash
-source /usr/local/Ascend/ascend-toolkit/set_env.sh
+source {HOME}/Ascend/ascend-toolkit/set_env.sh #{HOME}为CANN软件包安装目录，请根据实际安装路径进行替换
 # 以01_udf_add为例
 cd 01_udf_add
 mkdir build
-- 
Gitee


From 31d775a16e864e543e61c209662f3f6a8e0b25a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=94=90=E7=92=9E?= <tangpu2@h-partners.com>
Date: Tue, 24 Jun 2025 11:55:16 +0000
Subject: [PATCH 25/97] =?UTF-8?q?!2692=20=E2=80=9C=E5=A2=9E=E5=8A=A0return?=
 =?UTF-8?q?=E2=80=9D=20Merge=20pull=20request=20!2692=20from=20=E5=94=90?=
 =?UTF-8?q?=E7=92=9E/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../level2_simple_inference/0_data_process/venc/src/main.cpp     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cplusplus/level2_simple_inference/0_data_process/venc/src/main.cpp b/cplusplus/level2_simple_inference/0_data_process/venc/src/main.cpp
index 5c31df3a3..92c32c76b 100644
--- a/cplusplus/level2_simple_inference/0_data_process/venc/src/main.cpp
+++ b/cplusplus/level2_simple_inference/0_data_process/venc/src/main.cpp
@@ -184,6 +184,7 @@ Result InitResource()
         ERROR_LOG("acl get run mode failed");
         return FAILED;
     }
+    return SUCCESS;
 }
 
 Result Init(int imgWidth, int imgHeight)
-- 
Gitee


From d45ccbe545823837b3d15b23903a25062c401408 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Thu, 26 Jun 2025 05:54:24 +0000
Subject: [PATCH 26/97] =?UTF-8?q?!2690=20float4=20weight=20quantization=20?=
 =?UTF-8?q?sample=20Merge=20pull=20request=20!2690=20from=20=E5=BC=A0?=
 =?UTF-8?q?=E9=91=AB/zhangxin0623?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fp4_weight_quantization/README_CN.md      |  50 ++++++
 .../fp4_weight_quantization/requirements.txt  |   7 +
 .../src/quantization.cfg                      |   8 +
 .../src/run_llama7b_quantization.py           | 162 ++++++++++++++++++
 .../fp4_weight_quantization/src/utils.py      |  69 ++++++++
 5 files changed, 296 insertions(+)
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/requirements.txt
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/quantization.cfg
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
 create mode 100644 python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/utils.py

diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
new file mode 100644
index 000000000..93ea0a9ce
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
@@ -0,0 +1,50 @@
+# FP4伪量化
+
+## 1 FP4伪量化
+
+### 1.1 安装依赖
+
+本sample依赖包可参考[requirements.txt](requirements.txt)
+
+### 1.2 模型和数据集准备
+
+本sample以Llama2-7b模型，pileval和wikitext2数据集为示例，请用户自行下载。
+
+### 1.3 简易量化配置
+./src/quantization.cfg文件为用户自定义的简易量化配置，具体表示信息如下：
+
+| 字段 |类型| 说明 | 默认值 | 取值范围 |
+|:--| :-: | :-- | :-: | :-: |
+|skip_layers|str|跳过量化的层 |/|/|
+|weight_only_config.weight_compress_only|bool|是否为仅权重量化|False|True/False|
+|weight_only_config.wts_type|enum|量化后权重类型|INT8|INT8/MXFP4_E2M1/HIFLOAT8/FLOAT8_E4M3FN|
+|weight_only_config.awq_quantize.grids_num|uint32|awq搜索格点数量|20|/|/|
+
+## 2 FLOAT4_E2M1量化示例
+> 当前quantization.cfg文件中weight_only_config.wts_type设置的值为FLOAT4_E2M1
+
+
+### 2.1 使用接口方式调用
+
+请在当前目录执行如下命令运行示例程序
+
+验证fakequant模型脚本：
+
+`CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python3 src/run_llama7b_quantization.py --calibration_data=/pile_val_backup/ --verify_data=/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py --model=/data/Models/pytorch/Llama2/Llama2_7b_hf`
+
+
+若出现如下信息，则说明量化成功：
+
+```none
+Test time taken:  9.0 min  38.24865388870239 s
+Score:  5.657759
+```
+
+推理成功后，在当前目录会生成量化日志文件./amct_log/amct_pytorch.log和./output文件夹，该文件夹内包含以下内容：
+
+- config.json：量化配置文件，描述了如何对模型中的每一层进行量化。
+- record.txt：量化因子记录文件。
+- awq_result.pt：存储了awq算法的的scale和clip
+- quant_factor.pt：存储量化缩放因子
+
+> 如果outputs目录下已经存在量化配置文件或量化因子记录文件，再次运行示例程序时，如果新生成的文件与已有文件同名，则会覆盖已有的量化配置文件或量化因子记录文件。
diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/requirements.txt b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/requirements.txt
new file mode 100644
index 000000000..55441d062
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/requirements.txt
@@ -0,0 +1,7 @@
+torch==2.1.0
+transformers==4.40.0
+accelerate==0.30.1
+datasets==2.19.1
+sentencepiece==0.2.0
+numpy==1.23.5
+protobuf==3.20.2
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/quantization.cfg b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/quantization.cfg
new file mode 100644
index 000000000..a43152ad3
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/quantization.cfg
@@ -0,0 +1,8 @@
+skip_layers: "lm_head"
+weight_only_config: {
+    weight_compress_only: True
+    wts_type: FLOAT4_E2M1
+    awq_quantize:{
+        grids_num: 20
+    }
+}
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
new file mode 100644
index 000000000..4aac4fad9
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
@@ -0,0 +1,162 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+import argparse
+import os
+import copy
+import time
+import tqdm
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoConfig
+from accelerate import infer_auto_device_map, dispatch_model
+from accelerate.utils.modeling import get_balanced_memory
+
+from utils import get_loaders,  get_llama2, get_calib_dataset
+import amct_pytorch as amct
+
+
+def build_model_and_enc(model, model_path, gpu_num):
+    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+    if "mpt" in config.__class__.__name__.lower():
+        enc = AutoTokenizer.from_pretrained(
+            config.tokenizer_name, trust_remote_code=True
+        )
+    else:
+        enc = AutoTokenizer.from_pretrained(
+            model_path, use_fast=False, trust_remote_code=True
+        )
+
+    # Move the model to GPU (as much as possible) for LM evaluation
+    # max_memory = ['0:16GiB', '1:16GiB','2:16GiB', 'cpu:30GiB'], '0' means the first GPU that you specify.
+    # I don't recommend use 16GiB, we need to reserve some space for other tensors during calculation
+    # please see the recommand memeory allocation in the Word file
+    # Adjust the max_size accroding to the real situation
+    # a clever way:
+
+    max_memory = []
+    for i in range(gpu_num):
+        max_memory.append(f'{i}:12GiB')
+    max_memory.append('cpu:80GiB')
+    print('Max_memory allocation: \n', max_memory)
+
+    max_memory = [v.split(":") for v in (max_memory or [])]
+    max_memory = {(int(k) if k.isdigit() else k): v for k, v in max_memory}
+    kwargs = {
+        "max_memory": get_balanced_memory(
+            model, max_memory if len(max_memory) > 0 else None
+        )
+    }
+    model.tie_weights()
+    device_map = infer_auto_device_map(
+        model,
+        no_split_module_classes=[
+            "LlamaDecoderLayer",
+        ],
+        **kwargs,
+    )
+    model = dispatch_model(model, device_map=device_map, 
+        offload_dir=os.path.join(model_path, 'offload_dir'))
+
+    return model, enc
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--calibration_data', type=str, default='/pile_val_backup')
+    parser.add_argument('--verify_data', type=str, default='/data/Datasets/wikitext/wikitext-2-raw-v1/wikitext-2-raw/wikiscript.py')
+    parser.add_argument('--model', type=str, default='/data/Models/pytorch/Llama2/Llama2_7b_hf')
+
+    args = parser.parse_args()
+    model, model_path = get_llama2(args.model)
+    model = model.eval()
+    copied_model = copy.deepcopy(model)
+    gpu_num = torch.cuda.device_count()
+    model, enc = build_model_and_enc(model, model_path, gpu_num)
+
+    proto_path = './src/quantization.cfg'
+    config_file = './output/config.json'
+    record_file = './output/record.txt'
+
+    test_start_time = time.time()
+    # Phase1: generate quant config json
+    amct.create_post_quant_config(config_file,
+                             model,
+                             config_defination=proto_path)
+    
+    # Phase2: do weights calibration and generate calibration model
+    samples = get_calib_dataset(
+        data_path=args.calibration_data, tokenizer=enc, n_samples=512, block_size=518
+    )
+    samples = torch.cat(samples, dim=0)[:1,:]
+
+    post_quant_model = amct.create_post_quant_model(config_file,
+                                                    record_file,
+                                                    model)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
+    with torch.no_grad():
+        post_quant_model(samples.to(next(post_quant_model.parameters()).device))
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    test_end_time = time.time()
+    total_time = test_end_time - test_start_time
+    print('Calibration time taken: ', total_time // 60, 'min ', total_time%60, 's')
+    # save memory, del unuse model
+    del post_quant_model
+    
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    model, enc = build_model_and_enc(copied_model, model_path, gpu_num)
+    
+    # Phase3: save fakequant model
+    testenc = get_loaders(data_path=args.verify_data,
+                        enc=enc,
+                        seqlen=model.seqlen)
+
+    testenc = testenc.input_ids.to(model.device)
+
+    quant_model = amct.save_post_quant_model(record_file, model, mode='fakequant')
+
+    nsamples = testenc.numel() // model.seqlen
+    
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    
+    # Phase4: Test ppl result
+    nlls = []
+    test_start_time = time.time()
+    for i in tqdm.tqdm(range(nsamples), desc="evaluating..."):
+        batch = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)].to(
+            quant_model.device
+        )
+        with torch.no_grad():
+            lm_logits = quant_model(batch).logits
+        shift_logits = lm_logits[:, :-1, :].contiguous().float().cpu()
+        shift_labels = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)][:, 1:].cpu()
+        loss_fct = nn.CrossEntropyLoss()
+        loss = loss_fct(
+            shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
+        )
+        neg_log_likelihood = loss.float() * model.seqlen
+        nlls.append(neg_log_likelihood)
+    test_end_time = time.time()
+
+    ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen))
+
+    total_time = test_end_time - test_start_time
+    print('Test time taken: ', total_time // 60, 'min ', total_time%60, 's'  )
+    print('Score: ', ppl.item())
\ No newline at end of file
diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/utils.py b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/utils.py
new file mode 100644
index 000000000..474a5b618
--- /dev/null
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/utils.py
@@ -0,0 +1,69 @@
+"""
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. 
+"""
+
+import torch
+import torch.nn as nn
+from datasets import load_dataset,load_from_disk
+
+def get_llama2(model_path, seqlen=2048):
+    def skip(*args, **kwargs):
+        pass
+
+    torch.nn.init.kaiming_uniform_ = skip
+    torch.nn.init.uniform_ = skip
+    torch.nn.init.normal_ = skip
+    from transformers import LlamaForCausalLM
+    
+    model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, offload_folder="offload/")
+
+    model.seqlen = seqlen
+    return model, model_path
+
+
+def get_loaders(data_path: str, enc, seqlen):
+
+    print('Loading dataset: Wikitext2')
+    testenc = load_dataset(data_path, 'wikitext-2-raw-v1', split='test', trust_remote_code=True)
+    testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt")
+    
+    return testenc
+
+
+def get_calib_dataset(data_path, tokenizer=None, n_samples=512, block_size=512):
+    dataset = load_from_disk(data_path)
+    dataset = dataset.shuffle(seed=42)
+    samples = []
+    n_run = 0
+    for data in dataset:
+        line = data["text"]
+        line = line.strip()
+        line_encoded = tokenizer.encode(line)
+        if len(line_encoded) > 512:
+            continue
+        sample = torch.tensor([line_encoded])
+        if sample.numel() == 0:
+            continue
+        samples.append(sample)
+        n_run += 1
+        if n_run == n_samples:
+            break
+    # now concatenate all samples and split according to block size
+    cat_samples = torch.cat(samples, dim=1)
+    n_split = cat_samples.shape[1] // block_size
+    print(f" * Split into {n_split} blocks")
+    return [
+        cat_samples[:, i * block_size : (i + 1) * block_size] for i in range(n_split)
+    ]
-- 
Gitee


From 39eef173d06f5414e858610c524dd15bbb66a057 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Fri, 27 Jun 2025 03:20:13 +0000
Subject: [PATCH 27/97] =?UTF-8?q?!2694=20fix=20fp4=20weight=20quant=20samp?=
 =?UTF-8?q?le=20Merge=20pull=20request=20!2694=20from=20=E5=BC=A0=E9=91=AB?=
 =?UTF-8?q?/zhangxin0627?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fp4_weight_quantization/src/run_llama7b_quantization.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
index 4aac4fad9..37c78da8d 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/src/run_llama7b_quantization.py
@@ -107,7 +107,7 @@ if __name__ == '__main__':
                                                     model)
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-
+    post_quant_model.config.use_cache = False
     with torch.no_grad():
         post_quant_model(samples.to(next(post_quant_model.parameters()).device))
     if torch.cuda.is_available():
-- 
Gitee


From d846deb65f12c69aa9144a0ef8adc4b72c9a9f67 Mon Sep 17 00:00:00 2001
From: shinoda <zhuyuchen7@huawei.com>
Date: Sat, 28 Jun 2025 03:40:45 +0000
Subject: [PATCH 28/97] !2696 remove sim_log configuration and fix tbufpool
 run.sh Merge pull request !2696 from shinoda/master

---
 .../MatmulInvocationNeo/run.sh                |   7 -
 .../MatmulLeakyReluInvocation/run.sh          |   7 -
 .../MatmulLeakyReluInvocationAsync/run.sh     |   7 -
 .../AbsDuplicateKernelInvocation/README.md    |   5 +-
 .../AbsGatherMaskKernelInvocation/README.md   |   5 +-
 .../AbsPadKernelInvocation/README.md          |   5 +-
 .../AbsUnPadKernelInvocation/README.md        |   5 +-
 .../ReduceMinKernelInvocation/README.md       |   5 +-
 .../WholeReduceSumKernelInvocation/README.md  |   5 +-
 .../WholeReduceSumKernelInvocation/run.sh     |   7 -
 .../MmadBiasInvocation/run.sh                 |   7 -
 .../MmadInvocation/run.sh                     |   7 -
 .../VectorAddMultiCoreWithTiling/README.md    |   5 +-
 .../VectorAddMultiCoreWithTiling/run.sh       |   7 -
 .../README.md                                 |   5 +-
 .../run.sh                                    |   7 -
 .../VectorAddSingleCore/README.md             |   5 +-
 .../VectorAddSingleCore/run.sh                |   7 -
 .../VectorAddSingleCoreWithTmpbuf/README.md   |   5 +-
 .../VectorAddSingleCoreWithTmpbuf/run.sh      |   7 -
 .../AddKernelInvocationNeo/README.md          |   5 +-
 .../AddKernelInvocationNeo/run.sh             |   7 -
 .../AddKernelInvocationTilingNeo/README.md    |   5 +-
 .../AddKernelInvocationTilingNeo/run.sh       |   7 -
 .../5_addn_kernellaunch/README.md             |   5 +-
 .../0_introduction/5_addn_kernellaunch/run.sh |   7 -
 .../DumpTensorKernelInvocationCube/run.sh     |   7 -
 .../DumpTensorKernelInvocationVector/run.sh   |   7 -
 .../MatmulABshareInvocation/run.sh            |   7 -
 .../ascendc/2_features/2_tbufpool/README.md   |   5 +-
 .../ascendc/2_features/2_tbufpool/main.cpp    |  14 +-
 operator/ascendc/2_features/2_tbufpool/run.sh | 138 +++++++++++++-----
 .../6_group_matmul/KernelLaunch/run.sh        |   7 -
 33 files changed, 123 insertions(+), 218 deletions(-)

diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
index d36adef16..dbca0e151 100755
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
index d36adef16..dbca0e151 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
index 2fc9bfdcc..9e5b60ada 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/README.md b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/README.md
index 269648acd..abdf7863a 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/README.md
@@ -51,10 +51,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/README.md b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/README.md
index 91d619e5d..add51272e 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/README.md
@@ -50,10 +50,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/README.md b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/README.md
index bead79954..5b2be9c26 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/README.md
@@ -51,10 +51,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/README.md b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/README.md
index a80082e68..a5d3c5607 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/README.md
@@ -49,10 +49,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/README.md b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/README.md
index 32012880a..3c751e543 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/README.md
@@ -49,10 +49,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/README.md b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/README.md
index 46e2f9776..72a6e06d4 100644
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/README.md
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/README.md
@@ -98,10 +98,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
     
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
index c4f01fdfd..f239a9a44 100755
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
index 0c9c7f40b..3359bc3fa 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
@@ -75,13 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
index 0c9c7f40b..3359bc3fa 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
@@ -75,13 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/README.md b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/README.md
index 4a21d3854..e198055e3 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/README.md
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/README.md
@@ -62,10 +62,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
index 8fcd59730..eb66d5395 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
@@ -78,13 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/README.md b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/README.md
index 3feee5e51..e2d449c67 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/README.md
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/README.md
@@ -63,10 +63,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
index 8fcd59730..eb66d5395 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
@@ -78,13 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/README.md b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/README.md
index a3b82f9c0..db52b3f34 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/README.md
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/README.md
@@ -49,10 +49,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
     
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
index 8fcd59730..eb66d5395 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
@@ -78,13 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/README.md b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/README.md
index 26353571a..f7a51c3e6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/README.md
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/README.md
@@ -52,10 +52,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
     
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
index 8fcd59730..eb66d5395 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
@@ -78,13 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/README.md b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/README.md
index 32e2f1008..3149e087d 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/README.md
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/README.md
@@ -50,10 +50,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
index 8c6cb9c61..9bdf07910 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/README.md b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/README.md
index 447cf9219..8a409bdef 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/README.md
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/README.md
@@ -52,10 +52,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
     
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
index 8c6cb9c61..9bdf07910 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/README.md b/operator/ascendc/0_introduction/5_addn_kernellaunch/README.md
index e3eaed64a..1d39d13ac 100644
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/README.md
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/README.md
@@ -105,10 +105,7 @@ kernel侧:
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
 
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 样例执行
 
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
index 8c6cb9c61..9bdf07910 100755
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
index a755887a9..b38325a40 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
@@ -75,13 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
index e4cd2e80f..7ff642101 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
@@ -77,13 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
index 18d24d6fb..b60d42817 100644
--- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
+++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
@@ -75,13 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/2_features/2_tbufpool/README.md b/operator/ascendc/2_features/2_tbufpool/README.md
index 964f96712..fe4e7becd 100644
--- a/operator/ascendc/2_features/2_tbufpool/README.md
+++ b/operator/ascendc/2_features/2_tbufpool/README.md
@@ -66,10 +66,7 @@
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
     
-    配置仿真模式日志文件目录，默认为sim_log。
-    ```bash
-    export CAMODEL_LOG_PATH=./sim_log
-    ```
+
 
   - 生成输入和真值
 
diff --git a/operator/ascendc/2_features/2_tbufpool/main.cpp b/operator/ascendc/2_features/2_tbufpool/main.cpp
index ba4f849dd..1f6813a45 100644
--- a/operator/ascendc/2_features/2_tbufpool/main.cpp
+++ b/operator/ascendc/2_features/2_tbufpool/main.cpp
@@ -36,7 +36,7 @@ static bool CompareResult(const void *outputData, int64_t outSize) {
     CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize));
 #endif
     size_t goldenSize = outSize;
-    bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize);
+    bool ret = ReadFile("./output/golden.bin", goldenSize, goldenData, goldenSize);
     if (ret) {
         printf("ReadFile golden.bin success!\n");
     } else {
@@ -80,8 +80,8 @@ int32_t main(int32_t argc, char *argv[]) {
     uint8_t *zAdd = (uint8_t *)AscendC::GmAlloc(outputSizeAdd);
     uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingSize);
 
-    ReadFile("../input/input_x.bin", inputSize, x, inputSize);
-    ReadFile("../input/input_y.bin", inputSize, y, inputSize);
+    ReadFile("./input/input_x.bin", inputSize, x, inputSize);
+    ReadFile("./input/input_y.bin", inputSize, y, inputSize);
 
     GenerateTilingData(TOTAL_LENGTH, tiling);
 
@@ -89,7 +89,7 @@ int32_t main(int32_t argc, char *argv[]) {
 
     ICPU_RUN_KF(tbufpool_custom, USED_CORE_NUM, x, y, zAdd, *reinterpret_cast<TbufPoolTilingData *>(tiling)); // use this macro for cpu debug
 
-    WriteFile("../output/output.bin", zAdd, outputSizeAdd);
+    WriteFile("./output/output.bin", zAdd, outputSizeAdd);
 
     bool goldenResult = true;
     goldenResult = CompareResult(zAdd, outputSizeAdd);
@@ -122,8 +122,8 @@ int32_t main(int32_t argc, char *argv[]) {
     CHECK_ACL(aclrtMalloc((void **)&yDevice, inputSize, ACL_MEM_MALLOC_HUGE_FIRST));
     CHECK_ACL(aclrtMalloc((void **)&zDeviceAdd, outputSizeAdd, ACL_MEM_MALLOC_HUGE_FIRST));
 
-    ReadFile("../input/input_x.bin", inputSize, xHost, inputSize);
-    ReadFile("../input/input_y.bin", inputSize, yHost, inputSize);
+    ReadFile("./input/input_x.bin", inputSize, xHost, inputSize);
+    ReadFile("./input/input_y.bin", inputSize, yHost, inputSize);
 
     GenerateTilingData(TOTAL_LENGTH, tiling);
 
@@ -140,7 +140,7 @@ int32_t main(int32_t argc, char *argv[]) {
 
     // Copy result to host memory and write to output file
     CHECK_ACL(aclrtMemcpy(zHostAdd, outputSizeAdd, zDeviceAdd, outputSizeAdd, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("../output/output.bin", zHostAdd, outputSizeAdd);
+    WriteFile("./output/output.bin", zHostAdd, outputSizeAdd);
 
     // Compare the result with the golden result
     bool goldenResult = true;
diff --git a/operator/ascendc/2_features/2_tbufpool/run.sh b/operator/ascendc/2_features/2_tbufpool/run.sh
index 5ae89dbe9..04d5fd9fc 100644
--- a/operator/ascendc/2_features/2_tbufpool/run.sh
+++ b/operator/ascendc/2_features/2_tbufpool/run.sh
@@ -1,48 +1,114 @@
 #!/bin/bash
-SHORT=r:,v:,
-LONG=run-mode:,soc-version:,
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+SHORT=r:,v:,i:,b:,p:,
+LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
 OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
 eval set -- "$OPTS"
-while :
-do
+SOC_VERSION="Ascend310P3"
+
+while :; do
     case "$1" in
-        (-r | --run-mode )
-            RUN_MODE="$2"
-            shift 2;;
-        (-v | --soc-version )
-            SOC_VERSION="$2"
-            shift 2;;
-        (--)
-            shift;
-            break;;
-        (*)
-            echo "[ERROR] Unexpected option: $1";
-            break;;
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    -b | --build-type)
+        BUILD_TYPE="$2"
+        shift 2
+        ;;
+    -p | --install-prefix)
+        INSTALL_PREFIX="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
     esac
 done
 
-rm -rf build
-mkdir build
-cd build
-
-# in case of running op in simulator, use stub so instead
-if [ "${RUN_MODE}" = "sim" ]; then
-    export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's/\/.*\/runtime\/lib64://g')
-    export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/runtime/lib64/stub:$LD_LIBRARY_PATH
+RUN_MODE_LIST="cpu sim npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    exit -1
 fi
 
-source $ASCEND_HOME_DIR/bin/setenv.bash
-export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
 
-cmake  -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION}  -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} ..
-make -j16
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
 
-if [ "${RUN_MODE}" = "npu" ]; then
-    ./tbufpool_direct_kernel_op
-elif [ "${RUN_MODE}" = "sim" ]; then
-    export ASCEND_TOOLKIT_HOME=${ASCEND_HOME_DIR}
-    export ASCEND_HOME_PATH=${ASCEND_HOME_DIR}
-    msprof op simulator --application=./tbufpool_direct_kernel_op
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+if [ "${RUN_MODE}" = "sim" ]; then
+    # in case of running op in simulator, use stub .so instead
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
-    ./tbufpool_direct_kernel_op
-fi
\ No newline at end of file
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f tbufpool_direct_kernel_op
+cp ./out/bin/tbufpool_direct_kernel_op ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then
+        if [ "${RUN_MODE}" = "npu" ]; then
+            msprof op --application=./tbufpool_direct_kernel_op
+        elif [ "${RUN_MODE}" = "sim" ]; then
+            msprof op simulator --application=./tbufpool_direct_kernel_op
+        elif [ "${RUN_MODE}" = "cpu" ]; then
+            ./tbufpool_direct_kernel_op
+        fi
+    else
+        ./tbufpool_direct_kernel_op
+    fi
+)
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/run.sh b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/run.sh
index 9bed4b408..ef12dd68e 100644
--- a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/run.sh
+++ b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/run.sh
@@ -75,13 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
-- 
Gitee


From 0209698dfeab87b7e8910f942200779f01cc6f1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Sat, 28 Jun 2025 06:06:15 +0000
Subject: [PATCH 29/97] =?UTF-8?q?!2700=20complement=20readme=20Merge=20pul?=
 =?UTF-8?q?l=20request=20!2700=20from=20=E5=BC=A0=E9=91=AB/zhangxin0628?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
index 93ea0a9ce..807a7c044 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
@@ -18,7 +18,7 @@
 |skip_layers|str|跳过量化的层 |/|/|
 |weight_only_config.weight_compress_only|bool|是否为仅权重量化|False|True/False|
 |weight_only_config.wts_type|enum|量化后权重类型|INT8|INT8/MXFP4_E2M1/HIFLOAT8/FLOAT8_E4M3FN|
-|weight_only_config.awq_quantize.grids_num|uint32|awq搜索格点数量|20|/|/|
+|weight_only_config.awq_quantize.grids_num|uint32|awq搜索格点数量|20|1~4294967295（整数）|
 
 ## 2 FLOAT4_E2M1量化示例
 > 当前quantization.cfg文件中weight_only_config.wts_type设置的值为FLOAT4_E2M1
-- 
Gitee


From ba1b6ebd485c56d8d93b519ef5088fe475792ba7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E9=91=AB?= <18810011293@163.com>
Date: Mon, 30 Jun 2025 06:12:54 +0000
Subject: [PATCH 30/97] =?UTF-8?q?!2701=20fix=20fp4=20readme=20Merge=20pull?=
 =?UTF-8?q?=20request=20!2701=20from=20=E5=BC=A0=E9=91=AB/fix0628?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
index 807a7c044..51cb57c93 100644
--- a/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
+++ b/python/level1_single_api/9_amct/amct_pytorch/fp4_weight_quantization/README_CN.md
@@ -17,7 +17,7 @@
 |:--| :-: | :-- | :-: | :-: |
 |skip_layers|str|跳过量化的层 |/|/|
 |weight_only_config.weight_compress_only|bool|是否为仅权重量化|False|True/False|
-|weight_only_config.wts_type|enum|量化后权重类型|INT8|INT8/MXFP4_E2M1/HIFLOAT8/FLOAT8_E4M3FN|
+|weight_only_config.wts_type|enum|量化后权重类型|INT8|INT8/MXFP4_E2M1/HIFLOAT8/FLOAT8_E4M3FN/FLOAT4_E2M1/FLOAT4_E1M2|
 |weight_only_config.awq_quantize.grids_num|uint32|awq搜索格点数量|20|1~4294967295（整数）|
 
 ## 2 FLOAT4_E2M1量化示例
-- 
Gitee


From b1a865151ed52b7aff1b254bc4706ed10a40ac72 Mon Sep 17 00:00:00 2001
From: Y_keven <yingkaidi@huawei.com>
Date: Mon, 30 Jun 2025 09:29:44 +0000
Subject: [PATCH 31/97] =?UTF-8?q?!2695=20=E6=96=B0=E5=A2=9EpyACL=E5=BF=AB?=
 =?UTF-8?q?=E9=80=9F=E5=85=A5=E9=97=A8=E6=A0=B7=E4=BE=8B=20resnet50=5Ffirs?=
 =?UTF-8?q?tapp=20Merge=20pull=20request=20!2695=20from=20Y=5Fkeven/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../resnet50_firstapp/README.md               | 156 +++++++++++++++++
 .../resnet50_firstapp/data/.keep              |   1 +
 .../resnet50_firstapp/model/.keep             |   1 +
 .../resnet50_firstapp/src/constant.py         |  22 +++
 .../resnet50_firstapp/src/firstapp.py         | 164 ++++++++++++++++++
 5 files changed, 344 insertions(+)
 create mode 100644 python/level2_simple_inference/1_classification/resnet50_firstapp/README.md
 create mode 100644 python/level2_simple_inference/1_classification/resnet50_firstapp/data/.keep
 create mode 100644 python/level2_simple_inference/1_classification/resnet50_firstapp/model/.keep
 create mode 100644 python/level2_simple_inference/1_classification/resnet50_firstapp/src/constant.py
 create mode 100644 python/level2_simple_inference/1_classification/resnet50_firstapp/src/firstapp.py

diff --git a/python/level2_simple_inference/1_classification/resnet50_firstapp/README.md b/python/level2_simple_inference/1_classification/resnet50_firstapp/README.md
new file mode 100644
index 000000000..466780ba3
--- /dev/null
+++ b/python/level2_simple_inference/1_classification/resnet50_firstapp/README.md
@@ -0,0 +1,156 @@
+# 快速入门
+在本节中，您可以通过一个简单的图片分类应用了解使用AscendCL接口开发应用的基本过程以及开发过程中涉及的关键概念。
+
+## 什么是图片分类应用？
+
+“图片分类应用”，从名称上，我们也能直观地看出它的作用：按图片所属的类别来区分图片。
+
+![输入图片说明](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/resource/pyacl_resnet50_firstapp.png)
+
+但“图片分类应用”是怎么做到这一点的呢？当然得先有一个能做到图片分类的模型，我们可以直接使用一些训练好的开源模型，也可以基于开源模型的源码进行修改、重新训练，还可以自己基于算法、框架构建适合自己的模型。
+
+鉴于当前我们是入门内容，此处我们直接获取已训练好的开源模型，毕竟这种最简单、最快。此处我们选择的是ONNX框架的ResNet-50模型。
+
+ResNet-50模型的基本介绍如下：
+
+-   输入数据：RGB格式、224\*224分辨率的输入图片
+-   输出数据：图片的类别标签及其对应置信度
+
+> **说明：** 
+> -   置信度是指图片所属某个类别可能性。
+> -   类别标签和类别的对应关系与训练模型时使用的数据集有关，需要查阅对应数据集的标签及类别的对应关系。
+
+## 环境要求
+
+-   操作系统及架构：CentOS 7.6 x86\_64、CentOS aarch64、Ubuntu 18.04 x86\_64、EulerOS x86、EulerOS aarch64
+-   芯片：Atlas 200/300/500 推理产品、Atlas 推理系列产品、Atlas 训练系列产品
+-   python及依赖的库：python3.7.5以上，Pillow、Numpy库
+-   已在环境上部署昇腾AI软件栈，并配置对应的的环境变量，请参见[Link](https://www.hiascend.com/document/redirect/CannCommunityInstSoftware)中对应版本的CANN安装指南。  
+    
+    以下步骤中，开发环境指开发代码的环境，运行环境指运行算子、推理或训练等程序的环境，运行环境上必须带昇腾AI处理器。开发环境和运行环境可以合设在同一台服务器上，也可以分设。
+
+## 下载样例
+
+请选择其中一种样例下载方式：
+
+-   压缩包方式下载（下载时间较短，但步骤稍微复杂）
+
+    ```
+    # 1. samples仓右上角选择 【克隆/下载】 下拉框并选择 【下载ZIP】。     
+    # 2. 将ZIP包上传到开发环境中的普通用户家目录中，【例如：${HOME}/ascend-samples-master.zip】。      
+    # 3. 开发环境中，执行以下命令，解压zip包。      
+    cd ${HOME}     
+    unzip ascend-samples-master.zip
+    ```
+
+    注：如果需要下载其它版本代码，请先请根据前置条件说明进行samples仓分支切换。
+
+-   命令行方式下载（下载时间较长，但步骤简单）
+
+    ```
+    # 开发环境，非root用户命令行中执行以下命令下载源码仓。    
+    cd ${HOME}     
+    git clone https://gitee.com/ascend/samples.git
+    ```
+
+    注：如果需要切换到其它tag版本，以v0.5.0为例，可执行以下命令。
+
+    ```
+    git checkout v0.5.0
+    ```
+
+下载成功后，切换到“ <SAMPLE_DIR>/python/level2_simple_inference/1_classification/resnet50_firstapp”目录下，查看该样例的目录结构，**下文所有的操作步骤均需先切换到resnet50_firstapp目录**：
+
+```
+resnet50_firstapp
+├── data                                // 用于存放测试图片的目录
+├── model                               // 用于存放模型文件的目录                 
+├── src
+│   ├── constant.py                     // 常量定义文件
+│   └── firstapp.py                     // 图片分类样例的运行文件
+```
+
+## 准备模型
+
+1.  以运行用户登录开发环境。
+
+2.  下载模型数据。
+
+    执行以下命令，将ONNX模型下载至“model”目录下，命令中的“***<SAMPLE_DIR>***”请根据实际样例包的存放目录替换
+    ```
+    cd <SAMPLE_DIR>/python/level2_simple_inference/1_classification/resnet50_firstapp/model
+    wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/003_Atc_Models/resnet50/resnet50.onnx
+    ```
+
+3.  执行模型转换。
+
+    执行以下命令（以 Atlas 推理系列产品为例），将原始模型转换为昇腾AI处理器能识别的\*.om模型文件。请注意，执行命令的用户需具有命令中相关路径的可读、可写权限。以下命令中的“***<soc_version>***”请根据实际昇腾AI处理器版本替换。
+
+    ```
+    atc --model=resnet50.onnx --framework=5 --output=resnet50 --input_shape="actual_input_1:1,3,224,224"  --soc_version=<soc_version>
+    ```
+    
+    -   --model：ResNet-50网络的模型文件路径。
+    -   --framework：原始框架类型。5表示ONNX。
+    -   --output：resnet50.om模型文件的路径。若此处修改模型文件名及存储路径，则需要同步修改src/firstapp.py中模型加载处的模型文件名及存储路径，即model_path变量值。
+    -   --soc\_version：昇腾AI处理器的版本。
+    
+    关于各参数的详细解释，请参见[《ATC离线模型编译工具》](https://www.hiascend.com/document/redirect/AscendTensorCompiler)。
+
+## 准备测试图片
+
+本次样例需要使用两张动物图片，请执行以下命令将图片下载至“data”目录，或通过以下链接获取后放至“data”目录。若此处修改测试图片文件名，则需要同步修改src/firstapp.py中读取图片处的文件名，即image_paths变量值。
+
+-   [测试图片1](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/aclsample/dog1_1024_683.jpg)
+
+    ```
+    cd $HOME/first_app/data
+    wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/aclsample/dog1_1024_683.jpg
+    ```
+
+-   [测试图片2](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/aclsample/dog2_1024_683.jpg)
+
+    ```
+    cd $HOME/first_app/data
+    wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/aclsample/dog2_1024_683.jpg
+    ```
+
+## 运行应用
+以运行用户将resnet50_firstapp目录放至运行环境，以运行用户登录运行环境，切换到resnet50_firstapp目录下，检查环境变量配置是否正确，执行以下命令。
+
+```
+python3 src/firstapp.py
+```
+可以得到如下输出，分别为两张测试图片的top5分类信息。
+
+其中[161]: 0.810220表示的是类别标识索引“161”的置信度为“0.810220”。
+
+```
+======== top5 inference results: =============
+[161]: 0.810220
+[162]: 0.103008
+[178]: 0.017485
+[166]: 0.013941
+[212]: 0.009581
+======== top5 inference results: =============
+[267]: 0.728255
+[266]: 0.101687
+[265]: 0.100111
+[151]: 0.004214
+[160]: 0.002731
+```
+
+>**说明：** 
+>类别标签和类别的对应关系与训练模型时使用的数据集有关，本样例使用的模型是基于imagenet数据集进行训练的，您可以在互联网上查阅对应数据集的标签及类别的对应关系。
+>
+>当前屏显信息中的类别标识与类别的对应关系如下：
+>
+>"161": ["basset", "basset hound"]
+>
+>"162": ["beagle"]
+>
+>"163": ["bloodhound", "sleuthhound"]
+>
+>"166": ["Walker hound", "Walker foxhound"]
+>
+>"167": ["English foxhound"]
\ No newline at end of file
diff --git a/python/level2_simple_inference/1_classification/resnet50_firstapp/data/.keep b/python/level2_simple_inference/1_classification/resnet50_firstapp/data/.keep
new file mode 100644
index 000000000..8d1c8b69c
--- /dev/null
+++ b/python/level2_simple_inference/1_classification/resnet50_firstapp/data/.keep
@@ -0,0 +1 @@
+ 
diff --git a/python/level2_simple_inference/1_classification/resnet50_firstapp/model/.keep b/python/level2_simple_inference/1_classification/resnet50_firstapp/model/.keep
new file mode 100644
index 000000000..8d1c8b69c
--- /dev/null
+++ b/python/level2_simple_inference/1_classification/resnet50_firstapp/model/.keep
@@ -0,0 +1 @@
+ 
diff --git a/python/level2_simple_inference/1_classification/resnet50_firstapp/src/constant.py b/python/level2_simple_inference/1_classification/resnet50_firstapp/src/constant.py
new file mode 100644
index 000000000..6b389277f
--- /dev/null
+++ b/python/level2_simple_inference/1_classification/resnet50_firstapp/src/constant.py
@@ -0,0 +1,22 @@
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# rule for mem
+ACL_MEM_MALLOC_HUGE_FIRST = 0
+
+# rule for memory copy
+ACL_MEMCPY_HOST_TO_HOST = 0
+ACL_MEMCPY_HOST_TO_DEVICE = 1
+ACL_MEMCPY_DEVICE_TO_HOST = 2
+ACL_MEMCPY_DEVICE_TO_DEVICE = 3
diff --git a/python/level2_simple_inference/1_classification/resnet50_firstapp/src/firstapp.py b/python/level2_simple_inference/1_classification/resnet50_firstapp/src/firstapp.py
new file mode 100644
index 000000000..363becfb5
--- /dev/null
+++ b/python/level2_simple_inference/1_classification/resnet50_firstapp/src/firstapp.py
@@ -0,0 +1,164 @@
+# Copyright 2025 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import acl
+import numpy as np
+from PIL import Image
+from constant import ACL_MEM_MALLOC_HUGE_FIRST, \
+    ACL_MEMCPY_HOST_TO_DEVICE, ACL_MEMCPY_DEVICE_TO_HOST
+
+
+class Net:
+    def __init__(self, model_path):
+        # 初始化函数
+        self.device_id = 0  
+        # step1: 初始化
+        ret = acl.init()
+        # 指定运算的Device
+        ret = acl.rt.set_device(self.device_id) 
+        # step2: 加载模型，本示例为ResNet-50模型
+        # 加载离线模型文件，返回标识模型的ID
+        self.model_id, ret = acl.mdl.load_from_file(model_path)
+        # 创建空白模型描述信息，获取模型描述信息的指针地址
+        self.model_desc = acl.mdl.create_desc()
+        # 通过模型的ID，将模型的描述信息填充到model_desc
+        ret = acl.mdl.get_desc(self.model_desc, self.model_id)  
+        # step3：创建输入输出数据集
+        # 创建输入数据集
+        self.input_dataset, self.input_data = self.prepare_dataset('input')
+        # 创建输出数据集
+        self.output_dataset, self.output_data = self.prepare_dataset('output')
+
+    def prepare_dataset(self, io_type):
+        # 准备数据集
+        if io_type == "input":
+            # 获得模型输入的个数
+            io_num = acl.mdl.get_num_inputs(self.model_desc)
+            acl_mdl_get_size_by_index = acl.mdl.get_input_size_by_index
+        else:
+            # 获得模型输出的个数
+            io_num = acl.mdl.get_num_outputs(self.model_desc)
+            acl_mdl_get_size_by_index = acl.mdl.get_output_size_by_index
+        # 创建aclmdlDataset类型的数据，描述模型推理的输入。
+        dataset = acl.mdl.create_dataset()
+        datas = []
+        for i in range(io_num):
+            # 获取所需的buffer内存大小
+            buffer_size = acl_mdl_get_size_by_index(self.model_desc, i)
+            # 申请buffer内存
+            buffer, ret = acl.rt.malloc(buffer_size, ACL_MEM_MALLOC_HUGE_FIRST)
+            # 从内存创建buffer数据
+            data_buffer = acl.create_data_buffer(buffer, buffer_size)
+            # 将buffer数据添加到数据集
+            _, ret = acl.mdl.add_dataset_buffer(dataset, data_buffer)
+            datas.append({"buffer": buffer, "data": data_buffer, "size": buffer_size})
+        return dataset, datas
+
+    def forward(self, inputs):
+        # 执行推理任务
+        # 遍历所有输入，拷贝到对应的buffer内存中
+        input_num = len(inputs)
+        for i in range(input_num):
+            bytes_data = inputs[i].tobytes()
+            bytes_ptr = acl.util.bytes_to_ptr(bytes_data)
+            # 将图片数据从Host传输到Device。
+            ret = acl.rt.memcpy(self.input_data[i]["buffer"],   # 目标地址 device
+                                self.input_data[i]["size"],     # 目标地址大小
+                                bytes_ptr,                      # 源地址 host
+                                len(bytes_data),                # 源地址大小
+                                ACL_MEMCPY_HOST_TO_DEVICE)      # 模式:从host到device
+        # 执行模型推理。
+        ret = acl.mdl.execute(self.model_id, self.input_dataset, self.output_dataset)
+        # 处理模型推理的输出数据，输出top5置信度的类别编号。
+        inference_result = []
+        for i, item in enumerate(self.output_data):
+            buffer_host, ret = acl.rt.malloc_host(self.output_data[i]["size"])
+            # 将推理输出数据从Device传输到Host。
+            ret = acl.rt.memcpy(buffer_host,                    # 目标地址 host
+                                self.output_data[i]["size"],    # 目标地址大小
+                                self.output_data[i]["buffer"],  # 源地址 device
+                                self.output_data[i]["size"],    # 源地址大小
+                                ACL_MEMCPY_DEVICE_TO_HOST)      # 模式：从device到host
+            # 从内存地址获取bytes对象
+            bytes_out = acl.util.ptr_to_bytes(buffer_host, self.output_data[i]["size"])
+            # 按照float32格式将数据转为numpy数组
+            data = np.frombuffer(bytes_out, dtype=np.float32)
+            inference_result.append(data)
+            # 释放内存
+            ret = acl.rt.free_host(buffer_host)
+        vals = np.array(inference_result).flatten()
+        # 对结果进行softmax转换
+        vals = np.exp(vals)
+        vals = vals / np.sum(vals)
+        
+        return vals
+
+    def __del__(self):
+        # 析构函数 按照初始化资源的相反顺序释放资源。
+        # 销毁输入输出数据集
+        for dataset in [self.input_data, self.output_data]:
+            while dataset:
+                item = dataset.pop()
+                ret = acl.destroy_data_buffer(item["data"])    # 销毁buffer数据
+                ret = acl.rt.free(item["buffer"])              # 释放buffer内存
+        ret = acl.mdl.destroy_dataset(self.input_dataset)      # 销毁输入数据集
+        ret = acl.mdl.destroy_dataset(self.output_dataset)     # 销毁输出数据集
+        # 销毁模型描述
+        ret = acl.mdl.destroy_desc(self.model_desc)
+        # 卸载模型
+        ret = acl.mdl.unload(self.model_id)
+        # 释放device
+        ret = acl.rt.reset_device(self.device_id)
+        # acl去初始化
+        ret = acl.finalize()
+
+def transfer_pic(input_path):
+    # 图像预处理
+    input_path = os.path.abspath(input_path)
+    with Image.open(input_path) as image_file:
+        # 缩放为224*224
+        img = image_file.resize((224, 224))
+        # 转换为float32类型ndarray
+        img = np.array(img).astype(np.float32)
+    # 根据imageNet图片的均值和方差对图片像素进行归一化
+    img -= [123.675, 116.28, 103.53]
+    img /= [58.395, 57.12, 57.375]
+    # RGB通道交换顺序为BGR
+    img = img[:, :, ::-1]
+    # resnet50为色彩通道在前
+    img = img.transpose((2, 0, 1))
+    # 返回并添加batch通道
+    return np.array([img])
+
+def print_top_5(data):
+    top_5 = data.argsort()[::-1][:5]
+    print("======== top5 inference results: =============")
+    for j in top_5:
+        print("[%d]: %f" % (j, data[j]))
+
+if __name__ == "__main__":
+    image_paths = ["./data/dog1_1024_683.jpg", "./data/dog2_1024_683.jpg"]
+    model_path = './model/resnet50.om'
+    resnet50 = Net(model_path)
+    
+    for path in image_paths:
+        # 图像预处理，此处仅供参考，用户按照自己需求进行预处理
+        image = transfer_pic(path)
+        # 将数据按照每个输入的顺序构造list传入，当前示例的ResNet-50模型只有一个输入
+        result = resnet50.forward([image])
+        # 输出top_5
+        print_top_5(result)
+
+    del resnet50
-- 
Gitee


From 07a3f06baeb89a819da81f391f943df94dd35f54 Mon Sep 17 00:00:00 2001
From: ruoshuisixue <lishangfan@h-partners.com>
Date: Tue, 1 Jul 2025 11:17:13 +0000
Subject: [PATCH 32/97] !2704 sc clean Merge pull request !2704 from
 ruoshuisixue/master

---
 .../AddTemplateCustom/op_kernel/tiling_key_add_custom.h     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
index 61dcb08ce..1cc3d7700 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
@@ -56,14 +56,14 @@ ASCENDC_TPL_SEL(
     ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP16),
     ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP16),
     ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8),
-    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1),
+    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1)
     ),
     ASCENDC_TPL_ARGS_SEL(
     ASCENDC_TPL_DTYPE_SEL(D_T_X, ADD_TPL_FP32),
     ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP32),
     ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP32),
     ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8),
-    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1),
-    ),
+    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1)
+    )
 );
 #endif
\ No newline at end of file
-- 
Gitee


From 4ff91413ea15980d4623fb12381b558fcb39f75b Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Wed, 2 Jul 2025 07:28:50 +0000
Subject: [PATCH 33/97] !2705 add bank conflict cases Merge pull request !2705
 from zhanghao0689/master

---
 .../KernelLaunch/CMakeLists.txt               |  47 ++++
 .../4_bank_conflict/KernelLaunch/README.md    |  88 ++++++++
 .../KernelLaunch/add_custom_v1.cpp            |  86 ++++++++
 .../KernelLaunch/add_custom_v2.cpp            |  90 ++++++++
 .../KernelLaunch/cmake/cpu_lib.cmake          |   9 +
 .../KernelLaunch/cmake/npu_lib.cmake          |  11 +
 .../4_bank_conflict/KernelLaunch/data_utils.h | 203 ++++++++++++++++++
 .../4_bank_conflict/KernelLaunch/main.cpp     | 127 +++++++++++
 .../4_bank_conflict/KernelLaunch/run.sh       | 113 ++++++++++
 .../KernelLaunch/scripts/gen_data.py          |  25 +++
 .../KernelLaunch/scripts/verify_result.py     |  53 +++++
 .../4_bank_conflict/README.md                 |  70 +++++-
 operator/ascendc/4_best_practices/README.md   |   4 +-
 13 files changed, 924 insertions(+), 2 deletions(-)
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/CMakeLists.txt
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/README.md
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v1.cpp
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v2.cpp
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/cpu_lib.cmake
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/npu_lib.cmake
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/data_utils.h
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/main.cpp
 create mode 100755 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/run.sh
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/gen_data.py
 create mode 100644 operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/verify_result.py

diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/CMakeLists.txt b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/CMakeLists.txt
new file mode 100644
index 000000000..392189fe1
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/CMakeLists.txt
@@ -0,0 +1,47 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+
+# ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}.
+# ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library
+file(GLOB KERNEL_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v1.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v2.cpp
+)
+
+if("${RUN_MODE}" STREQUAL "cpu")
+    include(cmake/cpu_lib.cmake)
+elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu")
+    include(cmake/npu_lib.cmake)
+else()
+    message("invalid RUN_MODE: ${RUN_MODE}")
+endif()
+add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:-g>>
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<OR:$<STREQUAL:${RUN_MODE},npu>,$<STREQUAL:${RUN_MODE},sim>>:host_intf_pub>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:ascendcl>>
+    ascendc_kernels_${RUN_MODE}
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/README.md b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/README.md
new file mode 100644
index 000000000..f72b521cd
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/README.md
@@ -0,0 +1,88 @@
+## 目录结构介绍
+
+```
+├── KernelLaunch
+│   ├── cmake                   // 编译工程文件
+│   ├── scripts
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 验证输出数据和真值数据是否一致的验证脚本
+│   ├── add_custom_v1.cpp       // 算子kernel实现1：未优化前实现
+│   ├── add_custom_v2.cpp       // 算子kernel实现2：优化地址分配，消除Bank冲突后的实现
+│   ├── CMakeLists.txt          // 编译工程文件
+│   ├── data_utils.h            // 数据读入写出函数
+│   ├── main.cpp                // 主函数，调用算子的应用程序，含CPU域及NPU域调用
+│   └── run.sh                  // 编译运行算子的脚本
+```
+
+## 代码实现介绍
+
+本样例中实现的是固定shape为1*4096的Add算子。
+
+- kernel实现
+
+  Add算子的数学表达式为：
+
+  ```
+  z = x + y
+  ```
+
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。
+
+  实现1：请参考[add_custom_v1.cpp](./add_custom_v1.cpp)，xLocal地址为0，yLocal地址为0x4000，zLocal地址为0x8000。xLocal与yLocal存在读读冲突，xLocal与zLocal存在读写冲突。
+
+  实现2：请参考[add_custom_v2.cpp](./add_custom_v2.cpp)，为了避免Bank冲突，通过配置InitBuffer时的bufferSize来调整Tensor地址，xLocal地址为0，yLocal地址为0x4100，zLocal地址为0x10000。
+- 调用实现
+
+  1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
+  2. NPU侧运行验证主要通过使用ACLRT_LAUNCH_KERNEL内核调用宏来完成。
+
+  应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。
+
+## 运行样例算子
+
+- 打开样例目录
+  以命令行方式下载样例代码，master分支为例。
+
+  ```bash
+  cd ${git_clone_path}/samples/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch
+  ```
+- 配置环境变量
+
+  请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+
+  - 默认路径，root用户安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    ```
+  - 默认路径，非root用户安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    ```
+  - 指定路径install_path，安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+    ```
+- 样例执行
+
+  ```bash
+  bash run.sh -r [RUN_MODE] -v  [SOC_VERSION]
+  ```
+
+  - RUN_MODE：编译方式，可选择CPU调试，NPU仿真，NPU上板。支持参数为[cpu /sim / npu]
+  - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+    - Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+  示例如下，Ascendxxxyy请替换为实际的AI处理器型号。
+
+  ```bash
+  bash run.sh -r cpu -v Ascendxxxyy
+  ```
+
+## 更新说明
+
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/07/01 | 新增本readme |
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v1.cpp b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v1.cpp
new file mode 100644
index 000000000..9d9774405
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v1.cpp
@@ -0,0 +1,86 @@
+/**
+ * @file add_custom_v1.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr int32_t TOTAL_LENGTH = 4096;                            // total length of data
+constexpr int32_t BUFFER_NUM = 1;                                 // tensor num for each queue
+}
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x, TOTAL_LENGTH);
+        yGm.SetGlobalBuffer((__gm__ float *)y, TOTAL_LENGTH);
+        zGm.SetGlobalBuffer((__gm__ float *)z, TOTAL_LENGTH);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, TOTAL_LENGTH * sizeof(float));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, TOTAL_LENGTH * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TOTAL_LENGTH * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        CopyIn();
+        Compute();
+        CopyOut();
+    }
+
+private:
+    __aicore__ inline void CopyIn()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.AllocTensor<float>();
+        AscendC::DataCopy(xLocal, xGm, TOTAL_LENGTH);
+        AscendC::DataCopy(yLocal, yGm, TOTAL_LENGTH);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        AscendC::Add(zLocal, xLocal, yLocal, TOTAL_LENGTH);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut()
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopy(zGm, zLocal, TOTAL_LENGTH);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v1(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+{
+    KernelAdd op;
+    op.Init(x, y, z);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v1(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z)
+{
+    add_custom_v1<<<blockDim, nullptr, stream>>>(x, y, z);
+}
+#endif
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v2.cpp b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v2.cpp
new file mode 100644
index 000000000..65e7dd7e5
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/add_custom_v2.cpp
@@ -0,0 +1,90 @@
+/**
+ * @file add_custom_v2.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr int32_t TOTAL_LENGTH = 4096;        // total length of data
+constexpr int32_t BUFFER_NUM = 1;             // tensor num for each queue
+constexpr int32_t BANKGROUP_SIZE = 1024 * 64; // one bank size is 4KB, with 16 banks
+constexpr int32_t ONE_REPEAT_SIZE = 256;      // 256 bytes per repeat
+} // namespace
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x, TOTAL_LENGTH);
+        yGm.SetGlobalBuffer((__gm__ float *)y, TOTAL_LENGTH);
+        zGm.SetGlobalBuffer((__gm__ float *)z, TOTAL_LENGTH);
+        // xLocal size add 256 to avoid rr conflict
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, TOTAL_LENGTH * sizeof(float) + ONE_REPEAT_SIZE);
+        // yLocal size adjust to 64KB - xLocal size to avoid rw conflict
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, BANKGROUP_SIZE - (TOTAL_LENGTH * sizeof(float) + ONE_REPEAT_SIZE));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TOTAL_LENGTH * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        CopyIn();
+        Compute();
+        CopyOut();
+    }
+
+private:
+    __aicore__ inline void CopyIn()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.AllocTensor<float>();
+        AscendC::DataCopy(xLocal, xGm, TOTAL_LENGTH);
+        AscendC::DataCopy(yLocal, yGm, TOTAL_LENGTH);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        AscendC::Add(zLocal, xLocal, yLocal, TOTAL_LENGTH);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut()
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopy(zGm, zLocal, TOTAL_LENGTH);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v2(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+{
+    KernelAdd op;
+    op.Init(x, y, z);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v2(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z)
+{
+    add_custom_v2<<<blockDim, nullptr, stream>>>(x, y, z);
+}
+#endif
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/cpu_lib.cmake b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/cpu_lib.cmake
new file mode 100644
index 000000000..5362c8b5a
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/cpu_lib.cmake
@@ -0,0 +1,9 @@
+if(NOT DEFINED ENV{CMAKE_PREFIX_PATH})
+    set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake)
+endif()
+find_package(tikicpulib REQUIRED)
+
+add_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
+target_link_libraries(ascendc_kernels_${RUN_MODE} PUBLIC tikicpulib::${SOC_VERSION})
+target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 -std=c++17)
+install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/npu_lib.cmake b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/npu_lib.cmake
new file mode 100644
index 000000000..f92b095d1
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/cmake/npu_lib.cmake
@@ -0,0 +1,11 @@
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+# ascendc_library use to add kernel file to generate ascendc library
+ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/data_utils.h b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/data_utils.h
new file mode 100644
index 000000000..09d906371
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/data_utils.h
@@ -0,0 +1,203 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/main.cpp b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/main.cpp
new file mode 100644
index 000000000..8a65f8fa6
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/main.cpp
@@ -0,0 +1,127 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "data_utils.h"
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+extern void add_custom_do_v1(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z);
+extern void add_custom_do_v2(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z);
+using KernelEntry = void(*)(uint32_t, void *, uint8_t *, uint8_t *, uint8_t *);
+#else
+#include "tikicpulib.h"
+extern "C" __global__ __aicore__ void add_custom_v1(GM_ADDR x, GM_ADDR y, GM_ADDR z);
+extern "C" __global__ __aicore__ void add_custom_v2(GM_ADDR x, GM_ADDR y, GM_ADDR z);
+using KernelEntry = void(*)(GM_ADDR, GM_ADDR, GM_ADDR);
+
+#endif
+
+struct ArgInfo {
+    std::string fileName;
+    size_t length;
+};
+
+#ifndef ASCENDC_CPU_DEBUG
+
+void KernelCall(KernelEntry kernelEntry, uint32_t blockDim, void *stream, std::vector<ArgInfo> &inputsInfo,
+                std::vector<ArgInfo> &outputsInfo)
+{
+    std::vector<uint8_t *> inputHost(inputsInfo.size());
+    std::vector<uint8_t *> inputDevice(inputsInfo.size());
+    std::vector<uint8_t *> outputHost(outputsInfo.size());
+    std::vector<uint8_t *> outputDevice(outputsInfo.size());
+
+    for (uint32_t i = 0; i < inputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMallocHost((void **)(&inputHost[i]), inputsInfo[i].length));
+        CHECK_ACL(aclrtMalloc((void **)(&inputDevice[i]), inputsInfo[i].length, ACL_MEM_MALLOC_HUGE_FIRST));
+        ReadFile(inputsInfo[i].fileName, inputsInfo[i].length, inputHost[i], inputsInfo[i].length);
+        CHECK_ACL(aclrtMemcpy(inputDevice[i], inputsInfo[i].length, inputHost[i], inputsInfo[i].length,
+                              ACL_MEMCPY_HOST_TO_DEVICE));
+    }
+
+    for (uint32_t i = 0; i < outputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMallocHost((void **)(&outputHost[i]), outputsInfo[i].length));
+        CHECK_ACL(aclrtMalloc((void **)(&outputDevice[i]), outputsInfo[i].length, ACL_MEM_MALLOC_HUGE_FIRST));
+    }
+
+    kernelEntry(blockDim, stream, inputDevice[0], inputDevice[1], outputDevice[0]);
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+    for (uint32_t i = 0; i < outputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMemcpy(outputHost[i], outputsInfo[i].length, outputDevice[i], outputsInfo[i].length,
+                              ACL_MEMCPY_DEVICE_TO_HOST));
+        WriteFile(outputsInfo[i].fileName, outputHost[i], outputsInfo[i].length);
+        CHECK_ACL(aclrtFree(outputDevice[i]));
+        CHECK_ACL(aclrtFreeHost(outputHost[i]));
+    }
+
+    for (uint32_t i = 0; i < inputsInfo.size(); i++) {
+        CHECK_ACL(aclrtFree(inputDevice[i]));
+        CHECK_ACL(aclrtFreeHost(inputHost[i]));
+    }
+}
+
+#else
+
+#define KernelCall(kernelEntry, blockDim, inputsInfo, outputsInfo)                                  \
+    {                                                                                               \
+        std::vector<uint8_t *> input(inputsInfo.size());                                            \
+        std::vector<uint8_t *> output(outputsInfo.size());                                          \
+                                                                                                    \
+        for (uint32_t i = 0; i < inputsInfo.size(); i++) {                                          \
+            input[i] = (uint8_t *)AscendC::GmAlloc(inputsInfo[i].length);                           \
+            ReadFile(inputsInfo[i].fileName, inputsInfo[i].length, input[i], inputsInfo[i].length); \
+        }                                                                                           \
+                                                                                                    \
+        for (uint32_t i = 0; i < outputsInfo.size(); i++) {                                         \
+            output[i] = (uint8_t *)AscendC::GmAlloc(outputsInfo[i].length);                         \
+        }                                                                                           \
+                                                                                                    \
+        AscendC::SetKernelMode(KernelMode::AIV_MODE);                                               \
+        ICPU_RUN_KF(kernelEntry, blockDim, input[0], input[1], output[0]);                          \
+        for (uint32_t i = 0; i < inputsInfo.size(); i++) {                                          \
+            AscendC::GmFree((void *)input[i]);                                                      \
+        }                                                                                           \
+                                                                                                    \
+        for (uint32_t i = 0; i < outputsInfo.size(); i++) {                                         \
+            WriteFile(outputsInfo[i].fileName, output[i], outputsInfo[i].length);                   \
+            AscendC::GmFree((void *)output[i]);                                                     \
+        }                                                                                           \
+    }
+
+#endif
+
+int32_t main(int32_t argc, char *argv[])
+{
+    uint32_t blockDim = 1;
+    uint32_t dataLen = 4096;
+    size_t inputByteSize = dataLen * sizeof(float);
+    size_t outputByteSize = dataLen * sizeof(float);
+
+    std::vector<ArgInfo> inputsInfo = {{"./input/input_x.bin", inputByteSize}, {"./input/input_y.bin", inputByteSize}};
+    std::vector<ArgInfo> outputsV1Info = {{"./output/output_z_v1.bin", outputByteSize}};
+    std::vector<ArgInfo> outputsV2Info = {{"./output/output_z_v2.bin", outputByteSize}};
+
+#ifndef ASCENDC_CPU_DEBUG
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    KernelCall(add_custom_do_v1, blockDim, stream, inputsInfo, outputsV1Info);
+    KernelCall(add_custom_do_v2, blockDim, stream, inputsInfo, outputsV2Info);
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+#else
+    KernelCall(add_custom_v1, blockDim, inputsInfo, outputsV1Info);
+    KernelCall(add_custom_v2, blockDim, inputsInfo, outputsV2Info);
+#endif
+    return 0;
+}
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/run.sh b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/run.sh
new file mode 100755
index 000000000..0c5aef144
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/run.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+SHORT=r:,v:,i:,b:,p:,
+LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+SOC_VERSION="Ascend310P3"
+
+while :; do
+    case "$1" in
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    -b | --build-type)
+        BUILD_TYPE="$2"
+        shift 2
+        ;;
+    -p | --install-prefix)
+        INSTALL_PREFIX="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+RUN_MODE_LIST="cpu sim npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    exit -1
+fi
+
+VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+if [ "${RUN_MODE}" = "sim" ]; then
+    # in case of running op in simulator, use stub .so instead
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+elif [ "${RUN_MODE}" = "cpu" ]; then
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    if [ "${RUN_MODE}" = "npu" ]; then
+        msprof op --launch-count=2 --output=./prof ./ascendc_kernels_bbit
+    elif [ "${RUN_MODE}" = "sim" ]; then
+        msprof op simulator --launch-count=2 --output=./prof ./ascendc_kernels_bbit
+    elif [ "${RUN_MODE}" = "cpu" ]; then
+        ./ascendc_kernels_bbit
+    fi
+)
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output_z_v1.bin output/golden.bin
+python3 scripts/verify_result.py output/output_z_v2.bin output/golden.bin
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/gen_data.py b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/gen_data.py
new file mode 100644
index 000000000..86bbba89d
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [1, 4096]).astype(np.float32)
+    input_y = np.random.uniform(1, 100, [1, 4096]).astype(np.float32)
+    golden = (input_x + input_y).astype(np.float32)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/verify_result.py b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/verify_result.py
new file mode 100644
index 000000000..6a38a3b2b
--- /dev/null
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/KernelLaunch/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-4
+absolute_tol = 1e-5
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/4_best_practices/4_bank_conflict/README.md b/operator/ascendc/4_best_practices/4_bank_conflict/README.md
index 71c50671d..f2b828892 100644
--- a/operator/ascendc/4_best_practices/4_bank_conflict/README.md
+++ b/operator/ascendc/4_best_practices/4_bank_conflict/README.md
@@ -1 +1,69 @@
-减少bank冲突（待补充）
\ No newline at end of file
+## 概述
+
+本样例介绍基于Add算子优化bank冲突的实现，并提供核函数直调方法。
+
+## 目录结构介绍
+
+```
+├── 4_bank_conflict      // 使用核函数直调的方式调用Add自定义算子
+│   └── KernelLaunch     // Kernel Launch方式调用核函数样例
+```
+
+## 算子描述
+
+算子实现的是固定shape为1×4096的Add算子。
+
+Add的计算公式为：
+
+```python
+z = x + y
+```
+
+- x：输入，形状为\[1, 4096]，数据类型为float；
+- y：输入，形状为\[1, 4096]，数据类型为float；
+- z：输出，形状为\[1, 4096]，数据类型为float；
+
+## 算子规格描述
+
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">1 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">1 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">y</td><td align="center">1 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom_v1 / add_custom_v2</td></tr>
+</table>
+
+## 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 编译运行样例算子
+
+针对自定义算子工程，编译运行包含如下步骤：
+
+- 编译自定义算子工程；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+
+### 1. 获取源码包
+
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 编译运行样例工程
+
+- [KernelLaunch样例运行](./KernelLaunch/README.md)
+
+## 更新说明
+
+
+| 时间       | 更新事项         |
+| ---------- | ---------------- |
+| 2025/07/01 | 新增直调方式样例 |
diff --git a/operator/ascendc/4_best_practices/README.md b/operator/ascendc/4_best_practices/README.md
index 653d01b0c..f5379bbbf 100644
--- a/operator/ascendc/4_best_practices/README.md
+++ b/operator/ascendc/4_best_practices/README.md
@@ -6,6 +6,7 @@
 
 | 目录名称                        | 功能描述                                   | 运行环境                                   |
 | ------------------------------- | ------------------------------------------ | ------------------------------------------ |
+| [4_bank_conflict](./4_bank_conflict) | 基于Ascend C的bank冲突性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [6_group_matmul](./6_group_matmul) | 基于Ascend C的group matmul算子性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_all_gather_matmul_custom](./21_all_gather_matmul_custom) | 基于Ascend C的AllGatherMatmul算子性能调优样例 | Atlas A2训练系列产品 |
 | [22_matmul_reduce_scatter_custom](./22_matmul_reduce_scatter_custom) | 基于Ascend C的MatmulReduceScatter算子性能调优样例 | Atlas A2训练系列产品 |
@@ -43,7 +44,8 @@
 ## 更新说明
 | 时间       | 更新事项                                     |
 | ---------- | -------------------------------------------- |
+| 2025/07/01 | 新增4_bank_conflict样例         |
 | 2024/12/19 | 新增23_matmul_all_reduce_custom样例         |
 | 2024/12/19 | 新增22_matmul_reduce_scatter_custom样例         |
 | 2024/12/19 | 新增21_all_gather_matmul_custom样例         |
-| 2024/11/20 | 新增6_group_matmul样例                     |
\ No newline at end of file
+| 2024/11/20 | 新增6_group_matmul样例                     |
-- 
Gitee


From e120936690a2ea9220fea64fb29431e8c21316c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B6=8A=E9=99=8C=E5=BA=A6=E9=98=A1?=
 <gaoming39@huawei.com>
Date: Tue, 8 Jul 2025 11:37:19 +0000
Subject: [PATCH 34/97] =?UTF-8?q?!2708=20=E5=A2=9E=E5=8A=A0=E4=B8=A4?=
 =?UTF-8?q?=E7=A7=8D=E5=BD=92=E7=BA=A6=E6=8C=87=E4=BB=A4=E7=9A=84=E6=A0=B7?=
 =?UTF-8?q?=E4=BE=8B=20Merge=20pull=20request=20!2708=20from=20=E8=B6=8A?=
 =?UTF-8?q?=E9=99=8C=E5=BA=A6=E9=98=A1/Regulation=5FDirective=5FReduce=5FC?=
 =?UTF-8?q?ustom?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AclNNInvocationNaive/main.cpp             |  26 +--
 .../14_reduce_frameworklaunch/README.md       |   5 +
 .../ReduceCustom/op_host/reduce_custom.cpp    |  33 +--
 .../ReduceCustom/op_kernel/reduce_custom.cpp  | 204 +++++++++++++-----
 4 files changed, 189 insertions(+), 79 deletions(-)

diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/AclNNInvocationNaive/main.cpp b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/AclNNInvocationNaive/main.cpp
index 7ecffbc7e..734d48798 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/AclNNInvocationNaive/main.cpp
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/AclNNInvocationNaive/main.cpp
@@ -95,7 +95,7 @@ void DestroyResources(std::vector<void *> tensors, std::vector<void *> deviceAdd
 int main(int argc, char **argv)
 {
     constexpr int64_t inputShape = 4096;
-    constexpr float resFloat = 4096.0;
+    float resFloat = 0;
     // 1. (Fixed code) Initialize device / stream, refer to the list of external interfaces of acl
     // Update deviceId to your own device id
     int32_t deviceId = 0;
@@ -110,21 +110,21 @@ int main(int argc, char **argv)
     void *outputZDeviceAddr = nullptr;
     aclTensor *inputX = nullptr;
     aclTensor *outputZ = nullptr;
-    std::vector<aclFloat16> inputXHostData(inputXShape[0]);
-    std::vector<aclFloat16> outputZHostData(outputZShape[0]);
+    std::vector<float> inputXHostData(inputXShape[0], 1.0);
+    std::vector<float> outputZHostData(outputZShape[0], 0);
+
     for (int i = 0; i < inputXShape[0]; ++i) {
-        inputXHostData[i] = aclFloatToFloat16(1.0);
-    }
-    for (int i = 0; i < outputZShape[0]; ++i) {
-        outputZHostData[i] = aclFloatToFloat16(resFloat);
+        inputXHostData[i] = 1.0;
+        resFloat += 1.0;
     }
+
     std::vector<void *> tensors = {inputX, outputZ};
     std::vector<void *> deviceAddrs = {inputXDeviceAddr, outputZDeviceAddr};
     // Create inputX aclTensor
-    ret = CreateAclTensor(inputXHostData, inputXShape, &inputXDeviceAddr, aclDataType::ACL_FLOAT16, &inputX);
+    ret = CreateAclTensor(inputXHostData, inputXShape, &inputXDeviceAddr, aclDataType::ACL_FLOAT, &inputX);
     CHECK_RET(ret == ACL_SUCCESS, DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED);
     // Create outputZ aclTensor
-    ret = CreateAclTensor(outputZHostData, outputZShape, &outputZDeviceAddr, aclDataType::ACL_FLOAT16, &outputZ);
+    ret = CreateAclTensor(outputZHostData, outputZShape, &outputZDeviceAddr, aclDataType::ACL_FLOAT, &outputZ);
     CHECK_RET(ret == ACL_SUCCESS, DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED);
 
     // 3. Call the API of the custom operator library
@@ -154,9 +154,9 @@ int main(int argc, char **argv)
     // 5. Get the output value, copy the result from device memory to host memory, need to modify according to the
     // interface of the API
     auto size = GetShapeSize(outputZShape);
-    std::vector<aclFloat16> resultData(size, 0);
+    std::vector<float> resultData(size, 0);
     ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outputZDeviceAddr,
-                      size * sizeof(aclFloat16), ACL_MEMCPY_DEVICE_TO_HOST);
+                      size * sizeof(float), ACL_MEMCPY_DEVICE_TO_HOST);
     CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret);
               DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); return FAILED);
 
@@ -164,11 +164,11 @@ int main(int argc, char **argv)
     DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr);
 
     // print the output result
-    std::vector<aclFloat16> goldenData(size, aclFloatToFloat16(resFloat));
+    std::vector<float> goldenData(size, resFloat);
 
     LOG_PRINT("result is:\n");
     for (int64_t i = 0; i < 10; i++) {
-        LOG_PRINT("%.1f ", aclFloat16ToFloat(resultData[i]));
+        LOG_PRINT("%.1f ", resultData[i]);
     }
     LOG_PRINT("\n");
     if (std::equal(resultData.begin(), resultData.begin() + 1, goldenData.begin())) {
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/README.md b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/README.md
index 6f9bc094b..04e13268d 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/README.md
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/README.md
@@ -25,6 +25,10 @@ z = sum(x)
 
 3、长度在float输入(2KB,16KB]，或者half输入(4KB,32KB]时。由于一条WholeReduceSum的累加效率比使用两条BlockReduceSum的累加效率更高。所以采用两条WholeReduceSum（而不是两条BlockReduceSum+一条WholeReduceSum），得到这段buffer的累加和。
 
+4、长度在float输入为10000时，对应WholeReduceSumImpl中的处理方法，在Counter模式下，采用WholeReduceSum指令，循环处理二维数据中的每一行，得到每一行的归约运行结果。
+
+5、长度在float输入为20000时，对应BinaryReduceSumImpl中的处理方法，在Counter模式下，先将运算数据一分为二，使用Add指令将两部分数据相加，循环往复，最后一条WholeReduceSum指令得到归约的运行结果。此种操作方式，相比较WholeReduceSum单指令操作的方式，在数据量较大，循环次数较多的场景下，性能更优。
+
 注意代码中使用了Counter模式。
 
 ## 算子规格描述
@@ -134,3 +138,4 @@ CANN软件包中提供了工程创建工具msOpGen，ReduceCustom算子工程可
 | ---------- | ---------------------------- |
 | 2024/09/14 | 新增ReduceCustom样例 |
 | 2024/11/18 | 算子工程改写为由msOpGen生成 |
+| 2025/07/07 | 增加两种归约操作样例 |
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom.cpp b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom.cpp
index 5bec0d17e..743fb162b 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom.cpp
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom.cpp
@@ -9,21 +9,26 @@
  */
 #include "reduce_custom_tiling.h"
 #include "register/op_def_registry.h"
-#define REDUCE_TILING_0 1
-#define REDUCE_TILING_1 2
-#define REDUCE_TILING_2 3
 
 namespace optiling {
+constexpr uint32_t REDUCE_TILING_1 = 1;
+constexpr uint32_t REDUCE_TILING_2 = 2;
+constexpr uint32_t REDUCE_TILING_3 = 3;
+constexpr uint32_t REDUCE_TILING_4 = 4;
+constexpr uint32_t REDUCE_TILING_5 = 5;
+
 constexpr uint32_t BLOCK_DIM = 1;
 constexpr uint32_t ONE_REPEAT_LEN = 256;
 constexpr uint32_t ONE_BLOCK_LEN = 32;
 constexpr uint32_t OUT_SHAPE = 32;
-constexpr uint32_t HALF_THRESHOLD0 = ONE_REPEAT_LEN / sizeof(uint16_t);
-constexpr uint32_t FLOAT_THRESHOLD0 = ONE_REPEAT_LEN / sizeof(float);
-constexpr uint32_t HALF_THRESHOLD1 = ONE_REPEAT_LEN / sizeof(uint16_t) * ONE_BLOCK_LEN / sizeof(uint16_t);
-constexpr uint32_t FLOAT_THRESHOLD1 = ONE_REPEAT_LEN / sizeof(float) * ONE_BLOCK_LEN / sizeof(float);
-constexpr uint32_t HALF_THRESHOLD2 = ONE_REPEAT_LEN / sizeof(uint16_t) * ONE_REPEAT_LEN / sizeof(uint16_t);
-constexpr uint32_t FLOAT_THRESHOLD2 = ONE_REPEAT_LEN / sizeof(float) * ONE_REPEAT_LEN / sizeof(float);
+constexpr uint32_t HALF_THRESHOLD0 = ONE_REPEAT_LEN / sizeof(uint16_t); // 128
+constexpr uint32_t FLOAT_THRESHOLD0 = ONE_REPEAT_LEN / sizeof(float); // 64
+constexpr uint32_t HALF_THRESHOLD1 = ONE_REPEAT_LEN / sizeof(uint16_t) * ONE_BLOCK_LEN / sizeof(uint16_t); // 2048
+constexpr uint32_t FLOAT_THRESHOLD1 = ONE_REPEAT_LEN / sizeof(float) * ONE_BLOCK_LEN / sizeof(float); //512
+constexpr uint32_t HALF_THRESHOLD2 = ONE_REPEAT_LEN / sizeof(uint16_t) * ONE_REPEAT_LEN / sizeof(uint16_t); // 16384
+constexpr uint32_t FLOAT_THRESHOLD2 = ONE_REPEAT_LEN / sizeof(float) * ONE_REPEAT_LEN / sizeof(float); // 4096
+constexpr uint32_t WHOLEREDUCESUM_SIGLE_MODE = 10000;
+constexpr uint32_t BINARYREDUCESUM_SIGLE_MODE = 20000;
 static ge::graphStatus TilingFunc(gert::TilingContext *context)
 {
     TilingData tiling;
@@ -32,15 +37,19 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context)
     // Only WholeReduceSum is used under 256B.
     if ((totalLength <= HALF_THRESHOLD0 && inputDtype == ge::DT_FLOAT16) ||
         (totalLength <= FLOAT_THRESHOLD0 && inputDtype == ge::DT_FLOAT)) {
-        context->SetTilingKey(REDUCE_TILING_0);
+        context->SetTilingKey(REDUCE_TILING_1);
     // One WholeReduceSum and one BlockReduceSum are used in (256B,2KB](for float input) and (256B,4KB](for half input).
     } else if ((totalLength <= HALF_THRESHOLD1 && inputDtype == ge::DT_FLOAT16) ||
         (totalLength <= FLOAT_THRESHOLD1 && inputDtype == ge::DT_FLOAT)) {
-        context->SetTilingKey(REDUCE_TILING_1);
+        context->SetTilingKey(REDUCE_TILING_2);
     // Two WholeReduceSum are used in (2KB,16KB](for float input) and (4KB,32KB](for half input).
     } else if ((totalLength <= HALF_THRESHOLD2 && inputDtype == ge::DT_FLOAT16) ||
         (totalLength <= FLOAT_THRESHOLD2 && inputDtype == ge::DT_FLOAT)) {
-        context->SetTilingKey(REDUCE_TILING_2);
+        context->SetTilingKey(REDUCE_TILING_3);
+    } else if (totalLength == WHOLEREDUCESUM_SIGLE_MODE) {
+        context->SetTilingKey(REDUCE_TILING_4);
+    } else if (totalLength == BINARYREDUCESUM_SIGLE_MODE) {
+        context->SetTilingKey(REDUCE_TILING_5);
     }
     context->SetBlockDim(BLOCK_DIM);
     tiling.set_totalLength(totalLength);
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_kernel/reduce_custom.cpp b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_kernel/reduce_custom.cpp
index c4ac235d3..d8d631332 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_kernel/reduce_custom.cpp
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_kernel/reduce_custom.cpp
@@ -8,15 +8,20 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 #include "kernel_operator.h"
-#define REDUCE_TILING_0 1
-#define REDUCE_TILING_1 2
-#define REDUCE_TILING_2 3
+#define REDUCE_TILING_1 1
+#define REDUCE_TILING_2 2
+#define REDUCE_TILING_3 3
+#define REDUCE_TILING_4 4
+#define REDUCE_TILING_5 5
 
+template<typename DTYPE>
 class KernelReduce {
 static constexpr uint32_t DEFAULT_BLK_STRIDE = 1;
 static constexpr uint32_t DEFAULT_REP_STRIDE = 8;
 static constexpr uint32_t REP_LEN = 256;
 static constexpr uint32_t BLK_LEN = 32;
+static constexpr uint32_t ONE_REPEAT_FLOAT_SIZE = REP_LEN / 4;
+static constexpr uint32_t BINARY_BOUNDARY = DEFAULT_REP_STRIDE * 2;
 public:
     __aicore__ inline KernelReduce() {}
     __aicore__ inline void Init(GM_ADDR x, GM_ADDR z, uint32_t totalLength, uint32_t outLength)
@@ -24,105 +29,192 @@ public:
         this->totalLength = totalLength;
         this->outLength = outLength;
 
-        xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x, totalLength);
-        zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z, outLength);
-        pipe.InitBuffer(inQueueX, 1, totalLength * sizeof(DTYPE_X));
-        pipe.InitBuffer(outQueueZ, 1, outLength * sizeof(DTYPE_Z));
+        xGm.SetGlobalBuffer((__gm__ DTYPE *)x, totalLength);
+        zGm.SetGlobalBuffer((__gm__ DTYPE *)z, outLength);
+        pipe.InitBuffer(inQueueX, 1, totalLength * sizeof(DTYPE));
+        pipe.InitBuffer(outQueueZ, 1, outLength * sizeof(DTYPE));
     }
-    __aicore__ inline void Process1()
+
+    template<size_t ComputeKey = 0>
+    __aicore__ inline void Compute()
     {
-        CopyIn();
-        Compute1();
-        CopyOut();
+        if constexpr (ComputeKey == REDUCE_TILING_1) {
+            Compute1();
+        } else if constexpr (ComputeKey == REDUCE_TILING_2) {
+            Compute2();
+        } else if constexpr (ComputeKey == REDUCE_TILING_3) {
+            Compute3();
+        } else if constexpr (ComputeKey == REDUCE_TILING_4) {
+            Compute4();
+        } else if constexpr (ComputeKey == REDUCE_TILING_5) {
+            Compute5();
+        }
     }
-    __aicore__ inline void Process2()
-    {
-        CopyIn();
-        Compute2();
-        CopyOut();
-    }
-    __aicore__ inline void Process3()
+
+    template<size_t ComputeKey = 0>
+    __aicore__ inline void Process()
     {
         CopyIn();
-        Compute3();
+        Compute<ComputeKey>();
         CopyOut();
     }
 
 private:
     __aicore__ inline void CopyIn()
     {
-        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.AllocTensor<DTYPE>();
         AscendC::DataCopy(xLocal, xGm, totalLength);
         inQueueX.EnQue(xLocal);
     }
     // Only WholeReduceSum is used under 256B.
     __aicore__ inline void Compute1()
     {
-        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
-        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
-        constexpr int64_t maskLen = REP_LEN / sizeof(DTYPE_X);
-        AscendC::WholeReduceSum<DTYPE_X>(zLocal, xLocal, maskLen, 1,
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.DeQue<DTYPE>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.AllocTensor<DTYPE>();
+
+        constexpr int64_t maskLen = REP_LEN / sizeof(DTYPE);
+        AscendC::WholeReduceSum<DTYPE>(zLocal, xLocal, maskLen, 1,
             DEFAULT_BLK_STRIDE, DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
-        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+
+        outQueueZ.EnQue<DTYPE>(zLocal);
         inQueueX.FreeTensor(xLocal);
     }
     // One WholeReduceSum and one BlockReduceSum are used in (256B,2KB](for float input) and (256B,4KB](for half input).
     __aicore__ inline void Compute2()
     {
-        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
-        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
-        pipe.InitBuffer(calcBuf, totalLength * sizeof(DTYPE_X));
-        AscendC::LocalTensor<DTYPE_X> tempTensor1 = calcBuf.Get<DTYPE_X>();
-        constexpr uint32_t c0Count = BLK_LEN / sizeof(DTYPE_X);
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.DeQue<DTYPE>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.AllocTensor<DTYPE>();
+        pipe.InitBuffer(calcBuf, totalLength * sizeof(DTYPE));
+        AscendC::LocalTensor<DTYPE> tempTensor1 = calcBuf.Get<DTYPE>();
+        constexpr uint32_t c0Count = BLK_LEN / sizeof(DTYPE);
         const uint32_t blockNum0 = (totalLength + c0Count - 1) / c0Count;
+
         AscendC::SetMaskCount();
-        AscendC::SetVectorMask<DTYPE_X>(0, totalLength);
-        AscendC::BlockReduceSum<DTYPE_X, false>(tempTensor1, xLocal, AscendC::MASK_PLACEHOLDER, 1,
+        AscendC::SetVectorMask<DTYPE>(0, totalLength);
+        AscendC::BlockReduceSum<DTYPE, false>(tempTensor1, xLocal, 1, AscendC::MASK_PLACEHOLDER,
             DEFAULT_BLK_STRIDE, DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
         AscendC::PipeBarrier<PIPE_V>();
-        AscendC::SetVectorMask<DTYPE_X>(0, blockNum0);
-        AscendC::WholeReduceSum<DTYPE_X, false>(zLocal, tempTensor1, AscendC::MASK_PLACEHOLDER, 1,
+        AscendC::SetVectorMask<DTYPE>(0, blockNum0);
+        AscendC::WholeReduceSum<DTYPE, false>(zLocal, tempTensor1, 1, AscendC::MASK_PLACEHOLDER,
             DEFAULT_BLK_STRIDE, DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
         AscendC::PipeBarrier<PIPE_V>();
         AscendC::SetMaskNorm();
-        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+
+        outQueueZ.EnQue<DTYPE>(zLocal);
         inQueueX.FreeTensor(xLocal);
     }
     // Two WholeReduceSum are used in (2KB,16KB](for float input) and (4KB,32KB](for half input).
     __aicore__ inline void Compute3()
     {
-        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
-        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
-        pipe.InitBuffer(calcBuf, totalLength * sizeof(DTYPE_X));
-        AscendC::LocalTensor<DTYPE_X> tempTensor1 = calcBuf.Get<DTYPE_X>();
-        const uint32_t repeatNum = (totalLength * sizeof(DTYPE_X) + REP_LEN - 1) / REP_LEN;
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.DeQue<DTYPE>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.AllocTensor<DTYPE>();
+        pipe.InitBuffer(calcBuf, totalLength * sizeof(DTYPE));
+        AscendC::LocalTensor<DTYPE> tempTensor1 = calcBuf.Get<DTYPE>();
+        const uint32_t repeatNum = (totalLength * sizeof(DTYPE) + REP_LEN - 1) / REP_LEN;
+
         AscendC::SetMaskCount();
-        AscendC::SetVectorMask<DTYPE_X>(0, totalLength);
-        AscendC::WholeReduceSum<DTYPE_X, false>(tempTensor1, xLocal, AscendC::MASK_PLACEHOLDER, 1,
+        AscendC::SetVectorMask<DTYPE>(0, totalLength);
+        AscendC::WholeReduceSum<DTYPE, false>(tempTensor1, xLocal, 1, AscendC::MASK_PLACEHOLDER,
             DEFAULT_BLK_STRIDE, DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
         AscendC::PipeBarrier<PIPE_V>();
-        AscendC::SetVectorMask<DTYPE_X>(0, repeatNum);
-        AscendC::WholeReduceSum<DTYPE_X, false>(zLocal, tempTensor1, AscendC::MASK_PLACEHOLDER, 1,
+        AscendC::SetVectorMask<DTYPE>(0, repeatNum);
+        AscendC::WholeReduceSum<DTYPE, false>(zLocal, tempTensor1, 1, AscendC::MASK_PLACEHOLDER,
             DEFAULT_BLK_STRIDE, DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
         AscendC::PipeBarrier<PIPE_V>();
         AscendC::SetMaskNorm();
-        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+
+        outQueueZ.EnQue<DTYPE>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+    }
+
+    __aicore__ inline void Compute4()
+    {
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.DeQue<DTYPE>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.AllocTensor<DTYPE>();
+
+        int64_t start = AscendC::GetSystemCycle();
+        WholeReduceSumImpl(zLocal, xLocal, 1, totalLength);
+        int64_t runCycle = AscendC::GetSystemCycle() - start;
+
+        outQueueZ.EnQue<DTYPE>(zLocal);
         inQueueX.FreeTensor(xLocal);
     }
+
+    __aicore__ inline void Compute5()
+    {
+        AscendC::LocalTensor<DTYPE> xLocal = inQueueX.DeQue<DTYPE>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.AllocTensor<DTYPE>();
+
+        int64_t start = AscendC::GetSystemCycle();
+        BinaryReduceSumImpl(zLocal, xLocal, 1, totalLength);
+        int64_t runCycle = AscendC::GetSystemCycle() - start;
+
+        outQueueZ.EnQue<DTYPE>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+    }
+
     __aicore__ inline void CopyOut()
     {
-        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
+        AscendC::LocalTensor<DTYPE> zLocal = outQueueZ.DeQue<DTYPE>();
         AscendC::DataCopy(zGm, zLocal, this->outLength);
         outQueueZ.FreeTensor(zLocal);
     }
 
+    __aicore__ inline void WholeReduceSumImpl(const AscendC::LocalTensor<float>& dst, const AscendC::LocalTensor<float>& src,
+        const uint32_t bsLength, const uint32_t hLength)
+    { 
+        AscendC::SetMaskCount();
+        for (uint32_t i = 0; i < bsLength; i++) {
+            uint32_t totalNum = hLength;
+            AscendC::LocalTensor<float> srcTmp = src[i * hLength];
+            AscendC::LocalTensor<float> dstTmp = dst[i * hLength];
+            while (totalNum > 1) {
+                AscendC::SetVectorMask<uint8_t, AscendC::MaskMode::COUNTER>(0, totalNum);
+                AscendC::WholeReduceSum<float, false>(dstTmp, srcTmp, AscendC::MASK_PLACEHOLDER, 1, DEFAULT_BLK_STRIDE,
+                    DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
+                AscendC::PipeBarrier<PIPE_V>();
+                totalNum = AscendC::DivCeil(totalNum, ONE_REPEAT_FLOAT_SIZE);
+                srcTmp = dstTmp;
+            }
+        }
+        AscendC::ResetMask();
+        AscendC::SetMaskNorm();
+    }
+
+    __aicore__ inline void BinaryReduceSumImpl(const AscendC::LocalTensor<float>& dst, const AscendC::LocalTensor<float>& src,
+    const uint32_t bsLength, const uint32_t hLength)
+    {
+        AscendC::BinaryRepeatParams binaryParams;
+        AscendC::UnaryRepeatParams unaryParams;
+        AscendC::SetMaskCount();
+        for (uint32_t i = 0; i < bsLength; i++) {
+            uint32_t totalNum = hLength;
+            AscendC::LocalTensor<float> srcTmp = src[i * hLength];
+            AscendC::LocalTensor<float> dstTmp = dst[i * hLength];
+            while (totalNum > ONE_REPEAT_FLOAT_SIZE) {
+                uint32_t halfNum = AscendC::DivCeil(totalNum, BINARY_BOUNDARY) * DEFAULT_REP_STRIDE;
+                AscendC::SetVectorMask<uint8_t, AscendC::MaskMode::COUNTER>(0, totalNum - halfNum);
+                AscendC::Add<float, false>(dstTmp, srcTmp, srcTmp[halfNum], AscendC::MASK_PLACEHOLDER, 1, binaryParams);
+                AscendC::PipeBarrier<PIPE_V>();
+                totalNum = halfNum;
+                srcTmp = dstTmp;
+            }
+            AscendC::SetVectorMask<uint8_t, AscendC::MaskMode::COUNTER>(0, totalNum);
+            AscendC::WholeReduceSum<float, false>(dstTmp, srcTmp, AscendC::MASK_PLACEHOLDER, 1, DEFAULT_BLK_STRIDE,
+                DEFAULT_BLK_STRIDE, DEFAULT_REP_STRIDE);
+            AscendC::PipeBarrier<PIPE_V>();
+        }
+        AscendC::ResetMask();
+        AscendC::SetMaskNorm();
+    }
+
 private:
     AscendC::TPipe pipe;
     AscendC::TQue<AscendC::TPosition::VECIN, 1> inQueueX;
     AscendC::TQue<AscendC::TPosition::VECOUT, 1> outQueueZ;
     AscendC::TBuf<AscendC::TPosition::VECCALC> calcBuf;
-    AscendC::GlobalTensor<DTYPE_X> xGm;
-    AscendC::GlobalTensor<DTYPE_Z> zGm;
+    AscendC::GlobalTensor<DTYPE> xGm;
+    AscendC::GlobalTensor<DTYPE> zGm;
     uint32_t totalLength;
     uint32_t outLength;
 };
@@ -130,14 +222,18 @@ private:
 extern "C" __global__ __aicore__ void reduce_custom(GM_ADDR x, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
 {
     GET_TILING_DATA(tiling_data, tiling);
-    KernelReduce op;
+    KernelReduce<float> op;
     op.Init(x, z, tiling_data.totalLength, tiling_data.outLength);
-    if (TILING_KEY_IS(REDUCE_TILING_0)) {
-        op.Process1();
-    } else if (TILING_KEY_IS(REDUCE_TILING_1)) {
-        op.Process2();
+    if (TILING_KEY_IS(REDUCE_TILING_1)) {
+        op.Process<REDUCE_TILING_1>();
     } else if (TILING_KEY_IS(REDUCE_TILING_2)) {
-        op.Process3();
+        op.Process<REDUCE_TILING_2>();
+    } else if (TILING_KEY_IS(REDUCE_TILING_3)) {
+        op.Process<REDUCE_TILING_3>();
+    } else if (TILING_KEY_IS(REDUCE_TILING_4)) {
+        op.Process<REDUCE_TILING_4>();
+    } else if (TILING_KEY_IS(REDUCE_TILING_5)) {
+        op.Process<REDUCE_TILING_5>();
     }
 }
 
-- 
Gitee


From 6f032b5e57709de1067c07f1af259119659cd941 Mon Sep 17 00:00:00 2001
From: hehongan <hehongan@h-partners.com>
Date: Thu, 10 Jul 2025 11:58:32 +0000
Subject: [PATCH 35/97] =?UTF-8?q?!2713=20matmul=5Fleakyrelu=5Fcustom?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E6=B3=A8=E9=87=8A=20ASCEND310P=20?=
 =?UTF-8?q?=E5=B9=B6=E5=88=A0=E9=99=A4=E4=BA=86=E5=86=97=E4=BD=99=E5=8F=A5?=
 =?UTF-8?q?=E5=8F=B7=20Merge=20pull=20request=20!2713=20from=20hehongan/ma?=
 =?UTF-8?q?ster?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp       | 2 +-
 .../MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
index 3b78451b5..a65b6e230 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
@@ -102,7 +102,7 @@ __aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Process
     uint32_t computeRound = 0;
 
 #ifdef CUSTOM_ASCEND310P
-    // Set temp UB space when on SCEND310P .
+    // Set temp UB space when on ASCEND310P
     AscendC::TBuf<> tmpMMFormatUb;
     AscendC::LocalTensor<uint8_t> mmformatUb;
     pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType));
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
index 4a291ae6f..62f9f3668 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/matmul_leakyrelu_custom.cpp
@@ -102,7 +102,7 @@ __aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Process
     uint32_t computeRound = 0;
 
 #ifdef CUSTOM_ASCEND310P
-    // Set temp UB space when on SCEND310P .
+    // Set temp UB space when on ASCEND310P
     AscendC::TBuf<> tmpMMFormatUb;
     AscendC::LocalTensor<uint8_t> mmformatUb;
     pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType));
-- 
Gitee


From cfded7e064ea46fe417fdc4c72d8b1149cb530f8 Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Mon, 14 Jul 2025 06:09:30 +0000
Subject: [PATCH 36/97] !2706 add gm conflict case Merge pull request !2706
 from zhanghao0689/master

---
 .../AclNNInvocation/README.md                 |  75 +++
 .../AclNNInvocation/inc/common.h              |  45 ++
 .../AclNNInvocation/inc/op_runner.h           | 188 +++++++
 .../AclNNInvocation/inc/operator_desc.h       |  57 +++
 .../AclNNInvocation/run.sh                    |  78 +++
 .../AclNNInvocation/scripts/acl.json          |   1 +
 .../AclNNInvocation/scripts/gen_data.py       |  23 +
 .../AclNNInvocation/scripts/verify_result.py  |  53 ++
 .../AclNNInvocation/src/CMakeLists.txt        |  65 +++
 .../AclNNInvocation/src/common.cpp            |  80 +++
 .../AclNNInvocation/src/main.cpp              | 163 ++++++
 .../AclNNInvocation/src/op_runner.cpp         | 462 ++++++++++++++++++
 .../AclNNInvocation/src/operator_desc.cpp     |  51 ++
 .../15_mata_address_conflict/AddsCustom.json  |  37 ++
 .../AddsCustom/op_host/adds_custom.cpp        |  56 +++
 .../AddsCustom/op_kernel/adds_custom.cpp      |  33 ++
 .../AddsCustom/op_kernel/adds_custom_tiling.h |  22 +
 .../AddsCustom/op_kernel/adds_custom_v1.h     |  88 ++++
 .../AddsCustom/op_kernel/adds_custom_v2.h     |  94 ++++
 .../AddsCustom/op_kernel/adds_custom_v3.h     |  89 ++++
 .../15_mata_address_conflict/README.md        | 164 ++++++-
 .../15_mata_address_conflict/install.sh       |  58 +++
 operator/ascendc/4_best_practices/README.md   |   1 +
 23 files changed, 1982 insertions(+), 1 deletion(-)
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/README.md
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/common.h
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/op_runner.h
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/operator_desc.h
 create mode 100755 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/run.sh
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/acl.json
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/gen_data.py
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/verify_result.py
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/CMakeLists.txt
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/common.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/main.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/op_runner.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/operator_desc.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_host/adds_custom.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom.cpp
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_tiling.h
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v1.h
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v2.h
 create mode 100644 operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v3.h
 create mode 100755 operator/ascendc/4_best_practices/15_mata_address_conflict/install.sh

diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/README.md b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/README.md
new file mode 100644
index 000000000..5c1ffb4d2
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/README.md
@@ -0,0 +1,75 @@
+## 目录结构介绍
+
+```
+├── AclNNInvocation             //通过单算子API调用的方式调用AddsCustom算子
+│   ├── inc                     // 头文件目录
+│   │   ├── common.h            // 声明公共方法类，用于读取二进制文件
+│   │   ├── op_runner.h         // 算子描述声明文件，包含算子输入/输出，算子类型以及输入描述与输出描述
+│   │   └── operator_desc.h     // 算子运行相关信息声明文件，包含算子输入/输出个数，输入/输出大小等
+│   ├── input                   // 存放脚本生成的输入数据目录
+│   ├── scripts
+│   │   ├── acl.json            // acl配置文件
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 精度校验脚本
+│   ├── src
+│   │   ├── CMakeLists.txt      // 编译规则文件
+│   │   ├── common.cpp          // 公共函数，读取二进制文件函数的实现文件
+│   │   ├── main.cpp            // 单算子调用应用的入口
+│   │   ├── op_runner.cpp       // 单算子调用主体流程实现文件
+│   │   └── operator_desc.cpp   // 构造算子的输入与输出描述
+│   └── run.sh                  // 执行命令脚本
+```
+
+## 代码实现介绍
+
+完成自定义算子的开发部署后，可以通过单算子调用的方式来验证单算子的功能。src/main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子，无需提供单算子描述文件进行离线模型的转换，直接调用单算子API接口。
+
+自定义算子编译部署后，会自动生成单算子API，可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”，形如：
+
+```cpp
+ // 获取算子使用的workspace空间大小
+ aclnnStatus aclnnAddsCustomGetWorkspaceSize(
+     const aclTensor *x,
+     int64_t caseId,
+     const aclTensor *out,
+     uint64_t *workspaceSize,
+     aclOpExecutor **executor);
+ // 执行算子
+ aclnnStatus aclnnAddsCustom(
+     void *workspace,
+     uint64_t workspaceSize,
+     aclOpExecutor *executor,
+     aclrtStream stream);
+```
+
+其中aclnnAddsCustomGetWorkspaceSize为第一段接口，主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后，按照workspaceSize大小申请Device侧内存，然后调用第二段接口aclnnAddsCustom执行计算。具体参考[单算子API调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)章节。
+
+## 运行样例算子
+
+### 1. 编译算子工程
+
+运行此样例前，请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。
+
+### 2. 单算子API调用样例运行
+
+- 进入到样例目录
+
+  以命令行方式下载样例代码，master分支为例。
+
+  ```bash
+  cd ${git_clone_path}/samples/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation
+  ```
+- 样例执行
+
+  样例执行过程中会自动生成测试数据，然后编译与运行单算子API调用样例，最后检验运行结果。具体过程可参见run.sh脚本。
+
+  ```bash
+  bash run.sh
+  ```
+
+## 更新说明
+
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/07/03 | 新增本readme |
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/common.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/common.h
new file mode 100644
index 000000000..fadb5c808
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/common.h
@@ -0,0 +1,45 @@
+/**
+ * @file common.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+
+#endif // COMMON_H
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/op_runner.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/op_runner.h
new file mode 100644
index 000000000..7b98d5730
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/op_runner.h
@@ -0,0 +1,188 @@
+/**
+ * @file op_runner.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "acl/acl.h"
+#include "aclnn/acl_meta.h"
+#include "common.h"
+#include "operator_desc.h"
+
+/**
+ * Op Runner
+ */
+class OpRunner {
+public:
+    /**
+     * @brief Constructor
+     * @param [in] opDesc: op description
+     */
+    explicit OpRunner(OperatorDesc *opDesc);
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~OpRunner();
+
+    /**
+     * @brief Init op runner
+     */
+    bool Init();
+
+    /**
+     * @brief Get number of inputs
+     * @return number of inputs
+     */
+    const size_t NumInputs();
+
+    /**
+     * @brief Get number of outputs
+     * @return number of outputs
+     */
+    const size_t NumOutputs();
+
+    /**
+     * @brief Get input size by index
+     * @param [in] index: input index
+     * @return size of the input
+     */
+    const size_t GetInputSize(size_t index) const;
+    const size_t GetInputNumDims(size_t index) const;
+    aclDataType GetInputDataType(size_t index) const;
+    aclFormat GetInputFormat(size_t index) const;
+
+    /**
+     * @brief Get output size by index
+     * @param [in] index: output index
+     * @return size of the output
+     */
+    size_t GetOutputSize(size_t index) const;
+    const size_t GetOutputNumDims(size_t index) const;
+    aclDataType GetOutputDataType(size_t index) const;
+    aclFormat GetOutputFormat(size_t index) const;
+
+    /**
+     * @brief Get input element count by index
+     * @param i[in] ndex: input index
+     * @return element count of the input
+     */
+    size_t GetInputElementCount(size_t index) const;
+
+    /**
+     * @brief Get output element count by index
+     * @param [in] index: output index
+     * @return element count of the output
+     */
+    size_t GetOutputElementCount(size_t index) const;
+
+    /**
+     * @brief Get input shape by index
+     * @param [in] index: input index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetInputShape(size_t index) const;
+
+    /**
+     * @brief Get output shape by index
+     * @param [in] index: output index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetOutputShape(size_t index) const;
+
+    /**
+     * @brief Get input buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: input index
+     * @return host address of the input
+     */
+    template <typename T> T *GetInputBuffer(size_t index)
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return nullptr;
+        }
+        return reinterpret_cast<T *>(hostInputs_[index]);
+    }
+
+    /**
+     * @brief Get output buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: output index
+     * @return host address of the output
+     */
+    template <typename T> const T *GetOutputBuffer(size_t index)
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return nullptr;
+        }
+
+        return reinterpret_cast<T *>(hostOutputs_[index]);
+    }
+
+    /**
+     * @brief Print readable input by index
+     * @param [in] index: input index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintInput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Print readable output by index
+     * @param [in] index: output index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintOutput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Compile static op
+     * @return compile result
+     */
+    bool CompileStaticOp();
+
+    /**
+     * @brief Compile dynamic op
+     * @return compile result
+     */
+    bool CompileDynamicOp();
+
+    /**
+     * @brief Run op
+     * @return run result
+     */
+    bool RunOp(int64_t caseId);
+
+    /**
+     * @brief Get case index
+     * @return case index by user input
+     */
+    int64_t GetCaseId();
+
+private:
+    size_t numInputs_;
+    size_t numOutputs_;
+    void *workspace_;
+    int64_t caseId_;
+
+    std::vector<aclDataBuffer *> inputBuffers_;
+    std::vector<aclDataBuffer *> outputBuffers_;
+
+    std::vector<void *> devInputs_;
+    std::vector<void *> devOutputs_;
+
+    std::vector<void *> hostInputs_;
+    std::vector<void *> hostOutputs_;
+
+    std::vector<aclTensor *> inputTensor_;
+    std::vector<aclTensor *> outputTensor_;
+    OperatorDesc *opDesc_;
+};
+
+#endif // OP_RUNNER_H
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/operator_desc.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/operator_desc.h
new file mode 100644
index 000000000..cf02d7cec
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/inc/operator_desc.h
@@ -0,0 +1,57 @@
+/**
+ * @file operator_desc.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+/**
+ * Op description
+ */
+struct OperatorDesc {
+    /**
+     * Constructor
+     */
+    explicit OperatorDesc();
+
+    /**
+     * Destructor
+     */
+    virtual ~OperatorDesc();
+
+    /**
+     * Add an input tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    /**
+     * Add an output tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    std::string opType;
+    std::vector<aclTensorDesc *> inputDesc;
+    std::vector<aclTensorDesc *> outputDesc;
+};
+
+#endif // OPERATOR_DESC_H
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/run.sh b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/run.sh
new file mode 100755
index 000000000..d5eac7c1d
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/run.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export DDK_PATH=$_ASCEND_INSTALL_PATH
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/devlib
+
+function main {
+    # 1. 清除遗留生成文件和日志文件
+    rm -rf $HOME/ascend/log/*
+    rm -rf ./input && mkdir -p ./input
+    rm -rf ./output && mkdir -p ./output
+
+    # 2. 生成输入数据和真值数据
+    cd $CURRENT_DIR
+    python3 scripts/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    # 3. 编译可执行文件
+    cd $CURRENT_DIR
+    rm -rf build
+    mkdir -p build
+    cd build
+    cmake ../src -DCMAKE_SKIP_RPATH=TRUE
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    # 4. 运行可执行文件
+    export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
+    export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    msprof op --launch-count=3 --output=./prof ./execute_adds_op
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+
+    # 5. 精度比对
+    cd $CURRENT_DIR
+    python3 scripts/verify_result.py output/output_z_1.bin output/golden.bin
+    python3 scripts/verify_result.py output/output_z_2.bin output/golden.bin
+    python3 scripts/verify_result.py output/output_z_3.bin output/golden.bin
+    if [ $? -ne 0 ]; then
+        echo "ERROR: verify result failed!"
+        return 1
+    fi
+}
+
+main
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/acl.json b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/acl.json
new file mode 100644
index 000000000..9e26dfeeb
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/acl.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/gen_data.py
new file mode 100644
index 000000000..9c4ecbe6e
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/gen_data.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8192, 128]).astype(np.float32)
+    golden = (input_x + 2.0).astype(np.float32)
+
+    input_x.tofile("./input/input_x.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/verify_result.py
new file mode 100644
index 000000000..a5019f30f
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-4
+absolute_tol = 1e-5
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/CMakeLists.txt
new file mode 100644
index 000000000..8d0ae1bd3
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/CMakeLists.txt
@@ -0,0 +1,65 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_adds)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER)
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ../inc
+    ${INC_PATH}/include
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_adds_op
+    operator_desc.cpp
+    op_runner.cpp
+    main.cpp
+    common.cpp
+)
+
+target_link_libraries(execute_adds_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_adds_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/common.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/common.cpp
new file mode 100644
index 000000000..d58716122
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/common.cpp
@@ -0,0 +1,80 @@
+/**
+ * @file common.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "common.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <fstream>
+
+extern bool g_isDevice;
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/main.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/main.cpp
new file mode 100644
index 000000000..b70950642
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/main.cpp
@@ -0,0 +1,163 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <cstdint>
+#include <iostream>
+#include "acl/acl.h"
+#include "common.h"
+#include "op_runner.h"
+
+bool g_isDevice = false;
+int deviceId = 0;
+
+OperatorDesc CreateOpDesc()
+{
+    // define operator
+    std::vector<int64_t> shape{8192, 128};
+    aclDataType dataType = ACL_FLOAT;
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
+    opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format);
+    return opDesc;
+}
+
+bool SetInputData(OpRunner &runner)
+{
+    size_t fileSize = 0;
+    ReadFile("../input/input_x.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0));
+    INFO_LOG("Set input success");
+    return true;
+}
+
+bool ProcessOutputData(OpRunner &runner)
+{
+    int64_t caseId = runner.GetCaseId();
+    WriteFile("../output/output_z_" + std::to_string(caseId) + ".bin", runner.GetOutputBuffer<void>(0),
+              runner.GetOutputSize(0));
+    INFO_LOG("Write output success");
+    return true;
+}
+
+void DestroyResource()
+{
+    bool flag = false;
+    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Reset device %d failed", deviceId);
+        flag = true;
+    }
+    INFO_LOG("Reset Device success");
+    if (aclFinalize() != ACL_SUCCESS) {
+        ERROR_LOG("Finalize acl failed");
+        flag = true;
+    }
+    if (flag) {
+        ERROR_LOG("Destroy resource failed");
+    } else {
+        INFO_LOG("Destroy resource success");
+    }
+}
+
+bool InitResource()
+{
+    std::string output = "../output";
+
+    // acl.json is dump or profiling config file
+    if (aclInit("../scripts/acl.json") != ACL_SUCCESS) {
+        ERROR_LOG("acl init failed");
+        return false;
+    }
+
+    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
+        (void)aclFinalize();
+        return false;
+    }
+    INFO_LOG("Set device[%d] success", deviceId);
+
+    // runMode is ACL_HOST which represents app is running in host
+    // runMode is ACL_DEVICE which represents app is running in device
+    aclrtRunMode runMode;
+    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+        ERROR_LOG("Get run mode failed");
+        DestroyResource();
+        return false;
+    }
+    g_isDevice = (runMode == ACL_DEVICE);
+    INFO_LOG("Get RunMode[%d] success", runMode);
+
+    return true;
+}
+
+bool RunOp(int64_t caseId)
+{
+    // create op desc
+    OperatorDesc opDesc = CreateOpDesc();
+
+    // create Runner
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init OpRunner failed");
+        return false;
+    }
+
+    // Load inputs
+    if (!SetInputData(opRunner)) {
+        ERROR_LOG("Set input data failed");
+        return false;
+    }
+
+    // Run op
+    if (!opRunner.RunOp(caseId)) {
+        ERROR_LOG("Run op failed");
+        return false;
+    }
+
+    // process output data
+    if (!ProcessOutputData(opRunner)) {
+        ERROR_LOG("Process output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run op success");
+    return true;
+}
+
+int main(int argc, char **argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    int64_t caseId = 1;
+    if (!RunOp(caseId)) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    caseId = 2;
+    if (!RunOp(caseId)) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    caseId = 3;
+    if (!RunOp(caseId)) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    DestroyResource();
+    return SUCCESS;
+}
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/op_runner.cpp
new file mode 100644
index 000000000..d7bde46d6
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/op_runner.cpp
@@ -0,0 +1,462 @@
+/**
+ * @file op_runner.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "op_runner.h"
+
+#include <cassert>
+#include <limits>
+
+#include "acl/acl_op_compiler.h"
+#include "aclnn_adds_custom.h"
+#include "common.h"
+
+using namespace std;
+
+extern bool g_isDevice;
+
+OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+    workspace_ = nullptr;
+}
+
+OpRunner::~OpRunner()
+{
+    if (workspace_ != nullptr) {
+        (void)aclrtFree(workspace_);
+    }
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
+        }
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        (void)aclDestroyTensor(outputTensor_[i]);
+        (void)aclDestroyDataBuffer(outputBuffers_[i]);
+        (void)aclrtFree(devOutputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
+        }
+    }
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
+
+        aclTensor *inputTensor =
+            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+            return false;
+        }
+        devOutputs_.emplace_back(devMem);
+        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
+
+        aclTensor *outputTensor =
+            aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0,
+                            GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]);
+        if (outputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for output[%zu] failed", i);
+            return false;
+        }
+        outputTensor_.emplace_back(outputTensor);
+    }
+
+    return true;
+}
+
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
+
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
+
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
+
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOp(int64_t caseId)
+{
+    caseId_ = caseId;
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create stream failed");
+        return false;
+    }
+    INFO_LOG("Create stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor *handle = nullptr;
+    auto ret = aclnnAddsCustomGetWorkspaceSize(inputTensor_[0], caseId, outputTensor_[0], &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddsCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnAddsCustom(workspace_, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddsCustom success");
+
+    // The unit of 5000 is ms.
+    ret = aclrtSynchronizeStreamWithTimeout(stream, 5000);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+int64_t OpRunner::GetCaseId()
+{
+    return caseId_;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
+
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/operator_desc.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/operator_desc.cpp
new file mode 100644
index 000000000..90e0ac343
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AclNNInvocation/src/operator_desc.cpp
@@ -0,0 +1,51 @@
+/**
+ * @file operator_desc.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "operator_desc.h"
+
+#include "common.h"
+
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto *desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+
+    for (auto *desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+}
+
+OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+
+    outputDesc.emplace_back(desc);
+    return *this;
+}
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
new file mode 100644
index 000000000..a54432512
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
@@ -0,0 +1,37 @@
+[
+    {
+        "op": "AddsCustom",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "x",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "z",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ],
+        "attr": [
+            {
+                "name": "case_id",
+                "type": "int",
+                "value": 1
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_host/adds_custom.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_host/adds_custom.cpp
new file mode 100644
index 000000000..6c91c15b5
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_host/adds_custom.cpp
@@ -0,0 +1,56 @@
+/**
+ * @file adds_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "../op_kernel/adds_custom_tiling.h"
+#include "register/op_def_registry.h"
+
+namespace optiling {
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    constexpr uint32_t BLOCK_DIM = 16;
+    context->SetBlockDim(BLOCK_DIM);
+
+    // set tiling_key
+    auto attrs = context->GetAttrs();
+    const int64_t *caseId = attrs->GetInt(0);
+    context->SetTilingKey(*caseId);
+
+    AddsCustomTilingData *tiling = context->GetTilingData<AddsCustomTilingData>();
+    constexpr uint32_t M = 8192;
+    constexpr uint32_t N = 128;
+    constexpr uint32_t TILE_M = 512;
+    constexpr uint32_t TILE_N = 8;
+    constexpr uint32_t LOOP_ONE_CORE = M / TILE_M;
+    tiling->m = M;
+    tiling->n = N;
+    tiling->tileM = TILE_M;
+    tiling->tileN = TILE_N;
+    tiling->loopOneCore = LOOP_ONE_CORE;
+
+    // set workspace size
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = 0;
+
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+namespace ops {
+class AddsCustom : public OpDef {
+public:
+    explicit AddsCustom(const char *name) : OpDef(name)
+    {
+        this->Input("x").ParamType(REQUIRED).DataType({ge::DT_FLOAT}).Format({ge::FORMAT_ND});
+        this->Output("z").ParamType(REQUIRED).DataType({ge::DT_FLOAT}).Format({ge::FORMAT_ND});
+        this->AICore().SetTiling(optiling::TilingFunc).AddConfig("ascend910b");
+        this->Attr("case_id").Int(1);
+    }
+};
+OP_ADD(AddsCustom);
+} // namespace ops
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom.cpp b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom.cpp
new file mode 100644
index 000000000..8d0ad4cd9
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom.cpp
@@ -0,0 +1,33 @@
+/**
+ * @file adds_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "adds_custom_v1.h"
+#include "adds_custom_v2.h"
+#include "adds_custom_v3.h"
+
+extern "C" __global__ __aicore__ void adds_custom(GM_ADDR x, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    REGISTER_TILING_DEFAULT(AddsCustomTilingData);
+    GET_TILING_DATA(tilingData, tiling);
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIV_1_0);
+    if (TILING_KEY_IS(1UL)) {
+        KernelAddsV1 op;
+        op.Init(x, z, &tilingData);
+        op.Process();
+    } else if (TILING_KEY_IS(2UL)) {
+        KernelAddsV2 op;
+        op.Init(x, z, &tilingData);
+        op.Process();
+    } else if (TILING_KEY_IS(3UL)) {
+        KernelAddsV3 op;
+        op.Init(x, z, &tilingData);
+        op.Process();
+    }
+}
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_tiling.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_tiling.h
new file mode 100644
index 000000000..8730ae528
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_tiling.h
@@ -0,0 +1,22 @@
+/**
+ * @file adds_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADDS_CUSTOM_TILING_H
+#define ADDS_CUSTOM_TILING_H
+#include <cstdint>
+
+class AddsCustomTilingData {
+public:
+    uint32_t m;
+    uint32_t n;
+    uint32_t tileM;
+    uint32_t tileN;
+    uint32_t loopOneCore;
+};
+#endif // ADDS_CUSTOM_TILING_H
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v1.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v1.h
new file mode 100644
index 000000000..70d86c001
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v1.h
@@ -0,0 +1,88 @@
+/**
+ * @file adds_custom_v1.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADDS_CUSTOM_V1_H
+#define ADDS_CUSTOM_V1_H
+#include "kernel_operator.h"
+#include "adds_custom_tiling.h"
+
+using AscendC::TPosition;
+class KernelAddsV1 {
+public:
+    __aicore__ inline KernelAddsV1() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR z, AddsCustomTilingData *tilingPtr)
+    {
+        tiling = tilingPtr;
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * tiling->tileN);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * tiling->tileN);
+        // the gm address conflict happens when multi cores visit the same addr range(512Bytes)
+        // we disable the L2 cache mode to highlight the influence of the gm address conflict
+        xGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        zGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        for (int32_t i = 0; i < tiling->loopOneCore; i++) {
+            // the following two SyncAll in this case are unnecessary actually,
+            // we just used them to highlight the influence of gm address conflict in each loop
+            AscendC::SyncAll();
+            CopyIn(i);
+            Compute();
+            AscendC::SyncAll();
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        params.dstStride = 0;
+        AscendC::DataCopy(xLocal, xGm[progress * tiling->tileM * tiling->n], params);
+        inQueueX.EnQue(xLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        constexpr float scale = 2.0;
+        AscendC::Adds(zLocal, xLocal, scale, tiling->tileM * tiling->tileN);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = 0;
+        params.dstStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        AscendC::DataCopy(zGm[progress * tiling->tileM * tiling->n], zLocal, params);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    static constexpr int32_t BUFFER_NUM = 2;
+    static constexpr int32_t BLOCK_SIZE = 32;
+
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> zGm;
+    AddsCustomTilingData *tiling;
+};
+#endif // ADDS_CUSTOM_V1_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v2.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v2.h
new file mode 100644
index 000000000..ae5314a90
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v2.h
@@ -0,0 +1,94 @@
+/**
+ * @file adds_custom_v2.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADDS_CUSTOM_V2_H
+#define ADDS_CUSTOM_V2_H
+#include "kernel_operator.h"
+#include "adds_custom_tiling.h"
+
+using AscendC::TPosition;
+class KernelAddsV2 {
+public:
+    __aicore__ inline KernelAddsV2() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR z, AddsCustomTilingData *tilingPtr)
+    {
+        tiling = tilingPtr;
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * tiling->tileN);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * tiling->tileN);
+        // the gm address conflict happens when multi cores visit the same addr range(512Bytes)
+        // we disable the L2 cache mode to highlight the influence of the gm address conflict
+        xGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        zGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        for (int32_t i = 0; i < tiling->loopOneCore; i++) {
+            // adjust the loop order to avoid the gm address conflict:
+            // the loop order of core0  : 0, 1, 2, 3, ..., 13, 14, 15
+            // the loop order of core1  : 1, 2, 3, 4, ..., 14, 15, 0
+            // ...
+            // the loop order of core15 : 15, 0, 1, 2, ..., 12, 13, 14
+            int32_t newProgress = (i + AscendC::GetBlockIdx()) % tiling->loopOneCore;
+            // the following two SyncAll in this case are unnecessary actually,
+            // we just used them to highlight the influence of gm address conflict in each loop
+            AscendC::SyncAll();
+            CopyIn(newProgress);
+            Compute();
+            AscendC::SyncAll();
+            CopyOut(newProgress);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        params.dstStride = 0;
+        AscendC::DataCopy(xLocal, xGm[progress * tiling->tileM * tiling->n], params);
+        inQueueX.EnQue(xLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        constexpr float scale = 2.0;
+        AscendC::Adds(zLocal, xLocal, scale, tiling->tileM * tiling->tileN);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = 0;
+        params.dstStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        AscendC::DataCopy(zGm[progress * tiling->tileM * tiling->n], zLocal, params);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    static constexpr int32_t BUFFER_NUM = 2;
+    static constexpr int32_t BLOCK_SIZE = 32;
+
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> zGm;
+    AddsCustomTilingData *tiling;
+};
+#endif // ADDS_CUSTOM_V2_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v3.h b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v3.h
new file mode 100644
index 000000000..caecdef5e
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom/op_kernel/adds_custom_v3.h
@@ -0,0 +1,89 @@
+/**
+ * @file adds_custom_v3.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADDS_CUSTOM_V3_H
+#define ADDS_CUSTOM_V3_H
+#include "kernel_operator.h"
+#include "adds_custom_tiling.h"
+
+using AscendC::TPosition;
+class KernelAddsV3 {
+public:
+    __aicore__ inline KernelAddsV3() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR z, AddsCustomTilingData *tilingPtr)
+    {
+        tiling = tilingPtr;
+        // change the tile method from column split to row split
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * tiling->tileM * tiling->n);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * tiling->tileM * tiling->n);
+        // the gm address conflict happens when multi cores visit the same addr range(512Bytes)
+        // we disable the L2 cache mode to highlight the influence of the gm address conflict
+        xGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        zGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, tiling->tileM * tiling->tileN * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        for (int32_t i = 0; i < tiling->loopOneCore; i++) {
+            // the following two SyncAll in this case are unnecessary actually,
+            // we just used them to highlight the influence of gm address conflict in each loop
+            AscendC::SyncAll();
+            CopyIn(i);
+            Compute();
+            AscendC::SyncAll();
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        params.dstStride = 0;
+        AscendC::DataCopy(xLocal, xGm[progress * tiling->tileN], params);
+        inQueueX.EnQue(xLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        constexpr float scale = 2.0;
+        AscendC::Adds(zLocal, xLocal, scale, tiling->tileM * tiling->tileN);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopyParams params;
+        params.blockCount = tiling->tileM;
+        params.blockLen = tiling->tileN * sizeof(float) / BLOCK_SIZE;
+        params.srcStride = 0;
+        params.dstStride = (tiling->n - tiling->tileN) * sizeof(float) / BLOCK_SIZE;
+        AscendC::DataCopy(zGm[progress * tiling->tileN], zLocal, params);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    static constexpr int32_t BUFFER_NUM = 2;
+    static constexpr int32_t BLOCK_SIZE = 32;
+
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> zGm;
+    AddsCustomTilingData *tiling;
+};
+#endif // ADDS_CUSTOM_V3_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md b/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
index 34c96391e..1ebba2146 100644
--- a/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
@@ -1 +1,163 @@
-MATA地址冲突（待补充）
\ No newline at end of file
+## 概述
+
+本样例基于AddsCustom算子工程，介绍了同地址冲突的影响以及两种解决方法。
+
+## 目录结构介绍
+
+```
+├── 15_mata_address_conflict   // 同地址冲突样例工程目录
+│   ├── AclNNInvocation        // 通过单算子API调用的方式调用AddsCustom算子
+│   ├── AddsCustom             // AddsCustom算子工程
+│   ├── AddsCustom.json        // AddsCustom算子的原型定义json文件
+│   └── install.sh             // 脚本，调用msOpGen生成自定义算子工程，并编译
+```
+
+## 算子描述
+
+Adds算子实现了一个Tensor与标量值2.0相加，返回相加结果的功能。对应的数学表达式为：
+
+```
+z = x + 2.0
+```
+
+本样例主要介绍数据搬运中的同地址冲突对搬运效率的影响，在Global Memory的数据访问中，数据访问请求(读/写)在AI 处理器内部会按照512 Bytes对齐进行地址转换，同一时刻如果多核的数据访问请求在转换后落在连续的512 Bytes范围内，出于数据一致性的要求，AI 处理器会对落入同一个512Bytes范围内的请求进行串行处理，导致搬运效率降低，即发生了同地址访问现象。
+本样例中共有3个实现版本：
+adds_custom_v1.h：基础实现版本，每个核的计算顺序一致，存在同地址冲突，带宽效率较差
+adds_custom_v2.h：通过调整每个核的计算顺序，避免发生同地址冲突
+adds_custom_v3.h：通过调整切分顺序，避免发生同地址冲突
+
+当前算子执行机制保证用户kernel入参（包括workspace/tiling）的地址是512 Bytes对齐的，因此用户只需要根据地址的偏移量即可判断两个地址是否会落入连续的512 Bytes范围内。
+
+## 算子规格描述
+
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Adds</td></tr>
+</tr>
+<tr><td rowspan="2" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8192 * 128</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8192 * 128</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">adds_custom</td></tr>
+</table>
+
+## 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 算子工程介绍
+
+其中，算子工程目录AddsCustom包含算子的实现文件，如下所示：
+
+```
+├── AddsCustom               // AddsCustom自定义算子工程
+│   ├── op_host              // host侧实现文件
+│   └── op_kernel            // kernel侧实现文件
+```
+
+CANN软件包中提供了工程创建工具msOpGen，AddsCustom算子工程可通过AddsCustom.json自动创建，自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>工程化算子开发>创建算子工程 章节。
+
+创建完自定义算子工程后，开发者重点需要完成算子host和kernel文件的功能开发。为简化样例运行流程，本样例已在AddsCustom目录中准备好了必要的算子实现，install.sh脚本会创建一个CustomOp目录，并将算子实现文件复制到对应目录下，再编译算子。
+
+备注：CustomOp目录为生成目录，每次执行install.sh脚本都会删除该目录并重新生成，切勿在该目录下编码算子，会存在丢失风险。
+
+## 编译运行样例算子
+
+针对自定义算子工程，编译运行包含如下步骤：
+
+- 调用msOpGen工具生成自定义算子工程；
+- 完成算子host和kernel实现；
+- 编译自定义算子工程生成自定义算子包；
+- 安装自定义算子包到自定义算子库中；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+
+### 1. 获取源码包
+
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
+
+- 切换到msOpGen脚本install.sh所在目录
+
+  ```bash
+  # 若开发者以git命令行方式clone了master分支代码，并切换目录
+  cd ${git_clone_path}/samples/operator/ascendc/4_best_practices/15_mata_address_conflict
+  ```
+- 调用脚本，生成自定义算子工程，复制host和kernel实现并编译算子
+
+  - 方式一：配置环境变量运行脚本
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量命令。
+    - 默认路径，root用户安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+
+      运行install.sh脚本
+
+      ```bash
+      bash install.sh -v [SOC_VERSION]
+      ```
+  - 方式二：指定命令行安装路径来运行脚本
+    ```bash
+    bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH]
+    ```
+
+  参数说明：
+
+  - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+    - Atlas A2训练系列产品/Atlas 800I A2推理产品
+  - ASCEND_INSTALL_PATH：CANN软件包安装路径
+
+  脚本运行成功后，会在当前目录下创建CustomOp目录，编译完成后，会在CustomOp/build_out中，生成自定义算子安装包custom_opp_\<target os>_\<target architecture>.run，例如“custom_opp_ubuntu_x86_64.run”。
+
+### 3. 部署自定义算子包
+
+- 部署自定义算子包前，请确保存在自定义算子包默认部署路径环境变量ASCEND_OPP_PATH
+
+  ```bash
+  echo $ASCEND_OPP_PATH
+  # 输出示例 /usr/local/Ascend/ascend-toolkit/latest/opp
+
+  # 若没有，则需导出CANN环境变量
+  source [ASCEND_INSTALL_PATH]/bin/setenv.bash
+  # 例如 source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash
+  ```
+
+  参数说明：
+
+  - ASCEND_INSTALL_PATH：CANN软件包安装路径，一般和上一步中指定的路径保持一致
+- 在自定义算子安装包所在路径下，执行如下命令安装自定义算子包
+
+  ```bash
+  cd CustomOp/build_out
+  ./custom_opp_<target os>_<target architecture>.run
+  ```
+
+  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。
+
+### 4. 调用执行算子工程
+
+- [单算子API调用AddsCustom算子工程](./AclNNInvocation/README.md)
+
+## 更新说明
+
+
+| 时间       | 更新事项 |
+| ---------- | -------- |
+| 2025/07/03 | 新增样例 |
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/install.sh b/operator/ascendc/4_best_practices/15_mata_address_conflict/install.sh
new file mode 100755
index 000000000..24a0c35a2
--- /dev/null
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/install.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+OP_NAME=AddsCustom
+# Generate the op framework
+rm -rf CustomOp && msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp
+# Copy op implementation files to CustomOp
+rm -rf CustomOp/op_host/*.cpp
+rm -rf CustomOp/op_kernel/*.h && rm -rf CustomOp/op_kernel/*.cpp
+cp -rf $OP_NAME/op_kernel CustomOp/
+cp -rf $OP_NAME/op_host CustomOp/
+
+# Build CustomOp project
+(cd CustomOp && bash build.sh)
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/README.md b/operator/ascendc/4_best_practices/README.md
index f5379bbbf..c40fe61a7 100644
--- a/operator/ascendc/4_best_practices/README.md
+++ b/operator/ascendc/4_best_practices/README.md
@@ -8,6 +8,7 @@
 | ------------------------------- | ------------------------------------------ | ------------------------------------------ |
 | [4_bank_conflict](./4_bank_conflict) | 基于Ascend C的bank冲突性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [6_group_matmul](./6_group_matmul) | 基于Ascend C的group matmul算子性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
+| [15_mata_address_conflict](./15_mata_address_conflict) | 基于Ascend C的同地址冲突性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_all_gather_matmul_custom](./21_all_gather_matmul_custom) | 基于Ascend C的AllGatherMatmul算子性能调优样例 | Atlas A2训练系列产品 |
 | [22_matmul_reduce_scatter_custom](./22_matmul_reduce_scatter_custom) | 基于Ascend C的MatmulReduceScatter算子性能调优样例 | Atlas A2训练系列产品 |
 | [23_matmul_all_reduce_custom](./23_matmul_all_reduce_custom) | 基于Ascend C的MatmulAllReduce算子性能调优样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
-- 
Gitee


From 2fc9b125b6c4a6d4e8924c08c1303b58bafba819 Mon Sep 17 00:00:00 2001
From: Y_keven <yingkaidi@huawei.com>
Date: Mon, 14 Jul 2025 08:23:22 +0000
Subject: [PATCH 37/97] =?UTF-8?q?!2714=20=E4=BF=AE=E5=A4=8Dpython/resnet50?=
 =?UTF-8?q?=E5=BC=82=E6=AD=A5=E6=8E=A8=E7=90=86=E7=A4=BA=E4=BE=8B=E7=9A=84?=
 =?UTF-8?q?=E5=8F=98=E9=87=8F=E5=90=8D=E6=8B=BC=E5=86=99=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=20Merge=20pull=20request=20!2714=20from=20Y=5Fkeven/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/acl_net.py                             | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/level2_simple_inference/1_classification/resnet50_async_imagenet_classification/src/acl_net.py b/python/level2_simple_inference/1_classification/resnet50_async_imagenet_classification/src/acl_net.py
index a70a7f7dd..ef60270b6 100644
--- a/python/level2_simple_inference/1_classification/resnet50_async_imagenet_classification/src/acl_net.py
+++ b/python/level2_simple_inference/1_classification/resnet50_async_imagenet_classification/src/acl_net.py
@@ -51,7 +51,7 @@ class Net(object):
         self.model_id = None            # pointer
         self.context = None             # pointer
         self.stream = None              # pointer
-        self.excute_times = execute_times
+        self.execute_times = execute_times
         self.callback_interval = callback_interval
         self.is_callback = True if callback_interval else False
         self.memory_pool = memory_pool
@@ -169,8 +169,8 @@ class Net(object):
         for idx in range(self.memory_pool):
             img_idx = idx % len(images_dataset_list)
             img_input = self._load_input_data(images_dataset_list[img_idx])
-            infer_ouput = self._load_output_data()
-            self.dataset_list.append([img_input, infer_ouput])
+            infer_output = self._load_output_data()
+            self.dataset_list.append([img_input, infer_output])
         print("data interaction from host to device success")
 
     def _destroy_dataset_and_databuf(self, ):
@@ -226,16 +226,16 @@ class Net(object):
     def _get_callback(self, idx):
         if (idx + 1) % self.callback_interval == 0:
             acl.rt.launch_callback(self.callback_func,
-                                   self.excute_dataset,
+                                   self.execute_dataset,
                                    1,
                                    self.stream)
-            self.dataset_list.extend(self.excute_dataset)
-            self.excute_dataset = []
+            self.dataset_list.extend(self.execute_dataset)
+            self.execute_dataset = []
 
     def forward(self):
         print('execute stage:')
-        self.excute_dataset = []
-        for idx in range(self.excute_times):
+        self.execute_dataset = []
+        for idx in range(self.execute_times):
             img_data, infer_output = self.dataset_list.pop(0)
             ret = acl.mdl.execute_async(self.model_id,
                                         img_data,
@@ -244,7 +244,7 @@ class Net(object):
             check_ret("acl.mdl.execute_async", ret)
 
             if self.is_callback:
-                self.excute_dataset.append([img_data, infer_output])
+                self.execute_dataset.append([img_data, infer_output])
                 self._get_callback(idx)
         print('execute stage success')
 
-- 
Gitee


From 79e7fc50b314a84d9b4124e8c8a732ec34234f66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BB=98=E8=B1=AA?= <fuhao16@huawei.com>
Date: Tue, 15 Jul 2025 08:35:37 +0000
Subject: [PATCH 38/97] =?UTF-8?q?!2709=20[fix]meta=E6=9C=AA=E6=88=90?=
 =?UTF-8?q?=E5=8A=9F=E6=B3=A8=E5=86=8C=20Merge=20pull=20request=20!2709=20?=
 =?UTF-8?q?from=20=E4=BB=98=E8=B1=AA/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../PytorchInvocation/test_ops_custom_register_in_graph.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
index f9bed9c44..9e8397463 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/test_ops_custom_register_in_graph.py
@@ -29,7 +29,7 @@ except ModuleNotFoundError:
         )
 
 
-@impl(m, "npu_add_custom")
+@impl(m.m, "npu_add_custom")
 def npu_add_custom_meta(x, y):
     return torch.empty_like(x)
 
@@ -63,7 +63,7 @@ class TestTorchCompileCustomAdd(TestCase):
 
             def forward(self, x, y):
                 return torch_npu.npu_add_custom(x, y)
-        mod = torch.compile(Module().npu(), backend=npu_backend)
+        mod = torch.compile(Module().npu(), backend=npu_backend, fullgraph=True)
         output = mod(x, y)
         print(output)
         self.assertRtolEqual(output, (x + y))
-- 
Gitee


From de4c1bb09b62d1bb596919aadfc2878517c27e26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Tue, 15 Jul 2025 12:52:23 +0000
Subject: [PATCH 39/97] =?UTF-8?q?!2717=20add=20llm=20datadist=20v2=20sampl?=
 =?UTF-8?q?e=20Merge=20pull=20request=20!2717=20from=20=E8=B5=B5=E6=99=BA?=
 =?UTF-8?q?=E6=85=A7/zzh=5F0714=5Fadd=5Fllm=5Fdatadist=5Fv2=5Fsample?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../11_llm_data_dist/CMakeLists.txt           |  50 +++
 .../11_llm_data_dist/decoder_sample2.cpp      | 286 ++++++++++++++++++
 .../11_llm_data_dist/prompt_sample2.cpp       | 262 ++++++++++++++++
 .../11_llm_data_dist/readme.md                |  15 +
 .../10_llm_data_dist/README.md                |  12 +-
 .../switch_role_sample.py                     | 178 +++++++++++
 6 files changed, 802 insertions(+), 1 deletion(-)
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
 create mode 100644 python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
index 603b6e968..25addfeab 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
+++ b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
@@ -68,4 +68,54 @@ target_link_directories(decoder_sample PRIVATE
 target_link_libraries(decoder_sample PRIVATE
         llm_engine
         graph
+)
+
+add_executable(prompt_sample2 "prompt_sample2.cpp")
+
+target_compile_options(prompt_sample2 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(prompt_sample2 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(prompt_sample2 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(prompt_sample2 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(prompt_sample2 PRIVATE
+        llm_engine
+        graph
+        ascendcl
+)
+
+add_executable(decoder_sample2 "decoder_sample2.cpp")
+
+target_compile_options(decoder_sample2 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(decoder_sample2 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(decoder_sample2 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(decoder_sample2 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(decoder_sample2 PRIVATE
+        llm_engine
+        graph
+        ascendcl
 )
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
new file mode 100644
index 000000000..c4a186e96
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
@@ -0,0 +1,286 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t PROMPT_CLUSTER_ID = 0;
+constexpr uint16_t DECODER_CLUSTER_ID = 1;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr size_t TENSOR_BLOCK_ELEMENT_NUM = 16;
+constexpr int32_t WAIT_PROMPT_TIME = 5;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    if (std::getenv("LOCAL_COMM_RES") == nullptr) {
+        printf("[ERROR] env:LOCAL_COMM_RES not set\n");
+        return -1;
+    }
+    options[OPTION_LOCAL_COMM_RES] = std::getenv("LOCAL_COMM_RES");
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role, const char *localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_LISTEN_IP_INFO] = (std::string(localIp) + ":" + std::to_string(DECODER_LISTEN_PORT)).c_str();
+    auto ret = llmDataDist.SetRole(role, options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] SetRole failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] SetRole success\n");
+    return 0;
+}
+
+int Link(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] LinkLlmClusters success\n");
+    return 0;
+}
+
+int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 0;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] UnlinkLlmClusters success\n");
+    return 0;
+}
+
+int32_t CheckBuffers(const std::vector<void *> &buffers, const std::vector<uint32_t> &checkIndexList)
+{
+    for (auto buffer : buffers) {
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        CHECK_ACL(aclrtMemcpy(&hostBuffer[0], TENSOR_SIZE, buffer, TENSOR_SIZE, ACL_MEMCPY_DEVICE_TO_HOST));
+        for (auto checkIndex : checkIndexList) {
+            for (size_t i = 0U; i < TENSOR_BLOCK_ELEMENT_NUM; ++i) {
+                auto expect = checkIndex * TENSOR_BLOCK_ELEMENT_NUM + i;
+                if (hostBuffer[expect] != expect) {
+                    printf("[ERROR] Buffer check failed, index = %zu, val = %d, expect val = %zu\n",
+                           expect, hostBuffer[expect], expect);
+                    return -1;
+                }
+            }
+        }
+    }
+    printf("[INFO] CheckBuffers success\n");
+    return 0;
+}
+
+int32_t PullCache(LlmDataDist &llmDataDist, int64_t cacheId)
+{
+    std::vector<uint64_t> promptBlocks {1, 2, 3};
+    std::vector<uint64_t> decoderBlocks {1, 2, 3};
+    CacheIndex cacheIndex{PROMPT_CLUSTER_ID, 1, 0};
+    // 可以使用PullKvBlock拉取多块block的数据
+    Cache cache{};
+    cache.cache_id = cacheId;
+    auto ret = llmDataDist.PullKvBlocks(cacheIndex, cache, promptBlocks, decoderBlocks);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PullKvBlocks failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PullKvBlocks success\n");
+    // 也可以使用PullKvCache拉取一个batch中的连续数据
+    cacheIndex.batch_index = 0;
+    ret = llmDataDist.PullKvCache(cacheIndex, cache, 0);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PullKvCache failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PullKvCache success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
+              const std::vector<void *> buffers)
+{
+    if (linked) {
+        auto ret = Unlink(llmDataDist, remoteIp);
+        if (ret != 0) {
+            printf("[ERROR] Unlink failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Unlink success\n");
+        }
+    }
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *remoteIp)
+{
+    printf("[INFO] Decoder Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(DECODER_CLUSTER_ID, LlmRole::kDecoder);
+    if (Initialize(llmDataDist, deviceId) != 0) {
+        return -1;
+    }
+
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    bool linked = false;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 4. 等待Prompt写完cache，实际业务场景可通过合适方式实现通知
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_PROMPT_TIME));
+
+    // 5. 与prompt建链
+    if (Link(llmDataDist, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = true;
+
+    // 6. 从prompt拉取Cache
+    if (PullCache(llmDataDist, cacheId) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    if (CheckBuffers(buffers, {0, 1, 2, 3}) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 7. 解除链路
+    if (Unlink(llmDataDist, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = false;
+
+    // 8. 切换角色
+    if (SetRole(llmDataDist, LlmRole::kPrompt, localIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 9. 等待Prompt push cache，实际业务场景可通过合适方式实现通知
+    std::this_thread::sleep_for(std::chrono::seconds(30));
+
+    if (CheckBuffers(buffers, {4, 5, 6, 7}) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 10. 释放Cache与LlmDatadist
+    llmDataDist.Finalize();
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] Decoder Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
+    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
+    auto ret = RunDecoderSample(deviceId, localIp, remoteIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
new file mode 100644
index 000000000..033463d71
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
@@ -0,0 +1,262 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t PROMPT_CLUSTER_ID = 0;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr int32_t WAIT_TIME = 10;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std::string &localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    options[OPTION_LISTEN_IP_INFO] = (localIp + ":" + std::to_string(PROMPT_LISTEN_PORT)).c_str();
+    if (std::getenv("LOCAL_COMM_RES") == nullptr) {
+        printf("[ERROR] env:LOCAL_COMM_RES not set\n");
+        return -1;
+    }
+    options[OPTION_LOCAL_COMM_RES] = std::getenv("LOCAL_COMM_RES");
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role)
+{
+    std::map<AscendString, AscendString> options;
+    auto ret = llmDataDist.SetRole(role, options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] SetRole failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] SetRole success\n");
+    return 0;
+}
+
+int Link(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] LinkLlmClusters success\n");
+    return 0;
+}
+
+int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] UnlinkLlmClusters success\n");
+    return 0;
+}
+
+int32_t PushCache(LlmDataDist &llmDataDist, int64_t cacheId)
+{
+    std::vector<uint64_t> promptBlocks {5, 6, 7};
+    std::vector<uint64_t> decoderBlocks {5, 6, 7};
+    // 可以使用PushKvBlock推送多块block的数据
+    Cache cache{};
+    cache.cache_id = cacheId;
+    auto ret = LLM_SUCCESS;
+    CacheIndex cacheIndex{1, 1};
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        KvCacheExtParam param{};
+        param.src_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.dst_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.tensor_num_per_layer = 1;
+        ret = llmDataDist.PushKvBlocks(cache, cacheIndex, promptBlocks, decoderBlocks, param);
+        if (ret != LLM_SUCCESS) {
+            printf("[ERROR] PushKvBlocks failed, ret = %u\n", ret);
+            return -1;
+        }
+    }
+    printf("[INFO] PushKvBlocks success\n");
+
+    // 也可以使用PushKvCache推送一个batch中的连续数据
+    CacheIndex cacheIndex2{1, 1, 4};
+    KvCacheExtParam param2{};
+    param2.src_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.dst_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.tensor_num_per_layer = 4;
+    ret = llmDataDist.PushKvCache(cache, cacheIndex2, 4, -1, param2);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PushKvCache failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PushKvCache success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
+              const std::vector<void *> buffers)
+{
+    if (linked) {
+        auto ret = Unlink(llmDataDist, remoteIp);
+        if (ret != 0) {
+            printf("[ERROR] Unlink failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Unlink success\n");
+        }
+    }
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *remoteIp)
+{
+    printf("[INFO] Prompt Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(PROMPT_CLUSTER_ID, LlmRole::kPrompt);
+    if (Initialize(llmDataDist, deviceId, localIp) != 0) {
+        printf("[ERROR] Initialize LlmDataDist failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+
+        // init device buffer
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        std::iota(hostBuffer.begin(), hostBuffer.end(), 0);
+        CHECK_ACL(aclrtMemcpy(buffer, TENSOR_SIZE, &hostBuffer[0], TENSOR_SIZE, ACL_MEMCPY_HOST_TO_DEVICE));
+
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    bool linked = false;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 4. 等待Decoder拉取cache
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+
+    // 5. 切换角色
+    if (SetRole(llmDataDist, LlmRole::kDecoder) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 6. 与decoder建链
+    if (Link(llmDataDist, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = true;
+
+    // 7. 与decoder建链
+    if (PushCache(llmDataDist, cacheId) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 8. 释放Cache与LlmDatadist
+    llmDataDist.Finalize();
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] Prompt Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
+    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
+    auto ret = RunPromptSample(deviceId, localIp, remoteIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index f02114570..1a9cf9f80 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -71,6 +71,8 @@
 
 3. 在运行环境执行可执行文件。
 
+    3.1 执行sample1
+
     - 执行prompt_sample, 参数为device_id与local_ip, 其中device_id为prompt要使用的device_id, local_ip为prompt所在device的ip，如:
         ```
         ./prompt_sample 0 10.10.10.1
@@ -80,3 +82,16 @@
         ```
         ./decoder_sample 4 10.10.10.5 10.10.10.1
         ```
+
+    3.2 执行sample2
+
+    - 执行prompt_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为prompt要使用的device_id, local_host_ip为prompt所在host的ip, remote_host_ip为decoder所在host的ip，如:
+        ```
+        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' ./prompt_sample 0 10.10.170.1 10.170.10.2
+        ```
+
+    - 执行decoder_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为decoder要使用的device_id, local_host_ip为decoder所在host的ip，remote_host_ip为prompt所在host的ip，如:
+        ```
+        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' ./decoder_sample 1 10.170.10.2 10.170.10.1
+        ```
+    **注**：LOCAL_COMM_RES为sample2执行所需环境变量，配置了当前进程所需的通信资源，将传递给llm_datadist作为初始化option; 配置格式与HCCL的ranktable一致，只需要配置本进程第一个参数device_id对应的信息，其中ranktable中的rank_id和server_count字段不需要配置，当前用例配置为A2的ranktable格式，其他环境需参考对应环境的ranktable格式进行配置
\ No newline at end of file
diff --git a/python/level1_single_api/10_llm_data_dist/README.md b/python/level1_single_api/10_llm_data_dist/README.md
index f3393ab60..76c3225e5 100644
--- a/python/level1_single_api/10_llm_data_dist/README.md
+++ b/python/level1_single_api/10_llm_data_dist/README.md
@@ -96,5 +96,15 @@
       python pull_from_cache_to_blocks.py --device_id 0 --cluster_id 1
       # Decoder主机:
       python pull_from_cache_to_blocks.py --device_id 0 --cluster_id 2
-      ```      
+      ```
+    - 执行switch role样例程序，此样例程序使用单侧建链方式，首先torch自行申请内存并注册blocks,
+      decoder发起建链并pull blocks, 然后两侧切换角色, 并prompt发起建链， decoder进行push_blocks：
+      分别在Prompt主机与Decoder主机，执行样例程序：
+      ```
+      # Prompt主机:
+      LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 0 --role p --local_host_ip 10.170.10 --remote_host_ip 10.170.10
+      # Decoder主机:
+      LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 1 --role d --local_host_ip 10.170.10 --remote_host_ip 10.170.10
+      ```
+      **注**：**LOCAL_COMM_RES**为单侧建链方式执行所需环境变量，配置了当前进程所需的通信资源，将传递给llm_datadist作为初始化option; 配置格式与HCCL的ranktable一致，只需要配置本进程参数device_id对应的信息，其中ranktable中的rank_id和server_count字段不需要配置，当前用例配置为A2的ranktable格式，其他环境需参考对应环境的ranktable格式进行配置；**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
 
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
new file mode 100644
index 000000000..616e62eee
--- /dev/null
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
@@ -0,0 +1,178 @@
+"""
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import argparse
+import os
+import logging
+import datetime
+from llm_datadist import LLMDataDist, LLMRole, LLMConfig, CacheDesc, DataType, BlocksCacheKey, \
+    Placement, LLMClusterInfo, LLMStatusCode
+import torch
+import torch.distributed as dist
+import torch_npu
+import torchair
+
+logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+NUM_TENSORS = 2
+BLOCKS_NUM = 3
+KV_SHAPE = 10
+PROMPT_CLUSTER_ID = 0
+DECODER_CLUSTER_ID = 1
+
+def init_process_group(rank, world_size, master_ip, backend='gloo'):
+    os.environ['MASTER_ADDR'] = master_ip
+    os.environ['MASTER_PORT'] = '29500'
+
+    logging.info(f"init group begin, {rank=}, {world_size=}, {master_ip=}")
+    dist.init_process_group(backend=backend, rank=rank, world_size=world_size, timeout=datetime.timedelta(seconds=30))
+    logging.info(f"init group success, {rank=}, {world_size=}, {master_ip=}")
+
+
+def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip, remote_host_ip) -> LLMDataDist:
+    init_process_group(cluster_id, 2, min(local_host_ip, remote_host_ip))
+    datadist = LLMDataDist(role, cluster_id)
+    llm_config = LLMConfig()
+    llm_config.device_id = device_id
+    if os.getenv('LOCAL_COMM_RES') is None:
+        raise Exception("env:LOCAL_COMM_RES is not set")
+    llm_config.local_comm_res = os.getenv('LOCAL_COMM_RES')
+    if role == LLMRole.PROMPT:
+        llm_config.listen_ip_info = f"{local_host_ip}:26000"
+    llm_options = llm_config.generate_options()
+    datadist.init(llm_options)
+    logging.info(f"init {role} success, {cluster_id=}")
+    return datadist
+
+
+def run_prompt_sample(datadist, remote_host_ip):
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.ones(BLOCKS_NUM, KV_SHAPE, dtype=torch.float).npu()
+    tensor2 = torch.ones(BLOCKS_NUM, KV_SHAPE, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr, addr2], BlocksCacheKey(PROMPT_CLUSTER_ID, 0))
+    logging.info('register_blocks_cache success')
+    dist.barrier() # register end
+    logging.info('wait decoder link and pull...')
+    dist.barrier() # decoder unlink
+
+    datadist.switch_role(LLMRole.DECODER)
+    dist.barrier() # prompt switch role end, close lisen
+    dist.barrier() # decoder switch role end, lisen
+
+    cluster = LLMClusterInfo()
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.link_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("link failed")
+    logging.info('link success, wait decoder push...')
+    dist.barrier() # prompt link end
+    dist.barrier() # decoder push blocks end
+    logging.info(f'after decoder push, {tensor=}')
+    logging.info(f'after decoder push, {tensor2=}')
+
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = DECODER_CLUSTER_ID
+    ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+    logging.info('[finalize] success')
+
+
+def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    tensor2 = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr, addr2], BlocksCacheKey(DECODER_CLUSTER_ID, 0))
+    logging.info('register_blocks_cache success')
+    dist.barrier() # register end
+
+    cluster = LLMClusterInfo()
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.link_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.pull_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1], dst_blocks=[0, 2])
+    logging.info(f'after decoder pull, {tensor=}')
+    logging.info(f'after decoder pull, {tensor2=}')
+
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.unlink_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+    
+    dist.barrier() # decoder unlink
+    dist.barrier() # prompt switch role end, close lisen
+    llm_config = LLMConfig()
+    llm_config.listen_ip_info = f"{local_host_ip}:26000"
+    llm_options = llm_config.generate_options()
+    datadist.switch_role(LLMRole.PROMPT, llm_options)
+    logging.info('decoder link, pull, unlink, switch role success, wait prompt link...')
+    dist.barrier() # decoder switch role end, lisen
+    dist.barrier() # prompt link end
+
+    cache_manager.push_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1, 2], dst_blocks=[0, 1,2],
+                              src_layer_range=range(0, 2), dst_layer_range=range(0, 2), tensor_num_per_layer=1)
+    dist.barrier() # decoder push blocks end
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device_id", type=int, default=0, help='device id')
+    parser.add_argument("--role", type=str, default=1, help='role type, support p/d')
+    parser.add_argument("--local_host_ip", type=str, help='local host ip')
+    parser.add_argument("--remote_host_ip", type=str, help='remote host ip')
+    args = parser.parse_args()
+    if args.role not in ['p', 'd']:
+        raise RuntimeError("Not supported cluster id")
+    if args.device_id not in [0, 1, 2, 3, 4, 5, 6, 7]:
+        raise RuntimeError("Not supported device id")
+    if args.local_host_ip is None:
+        raise RuntimeError("local_host_ip is not set")
+    if args.remote_host_ip is None:
+        raise RuntimeError("remote_host_ip is not set")
+    logging.info(f'Sample start, device_id = {args.device_id}, role = {args.role}')
+
+    torch.npu.set_device(args.device_id)
+    role = LLMRole.PROMPT if args.role == 'p' else LLMRole.DECODER
+    cluster_id = PROMPT_CLUSTER_ID if args.role == 'p' else DECODER_CLUSTER_ID
+    datadist = init_llm_datadist(role, cluster_id, args.device_id, args.local_host_ip, args.remote_host_ip)
+    if role == LLMRole.PROMPT:
+        run_prompt_sample(datadist, args.remote_host_ip)
+    else:
+        run_decoder_sample(datadist, args.local_host_ip, args.remote_host_ip)
+    logging.info('Sample end')
-- 
Gitee


From 464b844ab5cd1c7ee6f18be645ba3a050320671f Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Wed, 16 Jul 2025 01:02:04 +0000
Subject: [PATCH 40/97] !2716 add l2 bypass case Merge pull request !2716 from
 zhanghao0689/master

---
 .../12_cachemiss_preload_dcci/README.md       |   1 -
 .../AclNNInvocation/README.md                 |  76 +++
 .../AclNNInvocation/inc/common.h              |  45 ++
 .../AclNNInvocation/inc/op_runner.h           | 188 +++++++
 .../AclNNInvocation/inc/operator_desc.h       |  57 +++
 .../12_l2_cache_bypass/AclNNInvocation/run.sh |  77 +++
 .../AclNNInvocation/scripts/acl.json          |   1 +
 .../AclNNInvocation/scripts/gen_data.py       |  28 ++
 .../AclNNInvocation/scripts/verify_result.py  |  53 ++
 .../AclNNInvocation/src/CMakeLists.txt        |  65 +++
 .../AclNNInvocation/src/common.cpp            |  80 +++
 .../AclNNInvocation/src/main.cpp              | 163 ++++++
 .../AclNNInvocation/src/op_runner.cpp         | 462 ++++++++++++++++++
 .../AclNNInvocation/src/operator_desc.cpp     |  51 ++
 .../12_l2_cache_bypass/AddCustom.json         |  47 ++
 .../AddCustom/op_host/add_custom.cpp          |  49 ++
 .../AddCustom/op_kernel/add_custom.cpp        |  28 ++
 .../AddCustom/op_kernel/add_custom_tiling.h   |  18 +
 .../AddCustom/op_kernel/add_custom_v1.h       | 102 ++++
 .../AddCustom/op_kernel/add_custom_v2.h       | 105 ++++
 .../12_l2_cache_bypass/README.md              | 162 ++++++
 .../12_l2_cache_bypass/install.sh             |  59 +++
 .../15_mata_address_conflict/README.md        |   9 +-
 operator/ascendc/4_best_practices/README.md   |   3 +
 24 files changed, 1924 insertions(+), 5 deletions(-)
 delete mode 100644 operator/ascendc/4_best_practices/12_cachemiss_preload_dcci/README.md
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/README.md
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/common.h
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/op_runner.h
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/operator_desc.h
 create mode 100755 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/acl.json
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/gen_data.py
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/verify_result.py
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/CMakeLists.txt
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/common.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/main.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/op_runner.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/operator_desc.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_host/add_custom.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom.cpp
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_tiling.h
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v1.h
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v2.h
 create mode 100644 operator/ascendc/4_best_practices/12_l2_cache_bypass/README.md
 create mode 100755 operator/ascendc/4_best_practices/12_l2_cache_bypass/install.sh

diff --git a/operator/ascendc/4_best_practices/12_cachemiss_preload_dcci/README.md b/operator/ascendc/4_best_practices/12_cachemiss_preload_dcci/README.md
deleted file mode 100644
index ae31f00d7..000000000
--- a/operator/ascendc/4_best_practices/12_cachemiss_preload_dcci/README.md
+++ /dev/null
@@ -1 +0,0 @@
-CACHE MISS优化 preload dcci（待补充）
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/README.md b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/README.md
new file mode 100644
index 000000000..d3e63bedf
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/README.md
@@ -0,0 +1,76 @@
+## 目录结构介绍
+
+```
+├── AclNNInvocation             //通过单算子API调用的方式调用AddCustom算子
+│   ├── inc                     // 头文件目录
+│   │   ├── common.h            // 声明公共方法类，用于读取二进制文件
+│   │   ├── op_runner.h         // 算子描述声明文件，包含算子输入/输出，算子类型以及输入描述与输出描述
+│   │   └── operator_desc.h     // 算子运行相关信息声明文件，包含算子输入/输出个数，输入/输出大小等
+│   ├── input                   // 存放脚本生成的输入数据目录
+│   ├── scripts
+│   │   ├── acl.json            // acl配置文件
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 精度校验脚本
+│   ├── src
+│   │   ├── CMakeLists.txt      // 编译规则文件
+│   │   ├── common.cpp          // 公共函数，读取二进制文件函数的实现文件
+│   │   ├── main.cpp            // 单算子调用应用的入口
+│   │   ├── op_runner.cpp       // 单算子调用主体流程实现文件
+│   │   └── operator_desc.cpp   // 构造算子的输入与输出描述
+│   └── run.sh                  // 执行命令脚本
+```
+
+## 代码实现介绍
+
+完成自定义算子的开发部署后，可以通过单算子调用的方式来验证单算子的功能。src/main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子，无需提供单算子描述文件进行离线模型的转换，直接调用单算子API接口。
+
+自定义算子编译部署后，会自动生成单算子API，可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”，形如：
+
+```cpp
+// 获取算子使用的workspace空间大小
+aclnnStatus aclnnAddCustomGetWorkspaceSize(
+    const aclTensor *x,
+    const aclTensor *y,
+    int64_t caseId,
+    const aclTensor *out,
+    uint64_t *workspaceSize,
+    aclOpExecutor **executor);
+// 执行算子
+aclnnStatus aclnnAddCustom(
+    void *workspace,
+    uint64_t workspaceSize,
+    aclOpExecutor *executor,
+    aclrtStream stream);
+```
+
+其中aclnnAddCustomGetWorkspaceSize为第一段接口，主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后，按照workspaceSize大小申请Device侧内存，然后调用第二段接口aclnnAddCustom执行计算。具体参考[单算子API调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)章节。
+
+## 运行样例算子
+
+### 1. 编译算子工程
+
+运行此样例前，请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。
+
+### 2. 单算子API调用样例运行
+
+- 进入到样例目录
+
+  以命令行方式下载样例代码，master分支为例。
+
+  ```bash
+  cd ${git_clone_path}/samples/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation
+  ```
+- 样例执行
+
+  样例执行过程中会自动生成测试数据，然后编译与运行单算子API调用样例，最后检验运行结果。具体过程可参见run.sh脚本。
+
+  ```bash
+  bash run.sh
+  ```
+
+## 更新说明
+
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/07/14 | 新增本readme |
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/common.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/common.h
new file mode 100644
index 000000000..fadb5c808
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/common.h
@@ -0,0 +1,45 @@
+/**
+ * @file common.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+
+#endif // COMMON_H
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/op_runner.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/op_runner.h
new file mode 100644
index 000000000..7b98d5730
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/op_runner.h
@@ -0,0 +1,188 @@
+/**
+ * @file op_runner.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "acl/acl.h"
+#include "aclnn/acl_meta.h"
+#include "common.h"
+#include "operator_desc.h"
+
+/**
+ * Op Runner
+ */
+class OpRunner {
+public:
+    /**
+     * @brief Constructor
+     * @param [in] opDesc: op description
+     */
+    explicit OpRunner(OperatorDesc *opDesc);
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~OpRunner();
+
+    /**
+     * @brief Init op runner
+     */
+    bool Init();
+
+    /**
+     * @brief Get number of inputs
+     * @return number of inputs
+     */
+    const size_t NumInputs();
+
+    /**
+     * @brief Get number of outputs
+     * @return number of outputs
+     */
+    const size_t NumOutputs();
+
+    /**
+     * @brief Get input size by index
+     * @param [in] index: input index
+     * @return size of the input
+     */
+    const size_t GetInputSize(size_t index) const;
+    const size_t GetInputNumDims(size_t index) const;
+    aclDataType GetInputDataType(size_t index) const;
+    aclFormat GetInputFormat(size_t index) const;
+
+    /**
+     * @brief Get output size by index
+     * @param [in] index: output index
+     * @return size of the output
+     */
+    size_t GetOutputSize(size_t index) const;
+    const size_t GetOutputNumDims(size_t index) const;
+    aclDataType GetOutputDataType(size_t index) const;
+    aclFormat GetOutputFormat(size_t index) const;
+
+    /**
+     * @brief Get input element count by index
+     * @param i[in] ndex: input index
+     * @return element count of the input
+     */
+    size_t GetInputElementCount(size_t index) const;
+
+    /**
+     * @brief Get output element count by index
+     * @param [in] index: output index
+     * @return element count of the output
+     */
+    size_t GetOutputElementCount(size_t index) const;
+
+    /**
+     * @brief Get input shape by index
+     * @param [in] index: input index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetInputShape(size_t index) const;
+
+    /**
+     * @brief Get output shape by index
+     * @param [in] index: output index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetOutputShape(size_t index) const;
+
+    /**
+     * @brief Get input buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: input index
+     * @return host address of the input
+     */
+    template <typename T> T *GetInputBuffer(size_t index)
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return nullptr;
+        }
+        return reinterpret_cast<T *>(hostInputs_[index]);
+    }
+
+    /**
+     * @brief Get output buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: output index
+     * @return host address of the output
+     */
+    template <typename T> const T *GetOutputBuffer(size_t index)
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return nullptr;
+        }
+
+        return reinterpret_cast<T *>(hostOutputs_[index]);
+    }
+
+    /**
+     * @brief Print readable input by index
+     * @param [in] index: input index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintInput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Print readable output by index
+     * @param [in] index: output index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintOutput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Compile static op
+     * @return compile result
+     */
+    bool CompileStaticOp();
+
+    /**
+     * @brief Compile dynamic op
+     * @return compile result
+     */
+    bool CompileDynamicOp();
+
+    /**
+     * @brief Run op
+     * @return run result
+     */
+    bool RunOp(int64_t caseId);
+
+    /**
+     * @brief Get case index
+     * @return case index by user input
+     */
+    int64_t GetCaseId();
+
+private:
+    size_t numInputs_;
+    size_t numOutputs_;
+    void *workspace_;
+    int64_t caseId_;
+
+    std::vector<aclDataBuffer *> inputBuffers_;
+    std::vector<aclDataBuffer *> outputBuffers_;
+
+    std::vector<void *> devInputs_;
+    std::vector<void *> devOutputs_;
+
+    std::vector<void *> hostInputs_;
+    std::vector<void *> hostOutputs_;
+
+    std::vector<aclTensor *> inputTensor_;
+    std::vector<aclTensor *> outputTensor_;
+    OperatorDesc *opDesc_;
+};
+
+#endif // OP_RUNNER_H
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/operator_desc.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/operator_desc.h
new file mode 100644
index 000000000..cf02d7cec
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/inc/operator_desc.h
@@ -0,0 +1,57 @@
+/**
+ * @file operator_desc.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+/**
+ * Op description
+ */
+struct OperatorDesc {
+    /**
+     * Constructor
+     */
+    explicit OperatorDesc();
+
+    /**
+     * Destructor
+     */
+    virtual ~OperatorDesc();
+
+    /**
+     * Add an input tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    /**
+     * Add an output tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    std::string opType;
+    std::vector<aclTensorDesc *> inputDesc;
+    std::vector<aclTensorDesc *> outputDesc;
+};
+
+#endif // OPERATOR_DESC_H
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
new file mode 100755
index 000000000..894fec61c
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export DDK_PATH=$_ASCEND_INSTALL_PATH
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/devlib
+
+function main {
+    # 1. 清除遗留生成文件和日志文件
+    rm -rf $HOME/ascend/log/*
+    rm -rf ./input && mkdir -p ./input
+    rm -rf ./output && mkdir -p ./output
+
+    # 2. 生成输入数据和真值数据
+    cd $CURRENT_DIR
+    python3 scripts/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    # 3. 编译可执行文件
+    cd $CURRENT_DIR
+    rm -rf build
+    mkdir -p build
+    cd build
+    cmake ../src -DCMAKE_SKIP_RPATH=TRUE
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    # 4. 运行可执行文件
+    export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
+    export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    msprof --application="./execute_add_op" --ai-core=on --l2=on --output=./prof
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+
+    # 5. 精度比对
+    cd $CURRENT_DIR
+    python3 scripts/verify_result.py output/output_z_1.bin output/golden.bin
+    python3 scripts/verify_result.py output/output_z_2.bin output/golden.bin
+    if [ $? -ne 0 ]; then
+        echo "ERROR: verify result failed!"
+        return 1
+    fi
+}
+
+main
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/acl.json b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/acl.json
new file mode 100644
index 000000000..9e26dfeeb
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/acl.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/gen_data.py
new file mode 100644
index 000000000..17b3d7119
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/gen_data.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    row = 5120
+    col = 5120
+    input_x = np.random.uniform(1, 10, [row, col]).astype(np.float32)
+    input_y = np.random.uniform(1, 10, [row, col * 3]).astype(np.float32)
+    y_blocks = np.split(input_y, 3, axis=1)
+    result_blocks = [input_x + block for block in y_blocks]
+    golden = np.hstack(result_blocks)
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/verify_result.py
new file mode 100644
index 000000000..a5019f30f
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-4
+absolute_tol = 1e-5
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/CMakeLists.txt
new file mode 100644
index 000000000..32bed518d
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/CMakeLists.txt
@@ -0,0 +1,65 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_add)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER)
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ../inc
+    ${INC_PATH}/include
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_add_op
+    operator_desc.cpp
+    op_runner.cpp
+    main.cpp
+    common.cpp
+)
+
+target_link_libraries(execute_add_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/common.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/common.cpp
new file mode 100644
index 000000000..d58716122
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/common.cpp
@@ -0,0 +1,80 @@
+/**
+ * @file common.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "common.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <fstream>
+
+extern bool g_isDevice;
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/main.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/main.cpp
new file mode 100644
index 000000000..d727b0a29
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/main.cpp
@@ -0,0 +1,163 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <cstdint>
+#include <iostream>
+#include "acl/acl.h"
+#include "common.h"
+#include "op_runner.h"
+
+bool g_isDevice = false;
+int deviceId = 0;
+
+OperatorDesc CreateOpDesc()
+{
+    // define operator
+    constexpr uint32_t ROW = 5120;
+    constexpr uint32_t COL = 5120;
+    std::vector<int64_t> shapeX{ROW, COL};
+    std::vector<int64_t> shapeY{ROW, COL*3};
+    std::vector<int64_t> shapeZ{ROW, COL*3};
+    aclDataType dataType = ACL_FLOAT;
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(dataType, shapeX.size(), shapeX.data(), format);
+    opDesc.AddInputTensorDesc(dataType, shapeY.size(), shapeY.data(), format);
+    opDesc.AddOutputTensorDesc(dataType, shapeZ.size(), shapeZ.data(), format);
+    return opDesc;
+}
+
+bool SetInputData(OpRunner &runner)
+{
+    size_t fileSize = 0;
+    ReadFile("../input/input_x.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0));
+    ReadFile("../input/input_y.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1));
+    INFO_LOG("Set input success");
+    return true;
+}
+
+bool ProcessOutputData(OpRunner &runner)
+{
+    int64_t caseId = runner.GetCaseId();
+    WriteFile("../output/output_z_" + std::to_string(caseId) + ".bin", runner.GetOutputBuffer<void>(0),
+              runner.GetOutputSize(0));
+    INFO_LOG("Write output success");
+    return true;
+}
+
+void DestroyResource()
+{
+    bool flag = false;
+    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Reset device %d failed", deviceId);
+        flag = true;
+    }
+    INFO_LOG("Reset Device success");
+    if (aclFinalize() != ACL_SUCCESS) {
+        ERROR_LOG("Finalize acl failed");
+        flag = true;
+    }
+    if (flag) {
+        ERROR_LOG("Destroy resource failed");
+    } else {
+        INFO_LOG("Destroy resource success");
+    }
+}
+
+bool InitResource()
+{
+    std::string output = "../output";
+
+    // acl.json is dump or profiling config file
+    if (aclInit("../scripts/acl.json") != ACL_SUCCESS) {
+        ERROR_LOG("acl init failed");
+        return false;
+    }
+
+    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
+        (void)aclFinalize();
+        return false;
+    }
+    INFO_LOG("Set device[%d] success", deviceId);
+
+    // runMode is ACL_HOST which represents app is running in host
+    // runMode is ACL_DEVICE which represents app is running in device
+    aclrtRunMode runMode;
+    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+        ERROR_LOG("Get run mode failed");
+        DestroyResource();
+        return false;
+    }
+    g_isDevice = (runMode == ACL_DEVICE);
+    INFO_LOG("Get RunMode[%d] success", runMode);
+
+    return true;
+}
+
+bool RunOp(int64_t caseId)
+{
+    // create op desc
+    OperatorDesc opDesc = CreateOpDesc();
+
+    // create Runner
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init OpRunner failed");
+        return false;
+    }
+
+    // Load inputs
+    if (!SetInputData(opRunner)) {
+        ERROR_LOG("Set input data failed");
+        return false;
+    }
+
+    // Run op
+    if (!opRunner.RunOp(caseId)) {
+        ERROR_LOG("Run op failed");
+        return false;
+    }
+
+    // process output data
+    if (!ProcessOutputData(opRunner)) {
+        ERROR_LOG("Process output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run op success");
+    return true;
+}
+
+int main(int argc, char **argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    int64_t caseId = 1;
+    if (!RunOp(caseId)) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    caseId = 2;
+    if (!RunOp(caseId)) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    DestroyResource();
+    return SUCCESS;
+}
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/op_runner.cpp
new file mode 100644
index 000000000..36d197bc5
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/op_runner.cpp
@@ -0,0 +1,462 @@
+/**
+ * @file op_runner.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "op_runner.h"
+
+#include <cassert>
+#include <limits>
+
+#include "acl/acl_op_compiler.h"
+#include "aclnn_add_custom.h"
+#include "common.h"
+
+using namespace std;
+
+extern bool g_isDevice;
+
+OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+    workspace_ = nullptr;
+}
+
+OpRunner::~OpRunner()
+{
+    if (workspace_ != nullptr) {
+        (void)aclrtFree(workspace_);
+    }
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
+        }
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        (void)aclDestroyTensor(outputTensor_[i]);
+        (void)aclDestroyDataBuffer(outputBuffers_[i]);
+        (void)aclrtFree(devOutputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
+        }
+    }
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
+
+        aclTensor *inputTensor =
+            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+            return false;
+        }
+        devOutputs_.emplace_back(devMem);
+        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
+
+        aclTensor *outputTensor =
+            aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0,
+                            GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]);
+        if (outputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for output[%zu] failed", i);
+            return false;
+        }
+        outputTensor_.emplace_back(outputTensor);
+    }
+
+    return true;
+}
+
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
+
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
+
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
+
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOp(int64_t caseId)
+{
+    caseId_ = caseId;
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create stream failed");
+        return false;
+    }
+    INFO_LOG("Create stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor *handle = nullptr;
+    auto ret = aclnnAddCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], caseId, outputTensor_[0], &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnAddCustom(workspace_, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustom success");
+
+    // The unit of 5000 is ms.
+    ret = aclrtSynchronizeStreamWithTimeout(stream, 5000);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+int64_t OpRunner::GetCaseId()
+{
+    return caseId_;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
+
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/operator_desc.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/operator_desc.cpp
new file mode 100644
index 000000000..90e0ac343
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/src/operator_desc.cpp
@@ -0,0 +1,51 @@
+/**
+ * @file operator_desc.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "operator_desc.h"
+
+#include "common.h"
+
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto *desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+
+    for (auto *desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+}
+
+OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+
+    outputDesc.emplace_back(desc);
+    return *this;
+}
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
new file mode 100644
index 000000000..b76e8928f
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
@@ -0,0 +1,47 @@
+[
+    {
+        "op": "AddCustom",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "x",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            },
+            {
+                "name": "y",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "z",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ],
+        "attr": [
+            {
+                "name": "case_id",
+                "type": "int",
+                "value": 1
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_host/add_custom.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_host/add_custom.cpp
new file mode 100644
index 000000000..b9cb652e0
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_host/add_custom.cpp
@@ -0,0 +1,49 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "../op_kernel/add_custom_tiling.h"
+#include "register/op_def_registry.h"
+
+namespace optiling {
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    constexpr uint32_t BLOCK_DIM = 40;
+    context->SetBlockDim(BLOCK_DIM);
+
+    // set tiling_key
+    auto attrs = context->GetAttrs();
+    const int64_t *caseId = attrs->GetInt(0);
+    context->SetTilingKey(*caseId);
+
+    AddCustomTilingData *tiling = context->GetTilingData<AddCustomTilingData>();
+    // x shape is [5120, 5120], y shape is [5120, 15360], so we set outer loop to 3
+    tiling->loopOuter = 3U;
+
+    // set workspace size
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = 0;
+
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+namespace ops {
+class AddCustom : public OpDef {
+public:
+    explicit AddCustom(const char *name) : OpDef(name)
+    {
+        this->Input("x").ParamType(REQUIRED).DataType({ge::DT_FLOAT}).Format({ge::FORMAT_ND});
+        this->Input("y").ParamType(REQUIRED).DataType({ge::DT_FLOAT}).Format({ge::FORMAT_ND});
+        this->Output("z").ParamType(REQUIRED).DataType({ge::DT_FLOAT}).Format({ge::FORMAT_ND});
+        this->AICore().SetTiling(optiling::TilingFunc).AddConfig("ascend910b");
+        this->Attr("case_id").Int(1);
+    }
+};
+OP_ADD(AddCustom);
+} // namespace ops
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom.cpp b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom.cpp
new file mode 100644
index 000000000..895e6444f
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom.cpp
@@ -0,0 +1,28 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "add_custom_v1.h"
+#include "add_custom_v2.h"
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    REGISTER_TILING_DEFAULT(AddCustomTilingData);
+    GET_TILING_DATA(tilingData, tiling);
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
+    if (TILING_KEY_IS(1UL)) {
+        KernelAddV1 op;
+        op.Init(x, y, z, &tilingData);
+        op.Process();
+    } else if (TILING_KEY_IS(2UL)) {
+        KernelAddV2 op;
+        op.Init(x, y, z, &tilingData);
+        op.Process();
+    }
+}
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_tiling.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_tiling.h
new file mode 100644
index 000000000..d865aba89
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_tiling.h
@@ -0,0 +1,18 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_H
+#define ADD_CUSTOM_TILING_H
+#include <cstdint>
+
+class AddCustomTilingData {
+public:
+    uint32_t loopOuter;
+};
+#endif // ADD_CUSTOM_TILING_H
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v1.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v1.h
new file mode 100644
index 000000000..086bca4f0
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v1.h
@@ -0,0 +1,102 @@
+/**
+ * @file add_custom_v1.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_V1_H
+#define ADD_CUSTOM_V1_H
+#include "kernel_operator.h"
+#include "add_custom_tiling.h"
+
+using AscendC::TPosition;
+class KernelAddV1 {
+public:
+    __aicore__ inline KernelAddV1() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, AddCustomTilingData *tilingPtr)
+    {
+        tiling = tilingPtr;
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * TILE_N);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * TILE_N);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * TILE_N);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        for (uint32_t i = 0; i < tiling->loopOuter; i++) {
+            for (uint32_t j = 0; j < M_A / TILE_M; j++) {
+                CopyIn(i, j);
+                Compute();
+                CopyOut(i, j);
+            }
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(uint32_t progressOuter, uint32_t progressInner)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.AllocTensor<float>();
+        AscendC::DataCopyParams paramsX;
+        paramsX.blockCount = TILE_M;
+        paramsX.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsX.srcStride = (N_A - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        paramsX.dstStride = 0;
+        AscendC::DataCopy(xLocal, xGm[progressInner * TILE_M * N_A], paramsX);
+
+        AscendC::DataCopyParams paramsY;
+        paramsY.blockCount = TILE_M;
+        paramsY.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsY.srcStride = (N_B - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        paramsY.dstStride = 0;
+        AscendC::DataCopy(yLocal, yGm[progressOuter * N_A + progressInner * TILE_M * N_B], paramsY);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        AscendC::Add(zLocal, xLocal, yLocal, TILE_M * TILE_N);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progressOuter, int32_t progressInner)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopyParams paramsZ;
+        paramsZ.blockCount = TILE_M;
+        paramsZ.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsZ.srcStride = 0;
+        paramsZ.dstStride = (N_B - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        AscendC::DataCopy(zGm[progressOuter * N_A + progressInner * TILE_M * N_B], zLocal, paramsZ);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    static constexpr int32_t BUFFER_NUM = 2;
+    static constexpr int32_t BLOCK_SIZE = 32;
+    static constexpr uint32_t M_A = 5120U;
+    static constexpr uint32_t N_A = M_A;
+    static constexpr uint32_t M_B = M_A;
+    static constexpr uint32_t N_B = N_A * 3U;
+    static constexpr uint32_t TILE_M = 64U;
+    static constexpr uint32_t TILE_N = 128U;
+
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    AddCustomTilingData *tiling;
+};
+#endif // ADD_CUSTOM_V1_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v2.h b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v2.h
new file mode 100644
index 000000000..1f790e84d
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom/op_kernel/add_custom_v2.h
@@ -0,0 +1,105 @@
+/**
+ * @file add_custom_v2.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_V2_H
+#define ADD_CUSTOM_V2_H
+#include "kernel_operator.h"
+#include "add_custom_tiling.h"
+
+using AscendC::TPosition;
+class KernelAddV2 {
+public:
+    __aicore__ inline KernelAddV2() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, AddCustomTilingData *tilingPtr)
+    {
+        tiling = tilingPtr;
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * TILE_N);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * TILE_N);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * TILE_N);
+        // disable the l2 cache mode of y and z
+        yGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        zGm.SetL2CacheHint(AscendC::CacheMode::CACHE_MODE_DISABLE);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_M * TILE_N * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        for (uint32_t i = 0; i < tiling->loopOuter; i++) {
+            for (uint32_t j = 0; j < M_A / TILE_M; j++) {
+                CopyIn(i, j);
+                Compute();
+                CopyOut(i, j);
+            }
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(uint32_t progressOuter, uint32_t progressInner)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.AllocTensor<float>();
+        AscendC::DataCopyParams paramsX;
+        paramsX.blockCount = TILE_M;
+        paramsX.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsX.srcStride = (N_A - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        paramsX.dstStride = 0;
+        AscendC::DataCopy(xLocal, xGm[progressInner * TILE_M * N_A], paramsX);
+
+        AscendC::DataCopyParams paramsY;
+        paramsY.blockCount = TILE_M;
+        paramsY.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsY.srcStride = (N_B - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        paramsY.dstStride = 0;
+        AscendC::DataCopy(yLocal, yGm[progressOuter * N_A + progressInner * TILE_M * N_B], paramsY);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        AscendC::Add(zLocal, xLocal, yLocal, TILE_M * TILE_N);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progressOuter, int32_t progressInner)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopyParams paramsZ;
+        paramsZ.blockCount = TILE_M;
+        paramsZ.blockLen = TILE_N * sizeof(float) / BLOCK_SIZE;
+        paramsZ.srcStride = 0;
+        paramsZ.dstStride = (N_B - TILE_N) * sizeof(float) / BLOCK_SIZE;
+        AscendC::DataCopy(zGm[progressOuter * N_A + progressInner * TILE_M * N_B], zLocal, paramsZ);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    static constexpr int32_t BUFFER_NUM = 2;
+    static constexpr int32_t BLOCK_SIZE = 32;
+    static constexpr uint32_t M_A = 5120U;
+    static constexpr uint32_t N_A = M_A;
+    static constexpr uint32_t M_B = M_A;
+    static constexpr uint32_t N_B = N_A * 3U;
+    static constexpr uint32_t TILE_M = 64U;
+    static constexpr uint32_t TILE_N = 128U;
+
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    AddCustomTilingData *tiling;
+};
+#endif // ADD_CUSTOM_V2_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/README.md b/operator/ascendc/4_best_practices/12_l2_cache_bypass/README.md
new file mode 100644
index 000000000..22f239d00
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/README.md
@@ -0,0 +1,162 @@
+## 概述
+
+本样例基于AddCustom算子工程，介绍了设置L2 CacheMode的方法以及其影响场景。
+
+## 目录结构介绍
+
+```
+├── l2_cache_bypass            // L2 CacheMode样例工程目录
+│   ├── AclNNInvocation        // 通过单算子API调用的方式调用AddCustom算子
+│   ├── AddCustom              // AddCustom算子工程
+│   ├── AddCustom.json         // AddCustom算子的原型定义json文件
+│   └── install.sh             // 脚本，调用msOpGen生成自定义算子工程，并编译
+```
+
+## 算子描述
+
+Add算子实现了两个Shape不相同的Tensor相加，返回相加结果的功能。对应的数学表达式为：
+
+```
+z = x + y
+```
+
+本样例主要介绍数据搬运中设置合理CacheMode对搬运效率的影响，在Global Memory的数据访问中，如果数据只需要访问一次，后续不需要重复读取，那么这种场景下可以设置Global Memory的CacheMode为CACHE_MODE_DISABLED，在这种模式下数据访问将不经过L2 Cache，避免影响需要重复访问的数据，从而提升数据访问效率。
+
+本样例中共有2个实现版本：     
+add_custom_v1.h：基础版本，从列方向切分，每个核计算5120×128的数据量，共有40个核参与计算。           
+add_custom_v2.h：在add_custom_v1基础上，设置y/z的CacheMode为CACHE_MODE_DISABLED，避免替换已进入Cache的x数据，影响搬运效率。             
+
+## 算子规格描述
+
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">5120 * 5120</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">5120 * 15360</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">5120 * 15360</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+## 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 算子工程介绍
+
+其中，算子工程目录AddCustom包含算子的实现文件，如下所示：
+
+```
+├── AddCustom                // AddCustom自定义算子工程
+│   ├── op_host              // host侧实现文件
+│   └── op_kernel            // kernel侧实现文件
+```
+
+CANN软件包中提供了工程创建工具msOpGen，AddCustom算子工程可通过AddCustom.json自动创建，自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>工程化算子开发>创建算子工程 章节。
+
+创建完自定义算子工程后，开发者重点需要完成算子host和kernel文件的功能开发。为简化样例运行流程，本样例已在AddCustom目录中准备好了必要的算子实现，install.sh脚本会创建一个CustomOp目录，并将算子实现文件复制到对应目录下，再编译算子。
+
+备注：CustomOp目录为生成目录，每次执行install.sh脚本都会删除该目录并重新生成，切勿在该目录下编码算子，会存在丢失风险。
+
+## 编译运行样例算子
+
+针对自定义算子工程，编译运行包含如下步骤：
+
+- 调用msOpGen工具生成自定义算子工程；
+- 完成算子host和kernel实现；
+- 编译自定义算子工程生成自定义算子包；
+- 安装自定义算子包到自定义算子库中；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+
+### 1. 获取源码包
+
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
+
+- 切换到msOpGen脚本install.sh所在目录
+
+  ```bash
+  # 若开发者以git命令行方式clone了master分支代码，并切换目录
+  cd ${git_clone_path}/samples/operator/ascendc/4_best_practices/12_l2_cache_bypass/
+  ```
+- 调用脚本，生成自定义算子工程，复制host和kernel实现并编译算子
+
+  - 方式一：配置环境变量运行脚本
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量命令。
+    - 默认路径，root用户安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+
+      运行install.sh脚本
+
+      ```bash
+      bash install.sh -v [SOC_VERSION]
+      ```
+  - 方式二：指定命令行安装路径来运行脚本
+    ```bash
+    bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH]
+    ```
+
+  参数说明：
+
+  - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+    - Atlas A2训练系列产品/Atlas 800I A2推理产品
+  - ASCEND_INSTALL_PATH：CANN软件包安装路径
+
+  脚本运行成功后，会在当前目录下创建CustomOp目录，编译完成后，会在CustomOp/build_out中，生成自定义算子安装包custom_opp_\<target os>_\<target architecture>.run，例如“custom_opp_ubuntu_x86_64.run”。
+
+### 3. 部署自定义算子包
+
+- 部署自定义算子包前，请确保存在自定义算子包默认部署路径环境变量ASCEND_OPP_PATH
+
+  ```bash
+  echo $ASCEND_OPP_PATH
+  # 输出示例 /usr/local/Ascend/ascend-toolkit/latest/opp
+
+  # 若没有，则需导出CANN环境变量
+  source [ASCEND_INSTALL_PATH]/bin/setenv.bash
+  # 例如 source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash
+  ```
+
+  参数说明：
+
+  - ASCEND_INSTALL_PATH：CANN软件包安装路径，一般和上一步中指定的路径保持一致
+- 在自定义算子安装包所在路径下，执行如下命令安装自定义算子包
+
+  ```bash
+  cd CustomOp/build_out
+  ./custom_opp_<target os>_<target architecture>.run
+  ```
+
+  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。
+
+### 4. 调用执行算子工程
+
+- [单算子API调用AddCustom算子工程](./AclNNInvocation/README.md)
+
+## 更新说明
+
+
+| 时间       | 更新事项 |
+| ---------- | -------- |
+| 2025/07/14 | 新增样例 |
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/install.sh b/operator/ascendc/4_best_practices/12_l2_cache_bypass/install.sh
new file mode 100755
index 000000000..09c8bf0aa
--- /dev/null
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/install.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+# only support Ascend910B2 since different soc version have different cache size
+VERSION_LIST="Ascend910B2"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+OP_NAME=AddCustom
+# Generate the op framework
+rm -rf CustomOp && msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp
+# Copy op implementation files to CustomOp
+rm -rf CustomOp/op_host/*.cpp
+rm -rf CustomOp/op_kernel/*.h && rm -rf CustomOp/op_kernel/*.cpp
+cp -rf $OP_NAME/op_kernel CustomOp/
+cp -rf $OP_NAME/op_host CustomOp/
+
+# Build CustomOp project
+(cd CustomOp && bash build.sh)
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md b/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
index 1ebba2146..bd20372ab 100644
--- a/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/README.md
@@ -21,10 +21,11 @@ z = x + 2.0
 ```
 
 本样例主要介绍数据搬运中的同地址冲突对搬运效率的影响，在Global Memory的数据访问中，数据访问请求(读/写)在AI 处理器内部会按照512 Bytes对齐进行地址转换，同一时刻如果多核的数据访问请求在转换后落在连续的512 Bytes范围内，出于数据一致性的要求，AI 处理器会对落入同一个512Bytes范围内的请求进行串行处理，导致搬运效率降低，即发生了同地址访问现象。
-本样例中共有3个实现版本：
-adds_custom_v1.h：基础实现版本，每个核的计算顺序一致，存在同地址冲突，带宽效率较差
-adds_custom_v2.h：通过调整每个核的计算顺序，避免发生同地址冲突
-adds_custom_v3.h：通过调整切分顺序，避免发生同地址冲突
+
+本样例中共有3个实现版本：     
+adds_custom_v1.h：基础实现版本，每个核的计算顺序一致，存在同地址冲突，带宽效率较差。    
+adds_custom_v2.h：通过调整每个核的计算顺序，避免发生同地址冲突。   
+adds_custom_v3.h：通过调整切分顺序，避免发生同地址冲突。
 
 当前算子执行机制保证用户kernel入参（包括workspace/tiling）的地址是512 Bytes对齐的，因此用户只需要根据地址的偏移量即可判断两个地址是否会落入连续的512 Bytes范围内。
 
diff --git a/operator/ascendc/4_best_practices/README.md b/operator/ascendc/4_best_practices/README.md
index c40fe61a7..926e4a6ef 100644
--- a/operator/ascendc/4_best_practices/README.md
+++ b/operator/ascendc/4_best_practices/README.md
@@ -8,6 +8,7 @@
 | ------------------------------- | ------------------------------------------ | ------------------------------------------ |
 | [4_bank_conflict](./4_bank_conflict) | 基于Ascend C的bank冲突性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [6_group_matmul](./6_group_matmul) | 基于Ascend C的group matmul算子性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
+| [12_l2_cache_bypass](./12_l2_cache_bypass) | 基于Ascend C的L2 CaCheMode算子性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [15_mata_address_conflict](./15_mata_address_conflict) | 基于Ascend C的同地址冲突性能优化样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_all_gather_matmul_custom](./21_all_gather_matmul_custom) | 基于Ascend C的AllGatherMatmul算子性能调优样例 | Atlas A2训练系列产品 |
 | [22_matmul_reduce_scatter_custom](./22_matmul_reduce_scatter_custom) | 基于Ascend C的MatmulReduceScatter算子性能调优样例 | Atlas A2训练系列产品 |
@@ -45,6 +46,8 @@
 ## 更新说明
 | 时间       | 更新事项                                     |
 | ---------- | -------------------------------------------- |
+| 2025/07/14 | 新增12_l2_cache_bypass样例         |
+| 2025/07/03 | 新增15_mata_address_conflict样例         |
 | 2025/07/01 | 新增4_bank_conflict样例         |
 | 2024/12/19 | 新增23_matmul_all_reduce_custom样例         |
 | 2024/12/19 | 新增22_matmul_reduce_scatter_custom样例         |
-- 
Gitee


From 435971102b39845b258f842b670c3744bd9f8af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Wed, 16 Jul 2025 10:10:16 +0000
Subject: [PATCH 41/97] =?UTF-8?q?!2720=20fix=20comment=20Merge=20pull=20re?=
 =?UTF-8?q?quest=20!2720=20from=20=E8=B5=B5=E6=99=BA=E6=85=A7/zzh=5F0716?=
 =?UTF-8?q?=5Ffix=5Fcomment?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../11_llm_data_dist/decoder_sample2.cpp                  | 8 ++++----
 .../level1_single_api/11_llm_data_dist/prompt_sample2.cpp | 6 +++---
 cplusplus/level1_single_api/11_llm_data_dist/readme.md    | 6 ++++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
index c4a186e96..909be6ddd 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
@@ -222,7 +222,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
         printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
     }
 
-    // 4. 等待Prompt写完cache，实际业务场景可通过合适方式实现通知
+    // 4. 等待prompt写完cache，实际业务场景可通过合适方式实现通知
     std::this_thread::sleep_for(std::chrono::seconds(WAIT_PROMPT_TIME));
 
     // 5. 与prompt建链
@@ -232,7 +232,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
     }
     linked = true;
 
-    // 6. 从prompt拉取Cache
+    // 6. 从prompt拉取cache
     if (PullCache(llmDataDist, cacheId) != 0) {
         Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
         return -1;
@@ -256,7 +256,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
         return -1;
     }
 
-    // 9. 等待Prompt push cache，实际业务场景可通过合适方式实现通知
+    // 9. 等待prompt push cache，实际业务场景可通过合适方式实现通知
     std::this_thread::sleep_for(std::chrono::seconds(30));
 
     if (CheckBuffers(buffers, {4, 5, 6, 7}) != 0) {
@@ -264,7 +264,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
         return -1;
     }
 
-    // 10. 释放Cache与LlmDatadist
+    // 10. 释放cache与llmDataDist
     llmDataDist.Finalize();
     printf("[INFO] Finalize success\n");
     printf("[INFO] Decoder Sample end\n");
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
index 033463d71..52abdafc4 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
@@ -218,7 +218,7 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
         printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
     }
 
-    // 4. 等待Decoder拉取cache
+    // 4. 等待decoder拉取cache
     std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
 
     // 5. 切换角色
@@ -234,13 +234,13 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
     }
     linked = true;
 
-    // 7. 与decoder建链
+    // 7. 向decoder push cache
     if (PushCache(llmDataDist, cacheId) != 0) {
         Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
         return -1;
     }
 
-    // 8. 释放Cache与LlmDatadist
+    // 8. 释放Cache与llmDataDist
     llmDataDist.Finalize();
     printf("[INFO] Finalize success\n");
     printf("[INFO] Prompt Sample end\n");
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index 1a9cf9f80..9c5546e3a 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -85,13 +85,15 @@
 
     3.2 执行sample2
 
+    此样例使用了单边操作的方式输出kv, p/d两侧注册kv后，decoder向prompt发起建链，然后pull kv，然后两个切换角色，prompt向decoder发起建链，并向decoder push kv
+
     - 执行prompt_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为prompt要使用的device_id, local_host_ip为prompt所在host的ip, remote_host_ip为decoder所在host的ip，如:
         ```
-        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' ./prompt_sample 0 10.10.170.1 10.170.10.2
+        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' ./prompt_sample2 0 10.10.170.1 10.170.10.2
         ```
 
     - 执行decoder_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为decoder要使用的device_id, local_host_ip为decoder所在host的ip，remote_host_ip为prompt所在host的ip，如:
         ```
-        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' ./decoder_sample 1 10.170.10.2 10.170.10.1
+        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' ./decoder_sample2 1 10.170.10.2 10.170.10.1
         ```
     **注**：LOCAL_COMM_RES为sample2执行所需环境变量，配置了当前进程所需的通信资源，将传递给llm_datadist作为初始化option; 配置格式与HCCL的ranktable一致，只需要配置本进程第一个参数device_id对应的信息，其中ranktable中的rank_id和server_count字段不需要配置，当前用例配置为A2的ranktable格式，其他环境需参考对应环境的ranktable格式进行配置
\ No newline at end of file
-- 
Gitee


From 934dc736007e24735996d1c14077b0d72b64297a Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Thu, 17 Jul 2025 01:27:49 +0000
Subject: [PATCH 42/97] !2718 add workspace * add workspace

---
 .../op_host/add_custom_tiling_sink_tiling.cpp             | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
index 563ba0b63..5116eb258 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
@@ -10,7 +10,7 @@
 
 #include "add_custom_tiling_sink_tiling.h"
 #include "register/device_op_impl_registry.h"
-
+#include "tiling/platform/platform_ascendc.h"
 namespace optiling {
 static constexpr uint32_t BLOCK_DIM = 8;
 static constexpr uint32_t TILE_NUM = 8;
@@ -26,11 +26,13 @@ ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
     tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
     context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
     size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = DEFAULT_WORKSPACE_SIZE; // 设置运行时workspace大小
+    auto platform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    size_t sysWorkspaceSize = platform.GetLibApiWorkSpaceSize();
+    currentWorkspace[0] = sysWorkspaceSize + DEFAULT_WORKSPACE_SIZE; // 设置运行时workspace大小
     if (context->GetInputTensor(1) != nullptr && context->GetInputTensor(1)->GetData<float>() == nullptr) {
         // 通过判断值依赖InputTensor的Data是否为空指针来确认当前是否处于编译期。
         // Tiling下沉场景，编译期需要为算子分配内存，包括其所需的workspace。为了保证运行时的高效性，编译期应根据算子的执行需求，合理设置所需的workspace最大值，以避免内存不足或浪费。
-        currentWorkspace[0] = MAX_WORKSPACE_SIZE;
+        currentWorkspace[0] = sysWorkspaceSize + MAX_WORKSPACE_SIZE;
     }
     return ge::GRAPH_SUCCESS;
 }
-- 
Gitee


From 159d68f900ca9e4246ebaca4f39afea35d1db0e8 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Thu, 17 Jul 2025 03:07:33 +0000
Subject: [PATCH 43/97] !2721 fix tilingsink sample comment * fix tilingsink
 sample comment

---
 .../op_host/add_custom_tiling_sink_tiling.cpp               | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
index 5116eb258..24f17126b 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
@@ -14,7 +14,7 @@
 namespace optiling {
 static constexpr uint32_t BLOCK_DIM = 8;
 static constexpr uint32_t TILE_NUM = 8;
-static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 算子所需workspace的最大值，AddCustomTilingSink样例不需要workspace，不涉及设置，此处设置为固定值仅作为示例
+static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 算子所需用户workspace空间最大值，AddCustomTilingSink算子本身逻辑无需用户workspace空间，此处设置为固定值仅作为示例
 static constexpr size_t DEFAULT_WORKSPACE_SIZE = 0;
 ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
 {
@@ -28,11 +28,11 @@ ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
     size_t *currentWorkspace = context->GetWorkspaceSizes(1);
     auto platform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
     size_t sysWorkspaceSize = platform.GetLibApiWorkSpaceSize();
-    currentWorkspace[0] = sysWorkspaceSize + DEFAULT_WORKSPACE_SIZE; // 设置运行时workspace大小
+    currentWorkspace[0] = sysWorkspaceSize + DEFAULT_WORKSPACE_SIZE; // 设置运行时workspace大小，此处为系统workspace空间 + 用户workspace空间
     if (context->GetInputTensor(1) != nullptr && context->GetInputTensor(1)->GetData<float>() == nullptr) {
         // 通过判断值依赖InputTensor的Data是否为空指针来确认当前是否处于编译期。
         // Tiling下沉场景，编译期需要为算子分配内存，包括其所需的workspace。为了保证运行时的高效性，编译期应根据算子的执行需求，合理设置所需的workspace最大值，以避免内存不足或浪费。
-        currentWorkspace[0] = sysWorkspaceSize + MAX_WORKSPACE_SIZE;
+        currentWorkspace[0] = sysWorkspaceSize + MAX_WORKSPACE_SIZE; // 设置编译期workspace大小，此处为系统workspace空间 + 用户workspace空间最大值
     }
     return ge::GRAPH_SUCCESS;
 }
-- 
Gitee


From 39409db816ccf0c088cb3c33404e8b4cbc734e71 Mon Sep 17 00:00:00 2001
From: shinoda <zhuyuchen7@huawei.com>
Date: Fri, 18 Jul 2025 08:24:25 +0000
Subject: [PATCH 44/97] !2715 change to new tiling definitions Merge pull
 request !2715 from shinoda/master

---
 .../op_host/matmul_custom.cpp                  | 16 +++++++---------
 .../op_kernel/matmul_custom.cpp                |  4 +++-
 .../matmul_custom_tiling.h                     | 18 +++++++-----------
 .../10_matmul_frameworklaunch/README.md        |  3 +++
 4 files changed, 20 insertions(+), 21 deletions(-)
 rename operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/{op_host => op_kernel}/matmul_custom_tiling.h (50%)

diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp
index 49bc45d64..f1911480c 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp
@@ -7,7 +7,7 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#include "matmul_custom_tiling.h"
+#include "../op_kernel/matmul_custom_tiling.h"
 #include "register/op_def_registry.h"
 #include "tiling/platform/platform_ascendc.h"
 #include "tiling/tiling_api.h"
@@ -45,27 +45,25 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context)
     }
     cubeTiling.SetBias(true);
     cubeTiling.SetBufferSpace(-1, -1, -1);
-    MatmulCustomTilingData tiling;
-    if (cubeTiling.GetTiling(tiling.cubeTilingData) == -1) {
+    MatmulCustomTilingData *tiling = context->GetTilingData<MatmulCustomTilingData>();
+    if (cubeTiling.GetTiling(tiling->cubeTilingData) == -1) {
         return ge::GRAPH_FAILED;
     }
 
     uint64_t localMemSize;
     ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, localMemSize);
-    tiling.set_localMemSize(localMemSize);
+    tiling->localMemSize = localMemSize;
 
     if (ascendcPlatform.GetSocVersion() == platform_ascendc::SocVersion::ASCEND310P) {
         context->SetBlockDim(2);
         context->SetTilingKey(2);
     } else {
-        /* SetBlockDim here refers to the number of cube cores, so for separated arch(AIC:AIV=1:2), 
-            vector cores number is set 48 by SetDim, cube core number need to be set 24 here.*/ 
-        context->SetBlockDim(24); 
+        /* SetBlockDim here refers to the number of cube cores, so for separated arch(AIC:AIV=1:2),
+            vector cores number is set 48 by SetDim, cube core number need to be set 24 here.*/
+        context->SetBlockDim(24);
         context->SetTilingKey(1);
     }
 
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
     size_t userWorkspaceSize = 0;
     size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform.GetLibApiWorkSpaceSize());
     size_t *currentWorkspace = context->GetWorkspaceSizes(1);
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp
index 77a323fca..d0d86f000 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp
@@ -9,6 +9,7 @@
  */
 #include "kernel_operator.h"
 #include "lib/matmul_intf.h"
+#include "matmul_custom_tiling.h"
 
 using namespace matmul;
 
@@ -141,12 +142,13 @@ MatmulKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, const
   * @param  bias: Bias gm addr.
   * @param  c: C matrix gm addr.
   * @param  workspace: Temporary gm space addr required by matmul calc.
-  * @param  tiling: Tiling data addr. 
+  * @param  tiling: Tiling data addr.
   * @retval None
   */
 extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace,
                                                     GM_ADDR tiling)
 {
+    REGISTER_TILING_DEFAULT(MatmulCustomTilingData);
     GET_TILING_DATA(tilingData, tiling);
     MatmulKernel<half, half, float, float> matmulKernel;
     AscendC::TPipe pipe;
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
similarity index 50%
rename from operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h
rename to operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
index fd898cba9..8f32f3418 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
@@ -10,16 +10,12 @@
 #ifndef MATMUL_TILING_H
 #define MATMUL_TILING_H
 
-#include "register/tilingdata_base.h"
-#include "tiling/tiling_api.h"
+#include <cstdint>
+#include "kernel_tiling/kernel_tiling.h"
 
-namespace optiling {
-BEGIN_TILING_DATA_DEF(MatmulCustomTilingData)
-TILING_DATA_FIELD_DEF(uint64_t, localMemSize);
-TILING_DATA_FIELD_DEF_STRUCT(TCubeTiling, cubeTilingData);
-END_TILING_DATA_DEF;
+struct MatmulCustomTilingData {
+    uint64_t localMemSize;
+    TCubeTiling cubeTilingData;
+};
 
-REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
-} // namespace optiling
-
-#endif
\ No newline at end of file
+#endif  // MATMUL_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/README.md b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/README.md
index 3b58d140e..05aeaa0c3 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/README.md
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/README.md
@@ -46,6 +46,8 @@ C = A * B + Bias
 ## 算子工程介绍
 本样例介绍了多核场景（[MatmulCustomMultiCore](./MatmulCustomMultiCore/)）和单核场景（[MatmulCustomSingleCore](./MatmulCustomSingleCore/)）两种MamMul算子实现。可以根据使用场景，自行选择多核算子工程或单核算子工程，并在编译算子工程时，进入选择的算子实现工程中完成编译和安装。
 
+其中[MatmulCustomMultiCore](./MatmulCustomMultiCore/)使用标准C++语法定义Tiling结构体，[MatmulCustomSingleCore](./MatmulCustomSingleCore/)使用宏定义方式定义Tiling结构体。相较于使用宏定义方式，标准C++语法定义Tiling结构体不仅更符合C++开发者的开发习惯，并且提供了强大的灵活性。
+
 以单核算子工程为例，算子工程目录MatmulCustomSingleCore包含算子的实现文件，如下所示：
 ```
 ├── MatmulCustomSingleCore  // Matmul自定义算子工程
@@ -140,3 +142,4 @@ CANN软件包中提供了工程创建工具msOpGen，MatmulCustom算子工程可
 | 2024/05/27 | 更新readme              |
 | 2024/11/11 | 样例目录调整 |
 | 2024/11/18 | 算子工程改写为由msOpGen生成 |
+| 2025/07/14 | MatmulCustomMultiCore使用标准C++语法定义Tiling结构体 |
-- 
Gitee


From 3158b9c6ee7d189119db0562033a4dd6feab616d Mon Sep 17 00:00:00 2001
From: hujiawenKaven <hujiawen5@hisilicon.com>
Date: Tue, 22 Jul 2025 07:39:04 +0000
Subject: [PATCH 45/97] !2669 add aclrt launch kernel add_custom sample Merge
 pull request !2669 from hujiawenKaven/aclKernelLaunch

---
 .../AddKernelInvocationAcl/CMakeLists.txt     |  44 ++++
 .../AddKernelInvocationAcl/README.md          |  72 ++++++
 .../AddKernelInvocationAcl/add_custom.cpp     |  82 ++++++
 .../cmake/cpu_lib.cmake                       |   9 +
 .../cmake/npu_lib.cmake                       |  10 +
 .../AddKernelInvocationAcl/data_utils.h       | 240 ++++++++++++++++++
 .../AddKernelInvocationAcl/main.cpp           |  97 +++++++
 .../AddKernelInvocationAcl/run.sh             | 113 +++++++++
 .../scripts/gen_data.py                       |  25 ++
 .../scripts/verify_result.py                  |  53 ++++
 .../3_add_kernellaunch/README.md              |   3 +
 11 files changed, 748 insertions(+)
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/README.md
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/add_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/cpu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/npu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/data_utils.h
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/main.cpp
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/verify_result.py

diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/CMakeLists.txt b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/CMakeLists.txt
new file mode 100644
index 000000000..ec0da5217
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/CMakeLists.txt
@@ -0,0 +1,44 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+
+file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/add_custom.cpp)
+
+if("${RUN_MODE}" STREQUAL "cpu")
+    include(cmake/cpu_lib.cmake)
+elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu")
+    include(cmake/npu_lib.cmake)
+else()
+    message("invalid RUN_MODE: ${RUN_MODE}")
+endif()
+add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:-g>>
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},npu>:host_intf_pub>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:tikicpulib::${SOC_VERSION}>>
+    ascendcl
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:c_sec>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:ascendc_kernels>>
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/README.md b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/README.md
new file mode 100644
index 000000000..ce8d471cb
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/README.md
@@ -0,0 +1,72 @@
+## 目录结构介绍
+```
+├── AddKernelInvocationAcl
+│   ├── cmake                   // 编译工程文件
+│   ├── scripts
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 验证输出数据和真值数据是否一致的验证脚本
+│   ├── add_custom.cpp          // 算子kernel实现
+│   ├── CMakeLists.txt          // 编译工程文件
+│   ├── data_utils.h            // 数据读入写出函数
+│   ├── main.cpp                // 主函数，调用算子的应用程序，含CPU域及NPU域调用
+│   └── run.sh                  // 编译运行算子的脚本
+```
+## 代码实现介绍
+本调用样例中实现的是固定shape为8*2048的Add算子。
+- kernel实现  
+  Add算子的数学表达式为：
+  ```
+  z = x + y
+  ```
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。具体请参考[add_custom.cpp](./add_custom.cpp)。
+
+- 调用实现
+  1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
+  2. NPU侧运行验证主要通过使用aclrtLaunchKernelWithConfig函数调用来完成。
+
+  应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。
+
+## 运行样例算子
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+
+  - 样例执行
+
+    ```bash
+    bash run.sh -r [RUN_MODE] -v  [SOC_VERSION]
+    ```
+    - RUN_MODE：编译方式，可选择CPU调试，NPU上板。支持参数为[cpu / npu]
+    - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+      - Atlas 训练系列产品
+      - Atlas 推理系列产品AI Core
+      - Atlas A2训练系列产品/Atlas 800I A2推理产品
+      - Atlas 200/500 A2推理产品
+
+    示例如下，Ascendxxxyy请替换为实际的AI处理器型号。
+    ```bash
+    bash run.sh -r cpu -v Ascendxxxyy
+    ```
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/06/05 | 新增本readme |
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/add_custom.cpp b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/add_custom.cpp
new file mode 100644
index 000000000..96b37a7c3
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/add_custom.cpp
@@ -0,0 +1,82 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+
+constexpr int32_t TOTAL_LENGTH = 8 * 2048;                            // total length of data
+constexpr int32_t USE_CORE_NUM = 8;                                   // num of core used
+constexpr int32_t BLOCK_LENGTH = TOTAL_LENGTH / USE_CORE_NUM;         // length computed of each core
+constexpr int32_t TILE_NUM = 8;                                       // split data into 8 tiles for each core
+constexpr int32_t BUFFER_NUM = 2;                                     // tensor num for each queue
+constexpr int32_t TILE_LENGTH = BLOCK_LENGTH / TILE_NUM / BUFFER_NUM; // separate to 2 parts, due to double buffer
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+    {
+        xGm.SetGlobalBuffer((__gm__ half *)x + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+        yGm.SetGlobalBuffer((__gm__ half *)y + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+        zGm.SetGlobalBuffer((__gm__ half *)z + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_LENGTH * sizeof(half));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = TILE_NUM * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
+        AscendC::DataCopy(xLocal, xGm[progress * TILE_LENGTH], TILE_LENGTH);
+        AscendC::DataCopy(yLocal, yGm[progress * TILE_LENGTH], TILE_LENGTH);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.DeQue<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.DeQue<half>();
+        AscendC::LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
+        AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH);
+        outQueueZ.EnQue<half>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
+        AscendC::DataCopy(zGm[progress * TILE_LENGTH], zLocal, TILE_LENGTH);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<half> xGm;
+    AscendC::GlobalTensor<half> yGm;
+    AscendC::GlobalTensor<half> zGm;
+};
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z)
+{
+    KernelAdd op;
+    op.Init(x, y, z);
+    op.Process();
+}
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/cpu_lib.cmake b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/cpu_lib.cmake
new file mode 100644
index 000000000..751a11941
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/cpu_lib.cmake
@@ -0,0 +1,9 @@
+if(NOT DEFINED ENV{CMAKE_PREFIX_PATH})
+    set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake)
+endif()
+find_package(tikicpulib REQUIRED)
+
+add_library(ascendc_kernels SHARED ${KERNEL_FILES})
+target_link_libraries(ascendc_kernels PUBLIC tikicpulib::${SOC_VERSION})
+target_compile_options(ascendc_kernels PRIVATE -g -O0 -std=c++17)
+install(TARGETS ascendc_kernels DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/npu_lib.cmake
new file mode 100644
index 000000000..d862f0064
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/cmake/npu_lib.cmake
@@ -0,0 +1,10 @@
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+ascendc_fatbin_library(ascendc_kernels ${KERNEL_FILES})
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/data_utils.h b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/data_utils.h
new file mode 100644
index 000000000..1d43459ef
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/data_utils.h
@@ -0,0 +1,240 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <memory>
+#include <cstring>
+#include <cerrno>
+
+#include "acl/acl.h"
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * @brief Reads a binary file into memory.
+ *
+ * This function opens a binary file, reads its contents into a dynamically allocated memory buffer,
+ * and returns a pointer to the buffer and the size of the file through output parameters.
+ *
+ * @param filePath The path to the binary file to be read.
+ * @param outBuffer A reference to a unique pointer that will hold the file data.
+ * @param outSize A reference to a size_t that will hold the size of the file.
+ * @return true if the file was read successfully, false otherwise.
+ */
+bool ReadBinaryFile(const char *filePath, std::unique_ptr<char[]> &outBuffer, size_t &outSize)
+{
+    FILE *file = fopen(filePath, "rb");
+    if (!file) {
+        ERROR_LOG("Error opening file: %s\n", strerror(errno));
+        return false;
+    }
+
+    fseek(file, 0, SEEK_END);
+    outSize = ftell(file);
+    rewind(file);
+
+    outBuffer.reset(new char[outSize]);
+    if (fread(outBuffer.get(), 1, outSize, file) != outSize) {
+        ERROR_LOG("Error reading file.\n");
+        fclose(file);
+        return false;
+    }
+
+    fclose(file);
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/main.cpp b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/main.cpp
new file mode 100644
index 000000000..322546543
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/main.cpp
@@ -0,0 +1,97 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "data_utils.h"
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+#else
+#include "tikicpulib.h"
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z);
+#endif
+
+int32_t main(int32_t argc, char *argv[])
+{
+    uint32_t blockDim = 8;
+    size_t inputByteSize = 8 * 2048 * sizeof(uint16_t);
+    size_t outputByteSize = 8 * 2048 * sizeof(uint16_t);
+
+#ifdef ASCENDC_CPU_DEBUG
+    uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize);
+    uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputByteSize);
+    uint8_t *z = (uint8_t *)AscendC::GmAlloc(outputByteSize);
+
+    ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize);
+    ReadFile("./input/input_y.bin", inputByteSize, y, inputByteSize);
+
+    AscendC::SetKernelMode(KernelMode::AIV_MODE);
+    ICPU_RUN_KF(add_custom, blockDim, x, y, z); // use this macro for cpu debug
+
+    WriteFile("./output/output_z.bin", z, outputByteSize);
+
+    AscendC::GmFree((void *)x);
+    AscendC::GmFree((void *)y);
+    AscendC::GmFree((void *)z);
+#else
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    uint8_t *xHost, *yHost, *zHost;
+    uint8_t *xDevice, *yDevice, *zDevice;
+
+    CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize));
+    CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize));
+    CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
+
+    ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
+    ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize);
+
+    CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+    CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    aclrtBinHandle binHandle = nullptr;
+    aclrtFuncHandle funcHandle = nullptr;
+    aclrtArgsHandle argsHandle = nullptr;
+    aclrtParamHandle paramHandle = nullptr;
+
+    const char *filePath = "./out/fatbin/ascendc_kernels/ascendc_kernels.o";
+    CHECK_ACL(aclrtBinaryLoadFromFile(filePath, nullptr, &binHandle));
+    CHECK_ACL(aclrtBinaryGetFunction(binHandle, "add_custom", &funcHandle));
+    CHECK_ACL(aclrtKernelArgsInit(funcHandle, &argsHandle));
+
+    CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&xDevice, sizeof(uintptr_t), &paramHandle));
+    CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&yDevice, sizeof(uintptr_t), &paramHandle));
+    CHECK_ACL(aclrtKernelArgsAppend(argsHandle, (void **)&zDevice, sizeof(uintptr_t), &paramHandle));
+    CHECK_ACL(aclrtKernelArgsFinalize(argsHandle));
+
+    CHECK_ACL(aclrtLaunchKernelWithConfig(funcHandle, blockDim, stream, nullptr, argsHandle, nullptr));
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+
+    CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
+    WriteFile("./output/output_z.bin", zHost, outputByteSize);
+
+    CHECK_ACL(aclrtBinaryUnLoad(binHandle));
+    CHECK_ACL(aclrtFree(xDevice));
+    CHECK_ACL(aclrtFree(yDevice));
+    CHECK_ACL(aclrtFree(zDevice));
+    CHECK_ACL(aclrtFreeHost(xHost));
+    CHECK_ACL(aclrtFreeHost(yHost));
+    CHECK_ACL(aclrtFreeHost(zHost));
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+#endif
+    return 0;
+}
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
new file mode 100644
index 000000000..6b6d23964
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+cd $CURRENT_DIR
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+SHORT=r:,v:,i:,b:,p:,
+LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    -b | --build-type)
+        BUILD_TYPE="$2"
+        shift 2
+        ;;
+    -p | --install-prefix)
+        INSTALL_PREFIX="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+RUN_MODE_LIST="cpu npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support specify cpu or npu!"
+    exit -1
+fi
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+if [ "${RUN_MODE}" = "cpu" ]; then
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then
+        if [ "${RUN_MODE}" = "npu" ]; then
+            msprof op --application=./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "sim" ]; then
+            msprof op simulator --application=./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "cpu" ]; then
+            ./ascendc_kernels_bbit
+        fi
+    else
+        ./ascendc_kernels_bbit
+    fi
+)
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/gen_data.py b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/gen_data.py
new file mode 100644
index 000000000..ea8ce828a
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    golden = (input_x + input_y).astype(np.float16)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/verify_result.py b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/verify_result.py
new file mode 100644
index 000000000..1a21d809a
--- /dev/null
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float16
+relative_tol = 1e-3
+absolute_tol = 1e-5
+error_tol = 1e-3
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float16).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float16).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/README.md b/operator/ascendc/0_introduction/3_add_kernellaunch/README.md
index a0dffa144..dd44718ab 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/README.md
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/README.md
@@ -3,6 +3,7 @@
 ## 目录结构介绍
 ```
 ├── 3_add_kernellaunch                // 使用核函数直调的方式调用Add自定义算子
+│   ├── AddKernelInvocationAcl        // 使用aclrtLaunchKernelWithConfig接口调用核函数样例
 │   ├── AddKernelInvocationNeo        // Kernel Launch方式调用核函数样例
 │   ├── AddKernelInvocationTilingNeo  // Kernel Launch方式调用核函数样例，带有Tiling
 │   └── CppExtensions                 // pybind方式调用核函数样例，带有Tiling
@@ -43,6 +44,7 @@ z = x + y
 ### 1. 获取源码包
 编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
 ### 2. 编译运行样例工程
+- [AddKernelInvocationAcl样例运行](./AddKernelInvocationAcl/README.md)
 - [AddKernelInvocationNeo样例运行](./AddKernelInvocationNeo/README.md)
 - [AddKernelInvocationTilingNeo样例运行](./AddKernelInvocationTilingNeo/README.md)
 - [CppExtensions样例运行](./CppExtensions/README.md)
@@ -57,3 +59,4 @@ z = x + y
 | 2024/06/06 | AddKernelInvocation样例转维护，不再更新，不推荐使用 |
 | 2024/08/11 | 删除AddKernelInvocation样例 |
 | 2024/11/11 | 样例目录调整 |   |
+| 2025/06/05 | 新增AddKernelInvocationAcl样例 |   |
-- 
Gitee


From 2cf94acaf1688eaf21c72a07567353a1fa5405c9 Mon Sep 17 00:00:00 2001
From: hehongan <hehongan@h-partners.com>
Date: Thu, 24 Jul 2025 06:24:29 +0000
Subject: [PATCH 46/97] =?UTF-8?q?!2719=20=E5=88=A0=E9=99=A4=E4=BA=86?=
 =?UTF-8?q?=E5=86=97=E4=BD=99=E7=9A=84=E5=88=A4=E6=96=AD=E6=9D=A1=E4=BB=B6?=
 =?UTF-8?q?=20Merge=20pull=20request=20!2719=20from=20hehongan/fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../VectorAddMultiCoreWithTiling/add_custom.cpp      | 12 ++++++------
 .../add_custom.cpp                                   | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom.cpp b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom.cpp
index 8b267ea1d..df072c8e6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom.cpp
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom.cpp
@@ -80,7 +80,7 @@ private:
     {
         AscendC::LocalTensor<bfloat16_t> xLocal = inQueueX.AllocTensor<bfloat16_t>();
         AscendC::LocalTensor<bfloat16_t> yLocal = inQueueY.AllocTensor<bfloat16_t>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 this->tileLength);
             AscendC::DataCopy(yLocal, yGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
@@ -114,7 +114,7 @@ private:
     __aicore__ inline void CopyOut(int32_t progress)
     {
         AscendC::LocalTensor<bfloat16_t> zLocal = outQueueZ.DeQue<bfloat16_t>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal,
                 this->tileLength);
         } else {
@@ -199,7 +199,7 @@ private:
     {
         AscendC::LocalTensor<int8_t> xLocal = inQueueX.AllocTensor<int8_t>();
         AscendC::LocalTensor<int8_t> yLocal = inQueueY.AllocTensor<int8_t>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 this->tileLength);
             AscendC::DataCopy(yLocal, yGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
@@ -233,7 +233,7 @@ private:
     __aicore__ inline void CopyOut(int32_t progress)
     {
         AscendC::LocalTensor<int8_t> zLocal = outQueueZ.DeQue<int8_t>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal,
                 this->tileLength);
         } else {
@@ -315,7 +315,7 @@ private:
     {
         AscendC::LocalTensor<dataType> xLocal = inQueueX.AllocTensor<dataType>();
         AscendC::LocalTensor<dataType> yLocal = inQueueY.AllocTensor<dataType>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 this->tileLength);
             AscendC::DataCopy(yLocal, yGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
@@ -342,7 +342,7 @@ private:
     __aicore__ inline void CopyOut(int32_t progress)
     {
         AscendC::LocalTensor<dataType> zLocal = outQueueZ.DeQue<dataType>();
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopy(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal,
                 this->tileLength);
         } else {
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom.cpp b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom.cpp
index 733e162c3..6baf8e693 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom.cpp
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom.cpp
@@ -464,7 +464,7 @@ private:
         AscendC::DataCopyExtParams copyYParams = {1, (uint32_t)(this->tileLength * sizeof(bfloat16_t) / this->coef), 0, 0, 0};
         AscendC::DataCopyPadExtParams<bfloat16_t> padParams = {false, 0, 0, 0};
 
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<bfloat16_t>(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 copyXParams, padParams);
             AscendC::DataCopyPad<bfloat16_t>(yLocal, yGm[((progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength) / this->coef],
@@ -504,7 +504,7 @@ private:
     {
         AscendC::LocalTensor<bfloat16_t> zLocal = outQueueZ.DeQue<bfloat16_t>();
         AscendC::DataCopyExtParams copyParams = {1, (uint32_t)(this->tileLength * sizeof(bfloat16_t)), 0, 0, 0};
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<bfloat16_t>(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal, copyParams);
         } else {
             AscendC::DataCopyPad<bfloat16_t>(zGm[progress * this->tileLength], zLocal, copyParams);
@@ -606,7 +606,7 @@ private:
         AscendC::DataCopyExtParams copyYParams = {1, (uint32_t)(this->tileLength * sizeof(int8_t) / this->coef), 0, 0, 0};
         AscendC::DataCopyPadExtParams<int8_t> padParams = {false, 0, 0, 0};
 
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<int8_t>(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 copyXParams, padParams);
             AscendC::DataCopyPad<int8_t>(yLocal, yGm[((progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength) / this->coef],
@@ -646,7 +646,7 @@ private:
     {
         AscendC::LocalTensor<int8_t> zLocal = outQueueZ.DeQue<int8_t>();
         AscendC::DataCopyExtParams copyParams = {1, (uint32_t)(this->tileLength * sizeof(int8_t)), 0, 0, 0};
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<int8_t>(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal, copyParams);
         } else {
             AscendC::DataCopyPad<int8_t>(zGm[progress * this->tileLength], zLocal, copyParams);
@@ -745,7 +745,7 @@ private:
         AscendC::DataCopyExtParams copyYParams = {1, (uint32_t)(this->tileLength * sizeof(dataType) / this->coef), 0, 0, 0};
         AscendC::DataCopyPadExtParams<dataType> padParams = {false, 0, 0, 0};
 
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<dataType>(xLocal, xGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength],
                 copyXParams, padParams);
             AscendC::DataCopyPad<dataType>(yLocal, yGm[((progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength) / this->coef],
@@ -778,7 +778,7 @@ private:
     {
         AscendC::LocalTensor<dataType> zLocal = outQueueZ.DeQue<dataType>();
         AscendC::DataCopyExtParams copyParams = {1, (uint32_t)(this->tileLength * sizeof(dataType)), 0, 0, 0};
-        if ((progress == (this->tileNum * BUFFER_NUM - 2)) || (progress == (this->tileNum * BUFFER_NUM - 1))) {
+        if (progress == (this->tileNum * BUFFER_NUM - 1)) {
             AscendC::DataCopyPad<dataType>(zGm[(progress - LAST_TWO_TILE) * this->tileLength + this->lastTileLength], zLocal, copyParams);
         } else {
             AscendC::DataCopyPad<dataType>(zGm[progress * this->tileLength], zLocal, copyParams);
-- 
Gitee


From 9a8d31b08b5ed3dd7f0e011ed1113b76ca9e9bb1 Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Sat, 26 Jul 2025 07:42:49 +0000
Subject: [PATCH 47/97] =?UTF-8?q?!2724=20=E6=B7=BB=E5=8A=A0=E9=9D=99?=
 =?UTF-8?q?=E6=80=81=E5=BA=93=E6=A0=B7=E4=BE=8B=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2724=20from=20PengC/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../8_library_frameworklaunch/README.md       |  68 +++
 .../static_library/AclNNInvocation/README.md  |  82 +++
 .../AclNNInvocation/input/.keep               |   1 +
 .../AclNNInvocation/output/.keep              |   1 +
 .../static_library/AclNNInvocation/run.sh     |  96 ++++
 .../AclNNInvocation/scripts_add/gen_data.py   |  25 +
 .../scripts_add/verify_result.py              |  53 ++
 .../scripts_matmul/gen_data.py                |  36 ++
 .../scripts_matmul/verify_result.py           |  53 ++
 .../AclNNInvocation/src/CMakeLists.txt        |  54 ++
 .../AclNNInvocation/src/main.cpp              | 222 ++++++++
 .../static_library/AddCustom.json             |  40 ++
 .../AddCustom/op_host/add_custom.cpp          |  76 +++
 .../AddCustom/op_host/add_custom_tiling.h     |  22 +
 .../AddCustom/op_kernel/add_custom.cpp        |  93 +++
 .../static_library/MatmulCustom.json          |  50 ++
 .../MatmulCustom/op_host/matmul_custom.cpp    | 100 ++++
 .../op_host/matmul_custom_tiling.h            |  20 +
 .../MatmulCustom/op_kernel/matmul_custom.cpp  | 153 +++++
 .../static_library/OpRunner/README.md         |  70 +++
 .../static_library/OpRunner/inc/common.h      |  45 ++
 .../static_library/OpRunner/inc/op_runner.h   | 182 ++++++
 .../OpRunner/inc/operator_desc.h              |  57 ++
 .../static_library/OpRunner/run.sh            |  55 ++
 .../OpRunner/src/CMakeLists.txt               |  64 +++
 .../static_library/OpRunner/src/common.cpp    |  80 +++
 .../static_library/OpRunner/src/op_runner.cpp | 532 ++++++++++++++++++
 .../OpRunner/src/operator_desc.cpp            |  51 ++
 .../static_library/README.md                  | 137 +++++
 .../static_library/install_add.sh             |  53 ++
 .../static_library/install_matmul.sh          |  53 ++
 operator/ascendc/0_introduction/README.md     |   3 +-
 32 files changed, 2626 insertions(+), 1 deletion(-)
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/README.md
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/README.md
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/input/.keep
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/output/.keep
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/verify_result.py
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/verify_result.py
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/main.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom_tiling.h
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_kernel/add_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_kernel/matmul_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/README.md
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/common.h
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/op_runner.h
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/operator_desc.h
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/common.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/op_runner.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/operator_desc.cpp
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/README.md
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
 create mode 100644 operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh

diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/README.md b/operator/ascendc/0_introduction/8_library_frameworklaunch/README.md
new file mode 100644
index 000000000..e2d6b6774
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/README.md
@@ -0,0 +1,68 @@
+## 概述
+本样例基于AddCustom算子工程和MatmulCustom算子工程，介绍了自定义算子工程的算子库集成与调用方式。
+
+## 目录结构介绍
+```
+├──   8_library_frameworklaunch
+│   └── static_library         // 基于AddCustom算子工程和MatmulCustom算子工程，介绍自定义算子工程静态库的集成和使用
+```
+
+## 算子描述
+1、Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：
+```
+z = x + y
+```
+
+2、Matmul算子实现了快速的Matmul矩阵乘法的运算操作。
+
+Matmul的计算公式为：
+```
+C = A * B + Bias
+```
+- A、B为源操作数，A为左矩阵，形状为\[M, K]；B为右矩阵，形状为\[K, N]。
+- C为目的操作数，存放矩阵乘结果的矩阵，形状为\[M, N]。
+- Bias为矩阵乘偏置，形状为\[N]。对A*B结果矩阵的每一行都采用该Bias进行偏置。
+
+
+## 算子规格描述
+### AddCustom
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+### MatmulCustom
+MatmulCustom设置的shape为：M = 1024, N = 640, K = 256。
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Matmul</td></tr>
+</tr>
+<tr><td rowspan="4" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">a</td><td align="center">M * K</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">b</td><td align="center">K * N</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">bias</td><td align="center">N</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">c</td><td align="center">M * N</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">matmul_custom</td></tr>
+</table>
+
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas 推理系列产品AI Core
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+
+## 更新说明
+| 时间       | 更新事项                     |
+| ---------- | ---------------------------- |
+| 2025/07/22 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/README.md b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/README.md
new file mode 100644
index 000000000..d6b6cc91e
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/README.md
@@ -0,0 +1,82 @@
+## 目录结构介绍
+```
+├── AclNNInvocation             //通过aclnn调用的方式调用AddCustom算子和MatmulCustom算子
+│   ├── input                   // 存放脚本生成的输入数据目录
+│   ├── output                  // 存放算子运行输出数据和真值数据的目录
+│   ├── scripts_add             // AddCustom算子相关脚本
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 真值对比文件
+│   ├── scripts_matmul          // MatmulCustom算子相关脚本
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 真值对比文件
+│   ├── src
+│   │   ├── CMakeLists.txt      // 编译规则文件
+│   │   └──  main.cpp           // 单算子调用应用的入口
+│   └── run.sh                  // 执行命令脚本
+```
+## 代码实现介绍
+完成自定义算子的开发部署后，可以通过单算子调用的方式来验证单算子的功能。将链接成的动态库链接到生成的可执行程序中，可以实现功能验证。src/main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子，无需提供单算子描述文件进行离线模型的转换，直接调用单算子API接口。
+
+自定义算子编译部署后，会自动生成单算子API，可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”，以AddCustom算子为例，形如：
+   ```cpp
+   // 获取算子使用的workspace空间大小
+   aclnnStatus aclnnAddCustomGetWorkspaceSize(const aclTensor *x, const aclTensor *y, const aclTensor *out, uint64_t *workspaceSize, aclOpExecutor **executor);
+   // 执行算子
+   aclnnStatus aclnnAddCustom(void *workspace, int64_t workspaceSize, aclOpExecutor *executor, aclrtStream stream);
+   ```
+其中aclnnAddCustomGetWorkspaceSize为第一段接口，主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后，开发者可以按照workspaceSize大小申请Device侧内存，然后调用第二段接口aclnnAddCustom执行计算。具体参考[单算子API调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)章节。
+
+CMakeLists.txt是编译规则文件，下面对其如何链接公共动态库进行介绍。
+- 设置集成多个算子静态库的公共动态库的存放路径
+```bash
+set(CUST_PKG_PATH $ENV{BASIC_PATH})
+```
+- 设置头文件及库文件路径
+```bash
+# Header path
+include_directories(
+    ${INC_PATH}
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${CUST_PKG_PATH}/lib
+)
+```
+- 编译可执行文件
+```bash
+add_executable(execute_static_op
+    main.cpp
+)
+```
+- 链接公共动态库
+```bash
+target_link_libraries(execute_static_op
+    op_runner
+    ascendcl
+)
+```
+
+## 运行样例算子
+### 1. 编译运行样例算子
+运行此样例前，请参考[编译运行样例算子](../README.md)完成前期准备。
+### 2. aclnn调用样例运行
+
+  - 进入到样例目录
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation
+    ```
+  - 样例执行
+
+    样例执行过程中会自动生成测试数据，然后编译与运行aclnn样例，最后检验运行结果。具体过程可参见run.sh脚本。
+
+    ```bash
+    bash run.sh
+    ```
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/07/22 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/input/.keep b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/input/.keep
new file mode 100644
index 000000000..4f07f1caf
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/input/.keep
@@ -0,0 +1 @@
+.keep
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/output/.keep b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/output/.keep
new file mode 100644
index 000000000..4f07f1caf
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/output/.keep
@@ -0,0 +1 @@
+.keep
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
new file mode 100644
index 000000000..f39cfda92
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export BASIC_PATH=`pwd`/../output/
+export DDK_PATH_ADD=`pwd`/../package/add_custom
+export DDK_PATH_MATMUL=`pwd`/../package/matmul_custom
+
+if [ "$1" = "Dynamic" ]; then
+    export COMPILE_MODE=DYNAMIC_ORI
+else
+    export COMPILE_MODE=$2
+fi
+
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/lib64
+export NPU_HOST_INC=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/include
+
+function main {
+    # 1. 清除遗留生成文件和日志文件
+    rm -rf $HOME/ascend/log/*
+    rm -rf ./input/*.bin
+    rm -rf ./output/*.bin
+
+    # 2. 生成输入数据和真值数据
+    cd $CURRENT_DIR
+    python3 scripts_add/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate add input data failed!"
+        return 1
+    fi
+    python3 scripts_matmul/gen_data.py
+    if [ $? -ne 0 ]; then
+        echo "ERROR: generate matmul input data failed!"
+        return 1
+    fi
+    echo "INFO: generate input data success!"
+
+    # 3. 编译可执行文件
+    cd $CURRENT_DIR
+    rm -rf build
+    mkdir -p build
+    cd build
+    cmake ../src -DCMAKE_SKIP_RPATH=TRUE
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    # 4. 运行可执行文件
+    export LD_LIBRARY_PATH=$NPU_HOST_LIB/:$BASIC_PATH/lib:$DDK_PATH_ADD/lib:$DDK_PATH_MATMUL/lib:$LD_LIBRARY_PATH:./
+    cd $CURRENT_DIR/output
+    echo "INFO: execute op!"
+    ./execute_static_op
+    if [ $? -ne 0 ]; then
+        echo "ERROR: acl executable run failed! please check your project!"
+        return 1
+    fi
+    echo "INFO: acl executable run success!"
+
+    # 5. 精度比对
+    cd $CURRENT_DIR
+    python3 scripts_matmul/verify_result.py output/output_z_matmul.bin output/golden_matmul.bin
+    if [ $? -ne 0 ]; then
+        echo "ERROR: verify matmul result failed!"
+        return 1
+    fi
+
+    python3 scripts_add/verify_result.py output/output_z_add.bin output/golden_add.bin
+    if [ $? -ne 0 ]; then
+        echo "ERROR: verify add result failed!"
+        return 1
+    fi
+}
+
+main
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/gen_data.py b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/gen_data.py
new file mode 100644
index 000000000..37c4f91ca
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16)
+    golden = (input_x + input_y).astype(np.float16)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden_add.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/verify_result.py b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/verify_result.py
new file mode 100644
index 000000000..1851787ee
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_add/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float16
+relative_tol = 1e-3
+absolute_tol = 1e-5
+error_tol = 1e-3
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float16).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float16).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/gen_data.py b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/gen_data.py
new file mode 100644
index 000000000..f5aa9c48c
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/gen_data.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+import os
+
+
+def gen_golden_data():
+    M = 1024
+    N = 640
+    K = 256
+
+    input_a = np.random.randint(1, 10, [M, K]).astype(np.float16)
+    input_b = np.random.randint(1, 10, [K, N]).astype(np.float16)
+    input_bias = np.random.randint(1, 10, [N]).astype(np.float32)
+    golden = (np.matmul(input_a.astype(np.float32), input_b.astype(np.float32)) + input_bias).astype(np.float32)
+
+    if not os.path.exists("input"):
+        os.mkdir("input")
+    if not os.path.exists("output"):
+        os.mkdir("output")
+    input_a.tofile("./input/input_a.bin")
+    input_b.tofile("./input/input_b.bin")
+    input_bias.tofile("./input/input_bias.bin")
+    golden.tofile("./output/golden_matmul.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data()
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/verify_result.py b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/verify_result.py
new file mode 100644
index 000000000..0fcae277e
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/scripts_matmul/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-6
+absolute_tol = 1e-9
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/CMakeLists.txt
new file mode 100644
index 000000000..af689f35c
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/CMakeLists.txt
@@ -0,0 +1,54 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_static)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(CUST_PKG_PATH $ENV{BASIC_PATH})
+set(OP_PKG_PATH_ADD $ENV{DDK_PATH_ADD})
+set(OP_PKG_PATH_MATMUL $ENV{DDK_PATH_MATMUL})
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+set(INC_PATH $ENV{NPU_HOST_INC})
+set(COMPILE_MODE $ENV{COMPILE_MODE})
+message("COMPILE_MODE: " ${COMPILE_MODE})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER)
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ${INC_PATH}
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_static_op
+    main.cpp
+)
+
+target_link_libraries(execute_static_op
+    op_runner
+    ascendcl
+)
+
+install(TARGETS execute_static_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/main.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/main.cpp
new file mode 100644
index 000000000..01d448977
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/src/main.cpp
@@ -0,0 +1,222 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstdint>
+#include <iostream>
+
+#include "acl/acl.h"
+#include "common.h"
+#include "op_runner.h"
+
+#define STATIC_FRAME_TWO 2
+bool g_isDevice = false;
+int deviceId = 0;
+
+OperatorDesc CreateOpDescAdd()
+{
+    // define operator
+    std::vector<int64_t> shape{8, 2048};
+    aclDataType dataType = ACL_FLOAT16;
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
+    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
+    opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format);
+    return opDesc;
+}
+
+bool SetInputDataAdd(OpRunner &runner)
+{
+    size_t fileSize = 0;
+    ReadFile("../input/input_x.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0));
+    ReadFile("../input/input_y.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1));
+    INFO_LOG("Set input success");
+    return true;
+}
+
+OperatorDesc CreateOpDescMatmul()
+{
+    // define operator
+    std::vector<int64_t> shapeA{1024, 256};
+    std::vector<int64_t> shapeB{256, 640};
+    std::vector<int64_t> shapeBias{640};
+    std::vector<int64_t> shapeC{1024, 640};
+    aclDataType dataTypeA = ACL_FLOAT16;
+    aclDataType dataTypeB = ACL_FLOAT16;
+    aclDataType dataTypeBias = ACL_FLOAT;
+    aclDataType dataTypeC = ACL_FLOAT;
+    aclFormat format = ACL_FORMAT_ND;
+    OperatorDesc opDesc;
+    opDesc.AddInputTensorDesc(dataTypeA, shapeA.size(), shapeA.data(), format);
+    opDesc.AddInputTensorDesc(dataTypeB, shapeB.size(), shapeB.data(), format);
+    opDesc.AddInputTensorDesc(dataTypeBias, shapeBias.size(), shapeBias.data(), format);
+    opDesc.AddOutputTensorDesc(dataTypeC, shapeC.size(), shapeC.data(), format);
+    return opDesc;
+}
+
+bool SetInputDataMatmul(OpRunner &runner)
+{
+    size_t fileSize = 0;
+    ReadFile("../input/input_a.bin", fileSize, runner.GetInputBuffer<void>(0), runner.GetInputSize(0));
+    ReadFile("../input/input_b.bin", fileSize, runner.GetInputBuffer<void>(1), runner.GetInputSize(1));
+    ReadFile("../input/input_bias.bin", fileSize, runner.GetInputBuffer<void>(STATIC_FRAME_TWO),
+        runner.GetInputSize(STATIC_FRAME_TWO));
+    INFO_LOG("Set input success");
+    return true;
+}
+
+bool ProcessOutputData(OpRunner &runner, std::string opName)
+{
+    std::string filePath = "../output/output_z_" + opName + ".bin";
+    WriteFile(filePath, runner.GetOutputBuffer<void>(0), runner.GetOutputSize(0));
+    INFO_LOG("Write output success");
+    return true;
+}
+
+void DestroyResource()
+{
+    bool flag = false;
+    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Reset device %d failed", deviceId);
+        flag = true;
+    }
+    INFO_LOG("Reset Device success");
+    if (aclFinalize() != ACL_SUCCESS) {
+        ERROR_LOG("Finalize acl failed");
+        flag = true;
+    }
+    if (flag) {
+        ERROR_LOG("Destroy resource failed");
+    } else {
+        INFO_LOG("Destroy resource success");
+    }
+}
+
+bool InitResource()
+{
+    std::string output = "../output";
+    if (access(output.c_str(), 0) == -1) {
+        int ret = mkdir(output.c_str(), 0700);
+        if (ret == 0) {
+            INFO_LOG("Make output directory successfully");
+        } else {
+            ERROR_LOG("Make output directory fail");
+            return false;
+        }
+    }
+
+    if (aclInit(nullptr) != ACL_SUCCESS) {
+        ERROR_LOG("acl init failed");
+        return false;
+    }
+
+    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
+        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
+        (void)aclFinalize();
+        return false;
+    }
+    INFO_LOG("Set device[%d] success", deviceId);
+
+    // runMode is ACL_HOST which represents app is running in host
+    // runMode is ACL_DEVICE which represents app is running in device
+    aclrtRunMode runMode;
+    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
+        ERROR_LOG("Get run mode failed");
+        DestroyResource();
+        return false;
+    }
+    g_isDevice = (runMode == ACL_DEVICE);
+    INFO_LOG("Get RunMode[%d] success", runMode);
+
+    return true;
+}
+
+bool RunOpMatmul()
+{
+    OperatorDesc opDesc = CreateOpDescMatmul();
+
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init matmul_custom OpRunner failed");
+        return false;
+    }
+
+    if (!SetInputDataMatmul(opRunner)) {
+        ERROR_LOG("Set matmul_custom input data failed");
+        return false;
+    }
+
+    if (!opRunner.RunOpMatmul()) {
+        ERROR_LOG("Run matmul_custom op failed");
+        return false;
+    }
+
+    if (!ProcessOutputData(opRunner, "matmul")) {
+        ERROR_LOG("Process matmul_custom output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run matmul_custom op success");
+    return true;
+}
+
+bool RunOpAdd()
+{
+    OperatorDesc opDesc = CreateOpDescAdd();
+
+    OpRunner opRunner(&opDesc);
+    if (!opRunner.Init()) {
+        ERROR_LOG("Init add_custom OpRunner failed");
+        return false;
+    }
+
+    if (!SetInputDataAdd(opRunner)) {
+        ERROR_LOG("Set add_custom input data failed");
+        return false;
+    }
+
+    if (!opRunner.RunOpAdd()) {
+        ERROR_LOG("Run add_custom op failed");
+        return false;
+    }
+
+    if (!ProcessOutputData(opRunner, "add")) {
+        ERROR_LOG("Process add_custom output data failed");
+        return false;
+    }
+
+    INFO_LOG("Run add_custom op success");
+    return true;
+}
+
+int main(int argc, char **argv)
+{
+    if (!InitResource()) {
+        ERROR_LOG("Init resource failed");
+        return FAILED;
+    }
+    INFO_LOG("Init resource success");
+
+    if (!RunOpMatmul()) {
+        DestroyResource();
+        return FAILED;
+    }
+    if (!RunOpAdd()) {
+        DestroyResource();
+        return FAILED;
+    }
+
+    DestroyResource();
+
+    return SUCCESS;
+}
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
new file mode 100644
index 000000000..dce1ed85f
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
@@ -0,0 +1,40 @@
+[
+    {
+        "op": "AddCustom",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "x",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            },
+            {
+                "name": "y",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "z",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom.cpp
new file mode 100644
index 000000000..67b4e7fb6
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom.cpp
@@ -0,0 +1,76 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "register/op_def_registry.h"
+
+namespace optiling {
+const uint32_t BLOCK_DIM = 8;
+const uint32_t TILE_NUM = 8;
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    TilingData tiling;
+    uint32_t totalLength = context->GetInputShape(0)->GetOriginShape().GetShapeSize();
+    context->SetBlockDim(BLOCK_DIM);
+    tiling.set_totalLength(totalLength);
+    tiling.set_tileNum(TILE_NUM);
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = 0;
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+namespace ge {
+static graphStatus InferShape(gert::InferShapeContext *context)
+{
+    const gert::Shape *x1_shape = context->GetInputShape(0);
+    gert::Shape *y_shape = context->GetOutputShape(0);
+    *y_shape = *x1_shape;
+    return GRAPH_SUCCESS;
+}
+
+static graphStatus InferDataType(gert::InferDataTypeContext *context)
+{
+    const auto inputDataType = context->GetInputDataType(0);
+    context->SetOutputDataType(0, inputDataType);
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace ge
+
+namespace ops {
+class AddCustom : public OpDef {
+public:
+    explicit AddCustom(const char *name) : OpDef(name)
+    {
+        this->Input("x")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Input("y")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Output("z")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+
+        this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType);
+        this->AICore()
+            .SetTiling(optiling::TilingFunc)
+            .AddConfig("ascend910")
+            .AddConfig("ascend310p")
+            .AddConfig("ascend310b")
+            .AddConfig("ascend910b");
+    }
+};
+OP_ADD(AddCustom);
+} // namespace ops
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom_tiling.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom_tiling.h
new file mode 100644
index 000000000..8e1c601b4
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_host/add_custom_tiling.h
@@ -0,0 +1,22 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_H
+#define ADD_CUSTOM_TILING_H
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(TilingData)
+TILING_DATA_FIELD_DEF(uint32_t, totalLength);
+TILING_DATA_FIELD_DEF(uint32_t, tileNum);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(AddCustom, TilingData)
+} // namespace optiling
+#endif // ADD_CUSTOM_TILING_H
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_kernel/add_custom.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_kernel/add_custom.cpp
new file mode 100644
index 000000000..0913f375b
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom/op_kernel/add_custom.cpp
@@ -0,0 +1,93 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+
+        xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(DTYPE_X));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Y));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Z));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<DTYPE_X> xGm;
+    AscendC::GlobalTensor<DTYPE_Y> yGm;
+    AscendC::GlobalTensor<DTYPE_Z> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    GET_TILING_DATA(tiling_data, tiling);
+    KernelAdd op;
+    op.Init(x, y, z, tiling_data.totalLength, tiling_data.tileNum);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+// call of kernel function
+void add_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *x, uint8_t *y, uint8_t *z,
+                   uint8_t *workspace, uint8_t *tiling)
+{
+    add_custom<<<blockDim, l2ctrl, stream>>>(x, y, z, workspace, tiling);
+}
+#endif
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
new file mode 100644
index 000000000..3886a9c63
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
@@ -0,0 +1,50 @@
+[
+    {
+        "op": "MatmulCustom",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "a",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            },
+            {
+                "name": "b",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            },
+            {
+                "name": "bias",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "c",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float"
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom.cpp
new file mode 100644
index 000000000..7967d95f8
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom.cpp
@@ -0,0 +1,100 @@
+/**
+ * @file matmul_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "matmul_custom_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "tiling/tiling_api.h"
+
+using namespace matmul_tiling;
+
+namespace optiling {
+/**
+  * @brief  Generate matmul tiling.
+  * @param  context: Tiling kernel context.
+  * @retval Status of GetTiling (GRAPH_SUCCESS or GRAPH_FAILED).
+  */
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+
+    auto shape_a = context->GetInputTensor(0)->GetOriginShape();
+    auto shape_b = context->GetInputTensor(1)->GetOriginShape();
+    int32_t M = shape_a.GetDim(0);
+    int32_t N = shape_b.GetDim(1);
+    int32_t K = shape_a.GetDim(1);
+    int32_t baseM = 128;
+    int32_t baseN = 128;
+    MatmulApiTiling cubeTiling(ascendcPlatform);
+    cubeTiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16);
+    cubeTiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16);
+    cubeTiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT);
+    cubeTiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT);
+    cubeTiling.SetShape(M, N, K);
+    cubeTiling.SetOrgShape(M, N, K);
+    cubeTiling.SetFixSplit(baseM, baseN, -1);
+    cubeTiling.SetBias(true);
+    cubeTiling.SetBufferSpace(-1, -1, -1);
+    MatmulCustomTilingData tiling;
+    if (cubeTiling.GetTiling(tiling.cubeTilingData) == -1) { // Get matmul tiling.
+        return ge::GRAPH_FAILED;
+    }
+
+    uint64_t localMemSize;
+    ascendcPlatform.GetCoreMemSize(platform_ascendc::CoreMemType::UB, localMemSize);
+    tiling.set_localMemSize(localMemSize);
+
+    if (ascendcPlatform.GetSocVersion() == platform_ascendc::SocVersion::ASCEND310P) {
+        context->SetTilingKey(2);
+    } else {
+        context->SetTilingKey(1);
+    }
+    context->SetBlockDim(1);
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    size_t userWorkspaceSize = 0;
+    size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform.GetLibApiWorkSpaceSize());
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = userWorkspaceSize + systemWorkspaceSize;
+
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+namespace ops {
+class MatmulCustom : public OpDef {
+public:
+    explicit MatmulCustom(const char *name) : OpDef(name)
+    {
+        this->Input("a")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Input("b")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Input("bias")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND});
+        this->Output("c")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT})
+            .Format({ge::FORMAT_ND});
+
+        this->AICore()
+            .SetTiling(optiling::TilingFunc)
+            .AddConfig("ascend310p")
+            .AddConfig("ascend910b");
+    }
+};
+
+OP_ADD(MatmulCustom);
+} // namespace ops
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
new file mode 100644
index 000000000..135fc5b54
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
@@ -0,0 +1,20 @@
+/**
+ * @file matmul_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "register/tilingdata_base.h"
+#include "tiling/tiling_api.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(MatmulCustomTilingData)
+TILING_DATA_FIELD_DEF(uint64_t, localMemSize);
+TILING_DATA_FIELD_DEF_STRUCT(TCubeTiling, cubeTilingData);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
+} // namespace optiling
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_kernel/matmul_custom.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_kernel/matmul_custom.cpp
new file mode 100644
index 000000000..137eb4f9d
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_kernel/matmul_custom.cpp
@@ -0,0 +1,153 @@
+/**
+ * @file matmul_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "lib/matmul_intf.h"
+
+using namespace matmul;
+
+__aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b)
+{
+    return (a + b - 1) / b;
+}
+
+template <typename aType, typename bType, typename cType, typename biasType> class MatmulKernel {
+public:
+    __aicore__ inline MatmulKernel(){};
+    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace,
+                                uint64_t memSize, const TCubeTiling &tiling);
+    template <bool setTmpSpace = false> __aicore__ inline void Process(AscendC::TPipe *pipe);
+
+    __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB,
+                                      int32_t &offsetC, int32_t &offsetBias);
+
+    Matmul<MatmulType<AscendC::TPosition::GM, CubeFormat::ND, aType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, bType>,
+           MatmulType<AscendC::TPosition::GM, CubeFormat::ND, cType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, biasType>>
+        matmulObj;
+
+    AscendC::GlobalTensor<aType> aGlobal;
+    AscendC::GlobalTensor<bType> bGlobal;
+    AscendC::GlobalTensor<cType> cGlobal;
+    AscendC::GlobalTensor<biasType> biasGlobal;
+    TCubeTiling tiling;
+    uint64_t localMemSize = 0;
+};
+
+/**
+  * @brief  Set matmul input and output gm addr of current core.
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: C matrix gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tiling: matmul tiling data.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulKernel<aType, bType, cType, biasType>::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c,
+                                                                         GM_ADDR workspace, uint64_t memSize, const TCubeTiling &tiling)
+{
+    this->tiling = tiling;
+    this->localMemSize = memSize;
+    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka);
+    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ bType *>(b), tiling.Kb * tiling.N);
+    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(c), tiling.M * tiling.N);
+    biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N);
+
+    int32_t offsetA = 0;
+    int32_t offsetB = 0;
+    int32_t offsetC = 0;
+    int32_t offsetBias = 0;
+    CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); // Calculate the gm offset based on the blockidx.
+    aGlobal = aGlobal[offsetA];
+    bGlobal = bGlobal[offsetB];
+    cGlobal = cGlobal[offsetC];
+    biasGlobal = biasGlobal[offsetBias];
+    if (GetSysWorkSpacePtr() == nullptr) {
+        return;
+    }
+}
+
+/**
+  * @brief  Main process of matmul calculation.
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+template <bool setTmpSpace>
+__aicore__ inline void MatmulKernel<aType, bType, cType, biasType>::Process(AscendC::TPipe *pipe)
+{
+    if (GetBlockIdx() >= 1) {
+        return;
+    }
+    // Set temp UB space if the setTmpSpace is true.
+    if constexpr (setTmpSpace) {
+        AscendC::TBuf<> tmpMMFormatUb;
+        AscendC::LocalTensor<uint8_t> mmformatUb;
+        pipe->InitBuffer(tmpMMFormatUb, localMemSize);
+        mmformatUb = tmpMMFormatUb.Get<uint8_t>(localMemSize);
+        matmulObj.SetLocalWorkspace(mmformatUb);
+    }
+
+    matmulObj.SetTensorA(aGlobal);
+    matmulObj.SetTensorB(bGlobal);
+    matmulObj.SetBias(biasGlobal);
+    matmulObj.IterateAll(cGlobal);
+    matmulObj.End();
+}
+
+/**
+  * @brief  Calculate the gm offset based on the blockidx.
+  * @param  blockIdx: Current Core blockidx.
+  * @param  tiling: Matmul tiling data.
+  * @param  offsetA: Gm offset of A matrix.
+  * @param  offsetB: Gm offset of B matrix.
+  * @param  offsetC: Gm offset of C matrix.
+  * @param  offsetBias: Gm offset of Bias matrix.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void
+MatmulKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA,
+                                                        int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias)
+{
+    auto mSingleBlocks = Ceiling(tiling.M, tiling.singleCoreM);
+    auto mCoreIndx = blockIdx % mSingleBlocks;
+    auto nCoreIndx = blockIdx / mSingleBlocks;
+
+    offsetA = mCoreIndx * tiling.Ka * tiling.singleCoreM;
+    offsetB = nCoreIndx * tiling.singleCoreN;
+    offsetC = mCoreIndx * tiling.N * tiling.singleCoreM + nCoreIndx * tiling.singleCoreN;
+    offsetBias = nCoreIndx * tiling.singleCoreN;
+}
+
+/**
+  * @brief  matmul kernel function entry.
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: C matrix gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tiling: Tiling data addr. 
+  * @retval None
+  */
+extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace,
+                                                    GM_ADDR tiling)
+{
+    GET_TILING_DATA(tilingData, tiling);
+    MatmulKernel<half, half, float, float> matmulKernel;
+    AscendC::TPipe pipe;
+    REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tilingData.cubeTilingData); // Initialize the matmul object.
+    matmulKernel.Init(a, b, bias, c, workspace, tilingData.localMemSize, tilingData.cubeTilingData);
+    if (TILING_KEY_IS(1)) {
+        matmulKernel.Process(&pipe);
+    } else if (TILING_KEY_IS(2)) {
+        matmulKernel.Process<true>(&pipe);
+    }
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/README.md b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/README.md
new file mode 100644
index 000000000..a7580bf68
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/README.md
@@ -0,0 +1,70 @@
+## 目录结构介绍
+```
+├── OpRunner                    // 对多个静态库的集成和使用
+│   ├── inc                     // 头文件目录
+│   │   ├── common.h            // 声明公共方法类，用于读取二进制文件
+│   │   ├── op_runner.h         // 算子描述声明文件，包含算子输入/输出，算子类型以及输入描述与输出描述
+│   │   └── operator_desc.h     // 算子运行相关信息声明文件，包含算子输入/输出个数，输入/输出大小等
+│   ├── src
+│   │   ├── CMakeLists.txt      // 编译规则文件
+│   │   ├── common.cpp          // 公共函数，读取二进制文件函数的实现文件
+│   │   ├── op_runner.cpp       // 单算子调用主体流程实现文件
+│   │   └── operator_desc.cpp   // 构造算子的输入与输出描述
+│   └── run.sh                  // 执行命令脚本
+```
+
+下面对CMakeLists.txt编译规则文件进行介绍。
+- 设置算子包存放路径
+```bash
+set(CUST_PKG_ADD_PATH $ENV{DDK_PATH_ADD})
+set(CUST_PKG_MATMUL_PATH $ENV{DDK_PATH_MATMUL})
+```
+- 编译一个公共动态库
+```bash
+add_library(op_runner SHARED
+    operator_desc.cpp
+    op_runner.cpp
+    common.cpp
+)
+```
+- 将两个算子静态库加到公共动态库中
+```bash
+find_package(add_custom REQUIRED
+    PATHS ${CUST_PKG_ADD_PATH}
+    NO_DEFAULT_PATH
+)
+
+find_package(matmul_custom REQUIRED
+    PATHS ${CUST_PKG_MATMUL_PATH}
+    NO_DEFAULT_PATH
+)
+
+target_link_libraries(op_runner PRIVATE
+    ascendcl
+    add_custom::static
+    matmul_custom::static
+    nnopbase
+)
+```
+- 将生成的功能动态库安装到指定输出目录中
+```bash
+install(TARGETS op_runner DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+```
+
+
+
+## 执行命令
+    - 进入到样例目录
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner
+    ```
+    - 链接静态库
+    ```bash
+    bash run.sh
+    ```
+  命令执行成功后，会在父目录static_library目录下，生成output目录，存放生成的libop_runner.so动态库。
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/07/22 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/common.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/common.h
new file mode 100644
index 000000000..30239a4de
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/common.h
@@ -0,0 +1,45 @@
+/**
+ * @file common.h
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize);
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
+
+#endif // COMMON_H
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/op_runner.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/op_runner.h
new file mode 100644
index 000000000..4df7ce279
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/op_runner.h
@@ -0,0 +1,182 @@
+/**
+ * @file op_runner.h
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OP_RUNNER_H
+#define OP_RUNNER_H
+
+#include "acl/acl.h"
+#include "aclnn/acl_meta.h"
+#include "common.h"
+#include "operator_desc.h"
+
+/**
+ * Op Runner
+ */
+class OpRunner {
+public:
+    /**
+     * @brief Constructor
+     * @param [in] opDesc: op description
+     */
+    explicit OpRunner(OperatorDesc *opDesc);
+
+    /**
+     * @brief Destructor
+     */
+    virtual ~OpRunner();
+
+    /**
+     * @brief Init op runner
+     */
+    bool Init();
+
+    /**
+     * @brief Get number of inputs
+     * @return number of inputs
+     */
+    const size_t NumInputs();
+
+    /**
+     * @brief Get number of outputs
+     * @return number of outputs
+     */
+    const size_t NumOutputs();
+
+    /**
+     * @brief Get input size by index
+     * @param [in] index: input index
+     * @return size of the input
+     */
+    const size_t GetInputSize(size_t index) const;
+    const size_t GetInputNumDims(size_t index) const;
+    aclDataType GetInputDataType(size_t index) const;
+    aclFormat GetInputFormat(size_t index) const;
+
+    /**
+     * @brief Get output size by index
+     * @param [in] index: output index
+     * @return size of the output
+     */
+    size_t GetOutputSize(size_t index) const;
+    const size_t GetOutputNumDims(size_t index) const;
+    aclDataType GetOutputDataType(size_t index) const;
+    aclFormat GetOutputFormat(size_t index) const;
+
+    /**
+     * @brief Get input element count by index
+     * @param i[in] ndex: input index
+     * @return element count of the input
+     */
+    size_t GetInputElementCount(size_t index) const;
+
+    /**
+     * @brief Get output element count by index
+     * @param [in] index: output index
+     * @return element count of the output
+     */
+    size_t GetOutputElementCount(size_t index) const;
+
+    /**
+     * @brief Get input shape by index
+     * @param [in] index: input index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetInputShape(size_t index) const;
+
+    /**
+     * @brief Get output shape by index
+     * @param [in] index: output index
+     * @return shape of the output
+     */
+    std::vector<int64_t> GetOutputShape(size_t index) const;
+
+    /**
+     * @brief Get input buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: input index
+     * @return host address of the input
+     */
+    template <typename T> T *GetInputBuffer(size_t index)
+    {
+        if (index >= numInputs_) {
+            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+            return nullptr;
+        }
+        return reinterpret_cast<T *>(hostInputs_[index]);
+    }
+
+    /**
+     * @brief Get output buffer(host memory) by index
+     * @tparam T: data type
+     * @param [in] index: output index
+     * @return host address of the output
+     */
+    template <typename T> const T *GetOutputBuffer(size_t index)
+    {
+        if (index >= numOutputs_) {
+            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+            return nullptr;
+        }
+
+        return reinterpret_cast<T *>(hostOutputs_[index]);
+    }
+
+    /**
+     * @brief Print readable input by index
+     * @param [in] index: input index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintInput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Print readable output by index
+     * @param [in] index: output index
+     * @param [in] elementsPerRow: number of elements per row
+     */
+    void PrintOutput(size_t index, size_t elementsPerRow = 16);
+
+    /**
+     * @brief Compile static op
+     * @return compile result
+     */
+    bool CompileStaticOp();
+
+    /**
+     * @brief Compile dynamic op
+     * @return compile result
+     */
+    bool CompileDynamicOp();
+
+    /**
+     * @brief Run op
+     * @return run result
+     */
+    bool RunOpAdd();
+    bool RunOpMatmul();
+
+private:
+    size_t numInputs_;
+    size_t numOutputs_;
+    void *workspace_;
+
+    std::vector<aclDataBuffer *> inputBuffers_;
+    std::vector<aclDataBuffer *> outputBuffers_;
+
+    std::vector<void *> devInputs_;
+    std::vector<void *> devOutputs_;
+
+    std::vector<void *> hostInputs_;
+    std::vector<void *> hostOutputs_;
+
+    std::vector<aclTensor *> inputTensor_;
+    std::vector<aclTensor *> outputTensor_;
+    OperatorDesc *opDesc_;
+};
+
+#endif // OP_RUNNER_H
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/operator_desc.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/operator_desc.h
new file mode 100644
index 000000000..ae94dcfb8
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/inc/operator_desc.h
@@ -0,0 +1,57 @@
+/**
+ * @file operator_desc.h
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef OPERATOR_DESC_H
+#define OPERATOR_DESC_H
+
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+/**
+ * Op description
+ */
+struct OperatorDesc {
+    /**
+     * Constructor
+     */
+    explicit OperatorDesc();
+
+    /**
+     * Destructor
+     */
+    virtual ~OperatorDesc();
+
+    /**
+     * Add an input tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    /**
+     * Add an output tensor description
+     * @param [in] dataType: data type
+     * @param [in] numDims: number of dims
+     * @param [in] dims: dims
+     * @param [in] format: format
+     * @return OperatorDesc
+     */
+    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
+
+    std::string opType;
+    std::vector<aclTensorDesc *> inputDesc;
+    std::vector<aclTensorDesc *> outputDesc;
+};
+
+#endif // OPERATOR_DESC_H
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
new file mode 100644
index 000000000..64bc86a70
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+
+export DDK_PATH_ADD=`pwd`/../package/add_custom/
+export DDK_PATH_MATMUL=`pwd`/../package/matmul_custom/
+
+export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/lib64
+export NPU_HOST_INC=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/include
+echo "NPU_HOST_LIB: $NPU_HOST_LIB"
+echo "NPU_HOST_INC: $NPU_HOST_INC"
+export CUSTLIB_PATH=`pwd`/output
+export RELEASE_PATH=`pwd`/../output
+
+mkdir -p $RELEASE_PATH/lib
+mkdir -p $RELEASE_PATH/include
+
+function main {
+    cd $CURRENT_DIR
+    rm -rf build
+    mkdir -p build
+    cd build
+    cmake ../src -DCMAKE_SKIP_RPATH=TRUE
+    if [ $? -ne 0 ]; then
+        echo "ERROR: cmake failed!"
+        return 1
+    fi
+    echo "INFO: cmake success!"
+    make
+    if [ $? -ne 0 ]; then
+        echo "ERROR: make failed!"
+        return 1
+    fi
+    echo "INFO: make success!"
+
+    cp -rf $CUSTLIB_PATH/* $RELEASE_PATH/lib
+    cp -rf $CUSTLIB_PATH/../inc/* $RELEASE_PATH/include
+}
+
+main
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/CMakeLists.txt b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/CMakeLists.txt
new file mode 100644
index 000000000..149d363ff
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/CMakeLists.txt
@@ -0,0 +1,64 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(Driver)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output")
+
+set(CUST_PKG_ADD_PATH $ENV{DDK_PATH_ADD})
+set(CUST_PKG_MATMUL_PATH $ENV{DDK_PATH_MATMUL})
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+set(INC_PATH $ENV{NPU_HOST_INC})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER)
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ../inc
+    ${INC_PATH}
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+)
+
+add_library(op_runner SHARED
+    operator_desc.cpp
+    op_runner.cpp
+    common.cpp
+)
+
+find_package(add_custom REQUIRED
+    PATHS ${CUST_PKG_ADD_PATH}
+    NO_DEFAULT_PATH
+)
+
+find_package(matmul_custom REQUIRED
+    PATHS ${CUST_PKG_MATMUL_PATH}
+    NO_DEFAULT_PATH
+)
+
+target_link_libraries(op_runner PRIVATE
+    ascendcl
+    add_custom::static
+    matmul_custom::static
+    nnopbase
+)
+
+install(TARGETS op_runner DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/common.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/common.cpp
new file mode 100644
index 000000000..f0d697736
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/common.cpp
@@ -0,0 +1,80 @@
+/**
+ * @file common.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "common.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <fstream>
+
+extern bool g_isDevice;
+
+bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file %s", filePath.c_str());
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/op_runner.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/op_runner.cpp
new file mode 100644
index 000000000..d00353177
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/op_runner.cpp
@@ -0,0 +1,532 @@
+/**
+ * @file op_runner.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "op_runner.h"
+
+#include <cassert>
+#include <limits>
+
+#include "acl/acl_op_compiler.h"
+#include "aclnn_add_custom.h"
+#include "aclnn_matmul_custom.h"
+#include "common.h"
+
+using namespace std;
+
+#define STATIC_FRAME_TIME 5000
+extern bool g_isDevice;
+
+OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
+{
+    numInputs_ = opDesc->inputDesc.size();
+    numOutputs_ = opDesc->outputDesc.size();
+    workspace_ = nullptr;
+}
+
+OpRunner::~OpRunner()
+{
+    if (workspace_ != nullptr) {
+        (void)aclrtFree(workspace_);
+    }
+    for (size_t i = 0; i < numInputs_; ++i) {
+        (void)aclDestroyTensor(inputTensor_[i]);
+        (void)aclDestroyDataBuffer(inputBuffers_[i]);
+        (void)aclrtFree(devInputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostInputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostInputs_[i]);
+        }
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        (void)aclDestroyTensor(outputTensor_[i]);
+        (void)aclDestroyDataBuffer(outputBuffers_[i]);
+        (void)aclrtFree(devOutputs_[i]);
+        if (g_isDevice) {
+            (void)aclrtFree(hostOutputs_[i]);
+        } else {
+            (void)aclrtFreeHost(hostOutputs_[i]);
+        }
+    }
+}
+
+bool OpRunner::Init()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+            return false;
+        }
+        devInputs_.emplace_back(devMem);
+        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostInput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostInput == nullptr) {
+            ERROR_LOG("Malloc memory for input[%zu] failed", i);
+            return false;
+        }
+        hostInputs_.emplace_back(hostInput);
+
+        aclTensor *inputTensor =
+            aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0,
+                            GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]);
+        if (inputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for input[%zu] failed", i);
+            return false;
+        }
+        inputTensor_.emplace_back(inputTensor);
+    }
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        void *devMem = nullptr;
+        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+            return false;
+        }
+        devOutputs_.emplace_back(devMem);
+        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
+
+        void *hostOutput = nullptr;
+        if (g_isDevice) {
+            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        } else {
+            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
+                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
+                return false;
+            }
+        }
+        if (hostOutput == nullptr) {
+            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
+            return false;
+        }
+        hostOutputs_.emplace_back(hostOutput);
+
+        aclTensor *outputTensor =
+            aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0,
+                            GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]);
+        if (outputTensor == nullptr) {
+            ERROR_LOG("Create Tensor for output[%zu] failed", i);
+            return false;
+        }
+        outputTensor_.emplace_back(outputTensor);
+    }
+
+    return true;
+}
+
+const size_t OpRunner::NumInputs()
+{
+    return numInputs_;
+}
+
+const size_t OpRunner::NumOutputs()
+{
+    return numOutputs_;
+}
+
+const size_t OpRunner::GetInputSize(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
+}
+
+const size_t OpRunner::GetInputNumDims(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->inputDesc[index]);
+}
+
+aclDataType OpRunner::GetInputDataType(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->inputDesc[index]);
+}
+
+aclFormat OpRunner::GetInputFormat(size_t index) const
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->inputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+
+    return ret;
+}
+
+size_t OpRunner::GetOutputSize(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
+}
+
+const size_t OpRunner::GetOutputNumDims(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescNumDims(opDesc_->outputDesc[index]);
+}
+
+aclDataType OpRunner::GetOutputDataType(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_DT_UNDEFINED;
+    }
+
+    return aclGetTensorDescType(opDesc_->outputDesc[index]);
+}
+
+aclFormat OpRunner::GetOutputFormat(size_t index) const
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ACL_FORMAT_UNDEFINED;
+    }
+
+    return aclGetTensorDescFormat(opDesc_->outputDesc[index]);
+}
+
+std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
+{
+    std::vector<int64_t> ret;
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return ret;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
+        int64_t dimSize;
+        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
+            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
+            ret.clear();
+            return ret;
+        }
+        ret.emplace_back(dimSize);
+    }
+    return ret;
+}
+
+size_t OpRunner::GetInputElementCount(size_t index) const
+{
+    if (index >= opDesc_->inputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
+}
+
+size_t OpRunner::GetOutputElementCount(size_t index) const
+{
+    if (index >= opDesc_->outputDesc.size()) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return 0;
+    }
+
+    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
+}
+
+bool OpRunner::RunOpAdd()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy AddCustom input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy AddCustom input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create AddCustom stream failed");
+        return false;
+    }
+    INFO_LOG("Create AddCustom stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor *handle = nullptr;
+    auto ret =
+        aclnnAddCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], outputTensor_[0], &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get AddCustom Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnAddCustom(workspace_, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute AddCustom Operator failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnAddCustom success");
+
+    // The unit of 5000 is ms.
+    ret = aclrtSynchronizeStreamWithTimeout(stream, STATIC_FRAME_TIME);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+bool OpRunner::RunOpMatmul()
+{
+    for (size_t i = 0; i < numInputs_; ++i) {
+        auto size = GetInputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
+            ERROR_LOG("Copy MatmulCustom input[%zu] failed", i);
+            return false;
+        }
+        INFO_LOG("Copy MatmulCustom input[%zu] success", i);
+    }
+
+    aclrtStream stream = nullptr;
+    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
+        ERROR_LOG("Create MatmulCustom stream failed");
+        return false;
+    }
+    INFO_LOG("Create MatmulCustom stream success");
+
+    size_t workspaceSize = 0;
+    aclOpExecutor *handle = nullptr;
+    auto ret = aclnnMatmulCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], outputTensor_[0],
+                                                 &workspaceSize, &handle);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Get Operator matmul Workspace failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnMatmulCustomGetWorkspaceSize success, workspace size %lu", workspaceSize);
+
+    if (workspaceSize != 0) {
+        if (aclrtMalloc(&workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) {
+            ERROR_LOG("Malloc device memory failed");
+        }
+    }
+
+    ret = aclnnMatmulCustom(workspace_, workspaceSize, handle, stream);
+    if (ret != ACL_SUCCESS) {
+        (void)aclrtDestroyStream(stream);
+        ERROR_LOG("Execute Operator matmul failed. error code is %d", static_cast<int32_t>(ret));
+        return false;
+    }
+    INFO_LOG("Execute aclnnMatmulCustom success");
+
+    ret = aclrtSynchronizeStreamWithTimeout(stream, STATIC_FRAME_TIME);
+    if (ret != SUCCESS) {
+        ERROR_LOG("Synchronize stream failed. error code is %d", static_cast<int32_t>(ret));
+        (void)aclrtDestroyStream(stream);
+        return false;
+    }
+    INFO_LOG("Synchronize stream success");
+
+    for (size_t i = 0; i < numOutputs_; ++i) {
+        auto size = GetOutputSize(i);
+        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
+        if (g_isDevice) {
+            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
+        }
+        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
+            INFO_LOG("Copy output[%zu] success", i);
+            (void)aclrtDestroyStream(stream);
+            return false;
+        }
+        INFO_LOG("Copy output[%zu] success", i);
+    }
+
+    (void)aclrtDestroyStream(stream);
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case ACL_BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT8:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT8:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT16:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT16:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT32:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT32:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_INT64:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_UINT64:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT16:
+            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case ACL_FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case ACL_DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+}
+
+void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numInputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
+        return;
+    }
+
+    auto desc = opDesc_->inputDesc[index];
+    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
+
+void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
+{
+    if (index >= numOutputs_) {
+        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
+        return;
+    }
+
+    auto desc = opDesc_->outputDesc[index];
+    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
+}
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/operator_desc.cpp b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/operator_desc.cpp
new file mode 100644
index 000000000..303fe601a
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/src/operator_desc.cpp
@@ -0,0 +1,51 @@
+/**
+ * @file operator_desc.cpp
+ *
+ * Copyright (C) 2025-2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "operator_desc.h"
+
+#include "common.h"
+
+using namespace std;
+
+OperatorDesc::OperatorDesc() {}
+
+OperatorDesc::~OperatorDesc()
+{
+    for (auto *desc : inputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+
+    for (auto *desc : outputDesc) {
+        aclDestroyTensorDesc(desc);
+    }
+}
+
+OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+    inputDesc.emplace_back(desc);
+    return *this;
+}
+
+OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims,
+                                                aclFormat format)
+{
+    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
+    if (desc == nullptr) {
+        ERROR_LOG("create tensor failed");
+        return *this;
+    }
+
+    outputDesc.emplace_back(desc);
+    return *this;
+}
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/README.md b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/README.md
new file mode 100644
index 000000000..e7459b609
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/README.md
@@ -0,0 +1,137 @@
+## 概述
+本样例基于AddCustom算子工程和MatmulCustom算子工程，介绍了自定义算子工程静态库的集成和使用。
+
+## 目录结构介绍
+```
+├── static_library             // 使用自定义算子工程静态库方式调用AddCustom算子和MatmulCustom算子
+│   ├── AclNNInvocation        // 基于AddCustom算子工程和MatmulCustom算子工程，介绍自定义算子工程静态库的集成和使用
+│   ├── OpRunner               // 对多个自定义算子工程的aclnn接口进行二次封装
+│   ├── AddCustom              // AddCustom算子工程
+│   ├── MatmulCustom           // MatmulCustom算子工程
+│   ├── AddCustom.json         // AddCustom算子的原型定义json文件
+│   ├── MatmulCustom.json      // MatmulCustom算子的原型定义json文件
+│   ├── install_matmul.sh      // 脚本，调用msOpGen生成MatmulCustom自定义算子工程
+│   └── install_add.sh         // 脚本，调用msOpGen生成AddCustom自定义算子工程
+```
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas 推理系列产品AI Core
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 算子工程介绍
+其中，算子工程目录AddCustom包含算子的实现文件，如下所示：
+```
+├── AddCustom               // AddCustom自定义算子工程
+│   ├── op_host             // host侧实现文件
+│   └── op_kernel           // kernel侧实现文件
+```
+算子工程目录MatmulCustom包含算子的实现文件，如下所示：
+```
+├── MatmulCustom           // Matmul自定义算子工程
+│   ├── op_host            // host侧实现文件
+│   └── op_kernel          // kernel侧实现文件
+```
+
+CANN软件包中提供了工程创建工具msOpGen，AddCustom算子工程和Matmul算子工程可通过AddCustom.json和MatmulCustom.json自动创建，自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>工程化算子开发>创建算子工程 章节。
+
+创建完自定义算子工程后，开发者重点需要完成算子host和kernel文件的功能开发。为简化样例运行流程，本样例已在AddCustom目录和MatmulCustom目录中准备好了必要的算子实现，install_add.sh脚本会创建一个CustomOpAdd目录，install_matmul.sh脚本会创建一个CustomOpMatmul目录，并将对应的算子实现文件复制到对应目录下。之后可以修改配置文件再编译算子。
+
+备注：CustomOpAdd和CustomOpMatmul目录为生成目录，每次执行对应脚本都会删除该目录并重新生成，切勿在该目录下编码算子，会存在丢失风险。
+
+## 编译运行样例算子
+针对自定义算子工程，编译运行包含如下步骤：
+- 调用msOpGen工具生成自定义算子工程；
+- 完成算子host和kernel实现；
+- 编译自定义算子工程生成自定义算子静态库；
+- 将静态库链接到公共动态库中；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+### 1. 获取源码包
+编译运行此样例前，请参考[准备：获取样例代码](../../README.md#codeready)获取源码包。
+
+### 2. 生成自定义算子工程，复制host和kernel实现并编译算子<a name="operatorcompile"></a>
+  - 切换到msOpGen脚本install_add.sh和install_matmul.sh所在目录
+    ```bash
+    # 若开发者以git命令行方式clone了master分支代码，并切换目录
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library
+    ```
+
+  - 调用脚本，生成自定义算子工程，复制host和kernel实现并编译算子
+    - 方式一：配置环境变量运行脚本
+      请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量命令。
+      - 默认路径，root用户安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+        ```
+      - 默认路径，非root用户安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+        ```
+      - 指定路径install_path，安装CANN软件包
+        ```bash
+        export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+        ```
+        运行msOpGen脚本
+        ```bash
+        bash install_add.sh -v [SOC_VERSION]
+        bash install_matmul.sh -v [SOC_VERSION]
+        ```
+    - 方式二：指定命令行安装路径来运行脚本
+      ```bash
+      bash install_add.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH]
+      bash install_matmul.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH]
+      ```
+    参数说明：
+    - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+        - Atlas 推理系列产品AI Core
+        - Atlas A2训练系列产品/Atlas 800I A2推理产品
+    - ASCEND_INSTALL_PATH：CANN软件包安装路径
+
+    脚本运行成功后，会在当前目录下创建CustomOpAdd和CustomOpMatmul目录。
+    进入CustomOpAdd目录，修改CMakePresets.json文件中的vendor_name字段的value修改为add_custom，将ASCEND_PACK_SHARED_LIBRARY字段的value设置为True，从而开启动态库和静态库编译。
+    进入CustomOpMatmul目录，修改CMakePresets.json文件中的vendor_name字段的value修改为matmul_custom，将ASCEND_PACK_SHARED_LIBRARY字段的value设置为True，从而开启动态库和静态库编译。
+    修改完成后，分别在对应目录下执行bash build.sh命令，进行编译。
+    ```bash
+      cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/CustomOpAdd
+      bash build.sh
+      cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/CustomOpMatmul
+      bash build.sh
+    ```
+    编译完成后，会在CustomOpAdd/build_out和CustomOpMatmul/build_out中，生成自定义算子动态库和静态库存放目录op_api。
+
+### 3. 链接静态库到动态库
+- 首先，请确保存在默认部署路径环境变量ASCEND_OPP_PATH
+```bash
+echo $ASCEND_OPP_PATH
+# 输出示例 /usr/local/Ascend/ascend-toolkit/latest/opp
+
+# 若没有，则需导出CANN环境变量
+source [ASCEND_INSTALL_PATH]/bin/setenv.bash
+# 例如 source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash
+```
+参数说明：
+
+ASCEND_INSTALL_PATH：CANN软件包安装路径，一般和上一步中指定的路径保持一致
+
+- 将编译生成的算子静态库存放到同一目录中，方便后续链接到动态库时指定链接目录。
+
+  在当前static_library目录下，执行如下拷贝命令，将AddCusotm和MatmulCustom算子的静态库拷贝到临时目录package中。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library
+    rm -rf package; mkdir package
+    cp -r CustomOpAdd/build_out/op_api  ./package/add_custom
+    cp -r CustomOpMatmul/build_out/op_api  ./package/matmul_custom
+    ```
+
+- 之后，进入OpRunner目录，执行命令，将两个静态库链接到同一个动态库中。详见：[对多个静态库的集成和使用](./OpRunner/README.md)
+
+### 4. 调用执行算子工程
+- [aclnn调用AddCustom和MatmulCustom算子工程](./AclNNInvocation/README.md)
+
+
+## 更新说明
+| 时间       | 更新事项                     |
+| ---------- | ---------------------------- |
+| 2025/07/22 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
new file mode 100644
index 000000000..e701007ef
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+OP_NAME=AddCustom
+rm -rf CustomOpAdd
+# Generate the op framework
+msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOpAdd
+# Copy op implementation files to CustomOpAdd
+cp -rf $OP_NAME/* CustomOpAdd
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh
new file mode 100644
index 000000000..314fea9ca
--- /dev/null
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+OP_NAME=MatmulCustom
+rm -rf CustomOpMatmul
+# Generate the op framework
+msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOpMatmul
+# Copy op implementation files to CustomOpMatmul
+cp -rf MatmulCustom/* CustomOpMatmul
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 0c9e46bf7..43cfc5593 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -22,6 +22,7 @@
 | [5_addn_kernellaunch](./5_addn_kernellaunch) | 基于Ascend C的AddN自定义Vector算子及KernelLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品|
 | [6_addtemplate_frameworklaunch](./6_addtemplate_frameworklaunch) | 基于Ascend C的Add（模板参数算子）自定义Vector算子及FrameworkLaunch调用样例 | Atlas 训练系列产品<br>Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品<br>Atlas 200/500 A2推理产品 |
 | [7_broadcast_frameworklaunch](./7_broadcast_frameworklaunch) | 基于Ascend C的Broadcast自定义Vector算子及FrameworkLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
+| [8_library_frameworklaunch](./8_library_frameworklaunch) | 基于Ascend C的Add自定义算子和Matmul自定义算子的自定义算子工程静态库集成和使用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [9_leakyrelu_frameworklaunch](./9_leakyrelu_frameworklaunch) | 基于Ascend C的LeakyReLU自定义Vector算子及FrameworkLaunch调用样例 | Atlas 训练系列产品<br>Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品<br>Atlas 200/500 A2推理产品 |
 | [10_matmul_frameworklaunch](./10_matmul_frameworklaunch) | 基于Ascend C的Matmul自定义Cube算子及FrameworkLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [11_matmul_kernellaunch](./11_matmul_kernellaunch) | 基于Ascend C的Matmul自定义Cube算子及KernelLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
@@ -66,4 +67,4 @@
 | 时间       | 更新事项                                     |
 | ---------- | -------------------------------------------- |
 | 2024/11/11 | 样例目录调整                     |
-| 2025/01/06 | 新增21_dumptensor_kernellaunch、22_dumptensor_frameworklaunch、23_matmul_debug_frameworklaunch和24_matmul_debug_kernellaunch样例  |
\ No newline at end of file
+| 2025/07/22 | 新增8_library_frameworklaunch样例       |
\ No newline at end of file
-- 
Gitee


From 0d5d195f4a330d5997cb9ba9e818298487daf6ee Mon Sep 17 00:00:00 2001
From: mingling <chenmingling@huawei.com>
Date: Sat, 26 Jul 2025 09:42:46 +0000
Subject: [PATCH 48/97] !2725 add hard sync samples Merge pull request !2725
 from mingling/master

---
 .../BareMixInvocation/CMakeLists.txt          |  57 +++++
 .../BareMixInvocation/README.md               | 102 +++++++++
 .../BareMixInvocation/bare_aic.h              | 167 ++++++++++++++
 .../BareMixInvocation/bare_aiv.h              |  65 ++++++
 .../BareMixInvocation/baremix_custom.cpp      |  27 +++
 .../BareMixInvocation/cmake/cpu_lib.cmake     |  12 ++
 .../BareMixInvocation/cmake/npu_lib.cmake     |  15 ++
 .../BareMixInvocation/data_utils.h            | 203 ++++++++++++++++++
 .../BareMixInvocation/main.cpp                |  89 ++++++++
 .../BareMixInvocation/run.sh                  | 121 +++++++++++
 .../BareMixInvocation/scripts/gen_data.py     |  34 +++
 .../scripts/verify_result.py                  |  53 +++++
 .../22_baremix_kernellaunch/README.md         |  43 ++++
 operator/ascendc/0_introduction/README.md     |   6 +-
 14 files changed, 993 insertions(+), 1 deletion(-)
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/cpu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/data_utils.h
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/verify_result.py
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md

diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
new file mode 100644
index 000000000..dfb278e1c
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
@@ -0,0 +1,57 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+
+# ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}.
+# ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library
+file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/baremix_custom.cpp)
+set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3")
+
+if("${RUN_MODE}" STREQUAL "cpu")
+    include(cmake/cpu_lib.cmake)
+elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu")
+    include(cmake/npu_lib.cmake)
+else()
+    message("invalid RUN_MODE: ${RUN_MODE}")
+endif()
+
+add_executable(ascendc_kernels_bbit
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:-g>>
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+
+target_compile_definitions(ascendc_kernels_bbit PRIVATE
+    $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+)
+
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<OR:$<STREQUAL:${RUN_MODE},npu>,$<STREQUAL:${RUN_MODE},sim>>:host_intf_pub>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:ascendcl>>
+    ascendc_kernels_${RUN_MODE}
+    tiling_api
+    register
+    platform
+    ascendalog
+    dl
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
new file mode 100644
index 000000000..83574f3e8
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
@@ -0,0 +1,102 @@
+## 目录结构介绍
+```
+├── BareMixInvocation
+│   ├── cmake                     // 编译工程文件
+│   ├── scripts
+│   │   ├── gen_data.py           // 输入数据和真值数据生成脚本文件
+│   │   └── verify_result.py      // 验证输出数据和真值数据是否一致的验证脚本
+│   ├── CMakeLists.txt            // 编译工程文件
+│   ├── data_utils.h              // 数据读入写出函数
+│   ├── main.cpp                  // 主函数，调用算子的应用程序，含CPU域及NPU域调用
+│   ├── bare_aic.h                 // AIC侧kernel实现
+│   ├── bare_aiv.h                 // AIV侧kernel实现
+│   ├── baremix_custom.cpp           // 算子kernel实现
+│   └── run.sh                    // 编译运行算子的脚本
+```
+
+## 算子规格描述
+在核函数直调样例中，算子实现支持的shape为：M = 32, N = 32, K = 32。
+<table>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">a</td><td align="center">M * K</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">b</td><td align="center">K * N</td><td align="center">float16</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">c</td><td align="center">M * N</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">baremix_custom</td></tr>
+</table>
+
+## 代码实现介绍
+本样例中实现的是[m, n, k]固定为[32, 32, 32]的MatmulLeakyRelu算子，并使用Ascend C基础Api实现。
+- kernel实现  
+  Matmul算子的数学表达式为：
+  ```
+  C = A * B + Bias
+  ```
+  其中A的形状为[32, 32], B的形状为[32, 32], C的形状为[32, 32]。具体请参考[baremix_custom.cpp](./baremix_custom.cpp)。
+
+  LeakyRelu算子的数学表达式为：
+  ```
+  C = C > 0 ? C : C * S
+  ```
+  其中S为用户设置的LeakyRelu比例系数
+  
+  **注：当使用硬件分离架构的产品如Atlas A2训练系列产品/Atlas 800I A2推理产品时，需注意AIV核与AIC核之间的同步关系。本样例实现了分离架构中手动实现AIV核与AIC核之间同步，调用AIC核完成matmul计算后调用AIV核完成LeakyRelu的计算，设置kernelType及用于同步的接口如下：
+  ```c++
+  KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+  ...
+  AscendC::CrossCoreSetFlag<modeId, pipe>(flagId);
+  ...
+  AscendC::CrossCoreWaitFlag(flagId);
+  ```
+
+- 调用实现  
+  1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
+  2. NPU侧运行验证主要通过使用ACLRT_LAUNCH_KERNEL内核调用宏来完成。
+
+  应用程序通过ASCENDC_CPU_DEBUG宏区分代码逻辑运行于CPU侧还是NPU侧。
+
+## 运行样例算子
+  - 打开样例目录
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+
+  - 样例执行
+
+    ```bash
+    bash run.sh -r [RUN_MODE] -v  [SOC_VERSION]
+    ```
+    - RUN_MODE：编译方式，可选择CPU调试，NPU仿真，NPU上板。支持参数为[cpu / sim / npu]。
+    - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如"Name"对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+      - Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+    示例如下，Ascendxxxyy请替换为实际的AI处理器型号。
+
+    ```bash
+    bash run.sh -r cpu -v Ascendxxxyy
+    ```
+
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/7/23 | 更新本readme |
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
new file mode 100644
index 000000000..d92080434
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
@@ -0,0 +1,167 @@
+/**
+ * @file bare_aic.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef BARE_AIC_H
+#define BARE_AIC_H
+#include "kernel_operator.h"
+
+// half type, cube block: [16, 16]
+constexpr uint32_t CUBE_BLOCK = 16;
+constexpr uint32_t CUBE_BLOCK_SIZE = 16 * 16;
+
+class KernelBareAic {
+public:
+    __aicore__ inline KernelBareAic()
+    {
+        aSize = m * k;
+        bSize = k * n;
+        cSize = m * n;
+    }
+    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR c)
+    {
+        aGM.SetGlobalBuffer((__gm__ half *)a);
+        bGM.SetGlobalBuffer((__gm__ half *)b);
+        cGM.SetGlobalBuffer((__gm__ float *)c);
+        pipe.InitBuffer(inQueueA1, 1, aSize * sizeof(half));
+        pipe.InitBuffer(inQueueA2, 1, aSize * sizeof(half));
+        pipe.InitBuffer(inQueueB1, 1, bSize * sizeof(half));
+        pipe.InitBuffer(inQueueB2, 1, bSize * sizeof(half));
+        pipe.InitBuffer(outQueueCO1, 1, cSize * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        CopyIn();
+        SplitA();
+        SplitB();
+        Compute();
+        CopyOut();
+        AscendC::CrossCoreSetFlag<0x2, PIPE_FIX>(3);
+        
+    }
+
+private:
+    __aicore__ inline uint32_t CeilCubeBlock(uint32_t len) { return (len + CUBE_BLOCK - 1) / CUBE_BLOCK; }
+
+    __aicore__ inline void CopyIn()
+    {
+        AscendC::LocalTensor<half> a1Local = inQueueA1.AllocTensor<half>();
+        AscendC::LocalTensor<half> b1Local = inQueueB1.AllocTensor<half>();
+
+        AscendC::Nd2NzParams nd2nzA1Params;
+        nd2nzA1Params.ndNum = 1;
+        nd2nzA1Params.nValue = m;
+        nd2nzA1Params.dValue = k;
+        nd2nzA1Params.srcNdMatrixStride = 0;
+        nd2nzA1Params.srcDValue = k;
+        nd2nzA1Params.dstNzC0Stride = CeilCubeBlock(m) * CUBE_BLOCK;
+        nd2nzA1Params.dstNzNStride = 1;
+        nd2nzA1Params.dstNzMatrixStride = 0;
+        AscendC::DataCopy(a1Local, aGM, nd2nzA1Params);
+
+        AscendC::Nd2NzParams nd2nzB1Params;
+        nd2nzB1Params.ndNum = 1;
+        nd2nzB1Params.nValue = k;
+        nd2nzB1Params.dValue = n;
+        nd2nzB1Params.srcNdMatrixStride = 0;
+        nd2nzB1Params.srcDValue = n;
+        nd2nzB1Params.dstNzC0Stride = CeilCubeBlock(k) * CUBE_BLOCK;
+        nd2nzB1Params.dstNzNStride = 1;
+        nd2nzB1Params.dstNzMatrixStride = 0;
+        AscendC::DataCopy(b1Local, bGM, nd2nzB1Params);
+
+        inQueueA1.EnQue(a1Local);
+        inQueueB1.EnQue(b1Local);
+    }
+
+    __aicore__ inline void SplitA()
+    {
+        AscendC::LocalTensor<half> a1Local = inQueueA1.DeQue<half>();
+        AscendC::LocalTensor<half> a2Local = inQueueA2.AllocTensor<half>();
+
+        uint32_t dstOffset = CeilCubeBlock(k) * CUBE_BLOCK_SIZE;
+        uint32_t srcOffset = CUBE_BLOCK_SIZE;
+        // Nz -> Zz
+        AscendC::LoadData2DParams loadDataParams;
+        loadDataParams.repeatTimes = CeilCubeBlock(k);
+        loadDataParams.srcStride = CeilCubeBlock(m);
+        loadDataParams.dstGap = 0;
+        loadDataParams.ifTranspose = false;
+        for (int i = 0; i < CeilCubeBlock(m); ++i) {
+            AscendC::LoadData(a2Local[i * dstOffset], a1Local[i * srcOffset], loadDataParams);
+        }
+
+        inQueueA2.EnQue<half>(a2Local);
+        inQueueA1.FreeTensor(a1Local);
+    }
+    __aicore__ inline void SplitB()
+    {
+        AscendC::LocalTensor<half> b1Local = inQueueB1.DeQue<half>();
+        AscendC::LocalTensor<half> b2Local = inQueueB2.AllocTensor<half>();
+
+        uint32_t dstOffset = CeilCubeBlock(n) * CUBE_BLOCK_SIZE;
+        uint32_t srcOffset = CUBE_BLOCK_SIZE;
+        // Nz -> Zn
+        AscendC::LoadData2DParams loadDataParams;
+        loadDataParams.repeatTimes = CeilCubeBlock(n);
+        loadDataParams.srcStride = CeilCubeBlock(k);
+        loadDataParams.dstGap = 0;
+        loadDataParams.ifTranspose = true;
+        for (int i = 0; i < CeilCubeBlock(k); ++i) {
+            AscendC::LoadData(b2Local[i * dstOffset], b1Local[i * srcOffset], loadDataParams);
+        }
+
+        inQueueB1.FreeTensor(b1Local);
+        inQueueB2.EnQue<half>(b2Local);
+    }
+    __aicore__ inline void Compute()
+    {
+        AscendC::LocalTensor<half> a2Local = inQueueA2.DeQue<half>();
+        AscendC::LocalTensor<half> b2Local = inQueueB2.DeQue<half>();
+        AscendC::LocalTensor<float> c1Local = outQueueCO1.AllocTensor<float>();
+        AscendC::MmadParams mmadParams;
+        mmadParams.m = m;
+        mmadParams.n = n;
+        mmadParams.k = k;
+        AscendC::Mmad(c1Local, a2Local, b2Local, mmadParams);
+        outQueueCO1.EnQue<float>(c1Local);
+        inQueueA2.FreeTensor(a2Local);
+        inQueueB2.FreeTensor(b2Local);
+    }
+    __aicore__ inline void CopyOut()
+    {
+        AscendC::LocalTensor<float> c1Local = outQueueCO1.DeQue<float>();
+        AscendC::FixpipeParamsV220 fixpipeParams;
+        fixpipeParams.nSize = n;
+        fixpipeParams.mSize = m;
+        fixpipeParams.srcStride = m;
+        fixpipeParams.dstStride = n;
+
+        fixpipeParams.ndNum = 1;
+        fixpipeParams.srcNdStride = 0;
+        fixpipeParams.dstNdStride = 0;
+        AscendC::Fixpipe(cGM, c1Local, fixpipeParams);
+        outQueueCO1.FreeTensor(c1Local);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::A1, 1> inQueueA1;
+    AscendC::TQue<AscendC::TPosition::A2, 1> inQueueA2;
+    AscendC::TQue<AscendC::TPosition::B1, 1> inQueueB1;
+    AscendC::TQue<AscendC::TPosition::B2, 1> inQueueB2;
+    AscendC::TQue<AscendC::TPosition::CO1, 1> outQueueCO1;
+
+    AscendC::GlobalTensor<half> aGM;
+    AscendC::GlobalTensor<half> bGM;
+    AscendC::GlobalTensor<float> cGM;
+    uint16_t m = 32, k = 32, n = 32;
+    uint16_t aSize, bSize, cSize;
+};
+
+#endif // BARE_AIC_H
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
new file mode 100644
index 000000000..e39d79d09
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
@@ -0,0 +1,65 @@
+/**
+ * @file bare_aiv.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef BARE_AIV_H
+#define BARE_AIV_H
+#include "kernel_operator.h"
+
+class KernelBareAiv {
+public:
+    __aicore__ inline KernelBareAiv() { cSize = m * n; }
+    __aicore__ inline void Init(GM_ADDR c)
+    {
+        cGM.SetGlobalBuffer((__gm__ float *)c + AscendC::GetBlockIdx() * (cSize / 2)); // C:V = 1:2, split tensor for vector calc
+
+        pipe.InitBuffer(reluOutQueue, 1, cSize * sizeof(float));
+        pipe.InitBuffer(reluInQueue, 1, cSize * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        AscendC::CrossCoreWaitFlag(3);
+        LeakyreluCopyIn();
+        LeakyreluCompute();
+        LeakyreluCopyOut();
+    }
+
+private:
+    __aicore__ inline void LeakyreluCopyIn()
+    {
+        AscendC::LocalTensor<float> reluInLocal = reluInQueue.AllocTensor<float>();
+
+        AscendC::DataCopy(reluInLocal, cGM, cSize / 2);
+        reluInQueue.EnQue<float>(reluInLocal);
+    }
+    __aicore__ inline void LeakyreluCompute()
+    {
+        AscendC::LocalTensor<float> reluInLocal = reluInQueue.DeQue<float>();
+        AscendC::LocalTensor<float> reluOutLocal = reluOutQueue.AllocTensor<float>();
+        AscendC::LeakyRelu(reluOutLocal, reluInLocal, (float)0.5, cSize);
+        reluOutQueue.EnQue<float>(reluOutLocal);
+        reluInQueue.FreeTensor(reluInLocal);
+    }
+    __aicore__ inline void LeakyreluCopyOut()
+    {
+        AscendC::LocalTensor<float> reluOutLocal = reluOutQueue.DeQue<float>();
+        AscendC::DataCopy(cGM, reluOutLocal, cSize / 2);
+        reluOutQueue.FreeTensor(reluOutLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECOUT, 1> reluOutQueue;
+    AscendC::TQue<AscendC::TPosition::VECIN, 1> reluInQueue;
+
+    AscendC::GlobalTensor<float> cGM;
+    uint16_t m = 32, n = 32;
+    uint16_t cSize;
+};
+
+#endif // BARE_AIV_H
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
new file mode 100644
index 000000000..683abee11
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
@@ -0,0 +1,27 @@
+/**
+ * @file baremix_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "bare_aic.h"
+#include "bare_aiv.h"
+
+extern "C" __global__ __aicore__ void baremix_custom(GM_ADDR a, GM_ADDR b, GM_ADDR c)
+{
+    // set mix
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+    if ASCEND_IS_AIC {
+        KernelBareAic mm;
+        mm.Init(a, b, c);
+        mm.Process();
+    }
+    if ASCEND_IS_AIV {
+        KernelBareAiv op;
+        op.Init(c);
+        op.Process();
+    }
+}
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/cpu_lib.cmake b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/cpu_lib.cmake
new file mode 100644
index 000000000..acb98ec90
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/cpu_lib.cmake
@@ -0,0 +1,12 @@
+if(NOT DEFINED ENV{CMAKE_PREFIX_PATH})
+    set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake)
+endif()
+find_package(tikicpulib REQUIRED)
+
+add_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
+target_link_libraries(ascendc_kernels_${RUN_MODE} PUBLIC tikicpulib::${SOC_VERSION})
+target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
+    $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+)
+target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 -std=c++17)
+install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
new file mode 100644
index 000000000..3b8e2c506
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
@@ -0,0 +1,15 @@
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+# ascendc_library use to add kernel file to generate ascendc library
+ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
+
+ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
+    $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+)
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/data_utils.h b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/data_utils.h
new file mode 100644
index 000000000..9d3445780
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/data_utils.h
@@ -0,0 +1,203 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
new file mode 100644
index 000000000..30568f793
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
@@ -0,0 +1,89 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "data_utils.h"
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+#include "aclrtlaunch_baremix_custom.h"
+#else
+#include "tikicpulib.h"
+extern "C" void baremix_custom(uint8_t *a, uint8_t *b, uint8_t *c);
+#endif
+
+int32_t main(int32_t argc, char *argv[])
+{
+    uint32_t M = 32;
+    uint32_t N = 32;
+    uint32_t K = 32;
+    size_t aFileSize = M * K * sizeof(int16_t); // uint16_t represent half
+    size_t bFileSize = K * N * sizeof(int16_t); // uint16_t represent half
+    size_t cFileSize = M * N * sizeof(float);
+    uint32_t blockDim = 1;
+
+#ifdef ASCENDC_CPU_DEBUG
+    AscendC::SetKernelMode(KernelMode::MIX_MODE);
+    uint8_t *a = (uint8_t *)AscendC::GmAlloc(aFileSize);
+    uint8_t *b = (uint8_t *)AscendC::GmAlloc(bFileSize);
+    uint8_t *c = (uint8_t *)AscendC::GmAlloc(cFileSize);
+
+    ReadFile("./input/x1_gm.bin", aFileSize, a, aFileSize);
+    ReadFile("./input/x2_gm.bin", bFileSize, b, bFileSize);
+
+    ICPU_RUN_KF(baremix_custom, blockDim, a, b, c);
+
+    WriteFile("./output/output.bin", c, cFileSize);
+
+    AscendC::GmFree((void *)a);
+    AscendC::GmFree((void *)b);
+    AscendC::GmFree((void *)c);
+#else
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    uint8_t *aHost;
+    uint8_t *aDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&aHost), aFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&aDevice, aFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    ReadFile("./input/x1_gm.bin", aFileSize, aHost, aFileSize);
+    CHECK_ACL(aclrtMemcpy(aDevice, aFileSize, aHost, aFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    uint8_t *bHost;
+    uint8_t *bDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&bHost), bFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&bDevice, bFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    ReadFile("./input/x2_gm.bin", bFileSize, bHost, bFileSize);
+    CHECK_ACL(aclrtMemcpy(bDevice, bFileSize, bHost, bFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    uint8_t *cHost;
+    uint8_t *cDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&cHost), cFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&cDevice, cFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+
+    ACLRT_LAUNCH_KERNEL(baremix_custom)(blockDim, stream, aDevice, bDevice, cDevice);
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+
+    CHECK_ACL(aclrtMemcpy(cHost, cFileSize, cDevice, cFileSize, ACL_MEMCPY_DEVICE_TO_HOST));
+    WriteFile("./output/output.bin", cHost, cFileSize);
+
+    CHECK_ACL(aclrtFree(aDevice));
+    CHECK_ACL(aclrtFreeHost(aHost));
+    CHECK_ACL(aclrtFree(bDevice));
+    CHECK_ACL(aclrtFreeHost(bHost));
+    CHECK_ACL(aclrtFree(cDevice));
+    CHECK_ACL(aclrtFreeHost(cHost));
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+#endif
+    return 0;
+}
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
new file mode 100644
index 000000000..0c9c7f40b
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+SHORT=r:,v:,i:,b:,p:,
+LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    -b | --build-type)
+        BUILD_TYPE="$2"
+        shift 2
+        ;;
+    -p | --install-prefix)
+        INSTALL_PREFIX="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+RUN_MODE_LIST="cpu sim npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    exit -1
+fi
+
+VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+if [ "${RUN_MODE}" = "sim" ]; then
+    # in case of running op in simulator, use stub .so instead
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    if [ ! $CAMODEL_LOG_PATH ]; then
+        export CAMODEL_LOG_PATH=$(pwd)/sim_log
+    fi
+    if [ -d "$CAMODEL_LOG_PATH" ]; then
+        rm -rf $CAMODEL_LOG_PATH
+    fi
+    mkdir -p $CAMODEL_LOG_PATH
+elif [ "${RUN_MODE}" = "cpu" ]; then
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then
+        if [ "${RUN_MODE}" = "npu" ]; then
+            msprof op --application=./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "sim" ]; then
+            msprof op simulator --application=./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "cpu" ]; then
+            ./ascendc_kernels_bbit
+        fi
+    else
+        ./ascendc_kernels_bbit
+    fi
+)
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
new file mode 100644
index 000000000..b0d849c9f
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+import os
+
+
+def gen_golden_data():
+    M = 32
+    N = 32
+    K = 32
+
+    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
+    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
+    golden1 = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32))).astype(np.float32)
+
+    negative_slope = np.array(2.0, dtype=np.float32)
+    golden = np.where(golden1 > 0, golden1, golden1 * negative_slope)
+    os.system("mkdir -p input")
+    os.system("mkdir -p output")
+    x1_gm.tofile("./input/x1_gm.bin")
+    x2_gm.tofile("./input/x2_gm.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data()
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/verify_result.py
new file mode 100644
index 000000000..24b30f8d4
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-6
+absolute_tol = 1e-9
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md
new file mode 100644
index 000000000..caa08b177
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md
@@ -0,0 +1,43 @@
+## 概述
+本样例介绍基于基础API的MatmulLeakyRelu算子实现及核函数直调方法。
+
+## 目录结构介绍
+```
+└── 22_baremix_kernellaunch      // 使用核函数直调的方式调用MatmulLeakyRelu自定义算子。
+    └── BareMixInvocation        // Kernel Launch方式调用核函数样例。
+```
+
+## 算子描述
+算子使用基础API包括DataCopy、LoadData、Mmad等，实现MatmulLeakyRelu的运算操作。
+
+MatmulLeakyRelu的计算公式为：
+
+```
+C = A * B + Bias
+C = C > 0 ? C : C * 2.0
+```
+
+- A、B为源操作数，A为左矩阵，形状为\[M, K]；B为右矩阵，形状为\[K, N]。
+- C为目的操作数，存放矩阵乘结果的矩阵，形状为\[M, N]。
+- Bias为矩阵乘偏置，形状为\[N]。对A*B结果矩阵的每一行都采用该Bias进行偏置。
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 编译运行样例算子
+
+### 1. 获取源码包
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 编译运行样例工程
+- [BareMixInvocation样例运行](./BareMixInvocation/README.md)
+
+## 更新说明
+| 时间       | 更新事项                 |
+| ---------- | ------------------------ |
+| 2025/7/23 | 新增22_baremix_kernellaunch |
+
+## 已知issue
+
+  暂无
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 43cfc5593..740d02daf 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -36,6 +36,7 @@
 | [19_unaligned_wholereduces_kernellaunch](./19_unaligned_wholereduces_kernellaunch) | 基于Ascend C的非对齐WholeReduceSum自定义算子及KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 | [20_mmad_kernellaunch](./20_mmad_kernellaunch) | 基于Ascend C基础API的Matmul自定义Cube算子及KernelLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_vectoradd_kernellaunch](./21_vectoradd_kernellaunch) | 基于Ascend C的Add多场景自定义Vector算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [22_baremix_kernellaunch](./22_baremix_kernellaunch) | 基于Ascend C的手写核间同步的MatmulLeayrelu算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 
 ## 获取样例代码<a name="codeready"></a>
 
@@ -67,4 +68,7 @@
 | 时间       | 更新事项                                     |
 | ---------- | -------------------------------------------- |
 | 2024/11/11 | 样例目录调整                     |
-| 2025/07/22 | 新增8_library_frameworklaunch样例       |
\ No newline at end of file
+| 2025/01/06 | 新增21_vectoradd_kernellaunch样例  |
+| 2025/07/22 | 新增8_library_frameworklaunch样例       |
+| 2025/7/23 | 新增22_baremix_kernellaunch                   |
+
-- 
Gitee


From ad68247efaba62862a5d9ee9ff308e67071cec75 Mon Sep 17 00:00:00 2001
From: hehongan <hehongan@h-partners.com>
Date: Mon, 28 Jul 2025 08:21:21 +0000
Subject: [PATCH 49/97] =?UTF-8?q?!2726=20=E4=BF=AE=E6=AD=A3=E4=B8=BA?=
 =?UTF-8?q?=E9=80=9A=E7=94=A8=E7=9A=84=E5=90=91=E4=B8=8A=E5=8F=96=E6=95=B4?=
 =?UTF-8?q?=20Merge=20pull=20request=20!2726=20from=20hehongan/new=5Fmaste?=
 =?UTF-8?q?r1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../VectorAddMultiCoreWithTiling/add_custom_tiling.cpp          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.cpp b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.cpp
index ca3f2cee6..8940940b0 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.cpp
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.cpp
@@ -29,7 +29,7 @@ void TilingParamsCalc(uint32_t length, uint32_t alignNum,
             tileNum = 1U;
         }
         if (length < MAX_AVAILABLE_UB_BLOCK_NUM * alignNum) {
-            tileLength = ((static_cast<size_t>(length) / alignNum) + 1) / BUFFER_NUM * BUFFER_NUM * alignNum;
+            tileLength = ((static_cast<size_t>(length) + alignNum - 1) / alignNum) * alignNum;
             lastTileLength = tileLength;
         } else {
             tileLength = MAX_AVAILABLE_UB_BLOCK_NUM * alignNum;
-- 
Gitee


From 112d1a11587e27e3f7d666c87e3d80a661424cba Mon Sep 17 00:00:00 2001
From: mingling <chenmingling@huawei.com>
Date: Mon, 28 Jul 2025 14:12:40 +0000
Subject: [PATCH 50/97] !2727 add matmulleakyrelu sample Merge pull request
 !2727 from mingling/master

---
 .../BareMixInvocation/CMakeLists.txt          |   5 +-
 .../BareMixInvocation/README.md               |  29 ++-
 .../BareMixInvocation/bare_aic.h              | 167 -------------
 .../BareMixInvocation/bare_aiv.h              |  65 -----
 .../BareMixInvocation/baremix_custom.cpp      | 234 +++++++++++++++++-
 .../baremix_custom_tiling.cpp                 |  75 ++++++
 .../BareMixInvocation/cmake/npu_lib.cmake     |   2 +
 .../BareMixInvocation/main.cpp                | 118 ++++++---
 .../BareMixInvocation/run.sh                  |   9 +-
 .../BareMixInvocation/scripts/gen_data.py     |  23 +-
 .../22_baremix_kernellaunch/README.md         |   6 +-
 operator/ascendc/0_introduction/README.md     |   2 +-
 12 files changed, 419 insertions(+), 316 deletions(-)
 delete mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
 delete mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
 create mode 100644 operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom_tiling.cpp

diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
index dfb278e1c..2dffea34c 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/CMakeLists.txt
@@ -16,6 +16,7 @@ endif()
 # ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}.
 # ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library
 file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/baremix_custom.cpp)
+
 set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3")
 
 if("${RUN_MODE}" STREQUAL "cpu")
@@ -28,6 +29,7 @@ endif()
 
 add_executable(ascendc_kernels_bbit
     ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/baremix_custom_tiling.cpp
 )
 
 target_compile_options(ascendc_kernels_bbit PRIVATE
@@ -37,6 +39,7 @@ target_compile_options(ascendc_kernels_bbit PRIVATE
 
 target_compile_definitions(ascendc_kernels_bbit PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+    SOC_VERSION="${SOC_VERSION}"
 )
 
 target_link_libraries(ascendc_kernels_bbit PRIVATE
@@ -54,4 +57,4 @@ install(TARGETS ascendc_kernels_bbit
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
     ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
+)
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
index 83574f3e8..d61d60171 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
@@ -1,6 +1,6 @@
 ## 目录结构介绍
 ```
-├── BareMixInvocation
+├── BareMixInvocation             // 通过更底层的编码方式，实现MatmulLeayrelu融合算子的样例
 │   ├── cmake                     // 编译工程文件
 │   ├── scripts
 │   │   ├── gen_data.py           // 输入数据和真值数据生成脚本文件
@@ -8,14 +8,13 @@
 │   ├── CMakeLists.txt            // 编译工程文件
 │   ├── data_utils.h              // 数据读入写出函数
 │   ├── main.cpp                  // 主函数，调用算子的应用程序，含CPU域及NPU域调用
-│   ├── bare_aic.h                 // AIC侧kernel实现
-│   ├── bare_aiv.h                 // AIV侧kernel实现
+│   ├── baremix_custom_tiling.cpp   // 算子tiling实现
 │   ├── baremix_custom.cpp           // 算子kernel实现
 │   └── run.sh                    // 编译运行算子的脚本
 ```
 
 ## 算子规格描述
-在核函数直调样例中，算子实现支持的shape为：M = 32, N = 32, K = 32。
+在核函数直调样例中，算子实现支持的shape为：M = 128, N = 128, K = 256。
 <table>
 <tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
 <tr><td align="center">a</td><td align="center">M * K</td><td align="center">float16</td><td align="center">ND</td></tr>
@@ -29,13 +28,13 @@
 </table>
 
 ## 代码实现介绍
-本样例中实现的是[m, n, k]固定为[32, 32, 32]的MatmulLeakyRelu算子，并使用Ascend C基础Api实现。
+本样例中实现的是[m, n, k]固定为[128, 128, 256]的MatmulLeakyRelu算子，仅在AIC核调用Matmul高阶API并计算，完成后调用AIV核完成LeakyRelu的计算。
 - kernel实现  
   Matmul算子的数学表达式为：
   ```
   C = A * B + Bias
   ```
-  其中A的形状为[32, 32], B的形状为[32, 32], C的形状为[32, 32]。具体请参考[baremix_custom.cpp](./baremix_custom.cpp)。
+  其中A的形状为[128, 256], B的形状为[256, 128], C的形状为[128, 128]。具体请参考[baremix_custom.cpp](./baremix_custom.cpp)。
 
   LeakyRelu算子的数学表达式为：
   ```
@@ -43,13 +42,23 @@
   ```
   其中S为用户设置的LeakyRelu比例系数
   
-  **注：当使用硬件分离架构的产品如Atlas A2训练系列产品/Atlas 800I A2推理产品时，需注意AIV核与AIC核之间的同步关系。本样例实现了分离架构中手动实现AIV核与AIC核之间同步，调用AIC核完成matmul计算后调用AIV核完成LeakyRelu的计算，设置kernelType及用于同步的接口如下：
+  **本样例关键代码介绍如下：
+  - 设置ASCENDC_CUBE_ONLY，仅在AIC核进行matmul计算
+  - 设置kernel_type
+  - 使用ASCEND_IS_AIC/ASCEND_IS_AIV隔离AIC/AIV核上的代码
+  - 使用同步接口，自行完成核间同步
   ```c++
+  #define ASCENDC_CUBE_ONLY
+  ...
   KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
   ...
-  AscendC::CrossCoreSetFlag<modeId, pipe>(flagId);
+  if ASCEND_IS_AIC {
+    AscendC::CrossCoreSetFlag<modeId, pipe>(flagId);
+  }
   ...
-  AscendC::CrossCoreWaitFlag(flagId);
+  if ASCEND_IS_AIV {
+    AscendC::CrossCoreWaitFlag(flagId);
+  }
   ```
 
 - 调用实现  
@@ -99,4 +108,4 @@
 ## 更新说明
 | 时间       | 更新事项     |
 | ---------- | ------------ |
-| 2025/7/23 | 更新本readme |
+| 2025/7/28 | 更新本readme |
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
deleted file mode 100644
index d92080434..000000000
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aic.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- * @file bare_aic.h
- *
- * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
- *
- * This program is distributed in the hope that it will be useful
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-#ifndef BARE_AIC_H
-#define BARE_AIC_H
-#include "kernel_operator.h"
-
-// half type, cube block: [16, 16]
-constexpr uint32_t CUBE_BLOCK = 16;
-constexpr uint32_t CUBE_BLOCK_SIZE = 16 * 16;
-
-class KernelBareAic {
-public:
-    __aicore__ inline KernelBareAic()
-    {
-        aSize = m * k;
-        bSize = k * n;
-        cSize = m * n;
-    }
-    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR c)
-    {
-        aGM.SetGlobalBuffer((__gm__ half *)a);
-        bGM.SetGlobalBuffer((__gm__ half *)b);
-        cGM.SetGlobalBuffer((__gm__ float *)c);
-        pipe.InitBuffer(inQueueA1, 1, aSize * sizeof(half));
-        pipe.InitBuffer(inQueueA2, 1, aSize * sizeof(half));
-        pipe.InitBuffer(inQueueB1, 1, bSize * sizeof(half));
-        pipe.InitBuffer(inQueueB2, 1, bSize * sizeof(half));
-        pipe.InitBuffer(outQueueCO1, 1, cSize * sizeof(float));
-    }
-    __aicore__ inline void Process()
-    {
-        CopyIn();
-        SplitA();
-        SplitB();
-        Compute();
-        CopyOut();
-        AscendC::CrossCoreSetFlag<0x2, PIPE_FIX>(3);
-        
-    }
-
-private:
-    __aicore__ inline uint32_t CeilCubeBlock(uint32_t len) { return (len + CUBE_BLOCK - 1) / CUBE_BLOCK; }
-
-    __aicore__ inline void CopyIn()
-    {
-        AscendC::LocalTensor<half> a1Local = inQueueA1.AllocTensor<half>();
-        AscendC::LocalTensor<half> b1Local = inQueueB1.AllocTensor<half>();
-
-        AscendC::Nd2NzParams nd2nzA1Params;
-        nd2nzA1Params.ndNum = 1;
-        nd2nzA1Params.nValue = m;
-        nd2nzA1Params.dValue = k;
-        nd2nzA1Params.srcNdMatrixStride = 0;
-        nd2nzA1Params.srcDValue = k;
-        nd2nzA1Params.dstNzC0Stride = CeilCubeBlock(m) * CUBE_BLOCK;
-        nd2nzA1Params.dstNzNStride = 1;
-        nd2nzA1Params.dstNzMatrixStride = 0;
-        AscendC::DataCopy(a1Local, aGM, nd2nzA1Params);
-
-        AscendC::Nd2NzParams nd2nzB1Params;
-        nd2nzB1Params.ndNum = 1;
-        nd2nzB1Params.nValue = k;
-        nd2nzB1Params.dValue = n;
-        nd2nzB1Params.srcNdMatrixStride = 0;
-        nd2nzB1Params.srcDValue = n;
-        nd2nzB1Params.dstNzC0Stride = CeilCubeBlock(k) * CUBE_BLOCK;
-        nd2nzB1Params.dstNzNStride = 1;
-        nd2nzB1Params.dstNzMatrixStride = 0;
-        AscendC::DataCopy(b1Local, bGM, nd2nzB1Params);
-
-        inQueueA1.EnQue(a1Local);
-        inQueueB1.EnQue(b1Local);
-    }
-
-    __aicore__ inline void SplitA()
-    {
-        AscendC::LocalTensor<half> a1Local = inQueueA1.DeQue<half>();
-        AscendC::LocalTensor<half> a2Local = inQueueA2.AllocTensor<half>();
-
-        uint32_t dstOffset = CeilCubeBlock(k) * CUBE_BLOCK_SIZE;
-        uint32_t srcOffset = CUBE_BLOCK_SIZE;
-        // Nz -> Zz
-        AscendC::LoadData2DParams loadDataParams;
-        loadDataParams.repeatTimes = CeilCubeBlock(k);
-        loadDataParams.srcStride = CeilCubeBlock(m);
-        loadDataParams.dstGap = 0;
-        loadDataParams.ifTranspose = false;
-        for (int i = 0; i < CeilCubeBlock(m); ++i) {
-            AscendC::LoadData(a2Local[i * dstOffset], a1Local[i * srcOffset], loadDataParams);
-        }
-
-        inQueueA2.EnQue<half>(a2Local);
-        inQueueA1.FreeTensor(a1Local);
-    }
-    __aicore__ inline void SplitB()
-    {
-        AscendC::LocalTensor<half> b1Local = inQueueB1.DeQue<half>();
-        AscendC::LocalTensor<half> b2Local = inQueueB2.AllocTensor<half>();
-
-        uint32_t dstOffset = CeilCubeBlock(n) * CUBE_BLOCK_SIZE;
-        uint32_t srcOffset = CUBE_BLOCK_SIZE;
-        // Nz -> Zn
-        AscendC::LoadData2DParams loadDataParams;
-        loadDataParams.repeatTimes = CeilCubeBlock(n);
-        loadDataParams.srcStride = CeilCubeBlock(k);
-        loadDataParams.dstGap = 0;
-        loadDataParams.ifTranspose = true;
-        for (int i = 0; i < CeilCubeBlock(k); ++i) {
-            AscendC::LoadData(b2Local[i * dstOffset], b1Local[i * srcOffset], loadDataParams);
-        }
-
-        inQueueB1.FreeTensor(b1Local);
-        inQueueB2.EnQue<half>(b2Local);
-    }
-    __aicore__ inline void Compute()
-    {
-        AscendC::LocalTensor<half> a2Local = inQueueA2.DeQue<half>();
-        AscendC::LocalTensor<half> b2Local = inQueueB2.DeQue<half>();
-        AscendC::LocalTensor<float> c1Local = outQueueCO1.AllocTensor<float>();
-        AscendC::MmadParams mmadParams;
-        mmadParams.m = m;
-        mmadParams.n = n;
-        mmadParams.k = k;
-        AscendC::Mmad(c1Local, a2Local, b2Local, mmadParams);
-        outQueueCO1.EnQue<float>(c1Local);
-        inQueueA2.FreeTensor(a2Local);
-        inQueueB2.FreeTensor(b2Local);
-    }
-    __aicore__ inline void CopyOut()
-    {
-        AscendC::LocalTensor<float> c1Local = outQueueCO1.DeQue<float>();
-        AscendC::FixpipeParamsV220 fixpipeParams;
-        fixpipeParams.nSize = n;
-        fixpipeParams.mSize = m;
-        fixpipeParams.srcStride = m;
-        fixpipeParams.dstStride = n;
-
-        fixpipeParams.ndNum = 1;
-        fixpipeParams.srcNdStride = 0;
-        fixpipeParams.dstNdStride = 0;
-        AscendC::Fixpipe(cGM, c1Local, fixpipeParams);
-        outQueueCO1.FreeTensor(c1Local);
-    }
-
-private:
-    AscendC::TPipe pipe;
-    AscendC::TQue<AscendC::TPosition::A1, 1> inQueueA1;
-    AscendC::TQue<AscendC::TPosition::A2, 1> inQueueA2;
-    AscendC::TQue<AscendC::TPosition::B1, 1> inQueueB1;
-    AscendC::TQue<AscendC::TPosition::B2, 1> inQueueB2;
-    AscendC::TQue<AscendC::TPosition::CO1, 1> outQueueCO1;
-
-    AscendC::GlobalTensor<half> aGM;
-    AscendC::GlobalTensor<half> bGM;
-    AscendC::GlobalTensor<float> cGM;
-    uint16_t m = 32, k = 32, n = 32;
-    uint16_t aSize, bSize, cSize;
-};
-
-#endif // BARE_AIC_H
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
deleted file mode 100644
index e39d79d09..000000000
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/bare_aiv.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * @file bare_aiv.h
- *
- * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
- *
- * This program is distributed in the hope that it will be useful
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-#ifndef BARE_AIV_H
-#define BARE_AIV_H
-#include "kernel_operator.h"
-
-class KernelBareAiv {
-public:
-    __aicore__ inline KernelBareAiv() { cSize = m * n; }
-    __aicore__ inline void Init(GM_ADDR c)
-    {
-        cGM.SetGlobalBuffer((__gm__ float *)c + AscendC::GetBlockIdx() * (cSize / 2)); // C:V = 1:2, split tensor for vector calc
-
-        pipe.InitBuffer(reluOutQueue, 1, cSize * sizeof(float));
-        pipe.InitBuffer(reluInQueue, 1, cSize * sizeof(float));
-    }
-    __aicore__ inline void Process()
-    {
-        AscendC::CrossCoreWaitFlag(3);
-        LeakyreluCopyIn();
-        LeakyreluCompute();
-        LeakyreluCopyOut();
-    }
-
-private:
-    __aicore__ inline void LeakyreluCopyIn()
-    {
-        AscendC::LocalTensor<float> reluInLocal = reluInQueue.AllocTensor<float>();
-
-        AscendC::DataCopy(reluInLocal, cGM, cSize / 2);
-        reluInQueue.EnQue<float>(reluInLocal);
-    }
-    __aicore__ inline void LeakyreluCompute()
-    {
-        AscendC::LocalTensor<float> reluInLocal = reluInQueue.DeQue<float>();
-        AscendC::LocalTensor<float> reluOutLocal = reluOutQueue.AllocTensor<float>();
-        AscendC::LeakyRelu(reluOutLocal, reluInLocal, (float)0.5, cSize);
-        reluOutQueue.EnQue<float>(reluOutLocal);
-        reluInQueue.FreeTensor(reluInLocal);
-    }
-    __aicore__ inline void LeakyreluCopyOut()
-    {
-        AscendC::LocalTensor<float> reluOutLocal = reluOutQueue.DeQue<float>();
-        AscendC::DataCopy(cGM, reluOutLocal, cSize / 2);
-        reluOutQueue.FreeTensor(reluOutLocal);
-    }
-
-private:
-    AscendC::TPipe pipe;
-    AscendC::TQue<AscendC::TPosition::VECOUT, 1> reluOutQueue;
-    AscendC::TQue<AscendC::TPosition::VECIN, 1> reluInQueue;
-
-    AscendC::GlobalTensor<float> cGM;
-    uint16_t m = 32, n = 32;
-    uint16_t cSize;
-};
-
-#endif // BARE_AIV_H
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
index 683abee11..d75bfeecc 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom.cpp
@@ -1,27 +1,235 @@
 /**
  * @file baremix_custom.cpp
  *
- * Copyright (C) 2025. Huawei Technologies Co. Ltd. All rights reserved.
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
  *
- * This program is distributed in the hope that it will be useful
+ * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#include "bare_aic.h"
-#include "bare_aiv.h"
+#define ASCENDC_CUBE_ONLY
+#include "kernel_operator.h"
+#include "lib/matmul_intf.h"
 
-extern "C" __global__ __aicore__ void baremix_custom(GM_ADDR a, GM_ADDR b, GM_ADDR c)
+using namespace matmul;
+
+__aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b)
+{
+    return (a + b - 1) / b;
+}
+
+/**
+  * @brief  Copy tiling data to TCubeTiling ptr from tiling gm addr.
+  * @param  tiling: TCubeTiling ptr which needs to copy tiling data.
+  * @param  tilingGM: tiling gm addr.
+  * @retval None
+  */
+__aicore__ inline void CopyTiling(TCubeTiling *tiling, GM_ADDR tilingGM)
+{
+    uint32_t *ptr = reinterpret_cast<uint32_t *>(tiling);
+    auto tiling32 = reinterpret_cast<__gm__ uint32_t *>(tilingGM);
+
+    for (uint32_t i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) {
+        *ptr = *(tiling32 + i);
+    }
+    return;
+}
+
+template <typename aType, typename bType, typename cType, typename biasType> class MatmulLeakyKernel {
+public:
+    __aicore__ inline MatmulLeakyKernel(){};
+    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace,
+                                const TCubeTiling &tiling, AscendC::TPipe *pipe);
+    __aicore__ inline void Process(AscendC::TPipe *pipe);
+
+    __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB,
+                                      int32_t &offsetC, int32_t &offsetBias);
+
+    Matmul<MatmulType<AscendC::TPosition::GM, CubeFormat::ND, aType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, bType>,
+           MatmulType<AscendC::TPosition::VECIN, CubeFormat::ND, cType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, biasType>>
+        matmulObj;
+
+    AscendC::GlobalTensor<aType> aGlobal;
+    AscendC::GlobalTensor<bType> bGlobal;
+    AscendC::GlobalTensor<cType> cGlobal;
+    AscendC::GlobalTensor<biasType> biasGlobal;
+    TCubeTiling tiling;
+};
+
+/**
+  * @brief  Set matmulLeaky input and output gm addr of current core.
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: C matrix gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tiling: matmul tiling data.
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias,
+                                                                              GM_ADDR c, GM_ADDR workspace,
+                                                                              const TCubeTiling &tiling, AscendC::TPipe *pipe)
+{
+    this->tiling = tiling;
+    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka);
+    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ bType *>(b), tiling.Kb * tiling.N);
+    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(c), tiling.M * tiling.N);
+    biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N);
+
+    int32_t offsetA, offsetB, offsetC, offsetBias;
+    CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); // Calculate the gm offset based on the blockidx.
+    aGlobal = aGlobal[offsetA];
+    bGlobal = bGlobal[offsetB];
+    cGlobal = cGlobal[offsetC];
+    biasGlobal = biasGlobal[offsetBias];
+}
+
+/**
+  * @brief  Main process of matmul calculation
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Process(AscendC::TPipe *pipe)
+{
+    matmulObj.SetTensorA(aGlobal);
+    matmulObj.SetTensorB(bGlobal);
+    matmulObj.SetBias(biasGlobal);
+
+    matmulObj.template IterateAll(cGlobal);
+    matmulObj.End();
+    AscendC::CrossCoreSetFlag<0x2, PIPE_FIX>(3);
+}
+
+/**
+  * @brief  Calculate the gm offset based on the blockidx.
+  * @param  blockIdx: Current Core blockidx.
+  * @param  tiling: Matmul tiling data.
+  * @param  offsetA: Gm offset of A matrix.
+  * @param  offsetB: Gm offset of B matrix.
+  * @param  offsetC: Gm offset of C matrix.
+  * @param  offsetBias: Gm offset of Bias matrix.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void
+MatmulLeakyKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, const TCubeTiling &tiling,
+                                                             int32_t &offsetA, int32_t &offsetB, int32_t &offsetC,
+                                                             int32_t &offsetBias)
+{
+    auto mSingleBlocks = Ceiling(tiling.M, tiling.singleCoreM);
+    auto mCoreIndx = blockIdx % mSingleBlocks;
+    auto nCoreIndx = blockIdx / mSingleBlocks;
+
+    offsetA = mCoreIndx * tiling.Ka * tiling.singleCoreM;
+    offsetB = nCoreIndx * tiling.singleCoreN;
+    offsetC = mCoreIndx * tiling.N * tiling.singleCoreM + nCoreIndx * tiling.singleCoreN;
+    offsetBias = nCoreIndx * tiling.singleCoreN;
+}
+
+template <typename cType> class LeakyReluKernel {
+public:
+    __aicore__ inline LeakyReluKernel(){};
+    __aicore__ inline void Init(GM_ADDR c, const TCubeTiling &tiling, AscendC::TPipe *pipe);
+    __aicore__ inline void Process(AscendC::TPipe *pipe);
+
+    __aicore__ inline void LeakyReluCopyIn(const TCubeTiling &tiling);
+    __aicore__ inline void LeakyReluCompute(const TCubeTiling &tiling);
+    __aicore__ inline void LeakyReluCopyOut(const TCubeTiling &tiling);
+
+    AscendC::GlobalTensor<cType> cGlobal;
+
+    AscendC::LocalTensor<cType> reluInLocal;
+    AscendC::LocalTensor<cType> reluOutLocal;
+    TCubeTiling tiling;
+    AscendC::TQue<AscendC::TPosition::VECIN, 1> reluInQueue_;
+    AscendC::TQue<AscendC::TPosition::VECOUT, 1> reluOutQueue_;
+};
+
+/**
+  * @brief  Set matmulLeaky input and output gm addr of current core.
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: C matrix gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tiling: matmul tiling data.
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename cType>
+__aicore__ inline void LeakyReluKernel<cType>::Init(GM_ADDR c, const TCubeTiling &tiling, AscendC::TPipe *pipe)
+{
+    this->tiling = tiling;
+    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(c) + AscendC::GetBlockIdx() * tiling.M * tiling.N / 2); //c:v = 1:2, split into 2 parts, for vector calculation
+
+    pipe->InitBuffer(reluInQueue_, 1, tiling.singleCoreM * tiling.singleCoreN * sizeof(cType) /2); // Init input buffer.
+    pipe->InitBuffer(reluOutQueue_, 1, tiling.singleCoreM * tiling.singleCoreN * sizeof(cType)/2); // Init output buffer.
+}
+
+template <typename cType>
+__aicore__ inline void LeakyReluKernel<cType>::Process(AscendC::TPipe *pipe)
+{
+    AscendC::CrossCoreWaitFlag(3);
+    LeakyReluCopyIn(tiling);
+    LeakyReluCompute(tiling);
+    LeakyReluCopyOut(tiling);
+}
+template <typename cType>
+__aicore__ inline void LeakyReluKernel<cType>::LeakyReluCopyIn(const TCubeTiling &tiling)
+{
+    AscendC::LocalTensor<float> reluInLocal = reluInQueue_.AllocTensor<float>();
+    AscendC::DataCopy(reluInLocal, cGlobal, tiling.singleCoreM * tiling.singleCoreN / 2);
+    reluInQueue_.EnQue<float>(reluInLocal);
+}
+
+template <typename cType>
+__aicore__ inline void LeakyReluKernel<cType>::LeakyReluCompute(const TCubeTiling &tiling)
+{
+    AscendC::LocalTensor<float> reluInLocal = reluInQueue_.DeQue<float>();
+    AscendC::LocalTensor<float> reluOutLocal = reluOutQueue_.AllocTensor<float>();
+    AscendC::LeakyRelu(reluOutLocal, reluInLocal, (float)0.001, tiling.singleCoreM * tiling.singleCoreN /2);
+    reluOutQueue_.EnQue<float>(reluOutLocal);
+    reluInQueue_.FreeTensor(reluInLocal);
+}
+
+template <typename cType>
+__aicore__ inline void LeakyReluKernel<cType>::LeakyReluCopyOut(const TCubeTiling &tiling)
+{
+    AscendC::LocalTensor<float> reluOutLocal = reluOutQueue_.DeQue<float>();
+    AscendC::DataCopy(cGlobal, reluOutLocal, tiling.singleCoreM * tiling.singleCoreN / 2);
+    reluOutQueue_.FreeTensor(reluOutLocal);
+}
+
+/**
+  * @brief  baremix kernel function entry
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: Out gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tilingGm: Tiling data addr. 
+  * @retval None
+  */
+extern "C" __global__ __aicore__ void baremix_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c,
+                                                              GM_ADDR workspace, GM_ADDR tilingGm)
 {
-    // set mix
     KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+    AscendC::TPipe pipe;
+    TCubeTiling tiling;
+    CopyTiling(&tiling, tilingGm);
+
     if ASCEND_IS_AIC {
-        KernelBareAic mm;
-        mm.Init(a, b, c);
-        mm.Process();
+        MatmulLeakyKernel<half, half, float, float> matmulLeakyKernel;
+        matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe);
+        REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); // Initialize the matmul object.
+        matmulLeakyKernel.Process(&pipe);
     }
     if ASCEND_IS_AIV {
-        KernelBareAiv op;
-        op.Init(c);
-        op.Process();
+        LeakyReluKernel<float> leakyReluKernel;
+        leakyReluKernel.Init(c, tiling, &pipe);
+        leakyReluKernel.Process(&pipe);
     }
-}
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom_tiling.cpp b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom_tiling.cpp
new file mode 100644
index 000000000..5edbb67af
--- /dev/null
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/baremix_custom_tiling.cpp
@@ -0,0 +1,75 @@
+/**
+ * @file baremix_tiling.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <cassert>
+#include <fstream>
+#include <iostream>
+
+#include "tiling/tiling_api.h"
+#include "tiling/platform/platform_ascendc.h"
+using namespace matmul_tiling;
+using namespace std;
+
+/**
+  * @brief  Generate matmul tiling.
+  * @param  socVersion: Platform socversion.
+  * @param  tilingBuf data buffer.
+  */
+void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
+{
+    int M = 128;
+    int N = 128;
+    int K = 256;
+
+    TPosition leftPosition = TPosition::GM;
+    CubeFormat leftFormat = CubeFormat::ND;
+    DataType leftDtype = DataType::DT_FLOAT16;
+    bool isTransA = false;
+
+    TPosition rightPosition = TPosition::GM;
+    CubeFormat rightFormat = CubeFormat::ND;
+    DataType rightDtype = DataType::DT_FLOAT16;
+    bool isTransB = false;
+
+    TPosition resultPosition = TPosition::GM;
+    CubeFormat resultFormat = CubeFormat::ND;
+    DataType resultDtype = DataType::DT_FLOAT;
+
+    TPosition biasPosition = TPosition::GM;
+    CubeFormat biasFormat = CubeFormat::ND;
+    DataType biasDtype = DataType::DT_FLOAT;
+    bool isBias = true;
+
+    int baseM = 128;
+    int baseN = 128;
+
+    optiling::TCubeTiling tilingData;
+    auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
+    MatmulApiTiling tilingApi(*ascendcPlatform);
+
+    tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA);
+    tilingApi.SetBType(rightPosition, rightFormat, rightDtype, isTransB);
+    tilingApi.SetCType(resultPosition, resultFormat, resultDtype);
+    tilingApi.SetBiasType(biasPosition, biasFormat, biasDtype);
+
+    tilingApi.SetOrgShape(M, N, K);
+    tilingApi.SetShape(M, N, K);
+    tilingApi.SetBias(isBias);
+    tilingApi.SetTraverse(MatrixTraverse::FIRSTM); // Set the matmul travse is FIRSTM.
+    tilingApi.SetFixSplit(baseM, baseN, -1); // Set the fixed baseM=128, baseN=256.
+    tilingApi.SetBufferSpace(-1, -1, -1);
+
+    int64_t res = tilingApi.GetTiling(tilingData); // Get matmul tiling data.
+    if (res == -1) {
+        std::cout << "gen tiling failed" << std::endl;
+    }
+    uint32_t tcubeTilingSize = tilingData.GetDataSize();
+    tilingData.SaveToBuffer(tilingBuf, tcubeTilingSize);
+    return;
+}
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
index 3b8e2c506..b3c8ff7ae 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
@@ -12,4 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
+    -DHAVE_WORKSPACE
+    -DHAVE_TILING
 )
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
index 30568f793..8b2fcfbd4 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/main.cpp
@@ -8,40 +8,58 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 #include "data_utils.h"
+#include "kernel_tiling/kernel_tiling.h"
+#include "tiling/platform/platform_ascendc.h"
 #ifndef ASCENDC_CPU_DEBUG
 #include "acl/acl.h"
 #include "aclrtlaunch_baremix_custom.h"
 #else
 #include "tikicpulib.h"
-extern "C" void baremix_custom(uint8_t *a, uint8_t *b, uint8_t *c);
+extern "C" void baremix_custom(uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *);
 #endif
+extern void GenerateTiling(const char *socVersion, uint8_t *tilingBuf);
 
 int32_t main(int32_t argc, char *argv[])
 {
-    uint32_t M = 32;
-    uint32_t N = 32;
-    uint32_t K = 32;
-    size_t aFileSize = M * K * sizeof(int16_t); // uint16_t represent half
-    size_t bFileSize = K * N * sizeof(int16_t); // uint16_t represent half
-    size_t cFileSize = M * N * sizeof(float);
+    const char *socVersion = SOC_VERSION;
+    auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
+    size_t aFileSize = 32768 * sizeof(int16_t);
+    size_t bFileSize = 32768 * sizeof(int16_t);
+    size_t cFileSize = 16384 * sizeof(float);
+    size_t biasFileSize = 640 * sizeof(float);
+    size_t tilingFileSize = sizeof(TCubeTiling);
+    size_t userWorkspaceSize = 0;
+    size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform->GetLibApiWorkSpaceSize());
+    size_t workspaceSize = userWorkspaceSize + systemWorkspaceSize;
+    uint8_t *tilingBuf = (uint8_t *)malloc(tilingFileSize);
+    GenerateTiling(socVersion, tilingBuf);
+#ifdef CUSTOM_ASCEND310P
+    uint32_t blockDim = 2;
+#else
     uint32_t blockDim = 1;
+#endif
 
 #ifdef ASCENDC_CPU_DEBUG
-    AscendC::SetKernelMode(KernelMode::MIX_MODE);
     uint8_t *a = (uint8_t *)AscendC::GmAlloc(aFileSize);
     uint8_t *b = (uint8_t *)AscendC::GmAlloc(bFileSize);
+    uint8_t *bias = (uint8_t *)AscendC::GmAlloc(biasFileSize);
     uint8_t *c = (uint8_t *)AscendC::GmAlloc(cFileSize);
+    uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingFileSize);
+    uint8_t *workspace = (uint8_t *)AscendC::GmAlloc(workspaceSize);
 
     ReadFile("./input/x1_gm.bin", aFileSize, a, aFileSize);
     ReadFile("./input/x2_gm.bin", bFileSize, b, bFileSize);
-
-    ICPU_RUN_KF(baremix_custom, blockDim, a, b, c);
+    ReadFile("./input/bias.bin", biasFileSize, bias, biasFileSize);
+    memcpy_s(tiling, tilingFileSize, tilingBuf, tilingFileSize);
+    ICPU_RUN_KF(baremix_custom, blockDim, a, b, bias, c, workspace, tiling);
 
     WriteFile("./output/output.bin", c, cFileSize);
-
     AscendC::GmFree((void *)a);
     AscendC::GmFree((void *)b);
+    AscendC::GmFree((void *)bias);
     AscendC::GmFree((void *)c);
+    AscendC::GmFree((void *)tiling);
+    AscendC::GmFree((void *)workspace);
 #else
     CHECK_ACL(aclInit(nullptr));
     int32_t deviceId = 0;
@@ -49,41 +67,65 @@ int32_t main(int32_t argc, char *argv[])
     aclrtStream stream = nullptr;
     CHECK_ACL(aclrtCreateStream(&stream));
 
-    uint8_t *aHost;
-    uint8_t *aDevice;
-    CHECK_ACL(aclrtMallocHost((void **)(&aHost), aFileSize));
-    CHECK_ACL(aclrtMalloc((void **)&aDevice, aFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x1_gm.bin", aFileSize, aHost, aFileSize);
-    CHECK_ACL(aclrtMemcpy(aDevice, aFileSize, aHost, aFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
+    uint8_t *inputAHost;
+    uint8_t *inputADevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&inputAHost), aFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&inputADevice, aFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    ReadFile("./input/x1_gm.bin", aFileSize, inputAHost, aFileSize);
+    CHECK_ACL(aclrtMemcpy(inputADevice, aFileSize, inputAHost, aFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
 
-    uint8_t *bHost;
-    uint8_t *bDevice;
-    CHECK_ACL(aclrtMallocHost((void **)(&bHost), bFileSize));
-    CHECK_ACL(aclrtMalloc((void **)&bDevice, bFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x2_gm.bin", bFileSize, bHost, bFileSize);
-    CHECK_ACL(aclrtMemcpy(bDevice, bFileSize, bHost, bFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
+    uint8_t *inputBHost;
+    uint8_t *inputBDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&inputBHost), bFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&inputBDevice, bFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    ReadFile("./input/x2_gm.bin", bFileSize, inputBHost, bFileSize);
+    CHECK_ACL(aclrtMemcpy(inputBDevice, bFileSize, inputBHost, bFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
 
-    uint8_t *cHost;
-    uint8_t *cDevice;
-    CHECK_ACL(aclrtMallocHost((void **)(&cHost), cFileSize));
-    CHECK_ACL(aclrtMalloc((void **)&cDevice, cFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    uint8_t *outputCHost;
+    uint8_t *outputCDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&outputCHost), cFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&outputCDevice, cFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
 
-    ACLRT_LAUNCH_KERNEL(baremix_custom)(blockDim, stream, aDevice, bDevice, cDevice);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
+    uint8_t *inputBiasHost;
+    uint8_t *inputBiasDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&inputBiasHost), biasFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&inputBiasDevice, biasFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    ReadFile("./input/bias.bin", biasFileSize, inputBiasHost, biasFileSize);
+    CHECK_ACL(aclrtMemcpy(inputBiasDevice, biasFileSize, inputBiasHost, biasFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
 
-    CHECK_ACL(aclrtMemcpy(cHost, cFileSize, cDevice, cFileSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("./output/output.bin", cHost, cFileSize);
+    uint8_t *tilingHost;
+    uint8_t *tilingDevice;
+    CHECK_ACL(aclrtMallocHost((void **)(&tilingHost), tilingFileSize));
+    CHECK_ACL(aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMemcpy(tilingHost, tilingFileSize, tilingBuf, tilingFileSize, ACL_MEMCPY_HOST_TO_HOST));
+    CHECK_ACL(aclrtMemcpy(tilingDevice, tilingFileSize, tilingHost, tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE));
+
+    uint8_t *workspaceDevice;
+    CHECK_ACL(aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
+
+    ACLRT_LAUNCH_KERNEL(baremix_custom)
+    (blockDim, stream, inputADevice, inputBDevice, inputBiasDevice, outputCDevice, workspaceDevice, tilingDevice);
+
+    CHECK_ACL(aclrtSynchronizeStream(stream));
 
-    CHECK_ACL(aclrtFree(aDevice));
-    CHECK_ACL(aclrtFreeHost(aHost));
-    CHECK_ACL(aclrtFree(bDevice));
-    CHECK_ACL(aclrtFreeHost(bHost));
-    CHECK_ACL(aclrtFree(cDevice));
-    CHECK_ACL(aclrtFreeHost(cHost));
+    CHECK_ACL(aclrtFree(inputADevice));
+    CHECK_ACL(aclrtFreeHost(inputAHost));
+    CHECK_ACL(aclrtFree(inputBDevice));
+    CHECK_ACL(aclrtFreeHost(inputBHost));
+    CHECK_ACL(aclrtMemcpy(outputCHost, cFileSize, outputCDevice, cFileSize, ACL_MEMCPY_DEVICE_TO_HOST));
+    WriteFile("./output/output.bin", outputCHost, cFileSize);
+    CHECK_ACL(aclrtFree(outputCDevice));
+    CHECK_ACL(aclrtFreeHost(outputCHost));
+    CHECK_ACL(aclrtFree(inputBiasDevice));
+    CHECK_ACL(aclrtFreeHost(inputBiasHost));
+    CHECK_ACL(aclrtFree(tilingDevice));
+    CHECK_ACL(aclrtFreeHost(tilingHost));
+    CHECK_ACL(aclrtFree(workspaceDevice));
 
     CHECK_ACL(aclrtDestroyStream(stream));
     CHECK_ACL(aclrtResetDevice(deviceId));
     CHECK_ACL(aclFinalize());
 #endif
+    free(tilingBuf);
     return 0;
-}
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
index 0c9c7f40b..dbca0e151 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -11,6 +11,7 @@ SHORT=r:,v:,i:,b:,p:,
 LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
 OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
 eval set -- "$OPTS"
+SOC_VERSION="Ascend310P3"
 
 while :; do
     case "$1" in
@@ -71,17 +72,11 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+echo "Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
index b0d849c9f..8257ae32c 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/scripts/gen_data.py
@@ -13,20 +13,21 @@ import os
 
 
 def gen_golden_data():
-    M = 32
-    N = 32
-    K = 32
+    M = 128
+    N = 128
+    K = 256
 
-    x1_gm = np.random.uniform(1, 10, [M, K]).astype(np.float16)
-    x2_gm = np.random.uniform(1, 10, [K, N]).astype(np.float16)
-    golden1 = (np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32))).astype(np.float32)
-
-    negative_slope = np.array(2.0, dtype=np.float32)
-    golden = np.where(golden1 > 0, golden1, golden1 * negative_slope)
+    input_a = np.random.randint(-10, 10, [M, K]).astype(np.float16)
+    input_b = np.random.randint(-10, 10, [K, N]).astype(np.float16)
+    input_bias = np.random.randint(1, 10, [N]).astype(np.float32)
+    alpha = 0.001
+    golden = (np.matmul(input_a.astype(np.float32), input_b.astype(np.float32)) + input_bias).astype(np.float32)
+    golden = np.where(golden >= 0, golden, golden * alpha)
     os.system("mkdir -p input")
     os.system("mkdir -p output")
-    x1_gm.tofile("./input/x1_gm.bin")
-    x2_gm.tofile("./input/x2_gm.bin")
+    input_a.tofile("./input/x1_gm.bin")
+    input_b.tofile("./input/x2_gm.bin")
+    input_bias.tofile("./input/bias.bin")
     golden.tofile("./output/golden.bin")
 
 
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md
index caa08b177..a2d26d3ab 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/README.md
@@ -1,5 +1,5 @@
 ## 概述
-本样例介绍基于基础API的MatmulLeakyRelu算子实现及核函数直调方法。
+本样例介绍MatmulLeakyRelu算子实现及核函数直调方法。
 
 ## 目录结构介绍
 ```
@@ -8,7 +8,7 @@
 ```
 
 ## 算子描述
-算子使用基础API包括DataCopy、LoadData、Mmad等，实现MatmulLeakyRelu的运算操作。
+算子使用了Matmul高阶API，实现了快速的MatmulLeakyRelu矩阵乘法的运算操作。
 
 MatmulLeakyRelu的计算公式为：
 
@@ -36,7 +36,7 @@ C = C > 0 ? C : C * 2.0
 ## 更新说明
 | 时间       | 更新事项                 |
 | ---------- | ------------------------ |
-| 2025/7/23 | 新增22_baremix_kernellaunch |
+| 2025/7/28 | 新增22_baremix_kernellaunch |
 
 ## 已知issue
 
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 740d02daf..7a4206d12 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -36,7 +36,7 @@
 | [19_unaligned_wholereduces_kernellaunch](./19_unaligned_wholereduces_kernellaunch) | 基于Ascend C的非对齐WholeReduceSum自定义算子及KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 | [20_mmad_kernellaunch](./20_mmad_kernellaunch) | 基于Ascend C基础API的Matmul自定义Cube算子及KernelLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_vectoradd_kernellaunch](./21_vectoradd_kernellaunch) | 基于Ascend C的Add多场景自定义Vector算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
-| [22_baremix_kernellaunch](./22_baremix_kernellaunch) | 基于Ascend C的手写核间同步的MatmulLeayrelu算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [22_baremix_kernellaunch](./22_baremix_kernellaunch) | 通过更底层的编码方式，实现MatmulLeayrelu融合算子的样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 
 ## 获取样例代码<a name="codeready"></a>
 
-- 
Gitee


From 4768976d3e92ad18327e876f48cff035b63cbaac Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Wed, 30 Jul 2025 05:59:22 +0000
Subject: [PATCH 51/97] !2728 change msprof to msprof op Merge pull request
 !2728 from zhanghao0689/master

---
 .../4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
index 894fec61c..5f407789f 100755
--- a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AclNNInvocation/run.sh
@@ -57,7 +57,7 @@ function main {
     export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
     cd $CURRENT_DIR/output
     echo "INFO: execute op!"
-    msprof --application="./execute_add_op" --ai-core=on --l2=on --output=./prof
+    msprof op --launch-count=2 --output=./prof ./execute_add_op
     if [ $? -ne 0 ]; then
         echo "ERROR: acl executable run failed! please check your project!"
         return 1
-- 
Gitee


From 72012f6ca4cc7061b999f2bffe151e70bfb31de9 Mon Sep 17 00:00:00 2001
From: mingling <chenmingling@huawei.com>
Date: Fri, 1 Aug 2025 09:35:52 +0000
Subject: [PATCH 52/97] !2729 fix matmul readme format Merge pull request !2729
 from mingling/matmul

---
 .../20_mmad_kernellaunch/MmadBiasInvocation/README.md       | 2 +-
 .../20_mmad_kernellaunch/MmadInvocation/README.md           | 2 +-
 .../22_baremix_kernellaunch/BareMixInvocation/README.md     | 6 ++++--
 .../22_baremix_kernellaunch/BareMixInvocation/run.sh        | 3 +--
 operator/ascendc/0_introduction/README.md                   | 2 +-
 5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/README.md b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/README.md
index 2af371234..d20b34a82 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/README.md
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/README.md
@@ -57,7 +57,7 @@
   - 打开样例目录
     以命令行方式下载样例代码，master分支为例。
     ```bash
-    cd ${git_clone_path}/samples/operator/ascendc\0_introduction\20_mmad_kernellaunch/MmadInvocationNeo
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocationNeo
     ```
   - 配置环境变量
 
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
index 7bb83671c..eda5f87ad 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/README.md
@@ -57,7 +57,7 @@
   - 打开样例目录
     以命令行方式下载样例代码，master分支为例。
     ```bash
-    cd ${git_clone_path}/samples/operator/ascendc\0_introduction\20_mmad_kernellaunch/MmadInvocationNeo
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocationNeo
     ```
   - 配置环境变量
 
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
index d61d60171..ffb9e0f4b 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
@@ -53,11 +53,13 @@
   KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
   ...
   if ASCEND_IS_AIC {
-    AscendC::CrossCoreSetFlag<modeId, pipe>(flagId);
+    // AIC核进行Matmul计算
+    // AIC核完成计算后，通过AscendC::CrossCoreSetFlag<modeId, pipe>(flagId)发送同步flag
   }
   ...
   if ASCEND_IS_AIV {
-    AscendC::CrossCoreWaitFlag(flagId);
+    // AIV核通过AscendC::CrossCoreWaitFlag(flagId)接收同步flag
+    // AIV核进行LeakyRelu计算
   }
   ```
 
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
index dbca0e151..435307aaa 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -11,7 +11,6 @@ SHORT=r:,v:,i:,b:,p:,
 LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
 OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
 eval set -- "$OPTS"
-SOC_VERSION="Ascend310P3"
 
 while :; do
     case "$1" in
@@ -52,7 +51,7 @@ if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
     exit -1
 fi
 
-VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
     echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 7a4206d12..2f95f076d 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -70,5 +70,5 @@
 | 2024/11/11 | 样例目录调整                     |
 | 2025/01/06 | 新增21_vectoradd_kernellaunch样例  |
 | 2025/07/22 | 新增8_library_frameworklaunch样例       |
-| 2025/7/23 | 新增22_baremix_kernellaunch                   |
+| 2025/7/28 | 新增22_baremix_kernellaunch                   |
 
-- 
Gitee


From 16d72e045b37af63bc33ef08888b7ef955e0df58 Mon Sep 17 00:00:00 2001
From: hujiawenKaven <hujiawen5@hisilicon.com>
Date: Sat, 2 Aug 2025 06:50:28 +0000
Subject: [PATCH 53/97] !2730 fix mm leakyrelu sample 24blocks aicore error *
 fix mm leakyrelu block24 aicore error

---
 .../op_host/matmul_leakyrelu_custom.cpp                |  4 ++--
 .../op_kernel/matmul_leakyrelu_custom.cpp              | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp
index b811c71ca..5e5da7928 100644
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp
@@ -60,8 +60,8 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context)
         context->SetTilingKey(2);
     } else {
         /* SetBlockDim here refers to the number of cube cores, so for separated arch(AIC:AIV=1:2), 
-            vector cores number is set 48 by SetDim, cube core number need to be set 24 here.*/ 
-        context->SetBlockDim(24);
+           when vector cores number is set like 48 by SetDim, cube core number need to be set 24 here.*/ 
+        context->SetBlockDim(ascendcPlatform.GetCoreNumAic());
         context->SetTilingKey(1);
     }
     tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp
index 4f56aca86..e1894f29f 100644
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp
@@ -147,11 +147,13 @@ template <typename aType, typename bType, typename cType, typename biasType>
 __aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::CopyOut(uint32_t count)
 {
     reluOutQueue_.DeQue<cType>();
-    const uint32_t roundM = tiling.singleCoreM / tiling.baseM;
-    const uint32_t roundN = tiling.singleCoreN / tiling.baseN;
+    const uint32_t roundM = Ceiling(tiling.singleCoreM, tiling.baseM);
+    const uint32_t curCopyM = tiling.singleCoreM < tiling.baseM ? tiling.singleCoreM : tiling.baseM;
+    const uint32_t curCopyN = tiling.singleCoreN < tiling.baseN ? tiling.singleCoreN : tiling.baseN;
+    const uint32_t curCopyNStride = tiling.N < tiling.baseN ? tiling.N : tiling.N - tiling.baseN;
     uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN);
-    DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0,
-                                (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)};
+    DataCopyParams copyParam = {(uint16_t)curCopyM, (uint16_t)(curCopyN * sizeof(cType) / 32), 0,
+                                (uint16_t)(curCopyNStride * sizeof(cType) / 32)};
     DataCopy(cGlobal[startOffset], reluOutLocal, copyParam);
     reluOutQueue_.FreeTensor(reluOutLocal);
 }
-- 
Gitee


From d2bf59052fe4490879920cc44fca9b18b473e75e Mon Sep 17 00:00:00 2001
From: mingling <chenmingling@huawei.com>
Date: Mon, 4 Aug 2025 11:39:40 +0000
Subject: [PATCH 54/97] !2733 change baremix readme Merge pull request !2733
 from mingling/master

---
 .../11_matmul_kernellaunch/MatmulInvocationNeo/run.sh    | 2 ++
 .../MatmulLeakyReluInvocation/run.sh                     | 2 ++
 .../MatmulLeakyReluInvocationAsync/run.sh                | 2 ++
 .../AbsDuplicateKernelInvocation/run.sh                  | 9 ++-------
 .../AbsGatherMaskKernelInvocation/run.sh                 | 9 ++-------
 .../AbsPadKernelInvocation/run.sh                        | 9 ++-------
 .../AbsUnPadKernelInvocation/run.sh                      | 9 ++-------
 .../ReduceMinKernelInvocation/run.sh                     | 9 ++-------
 .../WholeReduceSumKernelInvocation/run.sh                | 2 ++
 .../20_mmad_kernellaunch/MmadBiasInvocation/run.sh       | 2 ++
 .../20_mmad_kernellaunch/MmadInvocation/run.sh           | 2 ++
 .../VectorAddMultiCoreWithTiling/run.sh                  | 2 ++
 .../VectorAddMultiCoreWithTilingBroadcast/run.sh         | 2 ++
 .../21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh | 2 ++
 .../VectorAddSingleCoreWithTmpbuf/run.sh                 | 2 ++
 .../22_baremix_kernellaunch/BareMixInvocation/README.md  | 8 +++++---
 .../22_baremix_kernellaunch/BareMixInvocation/run.sh     | 2 ++
 .../3_add_kernellaunch/AddKernelInvocationNeo/run.sh     | 2 ++
 .../AddKernelInvocationTilingNeo/run.sh                  | 2 ++
 .../ascendc/0_introduction/5_addn_kernellaunch/run.sh    | 2 ++
 20 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
index dbca0e151..edfb13e92 100755
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
index dbca0e151..edfb13e92 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
index 9e5b60ada..806b460ed 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
index c19e08c88..b37622e7d 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
@@ -90,13 +90,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
index c19e08c88..b37622e7d 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
@@ -90,13 +90,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
index c19e08c88..b37622e7d 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
@@ -90,13 +90,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
index c19e08c88..b37622e7d 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
@@ -90,13 +90,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
index c19e08c88..b37622e7d 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
@@ -90,13 +90,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    if [ ! $CAMODEL_LOG_PATH ]; then
-        export CAMODEL_LOG_PATH=$(pwd)/sim_log
-    fi
-    if [ -d "$CAMODEL_LOG_PATH" ]; then
-        rm -rf $CAMODEL_LOG_PATH
-    fi
-    mkdir -p $CAMODEL_LOG_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
index f239a9a44..3e40df7be 100755
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
index 3359bc3fa..58b231e25 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
@@ -75,6 +75,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
index 3359bc3fa..58b231e25 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
@@ -75,6 +75,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
index eb66d5395..4daf467b6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
@@ -78,6 +78,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
index eb66d5395..4daf467b6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
@@ -78,6 +78,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
index eb66d5395..4daf467b6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
@@ -78,6 +78,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
index eb66d5395..4daf467b6 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
@@ -78,6 +78,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
index ffb9e0f4b..bc4e3e8ad 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/README.md
@@ -44,20 +44,22 @@
   
   **本样例关键代码介绍如下：
   - 设置ASCENDC_CUBE_ONLY，仅在AIC核进行matmul计算
-  - 设置kernel_type
+  - 设置Kernel类型为KERNEL_TYPE_MIX_XXX，同时启用AIV核和AIC核
   - 使用ASCEND_IS_AIC/ASCEND_IS_AIV隔离AIC/AIV核上的代码
   - 使用同步接口，自行完成核间同步
   ```c++
-  #define ASCENDC_CUBE_ONLY
+  #define ASCENDC_CUBE_ONLY //指定Matmul运行在AIC核上
   ...
-  KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
+  KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2); // 设置Kernel类型为KERNEL_TYPE_MIX_XXX
   ...
   if ASCEND_IS_AIC {
+    ...
     // AIC核进行Matmul计算
     // AIC核完成计算后，通过AscendC::CrossCoreSetFlag<modeId, pipe>(flagId)发送同步flag
   }
   ...
   if ASCEND_IS_AIV {
+    ...
     // AIV核通过AscendC::CrossCoreWaitFlag(flagId)接收同步flag
     // AIV核进行LeakyRelu计算
   }
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
index 435307aaa..66f8411b6 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -76,6 +76,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
index 9bdf07910..2c926e7c2 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
index 9bdf07910..2c926e7c2 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
index 9bdf07910..2c926e7c2 100755
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
@@ -77,6 +77,8 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+    # tidy folder by relocate log files, please use msprof tool to analyze these files.
+    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
-- 
Gitee


From 63a15dc91b1b671ca64b88f16fd0cf5184f9aaa5 Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Mon, 4 Aug 2025 12:24:19 +0000
Subject: [PATCH 55/97] =?UTF-8?q?!2731=20=E6=B7=BB=E5=8A=A0=E7=89=88?=
 =?UTF-8?q?=E6=9C=AC=E9=85=8D=E5=A5=97=20Merge=20pull=20request=20!2731=20?=
 =?UTF-8?q?from=20PengC/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/MATCH.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/MATCH.md b/docs/MATCH.md
index b37ed65a6..5bacb88d4 100644
--- a/docs/MATCH.md
+++ b/docs/MATCH.md
@@ -5,6 +5,27 @@
 
 <tr><td><b>Gitee标签</b></td><td><b>描述</b></td><td><b>CANN版本</b></td><td><b>CANN版本发布时间</b></td></tr>
 
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.6-8.2.RC1.alpha003/">v1.6-8.2.RC1.alpha003</a></td><td>
+8.2.RC1.alpha003 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1.alpha003">CANN 8.2.RC1.alpha003</a></td><td>2025/07/07</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.5-8.2.RC1/">v1.5-8.2.RC1</a></td><td>
+8.2.RC1 CANN商用版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1">CANN 8.2.RC1</a></td><td>2025/07/26</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.4-8.2.RC1.alpha002/">v1.4-8.2.RC1.alpha002</a></td><td>
+8.2.RC1.alpha002 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1.alpha002">CANN 8.2.RC1.alpha002</a></td><td>2025/05/26</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.3-8.2.RC1.alpha001/">v1.3-8.2.RC1.alpha001</a></td><td>
+8.2.RC1.alpha001 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.2.RC1.alpha001">CANN 8.2.RC1.alpha001</a></td><td>2025/05/13</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.2-8.1.RC1.alpha002/">v1.2-8.1.RC1.alpha002</a></td><td>
+8.1.RC1.alpha002 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.1.RC1.alpha002">CANN 8.1.RC1.alpha002</a></td><td>2025/04/14</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.1-8.1.RC1.beta1/">v1.1-8.1.RC1.beta1</a></td><td>
+8.1.RC1 CANN商用版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.1.RC1">CANN 8.1.RC1</a></td><td>2025/04/30</td></tr>
+
+<tr><td><a href="https://gitee.com/ascend/samples/tree/v1.0-8.1.RC1.alpha001/">v1.0-8.1.RC1.alpha001</a></td><td>
+8.1.RC1.alpha001 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.1.RC1.alpha001">CANN 8.1.RC1.alpha001</a></td><td>2025/03/14</td></tr>
+
 <tr><td><a href="https://gitee.com/ascend/samples/tree/v0.3-8.0.0.alpha003/">v0.3-8.0.0.alpha003</a></td><td>
 8.0.0.alpha003 CANN社区版</td><td><a href="https://www.hiascend.com/developer/download/community/result?module=cann&cann=8.0.0.alpha003">CANN 8.0.0.alpha003</a></td><td>2024/12/31</td></tr>
 
-- 
Gitee


From eb97dddf26c9fc761a579b89a8443a88f3925803 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 5 Aug 2025 01:03:58 +0000
Subject: [PATCH 56/97] =?UTF-8?q?!2734=20=E8=A7=A3=E9=99=A4tiling=E4=B8=8B?=
 =?UTF-8?q?=E6=B2=89=E6=A0=B7=E4=BE=8B=EF=BC=8C=E7=AE=97=E5=AD=90=E5=8C=85?=
 =?UTF-8?q?=E4=B8=8D=E6=94=AF=E6=8C=81=E9=80=9A=E8=BF=87--install-path?=
 =?UTF-8?q?=E5=AE=89=E8=A3=85=E7=9A=84=E9=99=90=E5=88=B6=20Merge=20pull=20?=
 =?UTF-8?q?request=20!2734=20from=20renjie/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddCustomTilingSink/README.md                    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
index 16e430cc8..37ce8f28f 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/README.md
@@ -40,11 +40,11 @@ z = x + y
 - Tiling函数逻辑：添加判断逻辑，通过判断值依赖InputTensor的Data是否为空指针，确认当前是否处于编译期。若处于编译期，需要设置最大的workspace用于内存分配。
 - Tiling函数下沉注册：将所有的Tiling函数逻辑单独在op_host/add_custom_tiling_sink_tiling.cpp中实现，并通过DEVICE_IMPL_OP_OPTILING接口注册下沉的Tiling函数。(DEVICE_IMPL_OP_OPTILING接口定义在头文件device_op_impl_registry.h中)
 - 算子host侧CMakeList.txt：Tiling下沉需要添加device侧的编译任务，本样例通过install.sh脚本添加，具体添加内容如下。
-```
-ascendc_device_library( TARGET cust_opmaster
-                        OPTION SHARED
-                        SRC ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_tiling_sink_tiling.cpp)
-```
+  ```
+  ascendc_device_library( TARGET cust_opmaster
+                          OPTION SHARED
+                          SRC ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_tiling_sink_tiling.cpp)
+  ```
 - 算子kernel实现：通过KERNEL_TASK_TYPE_DEFAULT接口将算子强制指定在AIC、AIV混合场景运行，满足Tiling下沉算子条件。
 
 ## 支持的产品型号
@@ -121,7 +121,7 @@ ascendc_device_library( TARGET cust_opmaster
     cd CustomOp/build_out
     ./custom_opp_<target os>_<target architecture>.run
     ```
-  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。若要执行Tiling下沉样例，则算子包不支持通过--install-path指定目录安装。
+  命令执行成功后，自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。
 
 ## 更新说明
 | 时间       | 更新事项                     |
-- 
Gitee


From c2116b43dd6ed45e50e2a61e535fdc2e73751529 Mon Sep 17 00:00:00 2001
From: mingling <chenmingling@huawei.com>
Date: Tue, 5 Aug 2025 08:26:21 +0000
Subject: [PATCH 57/97] !2735 simplify sim mode generated files * simplify sim
 mode generated files

---
 .../11_matmul_kernellaunch/MatmulInvocationNeo/run.sh       | 6 ++++--
 .../MatmulLeakyReluInvocation/run.sh                        | 6 ++++--
 .../MatmulLeakyReluInvocationAsync/run.sh                   | 6 ++++--
 .../AbsDuplicateKernelInvocation/run.sh                     | 6 ++++--
 .../AbsGatherMaskKernelInvocation/run.sh                    | 6 ++++--
 .../AbsPadKernelInvocation/run.sh                           | 6 ++++--
 .../AbsUnPadKernelInvocation/run.sh                         | 6 ++++--
 .../ReduceMinKernelInvocation/run.sh                        | 6 ++++--
 .../WholeReduceSumKernelInvocation/run.sh                   | 6 ++++--
 .../20_mmad_kernellaunch/MmadBiasInvocation/run.sh          | 6 ++++--
 .../20_mmad_kernellaunch/MmadInvocation/run.sh              | 6 ++++--
 .../VectorAddMultiCoreWithTiling/run.sh                     | 6 ++++--
 .../VectorAddMultiCoreWithTilingBroadcast/run.sh            | 6 ++++--
 .../21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh    | 6 ++++--
 .../VectorAddSingleCoreWithTmpbuf/run.sh                    | 6 ++++--
 .../22_baremix_kernellaunch/BareMixInvocation/run.sh        | 6 ++++--
 .../3_add_kernellaunch/AddKernelInvocationNeo/run.sh        | 6 ++++--
 .../3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh  | 6 ++++--
 operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh  | 6 ++++--
 19 files changed, 76 insertions(+), 38 deletions(-)

diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
index edfb13e92..38be8f48a 100755
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
index edfb13e92..38be8f48a 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
index 806b460ed..d66cd3aa4 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
index b37622e7d..08570fe09 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
@@ -90,8 +90,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -126,5 +124,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
index b37622e7d..08570fe09 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
@@ -90,8 +90,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -126,5 +124,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
index b37622e7d..08570fe09 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
@@ -90,8 +90,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -126,5 +124,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
index b37622e7d..08570fe09 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
@@ -90,8 +90,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -126,5 +124,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
index b37622e7d..08570fe09 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
@@ -90,8 +90,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${_SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -126,5 +124,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
index 3e40df7be..f09ddb475 100755
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_y.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
index 58b231e25..f83616b88 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
@@ -75,8 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -112,5 +110,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
index 58b231e25..f83616b88 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
@@ -75,8 +75,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -112,5 +110,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
index 4daf467b6..5f06bb334 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
@@ -78,8 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -115,5 +113,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
index 4daf467b6..5f06bb334 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
@@ -78,8 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -115,5 +113,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
index 4daf467b6..5f06bb334 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
@@ -78,8 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -115,5 +113,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
index 4daf467b6..5f06bb334 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
@@ -78,8 +78,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -115,5 +113,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
index 66f8411b6..74524bd16 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -76,8 +76,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -113,5 +111,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
index 2c926e7c2..c6dd79858 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
index 2c926e7c2..c6dd79858 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
index 2c926e7c2..c6dd79858 100755
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
@@ -77,8 +77,6 @@ source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
-    # tidy folder by relocate log files, please use msprof tool to analyze these files.
-    export CAMODEL_LOG_PATH=./sim_log
 elif [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
 fi
@@ -114,5 +112,9 @@ python3 scripts/gen_data.py
         ./ascendc_kernels_bbit
     fi
 )
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
 md5sum output/*.bin
 python3 scripts/verify_result.py output/output_z.bin output/golden.bin
-- 
Gitee


From 9f23fb790d2d13e70338c67b3784159431c9acfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=A8=81=E4=BA=A8?= <chenweiheng1@hisilicon.com>
Date: Wed, 6 Aug 2025 03:14:19 +0000
Subject: [PATCH 58/97] =?UTF-8?q?!2736=20[bugfix]fix=20invalid=20link=20Me?=
 =?UTF-8?q?rge=20pull=20request=20!2736=20from=20=E9=99=88=E5=A8=81?=
 =?UTF-8?q?=E4=BA=A8/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../2_features/17_tiling_sink/AddCustomTilingSink/README.md     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
index 8d884730e..6d8bfca70 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/README.md
@@ -42,7 +42,7 @@ z = x + y
 请参考本目录中[AddCustomTilingSink/README.md](./AddCustomTilingSink/README.md)部署自定义算子包。
 
 ### 3. 执行测试脚本
-执行本目录中[PytorchInvocation/test_add_custom.py](./PytorchInvocation/test_add_custom.py)测试脚本验证功能。 
+执行本目录中[PytorchInvocation/test_add_custom_tiling_sink.py](./PytorchInvocation/test_add_custom_tiling_sink.py)测试脚本验证功能。 
 
 ## 更新说明
 
-- 
Gitee


From 047565aab0b2eb10b05df0a3b02363cd3bdf31b1 Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Fri, 8 Aug 2025 03:32:52 +0000
Subject: [PATCH 59/97] !2737 remove useless language desc Merge pull request
 !2737 from PengC/fix

---
 .../0_introduction/10_matmul_frameworklaunch/MatmulCustom.json   | 1 -
 .../MatmulLeakyReluCustom.json                                   | 1 -
 .../0_introduction/14_reduce_frameworklaunch/ReduceCustom.json   | 1 -
 .../ascendc/0_introduction/15_sub_frameworklaunch/SubCustom.json | 1 -
 .../WholeReduceSumCustom.json                                    | 1 -
 .../ascendc/0_introduction/1_add_frameworklaunch/AddCustom.json  | 1 -
 .../0_introduction/2_add_frameworklaunchlite/AddCustom.json      | 1 -
 .../0_introduction/4_addn_frameworklaunch/AddnCustom.json        | 1 -
 .../6_addtemplate_frameworklaunch/AddTemplateCustom.json         | 1 -
 .../7_broadcast_frameworklaunch/BroadcastCustom.json             | 1 -
 .../8_library_frameworklaunch/static_library/AddCustom.json      | 1 -
 .../8_library_frameworklaunch/static_library/MatmulCustom.json   | 1 -
 .../9_leakyrelu_frameworklaunch/LeakyReluCustom.json             | 1 -
 .../1_utilities/0_printf/FrameworkLaunch/MatmulCustom.json       | 1 -
 .../1_utilities/3_assert/FrameworkLaunch/MatmulCustom.json       | 1 -
 .../7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom.json  | 1 -
 .../7_dumptensor/FrameworkLaunch/DumpTensorVector/AddCustom.json | 1 -
 operator/ascendc/2_features/12_cube_group/CubeGroupCustom.json   | 1 -
 .../14_matmul_api_constant/MatmulApiConstantCustom.json          | 1 -
 operator/ascendc/2_features/16_group_barrier/GroupBarrier.json   | 1 -
 .../AddCustomTilingSink/AddCustomTilingSink.json                 | 1 -
 .../ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json   | 1 -
 .../4_best_practices/15_mata_address_conflict/AddsCustom.json    | 1 -
 23 files changed, 23 deletions(-)

diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustom.json b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustom.json
index 3886a9c63..054b87366 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustom.json
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom.json b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom.json
index d1f08cc53..d8b16a5cb 100644
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom.json
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulLeakyreluCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom.json b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom.json
index 72e802c7f..f37fd4e13 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom.json
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "ReduceCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/15_sub_frameworklaunch/SubCustom.json b/operator/ascendc/0_introduction/15_sub_frameworklaunch/SubCustom.json
index 0e6043041..e2a490ae8 100644
--- a/operator/ascendc/0_introduction/15_sub_frameworklaunch/SubCustom.json
+++ b/operator/ascendc/0_introduction/15_sub_frameworklaunch/SubCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "SubCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom.json b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom.json
index 3d0a93123..ef2d57ad6 100644
--- a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom.json
+++ b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "WholeReduceSumCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustom.json b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustom.json
index dce1ed85f..c583cac4a 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustom.json
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/2_add_frameworklaunchlite/AddCustom.json b/operator/ascendc/0_introduction/2_add_frameworklaunchlite/AddCustom.json
index dce1ed85f..c583cac4a 100644
--- a/operator/ascendc/0_introduction/2_add_frameworklaunchlite/AddCustom.json
+++ b/operator/ascendc/0_introduction/2_add_frameworklaunchlite/AddCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AddnCustom.json b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AddnCustom.json
index 4e51d3cf4..a3152109f 100644
--- a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AddnCustom.json
+++ b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AddnCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddnCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "srcList",
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json
index dce1ed85f..c583cac4a 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom.json b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom.json
index 1baca3adf..f022b7b2a 100644
--- a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom.json
+++ b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "BroadcastCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
index dce1ed85f..c583cac4a 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AddCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
index 3886a9c63..054b87366 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom.json b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom.json
index f582c9b0e..344f61603 100644
--- a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom.json
+++ b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "LeakyReluCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom.json b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom.json
index 3886a9c63..054b87366 100644
--- a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom.json
+++ b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom.json b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom.json
index 3886a9c63..054b87366 100644
--- a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom.json
+++ b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom.json b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom.json
index 14fad3b8d..13a068dbd 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom.json
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MmadCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AddCustom.json b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AddCustom.json
index 495087aa1..6da181a14 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AddCustom.json
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AddCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/2_features/12_cube_group/CubeGroupCustom.json b/operator/ascendc/2_features/12_cube_group/CubeGroupCustom.json
index 6a9300152..7b63dd800 100644
--- a/operator/ascendc/2_features/12_cube_group/CubeGroupCustom.json
+++ b/operator/ascendc/2_features/12_cube_group/CubeGroupCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "CubeGroupCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom.json b/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom.json
index 6f9849971..f65e124fb 100644
--- a/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom.json
+++ b/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "MatmulApiConstantCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "a",
diff --git a/operator/ascendc/2_features/16_group_barrier/GroupBarrier.json b/operator/ascendc/2_features/16_group_barrier/GroupBarrier.json
index 284d76f28..3f84eb7da 100644
--- a/operator/ascendc/2_features/16_group_barrier/GroupBarrier.json
+++ b/operator/ascendc/2_features/16_group_barrier/GroupBarrier.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "GroupBarrier",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "barworkspace",
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
index 9a1ee691b..e827c0392 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/AddCustomTilingSink.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustomTilingSink",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
index b76e8928f..b063d2279 100644
--- a/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
+++ b/operator/ascendc/4_best_practices/12_l2_cache_bypass/AddCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
diff --git a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
index a54432512..db988d0cd 100644
--- a/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
+++ b/operator/ascendc/4_best_practices/15_mata_address_conflict/AddsCustom.json
@@ -1,7 +1,6 @@
 [
     {
         "op": "AddsCustom",
-        "language": "cpp",
         "input_desc": [
             {
                 "name": "x",
-- 
Gitee


From bcc5df4369770a9e52e16dd1795c61ff16ef0e53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AE=81?= <lining.li@huawei.com>
Date: Mon, 18 Aug 2025 12:17:45 +0000
Subject: [PATCH 60/97] =?UTF-8?q?!2739=20remove=20default=20soc=20version,?=
 =?UTF-8?q?=20and=20change=20default=20python=20version=20Merge=20pull=20r?=
 =?UTF-8?q?equest=20!2739=20from=20=E6=9D=8E=E5=AE=81/dev?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 inference/dataflow/cpluscplus/sample1.cpp        |  2 --
 inference/dataflow/cpluscplus/sample2.cpp        |  2 --
 inference/dataflow/cpluscplus/sample3.cpp        |  2 --
 inference/dataflow/cpluscplus/sample4.cpp        |  2 --
 inference/dataflow/cpluscplus/sample5.cpp        |  2 --
 inference/dataflow/cpluscplus/sample6.cpp        |  2 --
 inference/dataflow/cpluscplus/test_perf.cpp      |  2 --
 inference/dataflow/python/README.md              | 16 ++++++++--------
 inference/dataflow/python/sample1.py             |  3 +--
 inference/dataflow/python/sample2.py             |  3 +--
 inference/dataflow/python/sample3.py             |  3 +--
 .../dataflow/python/sample_multiple_model.py     |  3 +--
 inference/dataflow/python/sample_pytorch.py      |  3 +--
 inference/dataflow/python/test_perf.py           |  3 +--
 14 files changed, 14 insertions(+), 34 deletions(-)

diff --git a/inference/dataflow/cpluscplus/sample1.cpp b/inference/dataflow/cpluscplus/sample1.cpp
index 7e2160d44..1b8df1930 100644
--- a/inference/dataflow/cpluscplus/sample1.cpp
+++ b/inference/dataflow/cpluscplus/sample1.cpp
@@ -139,9 +139,7 @@ int32_t main()
     auto flow_graph = BuildDataFlow();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-        {"ge.socVersion", "Ascend910B"},
         {"ge.experiment.data_flow_deploy_info_path", "../config/data_flow_deploy_info.json"},
         {"ge.graphRunMode", "0"}};
     auto geRet = ge::GEInitialize(config);
diff --git a/inference/dataflow/cpluscplus/sample2.cpp b/inference/dataflow/cpluscplus/sample2.cpp
index 5c3aa0576..b60ebfc53 100644
--- a/inference/dataflow/cpluscplus/sample2.cpp
+++ b/inference/dataflow/cpluscplus/sample2.cpp
@@ -122,9 +122,7 @@ int32_t main() {
     auto flow_graph = BuildDataFlowGraph();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-                                                       {"ge.socVersion", "Ascend910B"},
                                                        {"ge.exec.logicalDeviceClusterDeployMode", "SINGLE"},
                                                        {"ge.exec.logicalDeviceId", "[0:0]"},
                                                        {"ge.graphRunMode", "0"}};
diff --git a/inference/dataflow/cpluscplus/sample3.cpp b/inference/dataflow/cpluscplus/sample3.cpp
index fcc9ece53..92ab880f9 100644
--- a/inference/dataflow/cpluscplus/sample3.cpp
+++ b/inference/dataflow/cpluscplus/sample3.cpp
@@ -121,9 +121,7 @@ int32_t main() {
     auto flow_graph = BuildDataFlowGraph();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-                                                       {"ge.socVersion", "Ascend910B"},
                                                        {"ge.exec.logicalDeviceClusterDeployMode", "SINGLE"},
                                                        {"ge.exec.logicalDeviceId", "[0:0]"},
                                                        {"ge.graphRunMode", "0"}};
diff --git a/inference/dataflow/cpluscplus/sample4.cpp b/inference/dataflow/cpluscplus/sample4.cpp
index e45ccd9b3..71f52187d 100644
--- a/inference/dataflow/cpluscplus/sample4.cpp
+++ b/inference/dataflow/cpluscplus/sample4.cpp
@@ -102,9 +102,7 @@ int32_t main()
     auto flow_graph = BuildDataFlowGraph();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-                                                       {"ge.socVersion", "Ascend910B"},
                                                        {"ge.graphRunMode", "0"}};
     auto geRet = ge::GEInitialize(config);
     if (geRet != ge::SUCCESS) {
diff --git a/inference/dataflow/cpluscplus/sample5.cpp b/inference/dataflow/cpluscplus/sample5.cpp
index 8f54b0d2b..c18e46b0f 100644
--- a/inference/dataflow/cpluscplus/sample5.cpp
+++ b/inference/dataflow/cpluscplus/sample5.cpp
@@ -103,9 +103,7 @@ int32_t main()
     auto flow_graph = BuildDataFlow();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-                                                       {"ge.socVersion", "Ascend910B"},
                                                        {"ge.exec.logicalDeviceClusterDeployMode", "SINGLE"},
                                                        {"ge.exec.logicalDeviceId", "[0:0]"},
                                                        {"ge.graphRunMode", "0"}};
diff --git a/inference/dataflow/cpluscplus/sample6.cpp b/inference/dataflow/cpluscplus/sample6.cpp
index 8bbd267ea..bb200e10e 100644
--- a/inference/dataflow/cpluscplus/sample6.cpp
+++ b/inference/dataflow/cpluscplus/sample6.cpp
@@ -144,9 +144,7 @@ int32_t main()
     flow_graph.SetInputsAlignAttrs(alginMaxCacheNum, alignTimeout, false);
     flow_graph.SetExceptionCatch(true);
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-        {"ge.socVersion", "Ascend910B"},
         {"ge.experiment.data_flow_deploy_info_path", "../config/data_flow_deploy_info.json"},
         {"ge.graphRunMode", "0"}};
     auto geRet = ge::GEInitialize(config);
diff --git a/inference/dataflow/cpluscplus/test_perf.cpp b/inference/dataflow/cpluscplus/test_perf.cpp
index 66ee873fd..f997d84b1 100644
--- a/inference/dataflow/cpluscplus/test_perf.cpp
+++ b/inference/dataflow/cpluscplus/test_perf.cpp
@@ -78,9 +78,7 @@ int32_t main()
     auto flow_graph = BuildDataFlowGraph();
 
     // Initialize
-    // socVersion is set according to real chip type
     std::map<ge::AscendString, AscendString> config = {{"ge.exec.deviceId", "0"},
-                                                       {"ge.socVersion", "Ascend910B"},
                                                        {"ge.graphRunMode", "0"}};
     auto geRet = ge::GEInitialize(config);
     if (geRet != ge::SUCCESS) {
diff --git a/inference/dataflow/python/README.md b/inference/dataflow/python/README.md
index 3a5290d85..b87a07cfd 100644
--- a/inference/dataflow/python/README.md
+++ b/inference/dataflow/python/README.md
@@ -31,7 +31,7 @@
 
 ## 环境准备
 参考[环境准备](../../../README.md#环境准备)下载安装驱动/固件/CANN软件包   
-python 版本要求：python3.9   
+python 版本要求：python3.11 具体版本以dataflow wheel包编译时用的python版本为准，如果需要使用不同python版本，可以参考[py_dflow](../py_dflow)重新编译dataflow wheel包。
 sample_pytorch.py、sample_npu_model.py样例依赖pytorch和torchvision包,推荐使用torch 2.1.0和torchvision 0.16.0
 
 
@@ -45,12 +45,12 @@ export ASCEND_SLOG_PRINT_TO_STDOUT=1   # 日志打屏，不设置日志落盘默
 source {HOME}/Ascend/ascend-toolkit/set_env.sh #{HOME}为CANN软件包安装目录，请根据实际安装路径进行替换
 export RESOURCE_CONFIG_PATH=xxx/xxx/xxx/numa_config.json
 
-python3.9 sample1.py
-python3.9 sample2.py
-python3.9 sample3.py
-python3.9 sample_pytorch.py
-python3.9 sample_npu_model.py
-python3.9 sample_multiple_model.py
-python3.9 test_perf.py
+python3.11 sample1.py
+python3.11 sample2.py
+python3.11 sample3.py
+python3.11 sample_pytorch.py
+python3.11 sample_npu_model.py
+python3.11 sample_multiple_model.py
+python3.11 test_perf.py
 ```
 
diff --git a/inference/dataflow/python/sample1.py b/inference/dataflow/python/sample1.py
index d692611d1..23aa6a8a2 100644
--- a/inference/dataflow/python/sample1.py
+++ b/inference/dataflow/python/sample1.py
@@ -20,8 +20,7 @@ import dataflow as df
 options = {
     "ge.exec.deviceId":"0",
     "ge.exec.logicalDeviceClusterDeployMode":"SINGLE",
-    "ge.exec.logicalDeviceId":"[0:0]",
-    "ge.socVersion":"Ascend910B"
+    "ge.exec.logicalDeviceId":"[0:0]"
 }
 df.init(options)
 
diff --git a/inference/dataflow/python/sample2.py b/inference/dataflow/python/sample2.py
index 4f4a9cd8c..d23460c8d 100644
--- a/inference/dataflow/python/sample2.py
+++ b/inference/dataflow/python/sample2.py
@@ -21,8 +21,7 @@ from udf_py.udf_add import UserFunc1
 # dataflow初始化参数
 options = {
     "ge.exec.deviceId":"0",
-    "ge.experiment.data_flow_deploy_info_path":"./config/data_flow_deploy_info.json",
-    "ge.socVersion":"Ascend910B"
+    "ge.experiment.data_flow_deploy_info_path":"./config/data_flow_deploy_info.json"
 }
 df.init(options)
 
diff --git a/inference/dataflow/python/sample3.py b/inference/dataflow/python/sample3.py
index ed24000af..6bc3b605e 100644
--- a/inference/dataflow/python/sample3.py
+++ b/inference/dataflow/python/sample3.py
@@ -20,8 +20,7 @@ import dataflow as df
 options = {
     "ge.exec.deviceId":"0",
     "ge.exec.logicalDeviceClusterDeployMode":"SINGLE",
-    "ge.exec.logicalDeviceId":"[0:0]",
-    "ge.socVersion":"Ascend910B"
+    "ge.exec.logicalDeviceId":"[0:0]"
 }
 df.init(options)
 
diff --git a/inference/dataflow/python/sample_multiple_model.py b/inference/dataflow/python/sample_multiple_model.py
index 43c5e7cdb..48da93585 100644
--- a/inference/dataflow/python/sample_multiple_model.py
+++ b/inference/dataflow/python/sample_multiple_model.py
@@ -138,8 +138,7 @@ class SampleFlowGraph:
         self.flow_graph = None
         self.options = {
             "ge.exec.deviceId":"0",
-            "ge.experiment.data_flow_deploy_info_path":"./config/multi_model_deploy.json",
-            "ge.socVersion":"Ascend910B"
+            "ge.experiment.data_flow_deploy_info_path":"./config/multi_model_deploy.json"
         }
 
     def init(self):
diff --git a/inference/dataflow/python/sample_pytorch.py b/inference/dataflow/python/sample_pytorch.py
index cf5481256..ff141be80 100644
--- a/inference/dataflow/python/sample_pytorch.py
+++ b/inference/dataflow/python/sample_pytorch.py
@@ -86,8 +86,7 @@ def postprocess(input_image):
 
 if __name__ == '__main__':
     options = {
-        "ge.experiment.data_flow_deploy_info_path": "./config/sample_pytorch_deploy_info.json",
-        "ge.socVersion": "Ascend910B"
+        "ge.experiment.data_flow_deploy_info_path": "./config/sample_pytorch_deploy_info.json"
     }
     df.init(options)
 
diff --git a/inference/dataflow/python/test_perf.py b/inference/dataflow/python/test_perf.py
index 7299a7abe..c5247c5e6 100644
--- a/inference/dataflow/python/test_perf.py
+++ b/inference/dataflow/python/test_perf.py
@@ -20,8 +20,7 @@ import time
 options = {
     "ge.exec.deviceId":"0",
     "ge.exec.logicalDeviceClusterDeployMode":"SINGLE",
-    "ge.exec.logicalDeviceId":"[0:0]",
-    "ge.socVersion":"Ascend910B"
+    "ge.exec.logicalDeviceId":"[0:0]"
 }
 df.init(options)
 
-- 
Gitee


From de4cba6723598659e5105919a87ff5987c599c0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Thu, 21 Aug 2025 01:10:34 +0000
Subject: [PATCH 61/97] =?UTF-8?q?!2722=20change=20sample2=20depend=20from?=
 =?UTF-8?q?=20llm=5Fengine=20to=20llm=5Fdatadist=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2722=20from=20=E8=B5=B5=E6=99=BA=E6=85=A7/zzh=5F0718=5Fchan?=
 =?UTF-8?q?ge=5Fllm=5Fdatadist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../11_llm_data_dist/CMakeLists.txt           |   4 +-
 .../11_llm_data_dist/decoder_sample2.cpp      |  14 +-
 .../11_llm_data_dist/prompt_sample2.cpp       |  14 +-
 .../11_llm_data_dist/readme.md                |   7 +-
 .../level1_single_api/12_adxl/CMakeLists.txt  |  48 ++++
 .../12_adxl/adxl_engine_sample.cpp            | 271 ++++++++++++++++++
 cplusplus/level1_single_api/12_adxl/readme.md |  77 +++++
 .../10_llm_data_dist/README.md                |  10 +-
 .../switch_role_sample.py                     |  32 ++-
 9 files changed, 446 insertions(+), 31 deletions(-)
 create mode 100644 cplusplus/level1_single_api/12_adxl/CMakeLists.txt
 create mode 100644 cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
 create mode 100644 cplusplus/level1_single_api/12_adxl/readme.md

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
index 25addfeab..5691c49c5 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
+++ b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
@@ -90,7 +90,7 @@ target_link_directories(prompt_sample2 PRIVATE
 )
 
 target_link_libraries(prompt_sample2 PRIVATE
-        llm_engine
+        llm_datadist
         graph
         ascendcl
 )
@@ -115,7 +115,7 @@ target_link_directories(decoder_sample2 PRIVATE
 )
 
 target_link_libraries(decoder_sample2 PRIVATE
-        llm_engine
+        llm_datadist
         graph
         ascendcl
 )
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
index 909be6ddd..41d94f042 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
@@ -50,11 +50,6 @@ int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId)
 {
     std::map<AscendString, AscendString> options;
     options[OPTION_DEVICE_ID] = deviceId.c_str();
-    if (std::getenv("LOCAL_COMM_RES") == nullptr) {
-        printf("[ERROR] env:LOCAL_COMM_RES not set\n");
-        return -1;
-    }
-    options[OPTION_LOCAL_COMM_RES] = std::getenv("LOCAL_COMM_RES");
     auto ret = llmDataDist.Initialize(options);
     if (ret != LLM_SUCCESS) {
         printf("[ERROR] Initialize failed, ret = %u\n", ret);
@@ -77,11 +72,16 @@ int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role, const char *localIp)
     return 0;
 }
 
-int Link(LlmDataDist &llmDataDist, const char *remoteIp)
+int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
 {
     std::vector<Status> rets;
     std::vector<ClusterInfo> clusters;
     ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 0;
+    IpInfo localIpInfo;
+    localIpInfo.ip = localIp;
+    localIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
     IpInfo remoteIpInfo;
     remoteIpInfo.ip = remoteIp;
     remoteIpInfo.port = PROMPT_LISTEN_PORT;
@@ -226,7 +226,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
     std::this_thread::sleep_for(std::chrono::seconds(WAIT_PROMPT_TIME));
 
     // 5. 与prompt建链
-    if (Link(llmDataDist, remoteIp) != 0) {
+    if (Link(llmDataDist, localIp, remoteIp) != 0) {
         Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
         return -1;
     }
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
index 52abdafc4..83a176d7a 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
@@ -49,11 +49,6 @@ int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std:
     std::map<AscendString, AscendString> options;
     options[OPTION_DEVICE_ID] = deviceId.c_str();
     options[OPTION_LISTEN_IP_INFO] = (localIp + ":" + std::to_string(PROMPT_LISTEN_PORT)).c_str();
-    if (std::getenv("LOCAL_COMM_RES") == nullptr) {
-        printf("[ERROR] env:LOCAL_COMM_RES not set\n");
-        return -1;
-    }
-    options[OPTION_LOCAL_COMM_RES] = std::getenv("LOCAL_COMM_RES");
     auto ret = llmDataDist.Initialize(options);
     if (ret != LLM_SUCCESS) {
         printf("[ERROR] Initialize failed, ret = %u\n", ret);
@@ -75,11 +70,16 @@ int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role)
     return 0;
 }
 
-int Link(LlmDataDist &llmDataDist, const char *remoteIp)
+int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
 {
     std::vector<Status> rets;
     std::vector<ClusterInfo> clusters;
     ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo localIpInfo;
+    localIpInfo.ip = localIp;
+    localIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
     IpInfo remoteIpInfo;
     remoteIpInfo.ip = remoteIp;
     remoteIpInfo.port = DECODER_LISTEN_PORT;
@@ -228,7 +228,7 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
     }
 
     // 6. 与decoder建链
-    if (Link(llmDataDist, remoteIp) != 0) {
+    if (Link(llmDataDist, localIp, remoteIp) != 0) {
         Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
         return -1;
     }
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index 9c5546e3a..c591fbe8e 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -89,11 +89,10 @@
 
     - 执行prompt_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为prompt要使用的device_id, local_host_ip为prompt所在host的ip, remote_host_ip为decoder所在host的ip，如:
         ```
-        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' ./prompt_sample2 0 10.10.170.1 10.170.10.2
+        ./prompt_sample2 0 10.10.170.1 10.170.10.2
         ```
 
     - 执行decoder_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为decoder要使用的device_id, local_host_ip为decoder所在host的ip，remote_host_ip为prompt所在host的ip，如:
         ```
-        LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' ./decoder_sample2 1 10.170.10.2 10.170.10.1
-        ```
-    **注**：LOCAL_COMM_RES为sample2执行所需环境变量，配置了当前进程所需的通信资源，将传递给llm_datadist作为初始化option; 配置格式与HCCL的ranktable一致，只需要配置本进程第一个参数device_id对应的信息，其中ranktable中的rank_id和server_count字段不需要配置，当前用例配置为A2的ranktable格式，其他环境需参考对应环境的ranktable格式进行配置
\ No newline at end of file
+        ./decoder_sample2 1 10.170.10.2 10.170.10.1
+        ```
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/CMakeLists.txt b/cplusplus/level1_single_api/12_adxl/CMakeLists.txt
new file mode 100644
index 000000000..bfc67c317
--- /dev/null
+++ b/cplusplus/level1_single_api/12_adxl/CMakeLists.txt
@@ -0,0 +1,48 @@
+cmake_minimum_required(VERSION 3.5.1)
+project(adxl_sample)
+
+set(CMAKE_VERBOSE_MAKEFILE ON)
+set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE)
+
+if (DEFINED ENV{ASCEND_INSTALL_PATH})
+    set(ASCEND_PATH $ENV{ASCEND_INSTALL_PATH})
+else ()
+    set(ASCEND_PATH /usr/local/Ascend/latest)
+endif ()
+
+set(INCLUDE_DIR ${ASCEND_PATH}/include)
+
+set(common_compile_options
+        --std=c++11
+        -g
+        -Wall
+)
+
+set(common_compile_definitions
+        _GLIBCXX_USE_CXX11_ABI=0
+)
+
+add_executable(adxl_engine_sample "adxl_engine_sample.cpp")
+
+target_compile_options(adxl_engine_sample PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(adxl_engine_sample PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(adxl_engine_sample PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(adxl_engine_sample PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(adxl_engine_sample PRIVATE
+        adxl
+        graph
+        ascendcl
+)
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
new file mode 100644
index 000000000..a2252b8d3
--- /dev/null
+++ b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
@@ -0,0 +1,271 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include <fstream>
+#include "acl/acl.h"
+#include "adxl/adxl_engine.h"
+
+using namespace adxl;
+namespace{
+constexpr int32_t WAIT_REG_TIME = 5;
+constexpr int32_t WAIT_TRANS_TIME = 20;
+constexpr int32_t CLIENT_EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_ENINE = 2;
+constexpr uint32_t CLIENT_ARG_INDEX_REMOTE_ENINE = 3;
+constexpr int32_t SERVER_EXPECTED_ARG_CNT = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(AdxlEngine &adxlEngine, const char *localEngine)
+{
+    std::map<AscendString, AscendString> options;
+    auto ret = adxlEngine.Initialize(localEngine, options);
+    if (ret != SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return 0;
+}
+
+int Connect(AdxlEngine &adxlEngine, const char *remoteEngine)
+{
+    auto ret = adxlEngine.Connect(remoteEngine);
+    if (ret != SUCCESS) {
+        printf("[ERROR] Connect failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Connect success\n");
+    return 0;
+}
+
+int Disconnect(AdxlEngine &adxlEngine, const char *remoteEngine)
+{
+    auto ret = adxlEngine.Disconnect(remoteEngine);
+    if (ret != SUCCESS) {
+        printf("[ERROR] Disconnect failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Disconnect success\n");
+    return 0;
+}
+
+int32_t Transfer(AdxlEngine &adxlEngine, int32_t &src, const char *remoteEngine)
+{
+    uintptr_t dstAddr;
+    std::ifstream("./tmp") >> std::hex >> dstAddr;
+
+    TransferOpDesc desc{reinterpret_cast<uintptr_t>(&src), reinterpret_cast<uintptr_t>(dstAddr), sizeof(int32_t)};
+    auto ret = adxlEngine.TransferSync(remoteEngine, READ, {desc});
+    if (ret != SUCCESS) {
+        printf("[ERROR] TransferSync read failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] TransferSync read success, src = %d\n", src);
+
+    src = 2;
+    ret = adxlEngine.TransferSync(remoteEngine, WRITE, {desc});
+    if (ret != SUCCESS) {
+        printf("[ERROR] TransferSync write failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] TransferSync write success, src = %d\n", src);
+    return 0;
+}
+
+void ClientFinalize(AdxlEngine &adxlEngine, bool connected, const char *remoteEngine,
+                    const std::vector<MemHandle> handles, const std::vector<void *> hostBuffers = {})
+{
+    if (connected) {
+        auto ret = Disconnect(adxlEngine, remoteEngine);
+        if (ret != 0) {
+            printf("[ERROR] Disconnect failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Disconnect success\n");
+        }
+    }
+
+    for (auto handle : handles) {
+        auto ret = adxlEngine.DeregisterMem(handle);
+        if (ret != 0) {
+            printf("[ERROR] DeregisterMem failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] DeregisterMem success\n");
+        }
+    }
+    for (auto buffer : hostBuffers) {
+        aclrtFreeHost(buffer);
+    }
+    adxlEngine.Finalize();
+}
+
+void ServerFinalize(AdxlEngine &adxlEngine,
+                    const std::vector<MemHandle> handles,
+                    const std::vector<void *> devBuffers = {})
+{
+    for (auto handle : handles) {
+        auto ret = adxlEngine.DeregisterMem(handle);
+        if (ret != 0) {
+            printf("[ERROR] DeregisterMem failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] DeregisterMem success\n");
+        }
+    }
+    for (auto buffer : devBuffers) {
+        aclrtFree(buffer);
+    }
+    adxlEngine.Finalize();
+}
+
+int32_t RunClient(const char *localEngine, const char *remoteEngine)
+{
+    printf("[INFO] client start\n");
+    // 1. 初始化
+    AdxlEngine adxlEngine;
+    if (Initialize(adxlEngine, localEngine) != 0) {
+        printf("[ERROR] Initialize AdxlEngine failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    int32_t *src = nullptr;
+    CHECK_ACL(aclrtMallocHost(reinterpret_cast<void **>(&src), sizeof(int32_t)));
+    bool connected = false;
+    MemDesc desc{};
+    desc.addr = reinterpret_cast<uintptr_t>(src);
+    desc.len = sizeof(int32_t);
+    MemHandle handle = nullptr;
+    auto ret = adxlEngine.RegisterMem(desc, MEM_HOST, handle);
+    if (ret != SUCCESS) {
+        printf("[ERROR] RegisterMem failed, ret = %u\n", ret);
+        ClientFinalize(adxlEngine, connected, remoteEngine, {handle}, {src});
+        return -1;
+    }
+    printf("[INFO] RegisterMem success\n");
+
+    // 等待server注册完成
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_REG_TIME));
+
+    // 3. 与server建链
+    if (Connect(adxlEngine, remoteEngine) != 0) {
+        ClientFinalize(adxlEngine, connected, remoteEngine, {handle}, {src});
+        return -1;
+    }
+    connected = true;
+
+    // 4. 从server get内存，并向server put内存
+    if (Transfer(adxlEngine, *src, remoteEngine) != 0) {
+        ClientFinalize(adxlEngine, connected, remoteEngine, {handle}, {src});
+        return -1;
+    }
+
+    // 5. 释放Cache与llmDataDist
+    ClientFinalize(adxlEngine, connected, remoteEngine, {handle}, {src});
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] Prompt Sample end\n");
+    return 0;
+}
+
+int32_t RunServer(const char *localEngine)
+{
+    printf("[INFO] server start\n");
+    // 1. 初始化
+    AdxlEngine adxlEngine;
+    if (Initialize(adxlEngine, localEngine) != 0) {
+        printf("[ERROR] Initialize AdxlEngine failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    int32_t dst = 1;
+    int32_t *buffer = nullptr;
+    CHECK_ACL(aclrtMalloc((void **)&buffer, sizeof(int32_t), ACL_MEM_MALLOC_HUGE_ONLY));
+    // init device buffer
+    CHECK_ACL(aclrtMemcpy(buffer, sizeof(int32_t), &dst, sizeof(int32_t), ACL_MEMCPY_HOST_TO_DEVICE));
+
+    MemDesc desc{};
+    desc.addr = reinterpret_cast<uintptr_t>(buffer);
+    desc.len = sizeof(int32_t);
+    MemHandle handle = nullptr;
+    auto ret = adxlEngine.RegisterMem(desc, MEM_DEVICE, handle);
+    if (ret != SUCCESS) {
+        printf("[ERROR] RegisterMem failed, ret = %u\n", ret);
+        ServerFinalize(adxlEngine, {handle}, {buffer});
+        return -1;
+    }
+    // 3. RegisterMem成功后，将地址保存到本地文件中等待client读取
+    printf("[INFO] RegisterMem success, dst addr:%p\n", buffer);
+    std::ofstream tmp_file("./tmp");  // 默认就是 std::ios::out | std::ios::trunc
+    if (tmp_file) {
+        tmp_file << buffer << std::endl;
+    }
+
+    // 4. 等待client transfer
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_TRANS_TIME));
+
+    CHECK_ACL(aclrtMemcpy(&dst, sizeof(int32_t), buffer, sizeof(int32_t), ACL_MEMCPY_DEVICE_TO_HOST));
+    printf("[INFO] After transfer, dst value:%d\n", dst);
+
+    // 5. 释放Cache与llmDataDist
+    ServerFinalize(adxlEngine, {handle}, {buffer});
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] server Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    bool isClient = false;
+    std::string deviceId;
+    std::string localEngine;
+    std::string remoteEngine;
+    if (argc == CLIENT_EXPECTED_ARG_CNT) {
+        isClient = true;
+        deviceId = argv[ARG_INDEX_DEVICE_ID];
+        localEngine = argv[ARG_INDEX_LOCAL_ENINE];
+        remoteEngine = argv[CLIENT_ARG_INDEX_REMOTE_ENINE];
+        printf("[INFO] deviceId = %s, localEngine = %s, remoteEngine = %s\n",
+               deviceId.c_str(), localEngine.c_str(), remoteEngine.c_str());
+    } else if (argc == SERVER_EXPECTED_ARG_CNT) {
+        deviceId = argv[ARG_INDEX_DEVICE_ID];
+        localEngine = argv[ARG_INDEX_LOCAL_ENINE];
+        printf("[INFO] deviceId = %s, localEngine = %s\n", deviceId.c_str(), localEngine.c_str());
+    } else {
+        printf("[ERROR] client expect 3 args(deviceId, localEngine, remoteEngine), "
+               "server expect 2 args(deviceId, localEngine), but got %d\n", argc - 1);
+    }
+    int32_t device = std::stoi(deviceId);
+    CHECK_ACL(aclrtSetDevice(device));
+
+    int32_t ret = 0;
+    if (isClient) {
+        ret = RunClient(localEngine.c_str(), remoteEngine.c_str());
+    } else {
+        ret = RunServer(localEngine.c_str());
+    }
+    CHECK_ACL(aclrtResetDevice(device));
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/readme.md b/cplusplus/level1_single_api/12_adxl/readme.md
new file mode 100644
index 000000000..9fc957373
--- /dev/null
+++ b/cplusplus/level1_single_api/12_adxl/readme.md
@@ -0,0 +1,77 @@
+## 目录
+
+- [样例介绍](#样例介绍)
+- [目录结构](#目录结构)
+- [环境要求](#环境要求)
+- [程序编译](#程序编译)
+- [样例运行](#样例运行)
+
+
+## 样例介绍
+
+功能：通过adxl engine接口实现Cache傳輸功能。
+
+
+## 目录结构
+
+```
+├── adxl_engine_sample.cpp          // adxl_engine样例
+├── CMakeLists.txt                  // 编译脚本 
+```
+
+
+## 环境要求
+
+-   操作系统及架构：Euleros x86系统、Euleros aarch64系统
+-   编译器：g++
+-   芯片：Atlas 训练系列产品、Atlas 推理系列产品（配置Ascend 310P AI处理器）
+-   python及依赖的库：python3.7.5
+-   已完成昇腾AI软件栈在运行环境上的部署
+
+## 程序编译
+
+1. 修改CMakeLists.txt文件中的安装包路径
+
+2. 执行如下命令进行编译。
+
+   依次执行:
+
+   ```
+   mkdir build && cd build
+   cmake .. && make
+   ```
+
+3. 编译结束后，在**build**目录下生成可执行文件**adxl_engine_sample**。
+
+## 样例运行
+1. 配置环境变量
+    - 若运行环境上安装的“Ascend-cann-toolkit”包，环境变量设置如下：
+
+        ```
+        . ${HOME}/Ascend/ascend-toolkit/set_env.sh
+        ```
+
+        “$HOME/Ascend”请替换相关软件包的实际安装路径。
+
+    - 若运行环境上安装的“CANN-XXX.run”包，环境变量设置如下：
+
+        ```
+        source ${HOME}/Ascend/latest/bin/setenv.bash
+        ```
+
+        “$HOME/Ascend”请替换相关软件包的实际安装路径。
+
+2. 在运行环境执行可执行文件。
+
+    3.1 执行sample
+
+    - 执行client adxl_engine_sample, 参数为device_id、local engine和remote engine, 其中device_id为client要使用的device_id，如:
+        ```
+        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample 0 10.10.10.0 10.10.10.1:16000
+        ```
+
+    - 执行server adxl_engine_sample, 参数为device_id、local engine, 其中device_id为server要使用的device_id, 如:
+        ```
+        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample 1 1 10.10.10.1:16000
+        ```
+    **注**：HCCL_INTRA_ROCE_ENABLE=1表示使用RDMA进行传输
\ No newline at end of file
diff --git a/python/level1_single_api/10_llm_data_dist/README.md b/python/level1_single_api/10_llm_data_dist/README.md
index 76c3225e5..ddddf5e88 100644
--- a/python/level1_single_api/10_llm_data_dist/README.md
+++ b/python/level1_single_api/10_llm_data_dist/README.md
@@ -97,14 +97,14 @@
       # Decoder主机:
       python pull_from_cache_to_blocks.py --device_id 0 --cluster_id 2
       ```
-    - 执行switch role样例程序，此样例程序使用单侧建链方式，首先torch自行申请内存并注册blocks,
-      decoder发起建链并pull blocks, 然后两侧切换角色, 并prompt发起建链， decoder进行push_blocks：
+    - switch_role_sample.py：执行switch role样例程序，此样例程序使用单侧建链方式，首先torch自行申请内存并注册blocks,
+      decoder发起建链并pull blocks, 然后两侧切换角色, 并prompt发起建链， decoder进行push_blocks，执行方式如下：
       分别在Prompt主机与Decoder主机，执行样例程序：
       ```
       # Prompt主机:
-      LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "0", "device_ip": "10.10.10.1"}]}]}' GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 0 --role p --local_host_ip 10.170.10 --remote_host_ip 10.170.10
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 0 --role p --local_host_ip 10.170.10.0 --remote_host_ip 10.170.10.1
       # Decoder主机:
-      LOCAL_COMM_RES='{"status": "completed", "version": "1.0", "server_list": [{"server_id": "node_1", "device": [{"device_id": "1", "device_ip": "10.10.10.2"}]}]}' GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 1 --role d --local_host_ip 10.170.10 --remote_host_ip 10.170.10
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 1 --role d --local_host_ip 10.170.10.1 --remote_host_ip 10.170.10.0
       ```
-      **注**：**LOCAL_COMM_RES**为单侧建链方式执行所需环境变量，配置了当前进程所需的通信资源，将传递给llm_datadist作为初始化option; 配置格式与HCCL的ranktable一致，只需要配置本进程参数device_id对应的信息，其中ranktable中的rank_id和server_count字段不需要配置，当前用例配置为A2的ranktable格式，其他环境需参考对应环境的ranktable格式进行配置；**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
+      **注**：**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
 
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
index 616e62eee..299f48c99 100644
--- a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
@@ -47,9 +47,8 @@ def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip,
     datadist = LLMDataDist(role, cluster_id)
     llm_config = LLMConfig()
     llm_config.device_id = device_id
-    if os.getenv('LOCAL_COMM_RES') is None:
-        raise Exception("env:LOCAL_COMM_RES is not set")
-    llm_config.local_comm_res = os.getenv('LOCAL_COMM_RES')
+    llm_config.enable_cache_manager = True
+    llm_config.enable_remote_cache_accessible = True
     if role == LLMRole.PROMPT:
         llm_config.listen_ip_info = f"{local_host_ip}:26000"
     llm_options = llm_config.generate_options()
@@ -58,7 +57,8 @@ def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip,
     return datadist
 
 
-def run_prompt_sample(datadist, remote_host_ip):
+def run_prompt_sample(datadist, local_host_ip, remote_host_ip):
+    # 1. 注册内存
     cache_manager = datadist.cache_manager
     cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
                            placement=Placement.DEVICE)
@@ -68,25 +68,34 @@ def run_prompt_sample(datadist, remote_host_ip):
     addr2 = int(tensor2.data_ptr())
     cache = cache_manager.register_blocks_cache(cache_desc, [addr, addr2], BlocksCacheKey(PROMPT_CLUSTER_ID, 0))
     logging.info('register_blocks_cache success')
+
+    # 2. 等decoder pull blocks
     dist.barrier() # register end
     logging.info('wait decoder link and pull...')
     dist.barrier() # decoder unlink
 
+    # 3. 切换角色
     datadist.switch_role(LLMRole.DECODER)
     dist.barrier() # prompt switch role end, close lisen
     dist.barrier() # decoder switch role end, lisen
 
+    # 4. 向decoder发起建链
     cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = DECODER_CLUSTER_ID
+    cluster.append_local_ip_info(local_host_ip, 26000)
     cluster.append_remote_ip_info(remote_host_ip, 26000)
     ret, _ = datadist.link_clusters([cluster], 5000)
     if ret != LLMStatusCode.LLM_SUCCESS:
         raise Exception("link failed")
     logging.info('link success, wait decoder push...')
     dist.barrier() # prompt link end
+
+    # 5. 等decoder push blocks
     dist.barrier() # decoder push blocks end
     logging.info(f'after decoder push, {tensor=}')
     logging.info(f'after decoder push, {tensor2=}')
 
+    # 6. 解链
     cluster = LLMClusterInfo()
     cluster.remote_cluster_id = DECODER_CLUSTER_ID
     ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
@@ -99,6 +108,7 @@ def run_prompt_sample(datadist, remote_host_ip):
 
 
 def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
+    # 1. 注册内存
     cache_manager = datadist.cache_manager
     cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
                            placement=Placement.DEVICE)
@@ -110,16 +120,21 @@ def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
     logging.info('register_blocks_cache success')
     dist.barrier() # register end
 
+    # 2. 向prompt建链
     cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    cluster.append_local_ip_info(local_host_ip, 26000)
     cluster.append_remote_ip_info(remote_host_ip, 26000)
     ret, _ = datadist.link_clusters([cluster], 5000)
     if ret != LLMStatusCode.LLM_SUCCESS:
-        raise Exception("unlink failed")
+        raise Exception("link failed")
 
+    # 3. 从prompt pull blocks
     cache_manager.pull_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1], dst_blocks=[0, 2])
     logging.info(f'after decoder pull, {tensor=}')
     logging.info(f'after decoder pull, {tensor2=}')
 
+    # 4. 断链并切换角色
     cluster = LLMClusterInfo()
     cluster.remote_cluster_id = PROMPT_CLUSTER_ID
     cluster.append_remote_ip_info(remote_host_ip, 26000)
@@ -134,12 +149,17 @@ def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
     llm_options = llm_config.generate_options()
     datadist.switch_role(LLMRole.PROMPT, llm_options)
     logging.info('decoder link, pull, unlink, switch role success, wait prompt link...')
+
+    # 5. 等待prompt发起建链
     dist.barrier() # decoder switch role end, lisen
     dist.barrier() # prompt link end
 
+    # 6. 向prompt push blocks
     cache_manager.push_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1, 2], dst_blocks=[0, 1,2],
                               src_layer_range=range(0, 2), dst_layer_range=range(0, 2), tensor_num_per_layer=1)
     dist.barrier() # decoder push blocks end
+
+    # 7. 断链
     cluster = LLMClusterInfo()
     cluster.remote_cluster_id = PROMPT_CLUSTER_ID
     ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
@@ -172,7 +192,7 @@ if __name__ == '__main__':
     cluster_id = PROMPT_CLUSTER_ID if args.role == 'p' else DECODER_CLUSTER_ID
     datadist = init_llm_datadist(role, cluster_id, args.device_id, args.local_host_ip, args.remote_host_ip)
     if role == LLMRole.PROMPT:
-        run_prompt_sample(datadist, args.remote_host_ip)
+        run_prompt_sample(datadist, args.local_host_ip, args.remote_host_ip)
     else:
         run_decoder_sample(datadist, args.local_host_ip, args.remote_host_ip)
     logging.info('Sample end')
-- 
Gitee


From c64df1dd584561899b2a75d2ca6295623890409e Mon Sep 17 00:00:00 2001
From: gitee_hw_lxc <linxiuchuan@hisilicon.com>
Date: Thu, 21 Aug 2025 07:15:17 +0000
Subject: [PATCH 62/97] !2745 dvpp sample support device_id setting Merge pull
 request !2745 from gitee_hw_lxc/master

---
 .../7_dvpp/jpegd_sample/readme.md             |  1 +
 .../jpegd_sample/src/sample_comm_jpegd.cpp    | 22 ++++++++++++-------
 .../7_dvpp/jpege_sample/readme.md             |  1 +
 .../7_dvpp/jpege_sample/src/sample_jpege.cpp  | 18 ++++++++++-----
 .../7_dvpp/pngd_sample/readme.md              |  2 +-
 .../pngd_sample/src/sample_comm_pngd.cpp      | 22 ++++++++++++-------
 .../7_dvpp/venc_sample/readme.md              |  2 ++
 .../7_dvpp/venc_sample/src/sample_debug.cpp   | 18 ++++++++++-----
 .../7_dvpp/vpc_sample/readme.md               |  2 +-
 .../vpc_sample/src/common/sample_comm.cpp     | 18 +++++++--------
 .../vpc_sample/src/common/sample_comm.h       |  6 +++--
 .../7_dvpp/vpc_sample/src/sample_vpc.cpp      | 15 ++++++++-----
 12 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/cplusplus/level1_single_api/7_dvpp/jpegd_sample/readme.md b/cplusplus/level1_single_api/7_dvpp/jpegd_sample/readme.md
index 4240b627a..664e52ea2 100644
--- a/cplusplus/level1_single_api/7_dvpp/jpegd_sample/readme.md
+++ b/cplusplus/level1_single_api/7_dvpp/jpegd_sample/readme.md
@@ -137,6 +137,7 @@ DVPP中的JPEGD功能模块，实现.jpg、.jpeg、.JPG、.JPEG图片的解码
       - delay\_time：发送前的等待时间，所有通道等待输入所定的秒之后，一齐发送码流。该参数用于性能测试时减小通道之间因为启动时间不同步引起的性能参数差异，非性能模式下无效。
       - wait\_time：最大等待时间，以秒为单位。超出设定时间后进入退出流程。
       - whole_dir：兼容性测试的输入文件夹路径。设置后将交给JPEGD解码该文件夹下所有以.jpg或.JPEG为后缀的文件，不设置则不会触发该模式。
+      - device\_id：device id，默认为0。
 
 
 ## JPEGD基础功能<a name="section16675547162815"></a>
diff --git a/cplusplus/level1_single_api/7_dvpp/jpegd_sample/src/sample_comm_jpegd.cpp b/cplusplus/level1_single_api/7_dvpp/jpegd_sample/src/sample_comm_jpegd.cpp
index 26d781562..13cb0123c 100644
--- a/cplusplus/level1_single_api/7_dvpp/jpegd_sample/src/sample_comm_jpegd.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/jpegd_sample/src/sample_comm_jpegd.cpp
@@ -71,6 +71,7 @@ char g_input_file_name[FILE_NAME_LEN]  = "infile";
 char g_output_file_name[FILE_NAME_LEN] = "outfile";
 char g_compatible_dir_name[FILE_NAME_LEN]  = "./";
 
+int32_t g_device_id = 0;
 aclrtContext g_context = nullptr;
 aclrtRunMode g_run_mode = ACL_DEVICE;
 hi_vdec_send_mode g_video_mode = HI_VDEC_SEND_MODE_FRAME;
@@ -168,6 +169,7 @@ void jpegd_usage(char *sPrgNm)
     SAMPLE_PRT("\t delay_time: start decode after input seconds.\n");
     SAMPLE_PRT("\t performance_mode: mode to test decode fps.\n");
     SAMPLE_PRT("\t wait_time: max wait input seconds,\n");
+    SAMPLE_PRT("\t device_id: device id,\n");
     SAMPLE_PRT("\t            JpegdDemo would exit after input seconds if it still running.\n");
     SAMPLE_PRT("\t whole_dir: compatible mode decode dir path. JPEGD would decode all jpeg to test compatibility\n");
     SAMPLE_PRT("\t If you want to check whether pics in folder can be decoded, run like:\n");
@@ -208,13 +210,14 @@ int32_t get_option(int32_t argc, char **argv)
         {"offset_bottom",    1, nullptr, 'B'},
         {"offset_left",      1, nullptr, 'L'},
         {"offset_right",     1, nullptr, 'R'},
+        {"device_id",        1, nullptr, 'i'},
         {nullptr,            0, nullptr, 0}
     };
 
     while (1) {
         option_index = 0;
 
-        c = getopt_long(argc, argv, "w:h:b:c:d:g:e:s:p:C:D:F:O:W:H:S:Y:a:P:", long_options, &option_index);
+        c = getopt_long(argc, argv, "w:h:b:c:d:g:e:s:p:C:D:F:O:W:H:S:Y:a:P:i", long_options, &option_index);
         if (c == -1) {
             break;
         }
@@ -286,6 +289,9 @@ int32_t get_option(int32_t argc, char **argv)
             case 'R':
                 g_offset_right = atoi(optarg);
                 break;
+            case 'i':
+                g_device_id = atoi(optarg);
+                break;
             default:
                 SAMPLE_PRT("unsupport option!\n");
                 break;
@@ -1753,18 +1759,18 @@ int32_t setup_acl_device()
     }
     SAMPLE_PRT("aclInit succ.\n");
 
-    aclRet = aclrtSetDevice(0);
+    aclRet = aclrtSetDevice(g_device_id);
     if (aclRet != ACL_SUCCESS) {
-        SAMPLE_PRT("aclrtSetDevice(0) fail with %d.\n", aclRet);
+        SAMPLE_PRT("aclrtSetDevice(%d) fail with %d.\n", g_device_id, aclRet);
         aclFinalize();
         return aclRet;
     }
-    SAMPLE_PRT("aclrtSetDevice(0) succ.\n");
+    SAMPLE_PRT("aclrtSetDevice(%d) succ.\n", g_device_id);
 
-    aclRet = aclrtCreateContext(&g_context, 0);
+    aclRet = aclrtCreateContext(&g_context, g_device_id);
     if (aclRet != ACL_SUCCESS) {
         SAMPLE_PRT("acl create context failed with %d.\n", aclRet);
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
         return aclRet;
     }
@@ -1775,7 +1781,7 @@ int32_t setup_acl_device()
         SAMPLE_PRT("get current context failed\n");
         aclrtDestroyContext(g_context);
         g_context = nullptr;
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
         return aclRet;
     }
@@ -1789,7 +1795,7 @@ void destroy_acl_device()
     if (g_context) {
         aclrtDestroyContext(g_context);
         g_context = nullptr;
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
     }
 }
diff --git a/cplusplus/level1_single_api/7_dvpp/jpege_sample/readme.md b/cplusplus/level1_single_api/7_dvpp/jpege_sample/readme.md
index 91c87b842..65ffef293 100644
--- a/cplusplus/level1_single_api/7_dvpp/jpege_sample/readme.md
+++ b/cplusplus/level1_single_api/7_dvpp/jpege_sample/readme.md
@@ -145,3 +145,4 @@ DVPP中的JPEGE功能模块，实现将YUV格式图片编码成.jpg图片。
         - zero\_copy：是否指定编码结果的输出地址。
           - 默认0：不指定输出地址
           - 非0：指定输出地址
+        - device\_id：device id, 默认为0。
diff --git a/cplusplus/level1_single_api/7_dvpp/jpege_sample/src/sample_jpege.cpp b/cplusplus/level1_single_api/7_dvpp/jpege_sample/src/sample_jpege.cpp
index 0d3129a9b..5e08709fc 100644
--- a/cplusplus/level1_single_api/7_dvpp/jpege_sample/src/sample_jpege.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/jpege_sample/src/sample_jpege.cpp
@@ -69,6 +69,7 @@ uint32_t g_isZeroCopy = 0;
 aclrtContext g_context = NULL;
 // ACL_HOST or ACL_DEVICE
 aclrtRunMode g_run_mode = ACL_HOST;
+int32_t g_device_id = 0;
 
 /*
 * function:    jpege_usage(int argc, char **argv)
@@ -96,6 +97,7 @@ void jpege_usage()
     printf("\t\t 9: HI_PIXEL_FORMAT_YVYU_PACKED_422\n");
     printf("\t\t 10: HI_PIXEL_FORMAT_VYUY_PACKED_422\n");
     printf("\t chn_num: jpege channel num.\n");
+    printf("\t device_id: device id.\n");
     printf("/*********************************************************/\n\n");
 }
 
@@ -133,9 +135,10 @@ void get_option(int argc, char **argv)
             {"sync_enc"        , 1, 0, 'S'},
             {"frame_count"     , 1, 0, 'n'},
             {"zero_copy"       , 1, 0, 'z'},
+            {"device_id"       , 1, 0, 'd'},
             {NULL, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "W:H:w:h:f:b:c:i:o:l:s:B:p:O:Q:T:S:n:z", longOptions, &optionIndex);
+        c = getopt_long(argc, argv, "W:H:w:h:f:b:c:i:o:l:s:B:p:O:Q:T:S:n:z:d", longOptions, &optionIndex);
         if (c == -1) {
             break;
         }
@@ -198,6 +201,9 @@ void get_option(int argc, char **argv)
             case 'z':
                 g_isZeroCopy = atoi(optarg);
                 break;
+            case 'd':
+                g_device_id = atoi(optarg);
+                break;
             default:
                 SAMPLE_PRT("bad arg!\n");
                 break;
@@ -224,13 +230,13 @@ int32_t jpege_sys_init()
         return HI_FAILURE;
     }
 
-    acl_ret = aclrtSetDevice(0);
+    acl_ret = aclrtSetDevice(g_device_id);
     if (acl_ret != ACL_SUCCESS) {
-        SAMPLE_PRT("aclrtSetDevice(0) fail with %d.\n", acl_ret);
+        SAMPLE_PRT("aclrtSetDevice(%d) fail with %d.\n", g_device_id, acl_ret);
         return HI_FAILURE;
     }
 
-    acl_ret = aclrtCreateContext(&g_context, 0);
+    acl_ret = aclrtCreateContext(&g_context, g_device_id);
     if (acl_ret != ACL_SUCCESS) {
         SAMPLE_PRT("acl create context failed with %d.\n", acl_ret);
         return HI_FAILURE;
@@ -277,9 +283,9 @@ hi_s32 jpege_sys_exit()
     if (acl_ret != ACL_SUCCESS) {
         SAMPLE_PRT("destroy context failed with %d.\n", acl_ret);
     }
-    acl_ret = aclrtResetDevice(0);
+    acl_ret = aclrtResetDevice(g_device_id);
     if (acl_ret != ACL_SUCCESS) {
-        SAMPLE_PRT("reset device(0) fail with %d.\n", acl_ret);
+        SAMPLE_PRT("reset device(%d) fail with %d.\n", g_device_id, acl_ret);
     }
     acl_ret = aclFinalize();
     if (acl_ret != ACL_SUCCESS) {
diff --git a/cplusplus/level1_single_api/7_dvpp/pngd_sample/readme.md b/cplusplus/level1_single_api/7_dvpp/pngd_sample/readme.md
index 5d7d17df4..684e03a45 100644
--- a/cplusplus/level1_single_api/7_dvpp/pngd_sample/readme.md
+++ b/cplusplus/level1_single_api/7_dvpp/pngd_sample/readme.md
@@ -130,7 +130,7 @@ DVPP中的PNGD功能模块，实现.png图片的解码。
       - delay\_time：发送前的等待时间，所有通道等待输入所定的秒之后，一齐发送码流。该参数用于性能测试时减小通道之间因为启动时间不同步引起的性能参数差异，非性能模式下无效。
       - wait\_time：最大等待时间，以秒为单位。超出设定时间后进入退出流程。
       - whole_dir：兼容性测试的输入文件夹路径。设置后将交给PNGD解码该文件夹下所有以.jpg或.JPEG为后缀的文件，不设置则不会触发该模式。
-
+      - device\_id：device id，默认为0。
 
 
 ## PNGD基础功能<a name="section16675547162815"></a>
diff --git a/cplusplus/level1_single_api/7_dvpp/pngd_sample/src/sample_comm_pngd.cpp b/cplusplus/level1_single_api/7_dvpp/pngd_sample/src/sample_comm_pngd.cpp
index 7f431bec9..8b03be87b 100644
--- a/cplusplus/level1_single_api/7_dvpp/pngd_sample/src/sample_comm_pngd.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/pngd_sample/src/sample_comm_pngd.cpp
@@ -58,6 +58,7 @@ char g_input_file_name[FILE_NAME_LEN]  = "infile";
 char g_output_file_name[FILE_NAME_LEN] = "outfile";
 char g_compatible_dir_name[FILE_NAME_LEN]  = "./";
 
+int32_t g_device_id = 0;
 aclrtContext g_context = nullptr;
 aclrtRunMode g_run_mode = ACL_DEVICE;
 hi_pixel_format g_pixel_format = HI_PIXEL_FORMAT_RGB_888;
@@ -158,6 +159,7 @@ void pngd_usage(char *sPrgNm)
     SAMPLE_PRT("\t delay_time: start decode after input seconds.\n");
     SAMPLE_PRT("\t performance_mode: mode to test decode fps.\n");
     SAMPLE_PRT("\t wait_time: max wait input seconds,\n");
+    SAMPLE_PRT("\t device_id: device id,\n");
     SAMPLE_PRT("\t            PngdDemo would exit after input seconds if it still running.\n");
     SAMPLE_PRT("\t whole_dir: compatible mode decode dir path. PNGD would decode all png to test compatibility\n");
     SAMPLE_PRT("\t If you want to check whether pics in folder can be decoded, run like:\n");
@@ -189,13 +191,14 @@ int32_t get_option(int32_t argc, char **argv)
         {"wait_time",        1, nullptr, 'a'},
         {"performance_mode", 1, nullptr, 'P'},
         {"align",            1, nullptr, 'b'},
+        {"device_id",        1, nullptr, 'd'},
         {nullptr,            0, nullptr, 0}
     };
 
     while (1) {
         option_index = 0;
 
-        c = getopt_long(argc, argv, "w:h:c:s:p:F:O:W:H:S:Y:a:P:b:", long_options, &option_index);
+        c = getopt_long(argc, argv, "w:h:c:s:p:F:O:W:H:S:Y:a:P:b:d:", long_options, &option_index);
         if (c == -1) {
             break;
         }
@@ -243,6 +246,9 @@ int32_t get_option(int32_t argc, char **argv)
             case 'b':
                 g_align = atoi(optarg);
                 break;
+            case 'd':
+                g_device_id = atoi(optarg);
+                break;
             default:
                 SAMPLE_PRT("unsupport option!\n");
                 break;
@@ -1390,18 +1396,18 @@ int32_t setup_acl_device()
     }
     SAMPLE_PRT("aclInit succ.\n");
 
-    aclRet = aclrtSetDevice(0);
+    aclRet = aclrtSetDevice(g_device_id);
     if (aclRet != ACL_SUCCESS) {
-        SAMPLE_PRT("aclrtSetDevice(0) fail with %d.\n", aclRet);
+        SAMPLE_PRT("aclrtSetDevice(%d) fail with %d.\n", g_device_id, aclRet);
         aclFinalize();
         return aclRet;
     }
-    SAMPLE_PRT("aclrtSetDevice(0) succ.\n");
+    SAMPLE_PRT("aclrtSetDevice(%d) succ.\n", g_device_id);
 
-    aclRet = aclrtCreateContext(&g_context, 0);
+    aclRet = aclrtCreateContext(&g_context, g_device_id);
     if (aclRet != ACL_SUCCESS) {
         SAMPLE_PRT("acl create context failed with %d.\n", aclRet);
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
         return aclRet;
     }
@@ -1412,7 +1418,7 @@ int32_t setup_acl_device()
         SAMPLE_PRT("get current context failed\n");
         aclrtDestroyContext(g_context);
         g_context = nullptr;
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
         return aclRet;
     }
@@ -1426,7 +1432,7 @@ void destroy_acl_device()
     if (g_context) {
         aclrtDestroyContext(g_context);
         g_context = nullptr;
-        aclrtResetDevice(0);
+        aclrtResetDevice(g_device_id);
         aclFinalize();
     }
 }
diff --git a/cplusplus/level1_single_api/7_dvpp/venc_sample/readme.md b/cplusplus/level1_single_api/7_dvpp/venc_sample/readme.md
index 01b3e392f..3b2c0653d 100644
--- a/cplusplus/level1_single_api/7_dvpp/venc_sample/readme.md
+++ b/cplusplus/level1_single_api/7_dvpp/venc_sample/readme.md
@@ -174,6 +174,8 @@ DVPP 中的VENC功能模块，实现将YUV420SP、YVU420SP格式的视频编码
           - 非0：性能测试（考虑读取YUV文件耗时，对性能有影响，性能测试只读取一帧，循环发送，指定帧数编码完成后结束流程）。
 
         - PerfFrameNum：性能测试输入帧数，默认为300。
+        - DeviceId：device id，默认为0。
+
 
    
diff --git a/cplusplus/level1_single_api/7_dvpp/venc_sample/src/sample_debug.cpp b/cplusplus/level1_single_api/7_dvpp/venc_sample/src/sample_debug.cpp
index 62ca00875..5e57a1c30 100644
--- a/cplusplus/level1_single_api/7_dvpp/venc_sample/src/sample_debug.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/venc_sample/src/sample_debug.cpp
@@ -58,6 +58,7 @@ std::map<hi_u64, hi_void*> g_mbuf_map;
 pthread_mutex_t g_mbuf_mutex;
 
 aclrtContext g_context = NULL;
+int32_t g_device_id = 0;
 
 // save the output stream
 hi_s32 venc_save_stream(FILE* fp, VencOutStream* outStream, uint64_t packCount)
@@ -556,13 +557,13 @@ int32_t venc_sys_init()
         return HI_FAILURE;
     }
 
-    aclRet = aclrtSetDevice(0);
+    aclRet = aclrtSetDevice(g_device_id);
     if (aclRet != ACL_SUCCESS) {
-        HMEV_HISDK_PRT(ERROR, "aclrtSetDevice(0) fail with %d.\n", aclRet);
+        HMEV_HISDK_PRT(ERROR, "aclrtSetDevice(%d) fail with %d.\n", g_device_id, aclRet);
         return HI_FAILURE;
     }
 
-    aclRet = aclrtCreateContext(&g_context, 0);
+    aclRet = aclrtCreateContext(&g_context, g_device_id);
     if (aclRet != ACL_SUCCESS) {
         HMEV_HISDK_PRT(ERROR, "acl create context failed with %d.", aclRet);
         return HI_FAILURE;
@@ -609,9 +610,9 @@ int32_t venc_sys_exit()
     if (aclRet != ACL_SUCCESS) {
         HMEV_HISDK_PRT(ERROR, "destroy context failed with %d.", aclRet);
     }
-    aclRet = aclrtResetDevice(0);
+    aclRet = aclrtResetDevice(g_device_id);
     if (aclRet != ACL_SUCCESS) {
-        HMEV_HISDK_PRT(ERROR, "reset device(0) fail with %d.\n", aclRet);
+        HMEV_HISDK_PRT(ERROR, "reset device(%d) fail with %d.\n", g_device_id, aclRet);
     }
     aclRet = aclFinalize();
     if (aclRet != ACL_SUCCESS) {
@@ -645,9 +646,10 @@ void venc_get_option(int argc, char** argv)
             {"SaveOutData", 1, 0, 's'}, // 0:do not save stream 1:save stream
             {"IFrameGop", 1, 0, 'g'}, // I frame interval[1, 65536‬]，set 1 for all I frame，65536 as default‬
             {"StartChnlId", 1, 0, 'd'}, // specify the start channel id for current process in multi-process test
+            {"DeviceId", 1, 0, 'I'},
             {0, 0, 0, 0},
         };
-        int c = getopt_long(argc, argv, "w:h:r:n:c:b:f:D:H:i:o:p:l:P:m:s:g:d:", longOptions, &optionIndex);
+        int c = getopt_long(argc, argv, "w:h:r:n:c:b:f:D:H:i:o:p:l:P:m:s:g:d:I:", longOptions, &optionIndex);
         if (c == -1) {
             break;
         }
@@ -707,6 +709,9 @@ void venc_get_option(int argc, char** argv)
             case 'd':
                 g_start_chnl = atoi(optarg);
                 break;
+            case 'I':
+                g_device_id = atoi(optarg);
+                break;
             default:
                 break;
         }
@@ -739,6 +744,7 @@ hi_void venc_print_usage()
     HMEV_HISDK_PRT(INFO, "--PerfTest(-P): test type 0:function test 1:performance test");
     HMEV_HISDK_PRT(INFO, "--PerfFrameNum(-m): performance test frame number");
     HMEV_HISDK_PRT(INFO, "--SaveOutData(-s): save output stream 0:do not save 1:save ");
+    HMEV_HISDK_PRT(INFO, "--DeviceId(-I): device id");
     HMEV_HISDK_PRT(INFO, "*********************************************************");
 }
 
diff --git a/cplusplus/level1_single_api/7_dvpp/vpc_sample/readme.md b/cplusplus/level1_single_api/7_dvpp/vpc_sample/readme.md
index cffd3589b..c84360efd 100644
--- a/cplusplus/level1_single_api/7_dvpp/vpc_sample/readme.md
+++ b/cplusplus/level1_single_api/7_dvpp/vpc_sample/readme.md
@@ -172,7 +172,7 @@ DVPP中的VPC功能模块，实现图片的抠图、缩放、边界填充、色
           -   11：多图抠图+缩放+贴图
           -   12：多图抠图+缩放+边界填充
           -   20：图片宽/高对齐预处理
-
+      -   device_id：设备id
 
 
diff --git a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.cpp b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.cpp
index 1152e2523..2b098ce4a 100644
--- a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.cpp
@@ -45,7 +45,7 @@ int32_t get_run_mode()
     return HI_SUCCESS;
 }
 
-int32_t acl_init()
+int32_t acl_init(int32_t device_id)
 {
     aclError aclRet = aclInit(nullptr);
     if (aclRet != ACL_SUCCESS) {
@@ -54,9 +54,9 @@ int32_t acl_init()
     }
     // By default, the program is running on device 0.
     // On a multi-P environment, you can choose target device by the following interface.
-    aclRet = aclrtSetDevice(0);
+    aclRet = aclrtSetDevice(device_id);
     if (aclRet != ACL_SUCCESS) {
-        SAMPLE_PRT("aclrtSetDevice(0) failed with %d.\n", aclRet);
+        SAMPLE_PRT("aclrtSetDevice(%d) failed with %d.\n", device_id, aclRet);
         aclRet = aclFinalize();
         if (aclRet != ACL_SUCCESS) {
             SAMPLE_PRT("finalize acl failed with %d.\n", aclRet);
@@ -64,12 +64,12 @@ int32_t acl_init()
         return HI_FAILURE;
     }
 
-    aclRet = aclrtCreateContext(&g_context, 0);
+    aclRet = aclrtCreateContext(&g_context, device_id);
     if (aclRet != ACL_SUCCESS) {
         SAMPLE_PRT("acl create context failed with %d.", aclRet);
-        aclRet = aclrtResetDevice(0);
+        aclRet = aclrtResetDevice(device_id);
         if (aclRet != ACL_SUCCESS) {
-            SAMPLE_PRT("reset device(0) failed with %d.\n", aclRet);
+            SAMPLE_PRT("reset device(%d) failed with %d.\n", device_id, aclRet);
         }
         aclRet = aclFinalize();
         if (aclRet != ACL_SUCCESS) {
@@ -81,15 +81,15 @@ int32_t acl_init()
     return HI_SUCCESS;
 }
 
-int32_t acl_deinit()
+int32_t acl_deinit(int32_t device_id)
 {
     aclError aclRet = aclrtDestroyContext(g_context);
     if (aclRet != ACL_SUCCESS) {
         SAMPLE_PRT("destroy context failed with %d.", aclRet);
     }
-    aclRet = aclrtResetDevice(0);
+    aclRet = aclrtResetDevice(device_id);
     if (aclRet != ACL_SUCCESS) {
-        SAMPLE_PRT("reset device(0) failed with %d.\n", aclRet);
+        SAMPLE_PRT("reset device(%d) failed with %d.\n", device_id, aclRet);
     }
     aclRet = aclFinalize();
     if (aclRet != ACL_SUCCESS) {
diff --git a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.h b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.h
index 4b3708bdb..ca8f1a5af 100644
--- a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.h
+++ b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/common/sample_comm.h
@@ -141,15 +141,17 @@ int32_t get_run_mode();
 
 /*
 * @brief : initialize acl environment
+* @param [in] device_id: device id
 * @return : 0: success; -1: failed
 */
-int32_t acl_init();
+int32_t acl_init(int32_t device_id);
 
 /*
 * @brief : deinitialization acl environment
+* @param [in] device_id: device id
 * @return : 0: success; -1: fail
 */
-int32_t acl_deinit();
+int32_t acl_deinit(int32_t device_id);
 
 /*
 * @brief : malloc buffer in device
diff --git a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/sample_vpc.cpp b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/sample_vpc.cpp
index bcbaf024a..94b3a3ade 100644
--- a/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/sample_vpc.cpp
+++ b/cplusplus/level1_single_api/7_dvpp/vpc_sample/src/sample_vpc.cpp
@@ -35,6 +35,7 @@ vector<pair<TEST_ENTRY, TEST_FUNC>> test_entry;
 
 VpcAttr g_vpc_attribute;
 aclrtRunMode g_run_mode;
+int32_t g_device_id = 0;
 
 void sample_vpc_handle_sig(int32_t signo)
 {
@@ -150,11 +151,12 @@ void get_option(int argc, char **argv)
             {"in_height_align", 1, 0, 'Y'},
             {"out_width_align", 1, 0, 'Z'},
             {"out_height_align", 1, 0, 'V'},
+            {"device_id", 1, 0, 'a'},
             {nullptr, 1, 0, 'U'},
         };
 
         int32_t c = getopt_long(argc, argv,
-            "w:h:f:b:c:l:t:d:g:e:m:r:s:p:u:v:x:y:i:L:T:C:D:F:O:W:M:I:0:1:2:3:4:5:6:7:8:Q:U:P:N:X:Y:Z:V",
+            "w:h:f:b:c:l:t:d:g:e:m:r:s:p:u:v:x:y:i:L:T:C:D:F:O:W:M:I:0:1:2:3:4:5:6:7:8:Q:U:P:N:X:Y:Z:V:a",
             long_options, &option_index);
         if (c == -1) {
             break;
@@ -250,6 +252,9 @@ void get_option(int argc, char **argv)
             case 'N':
                 g_vpc_attribute.srcPicNum = atoi(optarg);
                 break;
+            case 'a':
+                g_device_id = atoi(optarg);
+                break;
             default:
                 get_option_1(c);
                 break;
@@ -265,7 +270,7 @@ int32_t test_entry_single_chnl(TEST_FUNC test_func)
         return s32Ret;
     }
 
-    s32Ret = acl_init();
+    s32Ret = acl_init(g_device_id);
     if (s32Ret != HI_SUCCESS) {
         return s32Ret;
     }
@@ -273,7 +278,7 @@ int32_t test_entry_single_chnl(TEST_FUNC test_func)
     s32Ret = hi_mpi_sys_init();
     if (s32Ret != HI_SUCCESS) {
         SAMPLE_PRT("hi_mpi_sys_init failed, ret = %#x!\n", s32Ret);
-        acl_deinit();
+        acl_deinit(g_device_id);
         return s32Ret;
     }
 
@@ -285,7 +290,7 @@ int32_t test_entry_single_chnl(TEST_FUNC test_func)
     if (s32Ret != HI_SUCCESS) {
         SAMPLE_PRT("Call hi_mpi_vpc_sys_create_chn failed, ret = %#x\n", s32Ret);
         hi_mpi_sys_exit();
-        acl_deinit();
+        acl_deinit(g_device_id);
         return s32Ret;
     }
 
@@ -317,7 +322,7 @@ int32_t test_entry_single_chnl(TEST_FUNC test_func)
     }
 
     hi_mpi_sys_exit();
-    acl_deinit();
+    acl_deinit(g_device_id);
 
     if (s32Ret == HI_SUCCESS) {
         SAMPLE_PRT("program exit normally!\n");
-- 
Gitee


From dc33819d42b5eda41410637b6e60cfc58fe2543e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=9D=A8?= <liyang843@h-partners.com>
Date: Thu, 21 Aug 2025 08:14:28 +0000
Subject: [PATCH 63/97] =?UTF-8?q?!2740=20allgathermm=20opt=20Merge=20pull?=
 =?UTF-8?q?=20request=20!2740=20from=20=E6=9D=8E=E6=9D=A8/allgathermm=5Fop?=
 =?UTF-8?q?t?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AclNNInvocation/README.md                 |  12 +-
 .../AclNNInvocation/scripts/gen_data.py       |  20 +--
 .../AclNNInvocation/src/main.cpp              |   8 +-
 .../AclNNInvocation/src/op_runner.cpp         |   1 -
 .../op_host/all_gather_matmul_custom.cpp      | 147 +++++-------------
 .../op_kernel/all_gather_matmul_custom.cpp    |  55 +++----
 .../all_gather_matmul_custom_tiling.h         |  13 +-
 .../op_kernel/gather_mm.h                     |  27 ++--
 .../op_kernel/mc2_matmul_block.h              |  57 +------
 .../op_kernel/mc2_matmul_compute.h            |  74 +++------
 .../21_all_gather_matmul_custom/README.md     |  18 +--
 .../all_gather_matmul_custom.json             |  55 +------
 .../all_gather_matmul_demo_def.h              |  12 --
 13 files changed, 136 insertions(+), 363 deletions(-)

diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
index b6ddd2e8c..40cfc9d50 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
@@ -26,17 +26,11 @@
    ```cpp
    // 获取算子使用的workspace空间大小
    aclnnStatus aclnnAllGatherMatmulCustomGetWorkspaceSize(
-       const aclTensor *x1,
-       const aclTensor *x2,
+       const aclTensor *a,
+       const aclTensor *b,
        const aclTensor *biasOptional,
        char *group,
-       bool isTransAOptional,
-       bool isTransBOptional,
-       int64_t gatherIndexOptional,
-       int64_t commTurnOptional,
-       int64_t rankSizeOptional,
-       bool isGatherOutOptional,
-       const aclTensor *yOut,
+       const aclTensor *cOut,
        const aclTensor *gatherOutOut,
        uint64_t *workspaceSize,
        aclOpExecutor **executor);
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/scripts/gen_data.py
index cd0d33c99..bd6cea60b 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/scripts/gen_data.py
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/scripts/gen_data.py
@@ -15,20 +15,20 @@ def gen_golden_data_simple():
     if not os.path.exists("output"):
         os.mkdir("output")
 
-    input_x1 = []
-    input_x2 = []
+    input_a = []
+    input_b = []
     for i in range(rank_dim):
-        x1 = np.random.uniform(-3, 3, [rank_m, rank_k]).astype(np.float16)
-        x2 = np.random.uniform(-3, 3, [rank_k, rank_n]).astype(np.float16)
-        x1.tofile("./input/input_x1_{}.bin".format(i))
-        x2.tofile("./input/input_x2_{}.bin".format(i))
-        input_x1.append(x1)
-        input_x2.append(x2)
+        a = np.random.uniform(-3, 3, [rank_m, rank_k]).astype(np.float16)
+        b = np.random.uniform(-3, 3, [rank_k, rank_n]).astype(np.float16)
+        a.tofile("./input/input_a_{}.bin".format(i))
+        b.tofile("./input/input_b_{}.bin".format(i))
+        input_a.append(a)
+        input_b.append(b)
 
-    golden_gather_out = np.concatenate(input_x1, axis=0)
+    golden_gather_out = np.concatenate(input_a, axis=0)
     for i in range(rank_dim):
         golden_gather_out.tofile("./output/golden_gather_out_{}.bin".format(i))
-        out = np.matmul(golden_gather_out.astype(np.float32), input_x2[i].astype(np.float32)).astype(np.float16)
+        out = np.matmul(golden_gather_out.astype(np.float32), input_b[i].astype(np.float32)).astype(np.float16)
         out.tofile("./output/golden_out_{}.bin".format(i))
 
 if __name__ == "__main__":
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
index dafa01311..86ff36642 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
@@ -52,10 +52,10 @@ OperatorDesc CreateOpDesc()
 bool SetInputData(OpRunner &runner, uint32_t rankId)
 {
     size_t fileSize = 0;
-    ReadFile("../input/input_x1_" + std::to_string(rankId) + ".bin", fileSize,
-        runner.GetInputBuffer<void>(0), runner.GetInputSize(0)); // Read input_x1 file
-    ReadFile("../input/input_x2_" + std::to_string(rankId) + ".bin", fileSize,
-        runner.GetInputBuffer<void>(1), runner.GetInputSize(1)); // Read input_x2 file
+    ReadFile("../input/input_a_" + std::to_string(rankId) + ".bin", fileSize,
+        runner.GetInputBuffer<void>(0), runner.GetInputSize(0)); // Read input_a file
+    ReadFile("../input/input_b_" + std::to_string(rankId) + ".bin", fileSize,
+        runner.GetInputBuffer<void>(1), runner.GetInputSize(1)); // Read input_b file
     ReadFile("../input/input_bias_" + std::to_string(rankId) + ".bin", fileSize,
         runner.GetInputBuffer<void>(INPUT_BUFFER_BIAS), runner.GetInputSize(INPUT_BUFFER_BIAS));
     INFO_LOG("Set input success");
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
index 0d40007d4..5aa62934f 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
@@ -302,7 +302,6 @@ bool OpRunner::RunOp(std::string group, aclrtStream stream)
     size_t workspaceSize = 0;
     aclOpExecutor *handle = nullptr;
     auto ret = aclnnAllGatherMatmulCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], bias, (char*)group.c_str(),
-        IS_TRANS_A, IS_TRANS_B, GATHER_INDEX, COMM_TURN, RANK_DIM, IS_GATHER_OUT,
         outputTensor_[0], outputTensor_[1], &workspaceSize, &handle);
     if (ret != ACL_SUCCESS) {
         ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
index 2166672ad..9916b7b9d 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
@@ -20,39 +20,11 @@
 #define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
 
 // tiling
+namespace {
 constexpr int32_t TILE_M = 448;
 constexpr uint32_t HCCL_CMD_ALLGATHER = 6;
-constexpr uint32_t FP16_BF16_SIZE = 2;
-constexpr uint32_t CUSTOM_TILING_KEY = 100; // full mesh + no nd2nz + no cast bias
+constexpr uint32_t HCCL_REDUCE_SUM = 0;
 constexpr int32_t L1_BUFFER_SIZE = 512 * 1024;
-
-namespace {
-enum class HcclDataType {
-    HCCL_DATA_TYPE_INT8 = 0,   /* *< int8 */
-    HCCL_DATA_TYPE_INT16 = 1,  /* *< int16 */
-    HCCL_DATA_TYPE_INT32 = 2,  /* *< int32 */
-    HCCL_DATA_TYPE_FP16 = 3,   /* *< fp16 */
-    HCCL_DATA_TYPE_FP32 = 4,   /* *< fp32 */
-    HCCL_DATA_TYPE_INT64 = 5,  /* *< int64 */
-    HCCL_DATA_TYPE_UINT64 = 6, /* *< uint64 */
-    HCCL_DATA_TYPE_UINT8 = 7,  /* *< uint8 */
-    HCCL_DATA_TYPE_UINT16 = 8, /* *< uint16 */
-    HCCL_DATA_TYPE_UINT32 = 9, /* *< uint32 */
-    HCCL_DATA_TYPE_FP64 = 10,  /* *< fp64 */
-    HCCL_DATA_TYPE_BFP16 = 11, /* *< bfp16 */
-    HCCL_DATA_TYPE_RESERVED    /* *< reserved */
-};
-
-const std::set<ge::DataType> SUPPORT_DTYPE = { ge::DT_FLOAT16, ge::DT_BF16 };
-const std::map<ge::DataType, matmul_tiling::DataType> DTYPE_MAP = {
-    {ge::DT_FLOAT16, matmul_tiling::DataType::DT_FLOAT16},
-    {ge::DT_BF16, matmul_tiling::DataType::DT_BFLOAT16}
-};
-
-const std::map<ge::DataType, HcclDataType> HCCL_DATA_TYPE_MAP = {
-    {ge::DataType::DT_FLOAT16, HcclDataType::HCCL_DATA_TYPE_FP16},
-    {ge::DataType::DT_BF16, HcclDataType::HCCL_DATA_TYPE_BFP16}
-};
 }
 
 static ge::graphStatus AllGatherMatmulCustomTilingFunc(gert::TilingContext *context)
@@ -61,43 +33,31 @@ static ge::graphStatus AllGatherMatmulCustomTilingFunc(gert::TilingContext *cont
     auto aicCoreNum = ascendcPlatform.GetCoreNumAic();
 
     // get attrs
-    uint32_t index = 0U;
-    const char *group = context->GetAttrs()->GetAttrPointer<char>(index++);
-    bool isTransA = *(context->GetAttrs()->GetAttrPointer<bool>(index++));
-    bool isTransB = *(context->GetAttrs()->GetAttrPointer<bool>(index++));
-    int gatherIndex = *(context->GetAttrs()->GetAttrPointer<int>(index++));
-    int commTurn = *(context->GetAttrs()->GetAttrPointer<int>(index++));
-    int rankSize = *(context->GetAttrs()->GetAttrPointer<int>(index++));
-    bool isGatherOut = *(context->GetAttrs()->GetAttrPointer<int>(index++));
-    INFO_LOG("Group %s, isTransA is %d, isTransB is %d, gatherIndex is %d, commTurn is %d, rankSize %d, isGatherOut %d",
-        group, isTransA, isTransB, gatherIndex, commTurn, rankSize, isGatherOut);
+    const char *group = context->GetAttrs()->GetAttrPointer<char>(0);
+    INFO_LOG("Group %s", group);
 
     // get shape  [[4096/8,5120], [5120,640]] fp16
     uint64_t rankM = context->GetInputShape(0)->GetStorageShape().GetDim(0);
     uint64_t rankK = context->GetInputShape(0)->GetStorageShape().GetDim(1);
-    uint64_t rankN = isTransB ?
-        context->GetInputShape(1)->GetStorageShape().GetDim(0) : context->GetInputShape(1)->GetStorageShape().GetDim(1);
+    uint64_t rankN = context->GetInputShape(1)->GetStorageShape().GetDim(1);
     INFO_LOG("RankM %lu, rankK %lu, rankN %lu", rankM, rankK, rankN);
 
     // get dtype
     auto aType = context->GetInputTensor(0)->GetDataType();
     auto bType = context->GetInputTensor(1)->GetDataType();
     auto cType = aType;
-    if (SUPPORT_DTYPE.find(aType) == SUPPORT_DTYPE.cend() || SUPPORT_DTYPE.find(bType) == SUPPORT_DTYPE.cend()) {
-        ERROR_LOG("Dtype of a %d or b %d is unsupported", static_cast<int>(aType), static_cast<int>(bType));
+    if (aType != ge::DT_FLOAT16 || bType != ge::DT_FLOAT16) {
+        ERROR_LOG("Dtype is unsupported");
         return ge::GRAPH_FAILED;
     }
 
-    // set block dim & tiling key
+    // set block dim
     context->SetBlockDim(ascendcPlatform.GetCoreNumAic());
-    context->SetTilingKey(CUSTOM_TILING_KEY);
 
     // set work space size
-    size_t nd2nzLen = (rankN + 16) * (rankK + 16) * FP16_BF16_SIZE;
     size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform.GetLibApiWorkSpaceSize());
-    size_t workspaceSize = nd2nzLen + 0 + 0 + 0 + systemWorkspaceSize; // nd2nzLen + gmcFloat + gatherLen + biasLen + 16M
-    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = workspaceSize;
+    size_t *workspaceSizes = context->GetWorkspaceSizes(1); // 获取设置workspace大小的指针。
+    workspaceSizes[0] = systemWorkspaceSize;
 
     uint64_t tileNum = rankM / TILE_M;
     uint64_t tailNum = (rankM % TILE_M == 0) ? 0 : 1;
@@ -106,27 +66,18 @@ static ge::graphStatus AllGatherMatmulCustomTilingFunc(gert::TilingContext *cont
 
     AllGatherMatmulCustomTilingData *tiling = context->GetTilingData<AllGatherMatmulCustomTilingData>();
 
-    tiling->param.rankDim = rankSize;
-    tiling->param.tileNum = tileNum;
-    tiling->param.tailM = tailM;
-    tiling->param.tailNum = tailNum;
-    tiling->param.rankM = rankM;
-    tiling->param.rankN = rankN;
-    tiling->param.rankK = rankK;
-    tiling->param.gatherIndex = gatherIndex;
-    tiling->param.isTransposeA = isTransA ? 1 : 0;
-    tiling->param.isTransposeB = isTransB ? 1 : 0;
-    tiling->param.storageGather = isGatherOut;
-    tiling->param.cToFloatLen = 0; // 通信结果输出到workspace或gatherOut 不需要gmcFloat
-    tiling->param.nd2NzWorkLen = nd2nzLen;
-    tiling->param.gatherLen = 0; // gatherOut=true 不输出到workspace
-    tiling->param.dataType = static_cast<uint8_t>(HCCL_DATA_TYPE_MAP.at(aType));
+    tiling->cfg.tileNum = tileNum;
+    tiling->cfg.tailM = tailM;
+    tiling->cfg.tailNum = tailNum;
+    tiling->cfg.rankM = rankM;
+    tiling->cfg.rankN = rankN;
+    tiling->cfg.rankK = rankK;
     // matmul tiling func
     auto matmulTilingFunc = [&](int64_t m, int64_t n, int64_t k, TCubeTiling &cubeTiling) -> bool {
         matmul_tiling::MultiCoreMatmulTiling mmTiling;
-        mmTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, DTYPE_MAP.at(aType), isTransA);
-        mmTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, DTYPE_MAP.at(bType), isTransB);
-        mmTiling.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, DTYPE_MAP.at(cType));
+        mmTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16);
+        mmTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16);
+        mmTiling.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16);
         mmTiling.SetBias(false);
         mmTiling.SetDim(aicCoreNum);
         mmTiling.SetShape(m, n, k);
@@ -153,9 +104,9 @@ static ge::graphStatus AllGatherMatmulCustomTilingFunc(gert::TilingContext *cont
         return ge::GRAPH_FAILED;
     }
 
-    uint32_t opType = 6;
+    uint32_t opType = HCCL_CMD_ALLGATHER;
     std::string algConfig = "AllGather=level0:doublering";
-    uint32_t reduceType = 0;
+    uint32_t reduceType = HCCL_REDUCE_SUM;
     AscendC::Mc2CcTilingConfig mc2CcTilingConfig(group, opType, algConfig, reduceType);
     mc2CcTilingConfig.GetTiling(tiling->mc2InitTiling);
     mc2CcTilingConfig.SetSkipLocalRankCopy(0);
@@ -169,54 +120,38 @@ class AllGatherMatmulCustom : public OpDef {
 public:
     explicit AllGatherMatmulCustom(const char *name) : OpDef(name)
     {
-        this->Input("x1")
+        this->Input("a")
             .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16, ge::DT_BF16})
-            .Format({ge::FORMAT_ND, ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND});
-        this->Input("x2")
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
+        this->Input("b")
             .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16, ge::DT_BF16})
-            .Format({ge::FORMAT_ND, ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND})
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND})
             .IgnoreContiguous();
         this->Input("bias")
             .ParamType(OPTIONAL)
-            .DataType({ge::DT_FLOAT16, ge::DT_BF16})
-            .Format({ge::FORMAT_ND, ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND});
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
 
-        this->Output("y")
+        this->Output("c")
             .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16, ge::DT_BF16})
-            .Format({ge::FORMAT_ND, ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND});
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
         this->Output("gather_out")
             .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16, ge::DT_BF16})
-            .Format({ge::FORMAT_ND, ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND, ge::FORMAT_ND});
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND})
+            .UnknownShapeFormat({ge::FORMAT_ND});
 
         this->Attr("group").AttrType(REQUIRED).String();
-        this->Attr("isTransA").AttrType(OPTIONAL).Bool(false);
-        this->Attr("isTransB").AttrType(OPTIONAL).Bool(false);
-        this->Attr("gatherIndex").AttrType(OPTIONAL).Int(0);
-        this->Attr("commTurn").AttrType(OPTIONAL).Int(0);
-        this->Attr("rank_size").AttrType(OPTIONAL).Int(0);
-        this->Attr("is_gather_out").AttrType(OPTIONAL).Bool(true);
-
-        OpAICoreConfig aicore_config;
-        aicore_config.DynamicCompileStaticFlag(true)
-            .DynamicFormatFlag(true)
-            .DynamicRankSupportFlag(true)
-            .DynamicShapeSupportFlag(true)
-            .NeedCheckSupportFlag(false)
-            .PrecisionReduceFlag(true)
-            .ExtendCfgInfo("aclnnSupport.value", "support_aclnn")
-            .ExtendCfgInfo("jitCompile.flag", "static_false")
-            .ExtendCfgInfo("multiKernelSupportDynamicGraph.value", "multi_kernel");
+
         this->AICore().SetTiling(AllGatherMatmulCustomTilingFunc);
-        this->AICore().AddConfig("ascend910b", aicore_config);
+        this->AICore().AddConfig("ascend910b");
         this->MC2().HcclGroup("group");
     }
 };
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
index c35f7616a..bcdae45b8 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
@@ -16,15 +16,6 @@
 
 using namespace AscendC;
 
-template <class T>
-struct BiasType {
-    using type = float;
-};
-template <>
-struct BiasType<half> {
-    using type = half;
-};
-
 extern "C" __global__ __aicore__ void all_gather_matmul_custom(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR biasGM, GM_ADDR cGM,
     GM_ADDR gatherOutGM, GM_ADDR workspaceGM, GM_ADDR tilingGM)
 {
@@ -37,7 +28,7 @@ extern "C" __global__ __aicore__ void all_gather_matmul_custom(GM_ADDR aGM, GM_A
     __gm__ void *mc2CcTiling = (__gm__ void *)(&(tiling->mc2CcTiling));
 
     GET_TILING_DATA(tilingData, tilingGM);
-    auto &&cfg         = tilingData.param;
+    auto &&cfg         = tilingData.cfg;
     auto &&localTiling = tilingData.localTiling;
     auto &&tileTiling  = tilingData.tileTiling;
     auto &&tailTiling  = tilingData.tailTiling;
@@ -59,35 +50,33 @@ extern "C" __global__ __aicore__ void all_gather_matmul_custom(GM_ADDR aGM, GM_A
 
     // 下发allgather任务
     // 首块
-    auto handleId = hccl.AllGather<true>(aGM, gatherOutGM, aTileCnt, HcclDataType(cfg.dataType), aRankCnt, tileNum);
+    auto handleId = hccl.AllGather<true>(aGM, gatherOutGM, aTileCnt, HcclDataType::HCCL_DATA_TYPE_FP16, aRankCnt, tileNum);
     // 尾块
     auto tailHandleId = hccl.AllGather<true>(aGM + tileNum * aTileOffset, gatherOutGM + tileNum * aTileOffset, aTailCnt,
-        HcclDataType(cfg.dataType), aRankCnt, tailNum);
+        HcclDataType::HCCL_DATA_TYPE_FP16, aRankCnt, tailNum);
 
-    if (TILING_KEY_IS(100)) { // full mesh + no nd2nz + biasNoNeedCast
-        using A_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, A_DTYPE, true>;
-        using B_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, B_DTYPE, true>;
-        using C_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, C_DTYPE>;
-        using BIAS_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, typename BiasType<BIAS_DTYPE>::type>;
+    using A_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, A_DTYPE>;
+    using B_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, B_DTYPE>;
+    using C_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, C_DTYPE>;
 
-        // 本卡数据计算
-        MatmulKernelLocal<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>(aGM, bGM, biasGM, cGM, cfg, localTiling, hccl);
+    // 本卡数据计算
+    MatmulKernelLocal<A_TYPE, B_TYPE, C_TYPE>(aGM, bGM, cGM, cfg, localTiling, hccl);
 
-        // tile首块计算
-        auto aAddr = gatherOutGM; // gatherOut 作为 mm A矩阵地址
-        auto cAddr = cGM;
-        if (tileNum > 0) {
-            MatmulKernel<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>(aAddr, bGM, biasGM, cAddr, cfg, tileTiling, hccl, handleId,
-                tileNum);
-        }
+    // tile首块计算
+    auto aAddr = gatherOutGM; // gatherOut 作为 mm A矩阵地址
+    auto cAddr = cGM;
+    if (tileNum > 0) {
+        MatmulKernel<A_TYPE, B_TYPE, C_TYPE>(aAddr, bGM, cAddr, cfg, tileTiling, hccl, handleId,
+            tileNum);
+    }
 
-        // tail尾块计算
-        aAddr = gatherOutGM + tileNum * aTileOffset;
-        cAddr = cGM + tileNum * cTileOffset;
-        if (tailNum > 0) {
-            MatmulKernel<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>(aAddr, bGM, biasGM, cAddr, cfg, tailTiling, hccl, tailHandleId,
-                tailNum);
-        }
+    // tail尾块计算
+    aAddr = gatherOutGM + tileNum * aTileOffset;
+    cAddr = cGM + tileNum * cTileOffset;
+    if (tailNum > 0) {
+        MatmulKernel<A_TYPE, B_TYPE, C_TYPE>(aAddr, bGM, cAddr, cfg, tailTiling, hccl, tailHandleId,
+            tailNum);
     }
+
     hccl.Finalize();
 }
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom_tiling.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom_tiling.h
index de2ba7834..207eb9493 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom_tiling.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom_tiling.h
@@ -14,22 +14,13 @@
 #include <cstdint>
 #include "kernel_tiling/kernel_tiling.h"
 
-struct AllGatherMatmulRCSTiling {
-    uint32_t rankDim;
+struct AllGatherMatmulTiling {
     uint32_t rankM;
     uint32_t rankN;
     uint32_t rankK;
-    uint32_t gatherIndex;
-    uint32_t isTransposeA;
-    uint32_t isTransposeB;
-    uint32_t storageGather;
-    uint64_t cToFloatLen;
-    uint64_t nd2NzWorkLen;
-    uint64_t gatherLen;
     uint32_t tileNum;
     uint32_t tailM;
     uint32_t tailNum;
-    uint32_t dataType;
 };
 
 class AllGatherMatmulCustomTilingData {
@@ -39,7 +30,7 @@ public:
     TCubeTiling localTiling;
     TCubeTiling tileTiling;
     TCubeTiling tailTiling;
-    AllGatherMatmulRCSTiling param;
+    AllGatherMatmulTiling cfg;
 };
 
 #endif //__ALL_GATHER_MATMUL_CUSTOM_TILING_H__
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
index 891f1082e..b363d8ce2 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
@@ -14,8 +14,8 @@
 #if defined ASCENDC_CPU_DEBUG
 #define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
 #define SET_G_CORE_TYPE_IS_AIC thread_local int g_coreType = 1
-#define DTYPE_X1 half
-#define DTYPE_Y half
+#define DTYPE_A half
+#define DTYPE_C half
 #else
 #define SET_G_CORE_TYPE_IS_AIV
 #define SET_G_CORE_TYPE_IS_AIC
@@ -26,13 +26,12 @@
 #include "all_gather_matmul_custom_tiling.h"
 
 namespace AscendC {
-using A_DTYPE = DTYPE_X1;
-using B_DTYPE = DTYPE_X1;
-using C_DTYPE = DTYPE_Y;
-using BIAS_DTYPE = DTYPE_Y;
+using A_DTYPE = DTYPE_A;
+using B_DTYPE = DTYPE_B;
+using C_DTYPE = DTYPE_C;
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulKernelLocal(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR biasGM, GM_ADDR cGM, AllGatherMatmulRCSTiling &cfg,
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulKernelLocal(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR cGM, AllGatherMatmulTiling &cfg,
     TCubeTiling &tiling, Hccl<HCCL_SERVER_TYPE_AICPU> &hccl)
 {
     if ASCEND_IS_AIC {
@@ -43,17 +42,17 @@ __aicore__ inline void MatmulKernelLocal(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR biasG
         const auto aRankDataCnt = cfg.rankM * cfg.rankK;
         const auto cRankDataCnt = cfg.rankM * cfg.rankN;
 
-        MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE> mmLocal;
+        MatmulCompute<A_TYPE, B_TYPE, C_TYPE> mmLocal;
         mmLocal.Init(cfg, tiling);
-        mmLocal.UpdateWeightBias(bGM, biasGM);
+        mmLocal.UpdateWeight(bGM);
         mmLocal.UpdateAddress(aGM, aRankDataCnt, cGM + hccl.GetRankId() * cRankDataCnt * sizeof(C_T), cRankDataCnt);
         mmLocal.Process();
         mmLocal.End();
     }
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulKernel(GM_ADDR aAddr, GM_ADDR bGM, GM_ADDR biasGM, GM_ADDR cAddr, AllGatherMatmulRCSTiling &cfg,
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulKernel(GM_ADDR aAddr, GM_ADDR bGM, GM_ADDR cAddr, AllGatherMatmulTiling &cfg,
     TCubeTiling &tiling, Hccl<HCCL_SERVER_TYPE_AICPU> &hccl, HcclHandle &handleId, uint32_t tileCnt)
 {
     if ASCEND_IS_AIC {
@@ -73,9 +72,9 @@ __aicore__ inline void MatmulKernel(GM_ADDR aAddr, GM_ADDR bGM, GM_ADDR biasGM,
         const auto aRankOffset = cfg.rankM * cfg.rankK * sizeof(A_T);
         const auto cRankOffset = cfg.rankM * cfg.rankN * sizeof(C_T);
 
-        MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE> mm;
+        MatmulCompute<A_TYPE, B_TYPE, C_TYPE> mm;
         mm.Init(cfg, tiling);
-        mm.UpdateWeightBias(bGM, biasGM);
+        mm.UpdateWeight(bGM);
         for (uint32_t i = 0; i < tileCnt; i++) {
             // wait current handle allgather
             hccl.Wait(handleId);
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
index 5e62fd6f6..00d4322b5 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
@@ -19,15 +19,11 @@ struct BaseBlockOffset {
     uint64_t offsetA;
     uint64_t offsetB;
     uint64_t offsetC;
-    uint64_t offsetBias;
 };
 
 struct BaseBlockArguments
 {
     bool isRowOrder;
-    bool isAtomic;
-    bool isTransA;
-    bool isTransB;
     uint32_t singleCoreM;
     uint32_t singleCoreN;
     uint32_t mBlockCnt;       // M方向的基本块个数
@@ -48,11 +44,10 @@ struct BaseBlockArguments
 class MatmulBaseBlock {
 public:
     __aicore__ inline MatmulBaseBlock() {}
-    __aicore__ inline void Init(TCubeTiling& tiling, bool isTransA, bool isTransB);
+    __aicore__ inline void Init(TCubeTiling& tiling);
     __aicore__ inline void InitBlockWithoutIndex();
     __aicore__ inline void UpdateBlockIndex(uint32_t currPos);
     __aicore__ inline void UpdateBlockParams(int32_t mTileIndex=0, int32_t nTileIndex=0);
-    template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
     __aicore__ inline void CalcGMOffset();
     __aicore__ inline void GetBlockStartIdx(uint32_t startIdx, uint32_t endIdx);
 
@@ -62,7 +57,7 @@ public:
     TCubeTiling tiling_;
 };
 
-__aicore__ inline void MatmulBaseBlock::Init(TCubeTiling& tiling, bool isTransA, bool isTransB)
+__aicore__ inline void MatmulBaseBlock::Init(TCubeTiling& tiling)
 {
     tiling_ = tiling;
     args_.preCoreStartIdx = 0;
@@ -75,12 +70,6 @@ __aicore__ inline void MatmulBaseBlock::Init(TCubeTiling& tiling, bool isTransA,
     if (tiling_.N > 5 * tiling_.M) { // 5: ratio of rowOrder
         args_.isRowOrder = false;
     }
-    args_.isTransA = isTransA;
-    args_.isTransB = isTransB;
-    args_.isAtomic = false;
-    if (args_.isTransA) {
-        args_.isAtomic = true;
-    }
 }
 
 __aicore__ inline void MatmulBaseBlock::InitBlockWithoutIndex()
@@ -168,47 +157,11 @@ __aicore__ inline void MatmulBaseBlock::UpdateBlockParams(int32_t mTileIndex, in
     args_.mCWorkOffset = args_.mBlockOffset;
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
 __aicore__ inline void MatmulBaseBlock::CalcGMOffset()
 {
-    auto alignedKa = AlignUp(tiling_.Ka, C0_SIZE);
-    auto alignedKb = AlignUp(tiling_.Kb, C0_SIZE);
-
-    if constexpr (A_TYPE::format == CubeFormat::ND) {
-        if (args_.isTransA) {
-            offset_.offsetA = args_.mBlockOffset;
-        } else {
-            offset_.offsetA = args_.mBlockOffset * tiling_.Ka;
-        }
-    } else if constexpr (A_TYPE::format == CubeFormat::NZ) {
-        if (args_.isTransA) {
-            offset_.offsetA = args_.mBlockOffset * alignedKa;
-        } else {
-            offset_.offsetA = args_.mBlockOffset * C0_SIZE;
-        }
-    }
-
-    if constexpr (B_TYPE::format == CubeFormat::ND) {
-        if (args_.isTransB) {
-            offset_.offsetB = args_.nBlockOffset * tiling_.Kb;
-        } else {
-            offset_.offsetB = args_.nBlockOffset;
-        }
-    } else if constexpr (B_TYPE::format == CubeFormat::NZ) {
-        if (args_.isTransB) {
-            offset_.offsetB = args_.nBlockOffset * C0_SIZE;
-        } else {
-            offset_.offsetB = args_.nBlockOffset * alignedKb;
-        }
-    }
-
-    // C矩阵和BIAS只支持ND
-    if constexpr (C_TYPE::format == CubeFormat::ND || C_TYPE::format == CubeFormat::ND_ALIGN) {
-        offset_.offsetC = args_.nBlockOffset + args_.mCWorkOffset * tiling_.N;
-    }
-    if constexpr (BIAS_TYPE::format == CubeFormat::ND) {
-        offset_.offsetBias = args_.nBlockOffset;
-    }
+    offset_.offsetA = args_.mBlockOffset * tiling_.Ka;
+    offset_.offsetB = args_.nBlockOffset;
+    offset_.offsetC = args_.nBlockOffset + args_.mCWorkOffset * tiling_.N;
 }
 }      // namespace ASCENDC
 #endif // MC2_MATMUL_BLOCK_H
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h
index 23f0a1f18..0bac09100 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h
@@ -16,103 +16,69 @@
 namespace AscendC {
 using namespace matmul;
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
+template <class A_TYPE, class B_TYPE, class C_TYPE>
 class MatmulCompute {
     using A_T = typename A_TYPE::T;
     using B_T = typename B_TYPE::T;
     using C_T = typename C_TYPE::T;
-    using BiasT = typename BIAS_TYPE::T;
 
 public:
     __aicore__ inline MatmulCompute() {}
-    __aicore__ inline void Init(AllGatherMatmulRCSTiling& cfg, TCubeTiling& tiling);
-    __aicore__ inline void UpdateWeightBias(GM_ADDR bGM, GM_ADDR biasGM);
+    __aicore__ inline void Init(AllGatherMatmulTiling& cfg, TCubeTiling& tiling);
+    __aicore__ inline void UpdateWeight(GM_ADDR bGM);
     __aicore__ inline void UpdateAddress(GM_ADDR aGM, uint32_t aSize, GM_ADDR cGM, uint32_t cSize);
     __aicore__ inline void Process();
     __aicore__ inline void End();
 
 private:
-    __aicore__ inline void SetOrgShapeAlign();
-
-private:
-    MatmulImpl<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE, CFG_MDL> mm_;
+    MatmulImpl<A_TYPE, B_TYPE, C_TYPE> mm_;
     GlobalTensor<A_T> aGlobal;
     GlobalTensor<B_T> bGlobal;
     GlobalTensor<C_T> cGlobal;
-    GlobalTensor<BiasT> biasGlobal;
     MatmulBaseBlock block_;
     TCubeTiling tiling_;
-    AllGatherMatmulRCSTiling cfg_;
+    AllGatherMatmulTiling cfg_;
 };
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::UpdateWeightBias(
-    GM_ADDR bGM, GM_ADDR biasGM)
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::UpdateWeight(GM_ADDR bGM)
 {
     // MC2的计算流中默认B矩阵不变，GM地址无需偏移
     bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ B_T *>(bGM), tiling_.Kb * tiling_.N);
-    biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ BiasT *>(biasGM), tiling_.N);
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::UpdateAddress(
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::UpdateAddress(
     GM_ADDR aGM, uint32_t aSize, GM_ADDR cGM, uint32_t cSize)
 {
     aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ A_T *>(aGM), aSize);
     cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ C_T *>(cGM), cSize);
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::Init(AllGatherMatmulRCSTiling& cfg, TCubeTiling& tiling)
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::Init(AllGatherMatmulTiling& cfg, TCubeTiling& tiling)
 {
-    // MatmulImpl初始化
     mm_.SetSubBlockIdx(0);
     mm_.Init(&tiling, GetTPipePtr());
     tiling_ = tiling;
     cfg_ = cfg;
-    bool isTransA = cfg.isTransposeA > 0;
-    bool isTransB = cfg.isTransposeB > 0;
-    block_.Init(tiling, isTransA, isTransB);
-    SetOrgShapeAlign();
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::SetOrgShapeAlign()
-{
-    if constexpr (A_TYPE::format == CubeFormat::NZ && B_TYPE::format == CubeFormat::NZ) {
-        auto alignKa = AlignUp(tiling_.Ka, C0_SIZE);
-        auto alignKb = AlignUp(tiling_.Kb, C0_SIZE);
-        auto alignM = AlignUp(tiling_.M, C0_SIZE);
-        auto alignN = AlignUp(tiling_.N, C0_SIZE);
-        mm_.SetOrgShape(alignM, alignN, alignKa, alignKb, cfg_.rankN);
-    } else if (A_TYPE::format == CubeFormat::NZ) {
-        auto alignKa = AlignUp(tiling_.Ka, C0_SIZE);
-        auto alignM = AlignUp(tiling_.M, C0_SIZE);
-        mm_.SetOrgShape(alignM, tiling_.N, alignKa, tiling_.Kb, cfg_.rankN);
-    } else if (B_TYPE::format == CubeFormat::NZ) {
-        auto alignKb = AlignUp(tiling_.Kb, C0_SIZE);
-        auto alignN = AlignUp(tiling_.N, C0_SIZE);
-        mm_.SetOrgShape(tiling_.M, alignN, tiling_.Ka, alignKb, cfg_.rankN);
-    }
+    block_.Init(tiling);
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::Process()
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::Process()
 {
     // 每次block循环开始前需要计算初始blockIndex
     block_.InitBlockWithoutIndex();
     for (uint32_t i = 0; i < block_.args_.blockCnt; i++) {
-        // calculate blcokCurrIndex
+        // calculate blockCurrIndex
         block_.UpdateBlockIndex(i);
         if (block_.args_.blockCurrIdx < block_.args_.totalBlockCnt) {
             block_.UpdateBlockParams();
-            block_.template CalcGMOffset<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>();
+            block_.CalcGMOffset();
             mm_.SetSingleShape(block_.args_.singleCoreM, block_.args_.singleCoreN, tiling_.singleCoreK);
-            mm_.SetTensorA(aGlobal[block_.offset_.offsetA], block_.args_.isTransA);
-            mm_.SetTensorB(bGlobal[block_.offset_.offsetB], block_.args_.isTransB);
-            if (tiling_.isBias) {
-                mm_.SetBias(biasGlobal[block_.offset_.offsetBias]);
-            }
+            mm_.SetTensorA(aGlobal[block_.offset_.offsetA]);
+            mm_.SetTensorB(bGlobal[block_.offset_.offsetB]);
             mm_.Iterate();
             mm_.GetTensorC(cGlobal[block_.offset_.offsetC]);
             // 增加M等FIX同步
@@ -123,8 +89,8 @@ __aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::Process
     }
 }
 
-template <class A_TYPE, class B_TYPE, class C_TYPE, class BIAS_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE, BIAS_TYPE>::End()
+template <class A_TYPE, class B_TYPE, class C_TYPE>
+__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::End()
 {
     mm_.End();
 }
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
index c121aac07..03c7430e5 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
@@ -7,21 +7,21 @@
 │   ├── AllGatherMatmulCustom             // AllGatherMatmulCustom算子工程
 │   ├── all_gather_matmul_custom.json     // AllGatherMatmulCustom算子的原型定义json文件
 │   ├── all_gather_matmul_demo_def.h      // AllGatherMatmulCustom算子参数配置
-│   └── install.sh                            // 脚本，调用msOpGen生成自定义算子工程，并编译
+│   └── install.sh                        // 脚本，调用msOpGen生成自定义算子工程，并编译
 ```
 ## 算子描述
-AllGatherMatmul算子实现了AllGather通信操作和Matmul矩阵乘法运算操作的融合，输出AllGather通信操作结果gather\_out和Matmul运算操作结果y，对应的数学表达式为：  
+AllGatherMatmul算子实现了AllGather通信操作和Matmul矩阵乘法运算操作的融合，输出AllGather通信操作结果gather\_out和Matmul运算操作结果c，对应的数学表达式为：  
 $$
-gather\_out=AllGather(x1)
+gather\_out=AllGather(a)
 $$
 $$
-y=gather\_out * x2
+c=gather\_out * b
 $$
 
 - AllGather() 为集合通信AllGather通信操作。
-- x1、x2为源操作数，x1为左矩阵，形状为\[M, K]；x2为右矩阵，形状为\[K, N]。
+- a、b为源操作数，a为左矩阵，形状为\[M, K]；b为右矩阵，形状为\[K, N]。
 - gather\_out为目的操作数，存放AllGather通信结果的矩阵，形状为[M * rankDim, K]，其中rankDim为通信域内的节点数。
-- y为目的操作数，存放Matmul运算结果的矩阵，形状为[M * rankDim, N]。
+- c为目的操作数，存放Matmul运算结果的矩阵，形状为[M * rankDim, N]。
 
 备注：集合通信原语AllGather请参考[集合通信用户指南](https://hiascend.com/document/redirect/CannCommunityHcclUg)>集合通信原语。
 
@@ -30,12 +30,12 @@ $$
 <tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">AllGatherMatmulCustom</td></tr>
 </tr>
 <tr><td rowspan="4" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
-<tr><td align="center">x1</td><td align="center">512 * 5120</td><td align="center">float16</td><td align="center">ND</td></tr>
-<tr><td align="center">x2</td><td align="center">5120 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">a</td><td align="center">512 * 5120</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">b</td><td align="center">5120 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
 <tr><td align="center">bias</td><td align="center">/</td><td align="center">/</td><td align="center">/</td></tr>
 </tr>
 </tr>
-<tr><td rowspan="2" align="center">算子输出</td><td align="center">y</td><td align="center">4096 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td rowspan="2" align="center">算子输出</td><td align="center">c</td><td align="center">4096 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
 <td align="center">gather_out</td><td align="center">4096 * 5120</td><td align="center">float16</td><td align="center">ND</td></tr>
 </tr>
 <tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">all_gather_matmul_custom</td></tr>
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
index 6c137f335..6ab16e763 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
@@ -3,14 +3,13 @@
         "op": "AllGatherMatmulCustom",
         "input_desc": [
             {
-                "name": "x1",
+                "name": "a",
                 "param_type": "required",
                 "format": [
                     "ND"
                 ],
                 "type": [
-                    "float16",
-                    "bfloat16"
+                    "float16"
                 ]
             },
             {
@@ -20,8 +19,7 @@
                     "ND"
                 ],
                 "type": [
-                    "float16",
-                    "bfloat16"
+                    "float16"
                 ]
             },
             {
@@ -31,21 +29,19 @@
                     "ND"
                 ],
                 "type": [
-                    "float16",
-                    "bfloat16"
+                    "float16"
                 ]
             }
         ],
         "output_desc":[
             {
-                "name": "y",
+                "name": "c",
                 "param_type": "required",
                 "format": [
                     "ND"
                 ],
                 "type": [
-                    "float16",
-                    "bfloat16"
+                    "float16"
                 ]
             },
             {
@@ -55,8 +51,7 @@
                     "ND"
                 ],
                 "type": [
-                    "float16",
-                    "bfloat16"
+                    "float16"
                 ]
             }
         ],
@@ -66,42 +61,6 @@
                 "type": "string",
                 "default_value":"",
                 "param_type":"required"
-            },
-            {
-                "name": "is_trans_a",
-                "type": "bool",
-                "default_value":false,
-                "param_type":"optional"
-            },
-            {
-                "name": "is_trans_b",
-                "type": "bool",
-                "default_value":false,
-                "param_type":"optional"
-            },
-            {
-                "name": "gather_index",
-                "type": "int",
-                "default_value":0,
-                "param_type":"optional"
-            },
-            {
-                "name": "comm_turn",
-                "type": "int",
-                "default_value":0,
-                "param_type":"optional"
-            },
-            {
-                "name": "rank_size",
-                "type": "int",
-                "default_value":0,
-                "param_type":"optional"
-            },
-            {
-                "name": "is_gather_out",
-                "type": "bool",
-                "default_value":true,
-                "param_type":"optional"
             }
         ]
     }
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
index bea5e9251..1377915f5 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
@@ -15,17 +15,5 @@ constexpr uint32_t RANK_DIM = 8;
 constexpr uint32_t RANK_M = 512;
 constexpr uint32_t RANK_K = 5120;
 constexpr uint32_t RANK_N = 640;
-constexpr bool IS_TRANS_A = false;
-constexpr bool IS_TRANS_B = false;
-constexpr int64_t GATHER_INDEX = 0;
-constexpr int64_t COMM_TURN = 0;
-constexpr bool IS_GATHER_OUT = true;
-
-// tiling
-constexpr int32_t TILE_M = 448;
-constexpr uint32_t HCCL_CMD_ALLGATHER = 6;
-constexpr uint32_t FP16_BF16_SIZE = 2;
-constexpr uint32_t CUSTOM_TILING_KEY = 100; // full mesh + no nd2nz + no cast bias
-constexpr int32_t L1_BUFFER_SIZE = 512 * 1024;
 
 #endif
\ No newline at end of file
-- 
Gitee


From 18e532fd28a77eff379512f3e96cc321a5ed4629 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 26 Aug 2025 06:02:57 +0000
Subject: [PATCH 64/97] =?UTF-8?q?!2746=20=E9=80=82=E9=85=8Dtiling=20shape?=
 =?UTF-8?q?=20Merge=20pull=20request=20!2746=20from=20renjie/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../op_host/add_custom_tiling_sink_tiling.cpp                   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
index 24f17126b..abed3a150 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/op_host/add_custom_tiling_sink_tiling.cpp
@@ -13,7 +13,7 @@
 #include "tiling/platform/platform_ascendc.h"
 namespace optiling {
 static constexpr uint32_t BLOCK_DIM = 8;
-static constexpr uint32_t TILE_NUM = 8;
+static constexpr uint32_t TILE_NUM = 3;
 static constexpr size_t MAX_WORKSPACE_SIZE = 32; // 算子所需用户workspace空间最大值，AddCustomTilingSink算子本身逻辑无需用户workspace空间，此处设置为固定值仅作为示例
 static constexpr size_t DEFAULT_WORKSPACE_SIZE = 0;
 ge::graphStatus AddCustomSinkTilingFunc(gert::TilingContext *context)
-- 
Gitee


From 8420e15a8949bbff424979f2e48893a1ca5219af Mon Sep 17 00:00:00 2001
From: shinoda <zhuyuchen7@huawei.com>
Date: Tue, 26 Aug 2025 07:48:23 +0000
Subject: [PATCH 65/97] !2747 fix log format * fix log format

---
 .../0_introduction/0_helloworld/run.sh        |  6 ++--
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../10_matmul_frameworklaunch/install.sh      |  4 +--
 .../MatmulInvocationNeo/run.sh                |  8 +++---
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../install.sh                                |  4 +--
 .../CppExtensions/run.sh                      |  4 +--
 .../MatmulLeakyReluInvocation/run.sh          |  8 +++---
 .../MatmulLeakyReluInvocationAsync/run.sh     |  8 +++---
 .../14_reduce_frameworklaunch/install.sh      |  4 +--
 .../AclOfflineModel/run.sh                    | 28 +++++++++----------
 .../15_sub_frameworklaunch/install.sh         |  4 +--
 .../AbsDuplicateKernelInvocation/run.sh       |  8 +++---
 .../AbsGatherMaskKernelInvocation/run.sh      |  8 +++---
 .../AbsPadKernelInvocation/run.sh             |  8 +++---
 .../AbsUnPadKernelInvocation/run.sh           |  8 +++---
 .../ReduceMinKernelInvocation/run.sh          |  8 +++---
 .../install.sh                                |  4 +--
 .../WholeReduceSumKernelInvocation/run.sh     |  8 +++---
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../AclOfflineModel/run.sh                    | 28 +++++++++----------
 .../AclOnlineModel/run.sh                     | 20 ++++++-------
 .../CppExtensionInvocation/build_and_run.sh   |  8 +++---
 .../PytorchInvocation/run_op_plugin.sh        | 12 ++++----
 .../AscendCustomToTensorFlowBuildIn/run.sh    |  2 +-
 .../1_add_frameworklaunch/install.sh          |  4 +--
 .../MmadBiasInvocation/run.sh                 |  6 ++--
 .../MmadInvocation/run.sh                     |  6 ++--
 .../VectorAddMultiCoreWithTiling/run.sh       |  8 +++---
 .../run.sh                                    |  8 +++---
 .../VectorAddSingleCore/run.sh                |  8 +++---
 .../VectorAddSingleCoreWithTmpbuf/run.sh      |  8 +++---
 .../BareMixInvocation/run.sh                  |  8 +++---
 .../2_add_frameworklaunchlite/install.sh      |  4 +--
 .../AddKernelInvocationAcl/run.sh             |  8 +++---
 .../AddKernelInvocationNeo/run.sh             |  8 +++---
 .../AddKernelInvocationTilingNeo/run.sh       |  8 +++---
 .../3_add_kernellaunch/CppExtensions/run.sh   |  4 +--
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../4_addn_frameworklaunch/install.sh         |  4 +--
 .../0_introduction/5_addn_kernellaunch/run.sh |  8 +++---
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../6_addtemplate_frameworklaunch/install.sh  |  4 +--
 .../7_broadcast_frameworklaunch/install.sh    |  4 +--
 .../static_library/AclNNInvocation/run.sh     | 24 ++++++++--------
 .../static_library/OpRunner/run.sh            |  8 +++---
 .../static_library/install_add.sh             |  4 +--
 .../static_library/install_matmul.sh          |  4 +--
 .../AclNNInvocation/run.sh                    | 20 ++++++-------
 .../9_leakyrelu_frameworklaunch/install.sh    |  4 +--
 .../FrameworkLaunch/AclNNInvocation/run.sh    | 26 ++++++++---------
 .../0_printf/FrameworkLaunch/install.sh       |  4 +--
 .../KernelLaunch/MatmulInvocationNeo/run.sh   | 16 +++++------
 .../FrameworkLaunch/AclNNInvocation/run.sh    | 20 ++++++-------
 .../3_assert/FrameworkLaunch/install.sh       |  4 +--
 .../KernelLaunch/MatmulInvocationNeo/run.sh   |  6 ++--
 .../DumpTensorCube/AclNNInvocation/run.sh     | 26 ++++++++---------
 .../FrameworkLaunch/DumpTensorCube/install.sh |  4 +--
 .../DumpTensorVector/AclNNInvocation/run.sh   | 26 ++++++++---------
 .../DumpTensorVector/install.sh               |  4 +--
 .../DumpTensorKernelInvocationCube/run.sh     | 12 ++++----
 .../DumpTensorKernelInvocationVector/run.sh   | 14 +++++-----
 .../12_cube_group/AclNNInvocation/run.sh      | 20 ++++++-------
 .../2_features/12_cube_group/install.sh       |  4 +--
 .../MatmulABshareInvocation/run.sh            | 10 +++----
 .../AclNNInvocation/run.sh                    | 22 +++++++--------
 .../14_matmul_api_constant/install.sh         |  4 +--
 .../16_group_barrier/AclNNInvocation/run.sh   | 22 +++++++--------
 .../2_features/16_group_barrier/install.sh    |  4 +--
 .../AddCustomTilingSink/install.sh            |  2 +-
 operator/ascendc/2_features/2_tbufpool/run.sh |  8 +++---
 71 files changed, 365 insertions(+), 365 deletions(-)

diff --git a/operator/ascendc/0_introduction/0_helloworld/run.sh b/operator/ascendc/0_introduction/0_helloworld/run.sh
index 430d087bf..c08a3f749 100755
--- a/operator/ascendc/0_introduction/0_helloworld/run.sh
+++ b/operator/ascendc/0_introduction/0_helloworld/run.sh
@@ -21,7 +21,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -39,7 +39,7 @@ else
     fi
 fi
 source $_ASCEND_INSTALL_PATH/bin/setenv.bash
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 
 rm -rf build
 mkdir -p build
@@ -55,6 +55,6 @@ file_path=output_msg.txt
 count=$(grep -c "$check_msg" $file_path)
 
 if [ $count -ne 8 ]; then
-    echo "Error, Expected 8 occurrences of $check_msg, but found $count occurrences."
+    echo "[ERROR]: Expected 8 occurrences of $check_msg, but found $count occurrences."
     exit 1
 fi
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/run.sh
index 1c6b5ef46..6d7a2c9d5 100755
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,33 +41,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_matmul_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/install.sh b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/install.sh
index 456a674a2..39d09c983 100755
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
index 38be8f48a..bcb5a15b3 100755
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/run.sh
index ab64938f2..c86c7a625 100755
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,33 +41,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_matmul_leakyrelu_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/install.sh b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/install.sh
index 0012db2a0..1ca7720b9 100755
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/run.sh
index c3bbcb7a6..ae46c5ab1 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/run.sh
@@ -21,7 +21,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -39,7 +39,7 @@ else
     fi
 fi
 source $_ASCEND_INSTALL_PATH/bin/setenv.bash
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 
 set -e
 pip3 install pybind11
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
index 38be8f48a..bcb5a15b3 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
index d66cd3aa4..6725992d5 100755
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/install.sh b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/install.sh
index d092dcd78..a30696938 100755
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/run.sh b/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/run.sh
index 42b822522..7ba62608a 100755
--- a/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/run.sh
+++ b/operator/ascendc/0_introduction/15_sub_frameworklaunch/AclOfflineModel/run.sh
@@ -23,7 +23,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -73,7 +73,7 @@ def get_soc_version():
 print(get_soc_version())
     ''')
     if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
+        echo "[ERROR]: SOC_VERSION_CONCAT is invalid!"
         return 1
     fi
     SOC_FULL_VERSION=$(echo $SOC_VERSION_CONCAT | cut -d ',' -f 1)
@@ -82,7 +82,7 @@ print(get_soc_version())
 
 function main {
     if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
+        echo "[ERROR]: IS_DYNAMIC is invalid!"
         return 1
     fi
 
@@ -102,10 +102,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 4. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -114,37 +114,37 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 5. 运行可执行文件
     cd $CURRENT_DIR/output
     if [ $IS_DYNAMIC == 1 ]; then
-        echo "INFO: execute dynamic op!"
+        echo "[INFO]: Execute dynamic op!"
         ./execute_sub_op $IS_DYNAMIC 999
     else
-        echo "INFO: execute static op!"
+        echo "[INFO]: Execute static op!"
         ./execute_sub_op
     fi
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 6. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/15_sub_frameworklaunch/install.sh b/operator/ascendc/0_introduction/15_sub_frameworklaunch/install.sh
index e98fed73e..9595670fa 100755
--- a/operator/ascendc/0_introduction/15_sub_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/15_sub_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
index 08570fe09..9204f113f 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/run.sh
@@ -53,7 +53,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -61,12 +61,12 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 if [[ " ${!VersionMap[*]} " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [${!VersionMap[*]}]"
+    echo "[ERROR]: SOC_VERSION should be in [${!VersionMap[*]}]"
     exit -1
 fi
 _SOC_VERSION=${VersionMap[$SOC_VERSION]}
@@ -85,7 +85,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
index 08570fe09..9204f113f 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/run.sh
@@ -53,7 +53,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -61,12 +61,12 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 if [[ " ${!VersionMap[*]} " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [${!VersionMap[*]}]"
+    echo "[ERROR]: SOC_VERSION should be in [${!VersionMap[*]}]"
     exit -1
 fi
 _SOC_VERSION=${VersionMap[$SOC_VERSION]}
@@ -85,7 +85,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
index 08570fe09..9204f113f 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/run.sh
@@ -53,7 +53,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -61,12 +61,12 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 if [[ " ${!VersionMap[*]} " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [${!VersionMap[*]}]"
+    echo "[ERROR]: SOC_VERSION should be in [${!VersionMap[*]}]"
     exit -1
 fi
 _SOC_VERSION=${VersionMap[$SOC_VERSION]}
@@ -85,7 +85,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
index 08570fe09..9204f113f 100755
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsUnPadKernelInvocation/run.sh
@@ -53,7 +53,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -61,12 +61,12 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 if [[ " ${!VersionMap[*]} " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [${!VersionMap[*]}]"
+    echo "[ERROR]: SOC_VERSION should be in [${!VersionMap[*]}]"
     exit -1
 fi
 _SOC_VERSION=${VersionMap[$SOC_VERSION]}
@@ -85,7 +85,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
index 08570fe09..9204f113f 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/run.sh
@@ -53,7 +53,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -61,12 +61,12 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 if [[ " ${!VersionMap[*]} " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [${!VersionMap[*]}]"
+    echo "[ERROR]: SOC_VERSION should be in [${!VersionMap[*]}]"
     exit -1
 fi
 _SOC_VERSION=${VersionMap[$SOC_VERSION]}
@@ -85,7 +85,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/install.sh b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/install.sh
index 7dab3141f..f66ba50c6 100755
--- a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
index f09ddb475..522470602 100755
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/run.sh
index a652bf478..00bdad141 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译可执行文件
     cd $CURRENT_DIR
@@ -41,33 +41,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_add_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 精度比对
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/run.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/run.sh
index e2aff0259..7ad4ee9f8 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/run.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOfflineModel/run.sh
@@ -23,7 +23,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -73,7 +73,7 @@ def get_soc_version():
 print(get_soc_version())
     ''')
     if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
+        echo "[ERROR]: SOC_VERSION_CONCAT is invalid!"
         return 1
     fi
     SOC_FULL_VERSION=$(echo $SOC_VERSION_CONCAT | cut -d ',' -f 1)
@@ -82,7 +82,7 @@ print(get_soc_version())
 
 function main {
     if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
+        echo "[ERROR]: IS_DYNAMIC is invalid!"
         return 1
     fi
 
@@ -102,10 +102,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 4. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -114,37 +114,37 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 5. 运行可执行文件
     cd $CURRENT_DIR/output
     if [ $IS_DYNAMIC == 1 ]; then
-        echo "INFO: execute dynamic op!"
+        echo "[INFO]: Execute dynamic op!"
         ./execute_add_op $IS_DYNAMIC 2048
     else
-        echo "INFO: execute static op!"
+        echo "[INFO]: Execute static op!"
         ./execute_add_op
     fi
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 6. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/run.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/run.sh
index acd409c6f..01ae806ab 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/run.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AclOnlineModel/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,32 +41,32 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_add_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/build_and_run.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/build_and_run.sh
index 497954c8e..0db77830a 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/build_and_run.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/build_and_run.sh
@@ -25,13 +25,13 @@ pip3 install *.whl
 cd ${BASE_DIR}/test
 python3 test_add_custom.py
 if [ $? -ne 0 ]; then
-    echo "ERROR: run add_custom test failed!"
+    echo "[ERROR]: Run add_custom test failed!"
 fi
-echo "INFO: run add_custom test success!"
+echo "[INFO]: Run add_custom test success!"
 
 # 运行测试用例
 python3 test_add_custom_graph.py
 if [ $? -ne 0 ]; then
-    echo "ERROR: run add_custom_graph test failed!"
+    echo "[ERROR]: Run add_custom_graph test failed!"
 fi
-echo "INFO: run add_custom_graph test success!"
+echo "[INFO]: Run add_custom_graph test success!"
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/run_op_plugin.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/run_op_plugin.sh
index 4a47761ee..95302115d 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/run_op_plugin.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/PytorchInvocation/run_op_plugin.sh
@@ -18,13 +18,13 @@ source $ASCEND_HOME_DIR/bin/setenv.bash
 # 当前示例使用Python-3.9版本
 PYTHON_VERSION=$(python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}')
 if [ "$PYTHON_VERSION" != "3.9" ]; then
-    echo "Error: Python3 version is not 3.9"
+    echo "[ERROR]: Python3 version is not 3.9"
     exit 1
 fi
 # 当前示例使用Pytorch-2.1.0版本
 PYTORCH_VESION=$(pip3 show torch | grep "Version:" | awk '{print $2}' | awk -F '.' '{print $1"."$2"."$3}' | awk -F '+' '{print $1}')
 if [ "$PYTORCH_VESION" != "2.1.0" ]; then
-    echo "Error: Pytorch version is not 2.1.0"
+    echo "[ERROR]: Pytorch version is not 2.1.0"
     exit 1
 fi
 export HI_PYTHON=python${PYTHON_VERSION}
@@ -67,17 +67,17 @@ function main() {
     export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH
     python3 test_ops_custom.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: run custom op failed!"
+        echo "[ERROR]: Run custom op failed!"
         return 1
     fi
-    echo "INFO: Ascend C Add Custom SUCCESS"
+    echo "[INFO]: Ascend C Add Custom SUCCESS"
     # 6. 执行基于图框架的测试文件
     python3 test_ops_custom_register_in_graph.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: run custom op in graph failed!"
+        echo "[ERROR]: Run custom op in graph failed!"
         return 1
     fi
-    echo "INFO: Ascend C Add Custom  in torch.compile graph SUCCESS"
+    echo "[INFO]: Ascend C Add Custom  in torch.compile graph SUCCESS"
 
 }
 main
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh
index 0202346ce..1572b14ad 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh
@@ -20,5 +20,5 @@ if [[ $TENSORFLOW_VERSION =~ ^1\..* ]]; then
 elif [[ $TENSORFLOW_VERSION =~ ^2\..* ]]; then
     python3 run_add_custom_tf2.py
 else
-    echo "unknown version $TENSORFLOW_VERSION, or tensorflow not installed"
+    echo "[ERROR]: Unknown version $TENSORFLOW_VERSION, or tensorflow not installed"
 fi
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/install.sh b/operator/ascendc/0_introduction/1_add_frameworklaunch/install.sh
index 127c9a0f0..c206f4983 100755
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
index f83616b88..583e4a69f 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadBiasInvocation/run.sh
@@ -39,7 +39,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -47,13 +47,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
index f83616b88..583e4a69f 100644
--- a/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
+++ b/operator/ascendc/0_introduction/20_mmad_kernellaunch/MmadInvocation/run.sh
@@ -39,7 +39,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -47,13 +47,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
index 5f06bb334..b77f52273 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/run.sh
@@ -41,7 +41,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -49,13 +49,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -73,7 +73,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
index 5f06bb334..b77f52273 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/run.sh
@@ -41,7 +41,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -49,13 +49,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -73,7 +73,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
index 5f06bb334..b77f52273 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCore/run.sh
@@ -41,7 +41,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -49,13 +49,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -73,7 +73,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
index 5f06bb334..b77f52273 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddSingleCoreWithTmpbuf/run.sh
@@ -41,7 +41,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -49,13 +49,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -73,7 +73,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
index 74524bd16..60883d357 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/run.sh
@@ -39,7 +39,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -47,13 +47,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -71,7 +71,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/2_add_frameworklaunchlite/install.sh b/operator/ascendc/0_introduction/2_add_frameworklaunchlite/install.sh
index e04eb1c62..7f30925ac 100755
--- a/operator/ascendc/0_introduction/2_add_frameworklaunchlite/install.sh
+++ b/operator/ascendc/0_introduction/2_add_frameworklaunchlite/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
index 6b6d23964..804bef040 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationAcl/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "cpu" ]; then
     export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
index c6dd79858..9eb5de466 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationNeo/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
index c6dd79858..9eb5de466 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/CppExtensions/run.sh b/operator/ascendc/0_introduction/3_add_kernellaunch/CppExtensions/run.sh
index 3c46b63a5..f8c4a01a8 100755
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/CppExtensions/run.sh
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/CppExtensions/run.sh
@@ -21,7 +21,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -39,7 +39,7 @@ else
     fi
 fi
 source $_ASCEND_INSTALL_PATH/bin/setenv.bash
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 
 set -e
 pip3 install pybind11
diff --git a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/run.sh
index b0de91dfd..2d1bff7a4 100755
--- a/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/4_addn_frameworklaunch/AclNNInvocation/run.sh
@@ -31,10 +31,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -43,33 +43,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_add_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/4_addn_frameworklaunch/install.sh b/operator/ascendc/0_introduction/4_addn_frameworklaunch/install.sh
index 873e4bdc5..05edc62ca 100755
--- a/operator/ascendc/0_introduction/4_addn_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/4_addn_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
index c6dd79858..9eb5de466 100755
--- a/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
+++ b/operator/ascendc/0_introduction/5_addn_kernellaunch/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh
index 57e8771db..4b29064ca 100755
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,33 +41,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_add_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
index 41f6be73c..4b74830f0 100755
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/install.sh b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/install.sh
index 2bd031331..cf4f5950b 100755
--- a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
index f39cfda92..49623f804 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/AclNNInvocation/run.sh
@@ -39,15 +39,15 @@ function main {
     cd $CURRENT_DIR
     python3 scripts_add/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate add input data failed!"
+        echo "[ERROR]: Generate add input data failed!"
         return 1
     fi
     python3 scripts_matmul/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate matmul input data failed!"
+        echo "[ERROR]: Generate matmul input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译可执行文件
     cd $CURRENT_DIR
@@ -56,39 +56,39 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$NPU_HOST_LIB/:$BASIC_PATH/lib:$DDK_PATH_ADD/lib:$DDK_PATH_MATMUL/lib:$LD_LIBRARY_PATH:./
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_static_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 精度比对
     cd $CURRENT_DIR
     python3 scripts_matmul/verify_result.py output/output_z_matmul.bin output/golden_matmul.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify matmul result failed!"
+        echo "[ERROR]: Verify matmul result failed!"
         return 1
     fi
 
     python3 scripts_add/verify_result.py output/output_z_add.bin output/golden_add.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify add result failed!"
+        echo "[ERROR]: Verify add result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
index 64bc86a70..4fc65b243 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/OpRunner/run.sh
@@ -37,16 +37,16 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     cp -rf $CUSTLIB_PATH/* $RELEASE_PATH/lib
     cp -rf $CUSTLIB_PATH/../inc/* $RELEASE_PATH/include
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
index e701007ef..fb432e7ae 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_add.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh
index 314fea9ca..48efe5f45 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/install_matmul.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/run.sh
index 9c6122e13..ec7c6280c 100755
--- a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,33 +41,33 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_leakyrelu_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_y.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/install.sh b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/install.sh
index 2a37ba800..4db6545fb 100755
--- a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/run.sh b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/run.sh
index 50530d46c..09b692487 100755
--- a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,34 +41,34 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     file_path=output_msg.txt
     ./execute_matmul_op | tee $file_path
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
     # 6. 验证调测结果
@@ -82,17 +82,17 @@ function main {
     count_uint=$(grep -c "$check_msg_uint" $file_path)
 
     if [ $count_half -eq 0 ]; then
-        echo "Error, $check_msg_half is expected, but not found."
+        echo "[ERROR]: $check_msg_half is expected, but not found."
         exit 1
     fi
 
     if [ $count_int -eq 0 ]; then
-        echo "Error, $check_msg_int is expected, but not found."
+        echo "[ERROR]: $check_msg_int is expected, but not found."
         exit 1
     fi
 
     if [ $count_uint -eq 0 ]; then
-        echo "Error, $check_msg_uint is expected, but not found."
+        echo "[ERROR]: $check_msg_uint is expected, but not found."
         exit 1
     fi
 }
diff --git a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/install.sh b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/install.sh
index 6467d8345..805dcc861 100755
--- a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/install.sh
+++ b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/run.sh
index 11050aa56..0bb67501e 100755
--- a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/run.sh
@@ -36,7 +36,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -44,7 +44,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -62,7 +62,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 
 set -e
@@ -107,26 +107,26 @@ count_pointer=$(grep -c "$check_msg_pointer" $file_path)
 count_uint=$(grep -c "$check_msg_uint" $file_path)
 
 if [ $count_int -eq 0 ]; then
-    echo "Error, $check_msg_int is expected, but not found."
+    echo "[ERROR]: $check_msg_int is expected, but not found."
     exit 1
 fi
 
 if [ $count_bool -eq 0 ]; then
-    echo "Error, $check_msg_bool is expected, but not found."
+    echo "[ERROR]: $check_msg_bool is expected, but not found."
     exit 1
 fi
 
 if [ $count_half -eq 0 ]; then
-    echo "Error, $check_msg_half is expected, but not found."
+    echo "[ERROR]: $check_msg_half is expected, but not found."
     exit 1
 fi
 
 if [ $count_pointer -eq 0 ]; then
-    echo "Error, $check_msg_pointer is expected, but not found."
+    echo "[ERROR]: $check_msg_pointer is expected, but not found."
     exit 1
 fi
 
 if [ $count_uint -eq 0 ]; then
-    echo "Error, $check_msg_uint is expected, but not found."
+    echo "[ERROR]: $check_msg_uint is expected, but not found."
     exit 1
 fi
diff --git a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/run.sh b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/run.sh
index bb72829aa..11c9a1695 100755
--- a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/run.sh
+++ b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,34 +41,34 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     file_path=output_msg.txt
     ./execute_matmul_op | tee $file_path
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/install.sh b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/install.sh
index 6467d8345..805dcc861 100755
--- a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/install.sh
+++ b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/run.sh b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/run.sh
index fcb90ffee..db5b1db08 100755
--- a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/run.sh
+++ b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/run.sh
@@ -36,7 +36,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -44,7 +44,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -62,7 +62,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 
 set -e
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/run.sh b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/run.sh
index 782b395ef..eaf8ebde1 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,34 +41,34 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     file_path=output_msg.txt
     ./execute_mmad_op | tee $file_path
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 
@@ -83,17 +83,17 @@ function main {
     count_offset=$(grep -c "$check_msg_offset" $file_path)
 
     if [ $count_gm -eq 0 ]; then
-        echo "Error, $check_msg_gm is expected, but not found."
+        echo "[ERROR]: $check_msg_gm is expected, but not found."
         exit 1
     fi
 
     if [ $count_shape -eq 0 ]; then
-        echo "Error, $check_msg_shape is expected, but not found."
+        echo "[ERROR]: $check_msg_shape is expected, but not found."
         exit 1
     fi
 
     if [ $count_offset -eq 0 ]; then
-        echo "Error, $check_msg_offset is expected, but not found."
+        echo "[ERROR]: $check_msg_offset is expected, but not found."
         exit 1
     fi
 }
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/install.sh b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/install.sh
index 5efbbfe62..67d0f3ad6 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/install.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/run.sh b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/run.sh
index 92b089d86..bdecf496f 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/AclNNInvocation/run.sh
@@ -30,10 +30,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -42,34 +42,34 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     file_path=output_msg.txt
     ./execute_add_op | tee $file_path
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
     python3 scripts/verify_result.py output/output_z.bin output/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 
@@ -84,17 +84,17 @@ function main {
     count_offset=$(grep -c "$check_msg_offset" $file_path)
 
     if [ $count_gm -eq 0 ]; then
-        echo "Error, $check_msg_gm is expected, but not found."
+        echo "[ERROR]: $check_msg_gm is expected, but not found."
         exit 1
     fi
 
     if [ $count_shape -eq 0 ]; then
-        echo "Error, $check_msg_shape is expected, but not found."
+        echo "[ERROR]: $check_msg_shape is expected, but not found."
         exit 1
     fi
 
     if [ $count_offset -eq 0 ]; then
-        echo "Error, $check_msg_offset is expected, but not found."
+        echo "[ERROR]: $check_msg_offset is expected, but not found."
         exit 1
     fi
 }
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/install.sh b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/install.sh
index 70f27a954..59dcf4e57 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/install.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorVector/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
index b38325a40..23734a78a 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationCube/run.sh
@@ -39,7 +39,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -47,13 +47,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -123,16 +123,16 @@ count_shape=$(grep -c "$check_msg_shape" $file_path)
 count_offset=$(grep -c "$check_msg_offset" $file_path)
 
 if [ $count_gm -eq 0 ]; then
-    echo "Error, $check_msg_gm is expected, but not found."
+    echo "[ERROR]: $check_msg_gm is expected, but not found."
     exit 1
 fi
 
 if [ $count_shape -eq 0 ]; then
-    echo "Error, $check_msg_shape is expected, but not found."
+    echo "[ERROR]: $check_msg_shape is expected, but not found."
     exit 1
 fi
 
 if [ $count_offset -eq 0 ]; then
-    echo "Error, $check_msg_offset is expected, but not found."
+    echo "[ERROR]: $check_msg_offset is expected, but not found."
     exit 1
 fi
\ No newline at end of file
diff --git a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
index 7ff642101..d85b7f5f2 100755
--- a/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
+++ b/operator/ascendc/1_utilities/7_dumptensor/KernelLaunch/DumpTensorKernelInvocationVector/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
@@ -125,16 +125,16 @@ count_shape=$(grep -c "$check_msg_shape" $file_path)
 count_offset=$(grep -c "$check_msg_offset" $file_path)
 
 if [ $count_gm -eq 0 ]; then
-    echo "Error, $check_msg_gm is expected, but not found."
+    echo "[ERROR]: $check_msg_gm is expected, but not found."
     exit 1
 fi
 
 if [ $count_shape -eq 0 ]; then
-    echo "Error, $check_msg_shape is expected, but not found."
+    echo "[ERROR]: $check_msg_shape is expected, but not found."
     exit 1
 fi
 
 if [ $count_offset -eq 0 ]; then
-    echo "Error, $check_msg_offset is expected, but not found."
+    echo "[ERROR]: $check_msg_offset is expected, but not found."
     exit 1
 fi
diff --git a/operator/ascendc/2_features/12_cube_group/AclNNInvocation/run.sh b/operator/ascendc/2_features/12_cube_group/AclNNInvocation/run.sh
index 3e39eb84e..c8b55cde5 100644
--- a/operator/ascendc/2_features/12_cube_group/AclNNInvocation/run.sh
+++ b/operator/ascendc/2_features/12_cube_group/AclNNInvocation/run.sh
@@ -27,10 +27,10 @@ function main {
     cd $CURRENT_DIR/run/out/test_data/data
     python3 generate_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -39,27 +39,27 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/run/out
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_cube_group_custom_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
@@ -67,7 +67,7 @@ function main {
         $CURRENT_DIR/run/out/result_files/output_0.bin \
         $CURRENT_DIR/run/out/test_data/data/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/2_features/12_cube_group/install.sh b/operator/ascendc/2_features/12_cube_group/install.sh
index bda9aaff6..2dff708d6 100644
--- a/operator/ascendc/2_features/12_cube_group/install.sh
+++ b/operator/ascendc/2_features/12_cube_group/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
index b60d42817..480fcdf86 100644
--- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
+++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/run.sh
@@ -39,7 +39,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -47,13 +47,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -112,8 +112,8 @@ python3 scripts/gen_data.py
 )
 md5sum output/*.bin
 
-echo "Verify ABshare precision"
+echo "[INFO]: Verify ABshare precision"
 python3 scripts/verify_result.py output/output_ABshare.bin output/golden.bin
 
-echo "Verify noABshare precision"
+echo "[INFO]: Verify noABshare precision"
 python3 scripts/verify_result.py output/output_noABshare.bin output/golden.bin
diff --git a/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/run.sh b/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/run.sh
index 86998dc6c..17d93f334 100755
--- a/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/run.sh
+++ b/operator/ascendc/2_features/14_matmul_api_constant/AclNNInvocation/run.sh
@@ -25,7 +25,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -54,10 +54,10 @@ function main {
     cd $CURRENT_DIR/run/out/test_data/data
     python3 generate_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -66,26 +66,26 @@ function main {
     cd build
     cmake ../src
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     cd $CURRENT_DIR/run/out
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     ./execute_matmul_api_constant_custom_op
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     cd $CURRENT_DIR
@@ -93,7 +93,7 @@ function main {
         $CURRENT_DIR/run/out/result_files/output_0.bin \
         $CURRENT_DIR/run/out/test_data/data/golden.bin
     if [ $? -ne 0 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
 }
diff --git a/operator/ascendc/2_features/14_matmul_api_constant/install.sh b/operator/ascendc/2_features/14_matmul_api_constant/install.sh
index dd9f877e7..0d37d1595 100755
--- a/operator/ascendc/2_features/14_matmul_api_constant/install.sh
+++ b/operator/ascendc/2_features/14_matmul_api_constant/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/run.sh b/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/run.sh
index d3e54dc31..284a2cf58 100644
--- a/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/run.sh
+++ b/operator/ascendc/2_features/16_group_barrier/AclNNInvocation/run.sh
@@ -29,10 +29,10 @@ function main {
     cd $CURRENT_DIR
     python3 scripts/gen_data.py
     if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
+        echo "[ERROR]: Generate input data failed!"
         return 1
     fi
-    echo "INFO: generate input data success!"
+    echo "[INFO]: Generate input data success!"
 
     # 3. 编译acl可执行文件
     cd $CURRENT_DIR
@@ -41,38 +41,38 @@ function main {
     cd build
     cmake ../src -DCMAKE_SKIP_RPATH=TRUE
     if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
+        echo "[ERROR]: Cmake failed!"
         return 1
     fi
-    echo "INFO: cmake success!"
+    echo "[INFO]: Cmake success!"
     make
     if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
+        echo "[ERROR]: Make failed!"
         return 1
     fi
-    echo "INFO: make success!"
+    echo "[INFO]: Make success!"
 
     # 4. 运行可执行文件
     export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH
     cd $CURRENT_DIR/output
-    echo "INFO: execute op!"
+    echo "[INFO]: Execute op!"
     check_msg="OUTPUT = 24"
     file_path=output_msg.txt
     ./execute_group_barrier | tee $file_path
     count=$(grep -c "$check_msg" $file_path)
 
     if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
+        echo "[ERROR]: Acl executable run failed! please check your project!"
         return 1
     fi
-    echo "INFO: acl executable run success!"
+    echo "[INFO]: Acl executable run success!"
 
     # 5. 比较真值文件
     if [ $count -ne 6 ]; then
-        echo "ERROR: verify result failed!"
+        echo "[ERROR]: Verify result failed!"
         return 1
     fi
-    echo "test pass"
+    echo "[INFO]: Test pass"
 }
 
 main
diff --git a/operator/ascendc/2_features/16_group_barrier/install.sh b/operator/ascendc/2_features/16_group_barrier/install.sh
index 59b11009b..b65411bb6 100644
--- a/operator/ascendc/2_features/16_group_barrier/install.sh
+++ b/operator/ascendc/2_features/16_group_barrier/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -27,7 +27,7 @@ done
 
 VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
diff --git a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh
index d4ee2aa9a..15f3606b6 100644
--- a/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh
+++ b/operator/ascendc/2_features/17_tiling_sink/AddCustomTilingSink/AddCustomTilingSink/install.sh
@@ -19,7 +19,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
diff --git a/operator/ascendc/2_features/2_tbufpool/run.sh b/operator/ascendc/2_features/2_tbufpool/run.sh
index 04d5fd9fc..3f192dea0 100644
--- a/operator/ascendc/2_features/2_tbufpool/run.sh
+++ b/operator/ascendc/2_features/2_tbufpool/run.sh
@@ -40,7 +40,7 @@ while :; do
         break
         ;;
     *)
-        echo "[ERROR] Unexpected option: $1"
+        echo "[ERROR]: Unexpected option: $1"
         break
         ;;
     esac
@@ -48,13 +48,13 @@ done
 
 RUN_MODE_LIST="cpu sim npu"
 if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
-    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    echo "[ERROR]: RUN_MODE error, This sample only support specify cpu, sim or npu!"
     exit -1
 fi
 
 VERSION_LIST="Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
 if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
-    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    echo "[ERROR]: SOC_VERSION should be in [$VERSION_LIST]"
     exit -1
 fi
 
@@ -72,7 +72,7 @@ fi
 
 export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
 export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
-echo "Current compile soc version is ${SOC_VERSION}"
+echo "[INFO]: Current compile soc version is ${SOC_VERSION}"
 source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
 if [ "${RUN_MODE}" = "sim" ]; then
     # in case of running op in simulator, use stub .so instead
-- 
Gitee


From 24211d61e2a4b638b517c21f4361a24dc4998cd7 Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Fri, 29 Aug 2025 05:49:19 +0000
Subject: [PATCH 66/97] !2748 opt soc ver to 910B2 * opt soc ver to 910B2

---
 operator/ascendc/0_introduction/0_helloworld/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/0_introduction/0_helloworld/run.sh b/operator/ascendc/0_introduction/0_helloworld/run.sh
index c08a3f749..f2a8cd115 100755
--- a/operator/ascendc/0_introduction/0_helloworld/run.sh
+++ b/operator/ascendc/0_introduction/0_helloworld/run.sh
@@ -8,7 +8,7 @@ SHORT=v:,
 LONG=soc-version:,
 OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
 eval set -- "$OPTS"
-SOC_VERSION="Ascend310P3"
+SOC_VERSION="Ascend910B2"
 
 while :; do
     case "$1" in
-- 
Gitee


From 021196386728fc073e6b37dd1e8a7c6992e9ad31 Mon Sep 17 00:00:00 2001
From: ZhouChen <zhouchen53@huawei.com>
Date: Wed, 3 Sep 2025 01:27:05 +0000
Subject: [PATCH 67/97] =?UTF-8?q?!2750=20node=E4=B8=BA=E7=A9=BA=E6=97=B6?=
 =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E8=BF=94=E5=9B=9E=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2750=20from=20ZhouChen/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/add_abs_node.hpp                      | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
index eefe13fc2..1127e6e9d 100644
--- a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
+++ b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
@@ -29,6 +29,14 @@ constexpr const char *kOpTypeData = "Data";
 constexpr const char *kOpTypFrameworkOp = "FrameworkOp";
 int32_t kCount = 0;
 
+inline int CheckGraphStatus(graphStatus ret, const std::string &msg, CustomContext &custom_context) {
+    if (ret != GRAPH_SUCCESS) {
+        custom_context.SetErrorMessage(msg);
+        return -1;
+    }
+    return 0;
+}
+
 // |o>-----------------------------------
 // |o>      Data              Data
 // |o>       |                 |
@@ -46,14 +54,12 @@ graphStatus AddAbsNodeInSubgraph(GraphPtr &graph, CustomPassContext &custom_cont
     for (auto &node : nodes) {
         AscendString node_type;
         ret = node.GetType(node_type);
-        if (ret != GRAPH_SUCCESS) {
-            custom_context.SetErrorMessage("Get node type failed.");
+        if (CheckGraphStatus(ret, "Get node type failed.", custom_context) != 0) {
             return -1;
         }
         AscendString node_name;
         ret = node.GetName(node_name);
-        if (ret != GRAPH_SUCCESS) {
-            custom_context.SetErrorMessage("Get node name failed.");
+        if (CheckGraphStatus(ret, "Get node name failed.", custom_context) != 0) {
             return -1;
         }
         if (node_type == kOpTypeData) {
@@ -66,6 +72,10 @@ graphStatus AddAbsNodeInSubgraph(GraphPtr &graph, CustomPassContext &custom_cont
         }
     }
     // 2. 删除Data和FrameworkOp节点之间的边，如果没有找到目标节点或者目标节点间无连边，返回成功，无改图
+    if (src_node == nullptr || dst_node == nullptr) {
+        cout << "Do not find target src_node or dst_node, stop to add abs node success." << endl;
+        return GRAPH_SUCCESS;
+    }
     ret = graph->RemoveEdge(src_node, 0, dst_node, 0);
     if (ret != GRAPH_SUCCESS) {
         cout << "Do not find target nodes or there is no edge between src and dst nodes." << endl;
@@ -76,13 +86,11 @@ graphStatus AddAbsNodeInSubgraph(GraphPtr &graph, CustomPassContext &custom_cont
     auto abs = op::Abs(name.c_str());
     GNode node_abs = graph->AddNodeByOp(abs);
     ret = graph->AddDataEdge(src_node, 0, node_abs, 0);
-    if (ret != GRAPH_SUCCESS) {
-        custom_context.SetErrorMessage("Add data edge failed between const1_0 and abs.");
+    if (CheckGraphStatus(ret, "Add data edge failed between const1_0 and abs.", custom_context) != 0) {
         return -1;
     }
     ret = graph->AddDataEdge(node_abs, 0, dst_node, 0);
-    if (ret != GRAPH_SUCCESS) {
-        custom_context.SetErrorMessage("Add data edge failed between abs and const1_RetVal.");
+    if (CheckGraphStatus(ret, "Add data edge failed between abs and const1_RetVal.", custom_context) != 0) {
         return -1;
     }
     cout << "Add abs node success." << endl;
-- 
Gitee


From f02ae91da81d0b7d392748f8c743e671737ec575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Thu, 4 Sep 2025 13:03:57 +0000
Subject: [PATCH 68/97] =?UTF-8?q?!2752=20=E6=B7=BB=E5=8A=A0llm=5Fdatadist?=
 =?UTF-8?q?=20sample=20Merge=20pull=20request=20!2752=20from=20=E8=B5=B5?=
 =?UTF-8?q?=E6=99=BA=E6=85=A7/zzh=5Fadd=5Fllm=5Fdatadist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../11_llm_data_dist/CMakeLists.txt           | 100 ++++++
 .../11_llm_data_dist/decoder_sample2.cpp      |  30 +-
 .../11_llm_data_dist/decoder_sample3.cpp      | 155 ++++++++++
 .../11_llm_data_dist/decoder_sample4.cpp      | 286 ++++++++++++++++++
 .../11_llm_data_dist/prompt_sample2.cpp       | 144 +--------
 .../11_llm_data_dist/prompt_sample3.cpp       | 241 +++++++++++++++
 .../11_llm_data_dist/prompt_sample4.cpp       | 262 ++++++++++++++++
 .../11_llm_data_dist/readme.md                |  37 ++-
 .../level1_single_api/12_adxl/CMakeLists.txt  |  27 +-
 .../12_adxl/adxl_engine_sample.cpp            |   1 +
 .../12_adxl/adxl_engine_sample2.cpp           | 239 +++++++++++++++
 cplusplus/level1_single_api/12_adxl/readme.md |  16 +-
 .../10_llm_data_dist/README.md                |  22 +-
 .../push_blocks_sample.py                     | 154 ++++++++++
 .../push_cache_sample.py                      | 153 ++++++++++
 .../switch_role_sample.py                     |   3 +-
 16 files changed, 1694 insertions(+), 176 deletions(-)
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/decoder_sample3.cpp
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/decoder_sample4.cpp
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/prompt_sample3.cpp
 create mode 100644 cplusplus/level1_single_api/11_llm_data_dist/prompt_sample4.cpp
 create mode 100644 cplusplus/level1_single_api/12_adxl/adxl_engine_sample2.cpp
 create mode 100644 python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
 create mode 100644 python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_cache_sample.py

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
index 5691c49c5..c4215bdc6 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
+++ b/cplusplus/level1_single_api/11_llm_data_dist/CMakeLists.txt
@@ -118,4 +118,104 @@ target_link_libraries(decoder_sample2 PRIVATE
         llm_datadist
         graph
         ascendcl
+)
+
+add_executable(prompt_sample3 "prompt_sample3.cpp")
+
+target_compile_options(prompt_sample3 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(prompt_sample3 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(prompt_sample3 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(prompt_sample3 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(prompt_sample3 PRIVATE
+        llm_datadist
+        graph
+        ascendcl
+)
+
+add_executable(decoder_sample3 "decoder_sample3.cpp")
+
+target_compile_options(decoder_sample3 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(decoder_sample3 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(decoder_sample3 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(decoder_sample3 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(decoder_sample3 PRIVATE
+        llm_datadist
+        graph
+        ascendcl
+)
+
+add_executable(prompt_sample4 "prompt_sample4.cpp")
+
+target_compile_options(prompt_sample4 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(prompt_sample4 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(prompt_sample4 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(prompt_sample4 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(prompt_sample4 PRIVATE
+        llm_datadist
+        graph
+        ascendcl
+)
+
+add_executable(decoder_sample4 "decoder_sample4.cpp")
+
+target_compile_options(decoder_sample4 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(decoder_sample4 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(decoder_sample4 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(decoder_sample4 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(decoder_sample4 PRIVATE
+        llm_datadist
+        graph
+        ascendcl
 )
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
index 41d94f042..798af22f5 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample2.cpp
@@ -24,7 +24,6 @@
 using namespace llm_datadist;
 namespace{
 constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
-constexpr uint16_t DECODER_LISTEN_PORT = 26001;
 constexpr uint16_t PROMPT_CLUSTER_ID = 0;
 constexpr uint16_t DECODER_CLUSTER_ID = 1;
 constexpr uint32_t NUM_TENSORS = 4U;
@@ -59,19 +58,6 @@ int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId)
     return LLM_SUCCESS;
 }
 
-int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role, const char *localIp)
-{
-    std::map<AscendString, AscendString> options;
-    options[OPTION_LISTEN_IP_INFO] = (std::string(localIp) + ":" + std::to_string(DECODER_LISTEN_PORT)).c_str();
-    auto ret = llmDataDist.SetRole(role, options);
-    if (ret != LLM_SUCCESS) {
-        printf("[ERROR] SetRole failed, ret = %u\n", ret);
-        return -1;
-    }
-    printf("[INFO] SetRole success\n");
-    return 0;
-}
-
 int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
 {
     std::vector<Status> rets;
@@ -250,21 +236,7 @@ int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *
     }
     linked = false;
 
-    // 8. 切换角色
-    if (SetRole(llmDataDist, LlmRole::kPrompt, localIp) != 0) {
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
-        return -1;
-    }
-
-    // 9. 等待prompt push cache，实际业务场景可通过合适方式实现通知
-    std::this_thread::sleep_for(std::chrono::seconds(30));
-
-    if (CheckBuffers(buffers, {4, 5, 6, 7}) != 0) {
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
-        return -1;
-    }
-
-    // 10. 释放cache与llmDataDist
+    // 8. 释放cache与llmDataDist
     llmDataDist.Finalize();
     printf("[INFO] Finalize success\n");
     printf("[INFO] Decoder Sample end\n");
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample3.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample3.cpp
new file mode 100644
index 000000000..f7c778dff
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample3.cpp
@@ -0,0 +1,155 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t DECODER_CLUSTER_ID = 1;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr size_t TENSOR_BLOCK_ELEMENT_NUM = 16;
+constexpr int32_t WAIT_PROMPT_TIME = 10;
+constexpr int32_t EXPECTED_ARG_CNT = 3;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std::string &localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    options[OPTION_LISTEN_IP_INFO] = (std::string(localIp) + ":" + std::to_string(DECODER_LISTEN_PORT)).c_str();
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int32_t CheckBuffers(const std::vector<void *> &buffers, const std::vector<uint32_t> &checkIndexList)
+{
+    for (auto buffer : buffers) {
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        CHECK_ACL(aclrtMemcpy(&hostBuffer[0], TENSOR_SIZE, buffer, TENSOR_SIZE, ACL_MEMCPY_DEVICE_TO_HOST));
+        for (auto checkIndex : checkIndexList) {
+            for (size_t i = 0U; i < TENSOR_BLOCK_ELEMENT_NUM; ++i) {
+                auto expect = checkIndex * TENSOR_BLOCK_ELEMENT_NUM + i;
+                if (hostBuffer[expect] != expect) {
+                    printf("[ERROR] Buffer check failed, index = %zu, val = %d, expect val = %zu\n",
+                           expect, hostBuffer[expect], expect);
+                    return -1;
+                }
+            }
+        }
+    }
+    printf("[INFO] CheckBuffers success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, const std::vector<void *> buffers)
+{
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunDecoderSample(const char *deviceId, const char *localIp)
+{
+    printf("[INFO] Decoder Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(DECODER_CLUSTER_ID, LlmRole::kDecoder);
+    if (Initialize(llmDataDist, deviceId, localIp) != 0) {
+        return -1;
+    }
+
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 4. 等待prompt写完cache，实际业务场景可通过合适方式实现通知
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_PROMPT_TIME));
+    if (CheckBuffers(buffers, {4, 5, 6, 7}) != 0) {
+        Finalize(llmDataDist, cacheId, buffers);
+        return -1;
+    }
+
+    // 10. 释放cache与llmDataDist
+    Finalize(llmDataDist, cacheId, buffers);
+    printf("[INFO] Decoder Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    printf("[INFO] deviceId = %s, localIp = %s\n", deviceId, localIp);
+    auto ret = RunDecoderSample(deviceId, localIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample4.cpp b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample4.cpp
new file mode 100644
index 000000000..41d94f042
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/decoder_sample4.cpp
@@ -0,0 +1,286 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t PROMPT_CLUSTER_ID = 0;
+constexpr uint16_t DECODER_CLUSTER_ID = 1;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr size_t TENSOR_BLOCK_ELEMENT_NUM = 16;
+constexpr int32_t WAIT_PROMPT_TIME = 5;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role, const char *localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_LISTEN_IP_INFO] = (std::string(localIp) + ":" + std::to_string(DECODER_LISTEN_PORT)).c_str();
+    auto ret = llmDataDist.SetRole(role, options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] SetRole failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] SetRole success\n");
+    return 0;
+}
+
+int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 0;
+    IpInfo localIpInfo;
+    localIpInfo.ip = localIp;
+    localIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] LinkLlmClusters success\n");
+    return 0;
+}
+
+int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 0;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = PROMPT_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] UnlinkLlmClusters success\n");
+    return 0;
+}
+
+int32_t CheckBuffers(const std::vector<void *> &buffers, const std::vector<uint32_t> &checkIndexList)
+{
+    for (auto buffer : buffers) {
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        CHECK_ACL(aclrtMemcpy(&hostBuffer[0], TENSOR_SIZE, buffer, TENSOR_SIZE, ACL_MEMCPY_DEVICE_TO_HOST));
+        for (auto checkIndex : checkIndexList) {
+            for (size_t i = 0U; i < TENSOR_BLOCK_ELEMENT_NUM; ++i) {
+                auto expect = checkIndex * TENSOR_BLOCK_ELEMENT_NUM + i;
+                if (hostBuffer[expect] != expect) {
+                    printf("[ERROR] Buffer check failed, index = %zu, val = %d, expect val = %zu\n",
+                           expect, hostBuffer[expect], expect);
+                    return -1;
+                }
+            }
+        }
+    }
+    printf("[INFO] CheckBuffers success\n");
+    return 0;
+}
+
+int32_t PullCache(LlmDataDist &llmDataDist, int64_t cacheId)
+{
+    std::vector<uint64_t> promptBlocks {1, 2, 3};
+    std::vector<uint64_t> decoderBlocks {1, 2, 3};
+    CacheIndex cacheIndex{PROMPT_CLUSTER_ID, 1, 0};
+    // 可以使用PullKvBlock拉取多块block的数据
+    Cache cache{};
+    cache.cache_id = cacheId;
+    auto ret = llmDataDist.PullKvBlocks(cacheIndex, cache, promptBlocks, decoderBlocks);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PullKvBlocks failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PullKvBlocks success\n");
+    // 也可以使用PullKvCache拉取一个batch中的连续数据
+    cacheIndex.batch_index = 0;
+    ret = llmDataDist.PullKvCache(cacheIndex, cache, 0);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PullKvCache failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PullKvCache success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
+              const std::vector<void *> buffers)
+{
+    if (linked) {
+        auto ret = Unlink(llmDataDist, remoteIp);
+        if (ret != 0) {
+            printf("[ERROR] Unlink failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Unlink success\n");
+        }
+    }
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunDecoderSample(const char *deviceId, const char *localIp, const char *remoteIp)
+{
+    printf("[INFO] Decoder Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(DECODER_CLUSTER_ID, LlmRole::kDecoder);
+    if (Initialize(llmDataDist, deviceId) != 0) {
+        return -1;
+    }
+
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    bool linked = false;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 4. 等待prompt写完cache，实际业务场景可通过合适方式实现通知
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_PROMPT_TIME));
+
+    // 5. 与prompt建链
+    if (Link(llmDataDist, localIp, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = true;
+
+    // 6. 从prompt拉取cache
+    if (PullCache(llmDataDist, cacheId) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    if (CheckBuffers(buffers, {0, 1, 2, 3}) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 7. 解除链路
+    if (Unlink(llmDataDist, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = false;
+
+    // 8. 切换角色
+    if (SetRole(llmDataDist, LlmRole::kPrompt, localIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 9. 等待prompt push cache，实际业务场景可通过合适方式实现通知
+    std::this_thread::sleep_for(std::chrono::seconds(30));
+
+    if (CheckBuffers(buffers, {4, 5, 6, 7}) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 10. 释放cache与llmDataDist
+    llmDataDist.Finalize();
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] Decoder Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
+    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
+    auto ret = RunDecoderSample(deviceId, localIp, remoteIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
index 83a176d7a..8e2edbe3a 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample2.cpp
@@ -24,16 +24,14 @@
 using namespace llm_datadist;
 namespace{
 constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
-constexpr uint16_t DECODER_LISTEN_PORT = 26001;
 constexpr uint16_t PROMPT_CLUSTER_ID = 0;
 constexpr uint32_t NUM_TENSORS = 4U;
 constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
 const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
 constexpr int32_t WAIT_TIME = 10;
-constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr int32_t EXPECTED_ARG_CNT = 3;
 constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
 constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
-constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
 
 #define CHECK_ACL(x)                                                                        \
     do {                                                                                    \
@@ -58,110 +56,8 @@ int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std:
     return LLM_SUCCESS;
 }
 
-int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role)
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, const std::vector<void *> buffers)
 {
-    std::map<AscendString, AscendString> options;
-    auto ret = llmDataDist.SetRole(role, options);
-    if (ret != LLM_SUCCESS) {
-        printf("[ERROR] SetRole failed, ret = %u\n", ret);
-        return -1;
-    }
-    printf("[INFO] SetRole success\n");
-    return 0;
-}
-
-int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
-{
-    std::vector<Status> rets;
-    std::vector<ClusterInfo> clusters;
-    ClusterInfo clusterInfo;
-    clusterInfo.remote_cluster_id = 1;
-    IpInfo localIpInfo;
-    localIpInfo.ip = localIp;
-    localIpInfo.port = DECODER_LISTEN_PORT;
-    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
-    IpInfo remoteIpInfo;
-    remoteIpInfo.ip = remoteIp;
-    remoteIpInfo.port = DECODER_LISTEN_PORT;
-    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
-    clusters.emplace_back(std::move(clusterInfo));
-    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
-    if (ret != LLM_SUCCESS) {
-        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
-        return -1;
-    }
-    printf("[INFO] LinkLlmClusters success\n");
-    return 0;
-}
-
-int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
-{
-    std::vector<Status> rets;
-    std::vector<ClusterInfo> clusters;
-    ClusterInfo clusterInfo;
-    clusterInfo.remote_cluster_id = 1;
-    IpInfo remoteIpInfo;
-    remoteIpInfo.ip = remoteIp;
-    remoteIpInfo.port = DECODER_LISTEN_PORT;
-    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
-    clusters.emplace_back(std::move(clusterInfo));
-    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
-    if (ret != LLM_SUCCESS) {
-        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
-        return -1;
-    }
-    printf("[INFO] UnlinkLlmClusters success\n");
-    return 0;
-}
-
-int32_t PushCache(LlmDataDist &llmDataDist, int64_t cacheId)
-{
-    std::vector<uint64_t> promptBlocks {5, 6, 7};
-    std::vector<uint64_t> decoderBlocks {5, 6, 7};
-    // 可以使用PushKvBlock推送多块block的数据
-    Cache cache{};
-    cache.cache_id = cacheId;
-    auto ret = LLM_SUCCESS;
-    CacheIndex cacheIndex{1, 1};
-    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
-        KvCacheExtParam param{};
-        param.src_layer_range = std::pair<int32_t, int32_t>(i, i);
-        param.dst_layer_range = std::pair<int32_t, int32_t>(i, i);
-        param.tensor_num_per_layer = 1;
-        ret = llmDataDist.PushKvBlocks(cache, cacheIndex, promptBlocks, decoderBlocks, param);
-        if (ret != LLM_SUCCESS) {
-            printf("[ERROR] PushKvBlocks failed, ret = %u\n", ret);
-            return -1;
-        }
-    }
-    printf("[INFO] PushKvBlocks success\n");
-
-    // 也可以使用PushKvCache推送一个batch中的连续数据
-    CacheIndex cacheIndex2{1, 1, 4};
-    KvCacheExtParam param2{};
-    param2.src_layer_range = std::pair<int32_t, int32_t>(0, 0);
-    param2.dst_layer_range = std::pair<int32_t, int32_t>(0, 0);
-    param2.tensor_num_per_layer = 4;
-    ret = llmDataDist.PushKvCache(cache, cacheIndex2, 4, -1, param2);
-    if (ret != LLM_SUCCESS) {
-        printf("[ERROR] PushKvCache failed, ret = %u\n", ret);
-        return -1;
-    }
-    printf("[INFO] PushKvCache success\n");
-    return 0;
-}
-
-void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
-              const std::vector<void *> buffers)
-{
-    if (linked) {
-        auto ret = Unlink(llmDataDist, remoteIp);
-        if (ret != 0) {
-            printf("[ERROR] Unlink failed, ret = %d\n", ret);
-        } else {
-            printf("[INFO] Unlink success\n");
-        }
-    }
     if (cacheId > 0) {
         auto ret = llmDataDist.UnregisterKvCache(cacheId);
         if (ret != 0) {
@@ -176,7 +72,7 @@ void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char
     llmDataDist.Finalize();
 }
 
-int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *remoteIp)
+int32_t RunPromptSample(const char *deviceId, const char *localIp)
 {
     printf("[INFO] Prompt Sample start\n");
     // 1. 初始化
@@ -205,11 +101,10 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
         buffers.emplace_back(reinterpret_cast<void *>(buffer));
     }
     int64_t cacheId = -1;
-    bool linked = false;
     auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
     if (ret != LLM_SUCCESS) {
         printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        Finalize(llmDataDist, cacheId, buffers);
         return -1;
     }
     // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
@@ -221,28 +116,8 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
     // 4. 等待decoder拉取cache
     std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
 
-    // 5. 切换角色
-    if (SetRole(llmDataDist, LlmRole::kDecoder) != 0) {
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
-        return -1;
-    }
-
-    // 6. 与decoder建链
-    if (Link(llmDataDist, localIp, remoteIp) != 0) {
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
-        return -1;
-    }
-    linked = true;
-
-    // 7. 向decoder push cache
-    if (PushCache(llmDataDist, cacheId) != 0) {
-        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
-        return -1;
-    }
-
-    // 8. 释放Cache与llmDataDist
-    llmDataDist.Finalize();
-    printf("[INFO] Finalize success\n");
+    // 5. 释放Cache与llmDataDist
+    Finalize(llmDataDist, cacheId, buffers);
     printf("[INFO] Prompt Sample end\n");
     return 0;
 }
@@ -250,13 +125,12 @@ int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *r
 int main(int32_t argc, char **argv)
 {
     if (argc != EXPECTED_ARG_CNT) {
-        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        printf("[ERROR] expect 2 args(deviceId, localHostIp), but got %d\n", argc - 1);
         return -1;
     }
     const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
     const auto localIp = argv[ARG_INDEX_LOCAL_IP];
-    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
-    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
-    auto ret = RunPromptSample(deviceId, localIp, remoteIp);
+    printf("[INFO] deviceId = %s, localIp = %s\n", deviceId, localIp);
+    auto ret = RunPromptSample(deviceId, localIp);
     return ret;
 }
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample3.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample3.cpp
new file mode 100644
index 000000000..f60f3b64f
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample3.cpp
@@ -0,0 +1,241 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t PROMPT_CLUSTER_ID = 0;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr int32_t WAIT_TIME = 5;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std::string &localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo localIpInfo;
+    localIpInfo.ip = localIp;
+    localIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] LinkLlmClusters success\n");
+    return 0;
+}
+
+int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] UnlinkLlmClusters success\n");
+    return 0;
+}
+
+int32_t PushCache(LlmDataDist &llmDataDist, int64_t cacheId)
+{
+    std::vector<uint64_t> promptBlocks {5, 6, 7};
+    std::vector<uint64_t> decoderBlocks {5, 6, 7};
+    // 可以使用PushKvBlock推送多块block的数据
+    Cache cache{};
+    cache.cache_id = cacheId;
+    auto ret = LLM_SUCCESS;
+    CacheIndex cacheIndex{1, 1};
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        KvCacheExtParam param{};
+        param.src_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.dst_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.tensor_num_per_layer = 1;
+        ret = llmDataDist.PushKvBlocks(cache, cacheIndex, promptBlocks, decoderBlocks, param);
+        if (ret != LLM_SUCCESS) {
+            printf("[ERROR] PushKvBlocks failed, ret = %u\n", ret);
+            return -1;
+        }
+    }
+    printf("[INFO] PushKvBlocks success\n");
+
+    // 也可以使用PushKvCache推送一个batch中的连续数据
+    CacheIndex cacheIndex2{1, 1, 4};
+    KvCacheExtParam param2{};
+    param2.src_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.dst_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.tensor_num_per_layer = 4;
+    ret = llmDataDist.PushKvCache(cache, cacheIndex2, 4, -1, param2);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PushKvCache failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PushKvCache success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
+              const std::vector<void *> buffers)
+{
+    if (linked) {
+        auto ret = Unlink(llmDataDist, remoteIp);
+        if (ret != 0) {
+            printf("[ERROR] Unlink failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Unlink success\n");
+        }
+    }
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *remoteIp)
+{
+    printf("[INFO] Prompt Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(PROMPT_CLUSTER_ID, LlmRole::kPrompt);
+    if (Initialize(llmDataDist, deviceId, localIp) != 0) {
+        printf("[ERROR] Initialize LlmDataDist failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+
+        // init device buffer
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        std::iota(hostBuffer.begin(), hostBuffer.end(), 0);
+        CHECK_ACL(aclrtMemcpy(buffer, TENSOR_SIZE, &hostBuffer[0], TENSOR_SIZE, ACL_MEMCPY_HOST_TO_DEVICE));
+
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    bool linked = false;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 等待decoder注册完成
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+
+    // 4. 与decoder建链
+    if (Link(llmDataDist, localIp, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = true;
+
+    // 5. 向decoder push cache
+    if (PushCache(llmDataDist, cacheId) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 6. 释放Cache与llmDataDist
+    Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+    printf("[INFO] Prompt Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
+    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
+    auto ret = RunPromptSample(deviceId, localIp, remoteIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample4.cpp b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample4.cpp
new file mode 100644
index 000000000..83a176d7a
--- /dev/null
+++ b/cplusplus/level1_single_api/11_llm_data_dist/prompt_sample4.cpp
@@ -0,0 +1,262 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include "acl/acl.h"
+#include "llm_datadist/llm_datadist.h"
+
+using namespace llm_datadist;
+namespace{
+constexpr uint16_t PROMPT_LISTEN_PORT = 26000;
+constexpr uint16_t DECODER_LISTEN_PORT = 26001;
+constexpr uint16_t PROMPT_CLUSTER_ID = 0;
+constexpr uint32_t NUM_TENSORS = 4U;
+constexpr size_t TENSOR_SIZE = 8 * 16 * sizeof(int32_t);
+const std::vector<int64_t> TENSOR_SHAPE = {8, 16};
+constexpr int32_t WAIT_TIME = 10;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_IP = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_IP = 3;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(LlmDataDist &llmDataDist, const std::string &deviceId, const std::string &localIp)
+{
+    std::map<AscendString, AscendString> options;
+    options[OPTION_DEVICE_ID] = deviceId.c_str();
+    options[OPTION_LISTEN_IP_INFO] = (localIp + ":" + std::to_string(PROMPT_LISTEN_PORT)).c_str();
+    auto ret = llmDataDist.Initialize(options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return LLM_SUCCESS;
+}
+
+int32_t SetRole(LlmDataDist &llmDataDist, LlmRole role)
+{
+    std::map<AscendString, AscendString> options;
+    auto ret = llmDataDist.SetRole(role, options);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] SetRole failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] SetRole success\n");
+    return 0;
+}
+
+int Link(LlmDataDist &llmDataDist, const char *localIp, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo localIpInfo;
+    localIpInfo.ip = localIp;
+    localIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.local_ip_infos.emplace_back(std::move(localIpInfo));
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.LinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] LinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] LinkLlmClusters success\n");
+    return 0;
+}
+
+int Unlink(LlmDataDist &llmDataDist, const char *remoteIp)
+{
+    std::vector<Status> rets;
+    std::vector<ClusterInfo> clusters;
+    ClusterInfo clusterInfo;
+    clusterInfo.remote_cluster_id = 1;
+    IpInfo remoteIpInfo;
+    remoteIpInfo.ip = remoteIp;
+    remoteIpInfo.port = DECODER_LISTEN_PORT;
+    clusterInfo.remote_ip_infos.emplace_back(std::move(remoteIpInfo));
+    clusters.emplace_back(std::move(clusterInfo));
+    auto ret = llmDataDist.UnlinkLlmClusters(clusters, rets);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] UnlinkLlmClusters failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] UnlinkLlmClusters success\n");
+    return 0;
+}
+
+int32_t PushCache(LlmDataDist &llmDataDist, int64_t cacheId)
+{
+    std::vector<uint64_t> promptBlocks {5, 6, 7};
+    std::vector<uint64_t> decoderBlocks {5, 6, 7};
+    // 可以使用PushKvBlock推送多块block的数据
+    Cache cache{};
+    cache.cache_id = cacheId;
+    auto ret = LLM_SUCCESS;
+    CacheIndex cacheIndex{1, 1};
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        KvCacheExtParam param{};
+        param.src_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.dst_layer_range = std::pair<int32_t, int32_t>(i, i);
+        param.tensor_num_per_layer = 1;
+        ret = llmDataDist.PushKvBlocks(cache, cacheIndex, promptBlocks, decoderBlocks, param);
+        if (ret != LLM_SUCCESS) {
+            printf("[ERROR] PushKvBlocks failed, ret = %u\n", ret);
+            return -1;
+        }
+    }
+    printf("[INFO] PushKvBlocks success\n");
+
+    // 也可以使用PushKvCache推送一个batch中的连续数据
+    CacheIndex cacheIndex2{1, 1, 4};
+    KvCacheExtParam param2{};
+    param2.src_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.dst_layer_range = std::pair<int32_t, int32_t>(0, 0);
+    param2.tensor_num_per_layer = 4;
+    ret = llmDataDist.PushKvCache(cache, cacheIndex2, 4, -1, param2);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] PushKvCache failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] PushKvCache success\n");
+    return 0;
+}
+
+void Finalize(LlmDataDist &llmDataDist, int64_t cacheId, bool linked, const char *remoteIp,
+              const std::vector<void *> buffers)
+{
+    if (linked) {
+        auto ret = Unlink(llmDataDist, remoteIp);
+        if (ret != 0) {
+            printf("[ERROR] Unlink failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Unlink success\n");
+        }
+    }
+    if (cacheId > 0) {
+        auto ret = llmDataDist.UnregisterKvCache(cacheId);
+        if (ret != 0) {
+            printf("[ERROR] UnregisterKvCache failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] UnregisterKvCache success\n");
+        }
+    }
+    for (auto buffer : buffers) {
+        aclrtFree(buffer);
+    }
+    llmDataDist.Finalize();
+}
+
+int32_t RunPromptSample(const char *deviceId, const char *localIp, const char *remoteIp)
+{
+    printf("[INFO] Prompt Sample start\n");
+    // 1. 初始化
+    LlmDataDist llmDataDist(PROMPT_CLUSTER_ID, LlmRole::kPrompt);
+    if (Initialize(llmDataDist, deviceId, localIp) != 0) {
+        printf("[ERROR] Initialize LlmDataDist failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    CacheDesc cacheDesc{};
+    cacheDesc.num_tensors = NUM_TENSORS;
+    cacheDesc.data_type = DT_INT32;
+    cacheDesc.shape = TENSOR_SHAPE;
+    std::vector<uint64_t> tensorAddrs;
+    std::vector<void *> buffers;
+    for (uint32_t i = 0U; i < NUM_TENSORS; ++i) {
+        int32_t *buffer = nullptr;
+        CHECK_ACL(aclrtMalloc((void **)&buffer, TENSOR_SIZE, ACL_MEM_MALLOC_HUGE_ONLY));
+
+        // init device buffer
+        std::vector<int32_t> hostBuffer(TENSOR_SIZE / sizeof(int32_t));
+        std::iota(hostBuffer.begin(), hostBuffer.end(), 0);
+        CHECK_ACL(aclrtMemcpy(buffer, TENSOR_SIZE, &hostBuffer[0], TENSOR_SIZE, ACL_MEMCPY_HOST_TO_DEVICE));
+
+        tensorAddrs.emplace_back(reinterpret_cast<uint64_t>(buffer));
+        buffers.emplace_back(reinterpret_cast<void *>(buffer));
+    }
+    int64_t cacheId = -1;
+    bool linked = false;
+    auto ret = llmDataDist.RegisterKvCache(cacheDesc, tensorAddrs, {}, cacheId);
+    if (ret != LLM_SUCCESS) {
+        printf("[ERROR] RegisterKvCache failed, ret = %u\n", ret);
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    // 3. RegisterKvCache成功后，可以获取cache中各tensor的地址用于后续操作
+    printf("[INFO] RegisterKvCache success\n");
+    for (size_t i = 0U; i < tensorAddrs.size(); ++i) {
+        printf("[INFO] Tensor[%zu] addr = %p\n", i, reinterpret_cast<void *>(tensorAddrs[i]));
+    }
+
+    // 4. 等待decoder拉取cache
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+
+    // 5. 切换角色
+    if (SetRole(llmDataDist, LlmRole::kDecoder) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 6. 与decoder建链
+    if (Link(llmDataDist, localIp, remoteIp) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+    linked = true;
+
+    // 7. 向decoder push cache
+    if (PushCache(llmDataDist, cacheId) != 0) {
+        Finalize(llmDataDist, cacheId, linked, remoteIp, buffers);
+        return -1;
+    }
+
+    // 8. 释放Cache与llmDataDist
+    llmDataDist.Finalize();
+    printf("[INFO] Finalize success\n");
+    printf("[INFO] Prompt Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    if (argc != EXPECTED_ARG_CNT) {
+        printf("[ERROR] expect 3 args(deviceId, localHostIp, remoteHostIp), but got %d\n", argc - 1);
+        return -1;
+    }
+    const auto deviceId = argv[ARG_INDEX_DEVICE_ID];
+    const auto localIp = argv[ARG_INDEX_LOCAL_IP];
+    const auto remoteIp = argv[ARG_INDEX_REMOTE_IP];
+    printf("[INFO] deviceId = %s, localIp = %s, remoteIp = %s\n", deviceId, localIp, remoteIp);
+    auto ret = RunPromptSample(deviceId, localIp, remoteIp);
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index c591fbe8e..4e00425ea 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -73,6 +73,8 @@
 
     3.1 执行sample1
 
+    此样例介绍了libllm_engine.so的decoder向prompt进行pull cache和pull blocks流程
+
     - 执行prompt_sample, 参数为device_id与local_ip, 其中device_id为prompt要使用的device_id, local_ip为prompt所在device的ip，如:
         ```
         ./prompt_sample 0 10.10.10.1
@@ -85,14 +87,41 @@
 
     3.2 执行sample2
 
-    此样例使用了单边操作的方式输出kv, p/d两侧注册kv后，decoder向prompt发起建链，然后pull kv，然后两个切换角色，prompt向decoder发起建链，并向decoder push kv
+    此样例介绍了libllm_datadist.so的decoder向prompt进行pull cache和pull blocks流程，其中link和pull的方向与角色无关，可以根据需求更改
 
     - 执行prompt_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为prompt要使用的device_id, local_host_ip为prompt所在host的ip, remote_host_ip为decoder所在host的ip，如:
         ```
-        ./prompt_sample2 0 10.10.170.1 10.170.10.2
+        ./prompt_sample2 0 10.10.170.1
         ```
 
     - 执行decoder_sample2, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为decoder要使用的device_id, local_host_ip为decoder所在host的ip，remote_host_ip为prompt所在host的ip，如:
         ```
-        ./decoder_sample2 1 10.170.10.2 10.170.10.1
-        ```
\ No newline at end of file
+        ./decoder_sample2 2 10.170.10.2 10.170.10.1
+        ```
+
+    3.3 执行sample3
+
+    此样例介绍了libllm_datadist.so的prompt向decoder进行push cache和push blocks流程，其中link和push的方向与角色无关，可以根据需求更改
+
+    - 执行prompt_sample3, 参数为device_id与local_ip, 其中device_id为prompt要使用的device_id, local_ip为prompt所在host的ip，如:
+        ```
+        ./prompt_sample3 0 10.10.10.1 10.10.10.5
+        ```
+
+    - 执行decoder_sample3, 参数为device_id、local_ip与remote_ip, 其中device_id为decoder要使用的device_id, local_ip为decoder所在host的ip，remote_ip为prompt所在host的ip，如:
+        ```
+        ./decoder_sample3 4 10.10.10.5
+        ```
+
+    3.4 执行sample4
+
+    此样例介绍了libllm_datadist.so的角色切换，并结合pull以及push使用流程
+
+    - 执行prompt_sample4, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为prompt要使用的device_id, local_host_ip为prompt所在host的ip, remote_host_ip为decoder所在host的ip，如:
+        ```
+        ./prompt_sample4 0 10.10.170.1 10.170.10.2
+        ```
+
+    - 执行decoder_sample4, 参数为device_id、local_host_ip和remote_host_ip, 其中device_id为decoder要使用的device_id, local_host_ip为decoder所在host的ip，remote_host_ip为prompt所在host的ip，如:
+        ```
+        ./decoder_sample4 2 10.170.10.2 10.170.10.1
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/CMakeLists.txt b/cplusplus/level1_single_api/12_adxl/CMakeLists.txt
index bfc67c317..c7c49fff5 100644
--- a/cplusplus/level1_single_api/12_adxl/CMakeLists.txt
+++ b/cplusplus/level1_single_api/12_adxl/CMakeLists.txt
@@ -42,7 +42,32 @@ target_link_directories(adxl_engine_sample PRIVATE
 )
 
 target_link_libraries(adxl_engine_sample PRIVATE
-        adxl
+        llm_datadist
+        graph
+        ascendcl
+)
+
+add_executable(adxl_engine_sample2 "adxl_engine_sample2.cpp")
+
+target_compile_options(adxl_engine_sample2 PRIVATE
+        ${common_compile_options}
+)
+
+target_compile_definitions(adxl_engine_sample2 PRIVATE
+        ${common_compile_definitions}
+)
+
+target_include_directories(adxl_engine_sample2 PRIVATE
+        ${INCLUDE_DIR}
+        ${INCLUDE_DIR}/external/ge_common
+)
+
+target_link_directories(adxl_engine_sample2 PRIVATE
+        ${ASCEND_PATH}/lib64
+)
+
+target_link_libraries(adxl_engine_sample2 PRIVATE
+        llm_datadist
         graph
         ascendcl
 )
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
index a2252b8d3..9a9d42c09 100644
--- a/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
+++ b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample.cpp
@@ -256,6 +256,7 @@ int main(int32_t argc, char **argv)
     } else {
         printf("[ERROR] client expect 3 args(deviceId, localEngine, remoteEngine), "
                "server expect 2 args(deviceId, localEngine), but got %d\n", argc - 1);
+        return -1;
     }
     int32_t device = std::stoi(deviceId);
     CHECK_ACL(aclrtSetDevice(device));
diff --git a/cplusplus/level1_single_api/12_adxl/adxl_engine_sample2.cpp b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample2.cpp
new file mode 100644
index 000000000..6f3e965db
--- /dev/null
+++ b/cplusplus/level1_single_api/12_adxl/adxl_engine_sample2.cpp
@@ -0,0 +1,239 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <numeric>
+#include <cstdio>
+#include <thread>
+#include <iostream>
+#include <fstream>
+#include <string.h>
+#include "acl/acl.h"
+#include "adxl/adxl_engine.h"
+
+using namespace adxl;
+namespace{
+constexpr int32_t WAIT_TIME = 5;
+constexpr int32_t EXPECTED_ARG_CNT = 4;
+constexpr uint32_t ARG_INDEX_DEVICE_ID = 1;
+constexpr uint32_t ARG_INDEX_LOCAL_ENGINE = 2;
+constexpr uint32_t ARG_INDEX_REMOTE_ENGINE = 3;
+constexpr uint32_t MAX_ENGINE_NAME_LEN = 30;
+
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+}
+
+int Initialize(AdxlEngine &adxlEngine, const char *localEngine)
+{
+    std::map<AscendString, AscendString> options;
+    auto ret = adxlEngine.Initialize(localEngine, options);
+    if (ret != SUCCESS) {
+        printf("[ERROR] Initialize failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Initialize success\n");
+    return 0;
+}
+
+int Connect(AdxlEngine &adxlEngine, const char *remoteEngine)
+{
+    auto ret = adxlEngine.Connect(remoteEngine);
+    if (ret != SUCCESS) {
+        printf("[ERROR] Connect failed, ret = %u\n", ret);
+        return -1;
+    }
+    printf("[INFO] Connect success\n");
+    return 0;
+}
+
+int32_t Transfer(AdxlEngine &adxlEngine, uint8_t *&buffer, uint8_t *&buffer2,
+                 const char *localEngine, const char *remoteEngine)
+{
+    uintptr_t remoteAddr;
+    uintptr_t remoteAddr2;
+    std::ifstream(remoteEngine) >> std::hex >> remoteAddr >> remoteAddr2;
+    printf("[INFO] Get remote addr success, addr:%p, add2:%p\n",
+           reinterpret_cast<void *>(remoteAddr), reinterpret_cast<void *>(remoteAddr2));
+    if (std::string(localEngine) == std::min(std::string(localEngine), std::string(remoteEngine))) {
+        // init device buffer
+        printf("[INFO] Local engine test write, write value:%s\n", localEngine);
+        CHECK_ACL(aclrtMemcpy(buffer, MAX_ENGINE_NAME_LEN, localEngine, strlen(localEngine),
+                              ACL_MEMCPY_HOST_TO_DEVICE));
+        TransferOpDesc desc{reinterpret_cast<uintptr_t>(buffer), remoteAddr, strlen(localEngine)};
+        auto ret = adxlEngine.TransferSync(remoteEngine, WRITE, {desc});
+        if (ret != SUCCESS) {
+            printf("[ERROR] TransferSync write failed, remoteAddr:%p, ret = %u\n",
+                   reinterpret_cast<void *>(remoteAddr), ret);
+            return -1;
+        }
+        printf("[INFO] TransferSync write success, remoteAddr:%p, value:%s\n",
+               reinterpret_cast<void *>(remoteAddr), localEngine);
+
+        // 等待对端读
+        CHECK_ACL(aclrtMemcpy(buffer2, MAX_ENGINE_NAME_LEN, localEngine, strlen(localEngine),
+                              ACL_MEMCPY_HOST_TO_DEVICE));
+        std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+    } else {
+        // 等待对端写内存完成
+        printf("[INFO] Local engine test read, expect read value:%s\n", remoteEngine);
+        std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+        char value[MAX_ENGINE_NAME_LEN] = {};
+        CHECK_ACL(aclrtMemcpy(value, MAX_ENGINE_NAME_LEN, buffer, strlen(remoteEngine), ACL_MEMCPY_DEVICE_TO_HOST));
+        printf("[INFO] Wait peer TransferSync write end, remoteAddr:%p, value = %s\n",
+               reinterpret_cast<void *>(remoteAddr), value);
+        if (std::string(remoteEngine) != value) {
+            printf("[ERROR] Failed to check peer write value:%s, expect:%s\n", value, remoteEngine);
+            return -1;
+        } else {
+            printf("[INFO] Check peer write value success\n");
+        }
+
+        TransferOpDesc desc{reinterpret_cast<uintptr_t>(buffer2), remoteAddr2, strlen(remoteEngine)};
+        auto ret = adxlEngine.TransferSync(remoteEngine, READ, {desc});
+        if (ret != SUCCESS) {
+            printf("[ERROR] TransferSync read failed, remoteAddr:%p, ret = %u\n",
+                   reinterpret_cast<void *>(remoteAddr2), ret);
+            return -1;
+        }
+
+        char value2[MAX_ENGINE_NAME_LEN] = {};
+        CHECK_ACL(aclrtMemcpy(value2, MAX_ENGINE_NAME_LEN, buffer2, strlen(remoteEngine), ACL_MEMCPY_DEVICE_TO_HOST));
+        printf("[INFO] TransferSync read success, remoteAddr:%p, value = %s\n",
+               reinterpret_cast<void *>(remoteAddr2), value2);
+        if (std::string(remoteEngine) != value2) {
+            printf("[ERROR] Failed to check read value:%s, expect:%s\n", value, remoteEngine);
+            return -1;
+        } else {
+            printf("[INFO] Check read value success\n");
+        }
+    }
+    return 0;
+}
+
+void Finalize(AdxlEngine &adxlEngine, bool connected, const char *remoteEngine,
+              const std::vector<MemHandle> handles)
+{
+    if (connected) {
+        auto ret = adxlEngine.Disconnect(remoteEngine);
+        if (ret != 0) {
+            printf("[ERROR] Disconnect failed, ret = %d\n", ret);
+        } else {
+            printf("[INFO] Disconnect success\n");
+        }
+        // 等待对端写disconnect完成, 销毁本地链路后进行解注册
+        std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+    }
+
+    for (auto handle : handles) {
+        auto ret = adxlEngine.DeregisterMem(handle);
+        if (ret != 0) {
+            printf("[ERROR] DeregisterMem failed, ret = %u\n", ret);
+        } else {
+            printf("[INFO] DeregisterMem success\n");
+        }
+    }
+    adxlEngine.Finalize();
+}
+
+int32_t Run(const char *localEngine, const char *remoteEngine)
+{
+    printf("[INFO] run start\n");
+    // 1. 初始化
+    AdxlEngine adxlEngine;
+    if (Initialize(adxlEngine, localEngine) != 0) {
+        printf("[ERROR] Initialize AdxlEngine failed\n");
+        return -1;
+    }
+    // 2. 注册内存地址
+    uint8_t *buffer = nullptr;  // 用于write
+    CHECK_ACL(aclrtMalloc((void **)&buffer, MAX_ENGINE_NAME_LEN, ACL_MEM_MALLOC_HUGE_ONLY));
+    uint8_t *buffer2 = nullptr; // 用于read
+    CHECK_ACL(aclrtMalloc((void **)&buffer2, MAX_ENGINE_NAME_LEN, ACL_MEM_MALLOC_HUGE_ONLY));
+
+    MemDesc desc{};
+    desc.addr = reinterpret_cast<uintptr_t>(buffer);
+    desc.len = MAX_ENGINE_NAME_LEN;
+    MemHandle handle = nullptr;
+    bool connected = false;
+    auto ret = adxlEngine.RegisterMem(desc, MEM_DEVICE, handle);
+    if (ret != SUCCESS) {
+        printf("[ERROR] RegisterMem failed, ret = %u\n", ret);
+        Finalize(adxlEngine, connected, remoteEngine, {});
+        return -1;
+    }
+    MemHandle handle2 = nullptr;
+    desc.addr = reinterpret_cast<uintptr_t>(buffer2);
+    ret = adxlEngine.RegisterMem(desc, MEM_DEVICE, handle2);
+    if (ret != SUCCESS) {
+        printf("[ERROR] RegisterMem failed, ret = %u\n", ret);
+        Finalize(adxlEngine, connected, remoteEngine, {handle});
+        return -1;
+    }
+    // RegisterMem成功后，将地址保存到本地文件中等待对端读取
+    printf("[INFO] RegisterMem success, addr:%p, add2:%p\n", buffer, buffer2);
+    std::ofstream tmp_file(localEngine);  // 默认就是 std::ios::out | std::ios::trunc
+    if (tmp_file) {
+        tmp_file << reinterpret_cast<void *>(buffer) << " " << reinterpret_cast<void *>(buffer2) << std::endl;
+    }
+
+    // 等待对端server注册完成
+    std::this_thread::sleep_for(std::chrono::seconds(WAIT_TIME));
+
+    // 3. 与对端server建链
+    if (Connect(adxlEngine, remoteEngine) != 0) {
+        Finalize(adxlEngine, connected, remoteEngine, {handle, handle2});
+        return -1;
+    }
+    connected = true;
+
+    // 4. 测试d2d write和read
+    if (Transfer(adxlEngine, buffer, buffer2, localEngine, remoteEngine) != 0) {
+        Finalize(adxlEngine, connected, remoteEngine, {handle, handle2});
+        return -1;
+    }
+
+    // 5. 释放Cache与llmDataDist
+    Finalize(adxlEngine, connected, remoteEngine, {handle, handle2});
+    printf("[INFO] run Sample end\n");
+    return 0;
+}
+
+int main(int32_t argc, char **argv)
+{
+    std::string deviceId;
+    std::string localEngine;
+    std::string remoteEngine;
+    if (argc == EXPECTED_ARG_CNT) {
+        deviceId = argv[ARG_INDEX_DEVICE_ID];
+        localEngine = argv[ARG_INDEX_LOCAL_ENGINE];
+        remoteEngine = argv[ARG_INDEX_REMOTE_ENGINE];
+        printf("[INFO] deviceId = %s, localEngine = %s, remoteEngine = %s\n",
+               deviceId.c_str(), localEngine.c_str(), remoteEngine.c_str());
+    } else {
+        printf("[ERROR] expect 3 args(deviceId, localEngine, remoteEngine), but got %d\n", argc - 1);
+        return -1;
+    }
+    int32_t device = std::stoi(deviceId);
+    CHECK_ACL(aclrtSetDevice(device));
+    int32_t ret = Run(localEngine.c_str(), remoteEngine.c_str());
+    CHECK_ACL(aclrtResetDevice(device));
+    return ret;
+}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/12_adxl/readme.md b/cplusplus/level1_single_api/12_adxl/readme.md
index 9fc957373..60ba349da 100644
--- a/cplusplus/level1_single_api/12_adxl/readme.md
+++ b/cplusplus/level1_single_api/12_adxl/readme.md
@@ -63,7 +63,7 @@
 
 2. 在运行环境执行可执行文件。
 
-    3.1 执行sample
+    3.1 执行sample, client-server模式，h2d场景
 
     - 执行client adxl_engine_sample, 参数为device_id、local engine和remote engine, 其中device_id为client要使用的device_id，如:
         ```
@@ -72,6 +72,18 @@
 
     - 执行server adxl_engine_sample, 参数为device_id、local engine, 其中device_id为server要使用的device_id, 如:
         ```
-        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample 1 1 10.10.10.1:16000
+        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample 1 10.10.10.1:16000
+        ```
+
+    3.2 执行sample2, 均作为server，d2d场景
+
+    - 执行server1 adxl_engine_sample2, 参数为device_id、local engine和remote engine, 其中device_id为当前engine要使用的device_id，如:
+        ```
+        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample2 0 10.10.10.0:16000 10.10.10.1:16001
+        ```
+
+    - 执行server2 adxl_engine_sample2, 参数为device_id、local engine和remote engine, 其中device_id为当前engine要使用的device_id, 如:
+        ```
+        HCCL_INTRA_ROCE_ENABLE=1 ./adxl_engine_sample2 1 10.10.10.1:16001 10.10.10.0:16000
         ```
     **注**：HCCL_INTRA_ROCE_ENABLE=1表示使用RDMA进行传输
\ No newline at end of file
diff --git a/python/level1_single_api/10_llm_data_dist/README.md b/python/level1_single_api/10_llm_data_dist/README.md
index ddddf5e88..6dc0a99d7 100644
--- a/python/level1_single_api/10_llm_data_dist/README.md
+++ b/python/level1_single_api/10_llm_data_dist/README.md
@@ -73,7 +73,7 @@
         - 将PROMPT_IP_LIST中的device_ip修改为Prompt主机的各device_ip。
         - 将DECODER_IP_LIST中的device_ip修改为Decoder主机的各device_ip。
         - 两台机器脚本保持一致。
-    - 执行pull cache样例程序，此样例程序展示了配置内存池场景下，使用allocate_cache并从远端pull_cache：
+    - 执行pull cache样例程序，此样例程序展示了配置内存池场景下，使用allocate_cache，双向建链，并从远端pull_cache：
       分别在Prompt主机与Decoder主机，执行样例程序：
       ```
       # Prompt主机:
@@ -81,7 +81,7 @@
       # Decoder主机:
       python pull_cache_sample.py --device_id 0 --cluster_id 2
       ```
-    - 执行pull blocks样例程序，此样例程序使用torch自行申请内存，并从远端pull_cache：
+    - 执行pull blocks样例程序，此样例程序使用torch自行申请内存，双向建链，并从远端pull_cache：
       分别在Prompt主机与Decoder主机，执行样例程序：
       ```
       # Prompt主机:
@@ -97,6 +97,22 @@
       # Decoder主机:
       python pull_from_cache_to_blocks.py --device_id 0 --cluster_id 2
       ```
+    - push_blocks_sample.py：此样例程序使用单侧建链方式，申请内存并注册blocks,  decoder发起建链并push blocks
+      分别在Prompt主机与Decoder主机，执行样例程序：
+      ```
+      # Prompt主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python push_blocks_sample.py --device_id 0 --role p --local_host_ip 10.170.10.0 --remote_host_ip 10.170.10.1
+      # Decoder主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python push_blocks_sample.py --device_id 1 --role d --local_host_ip 10.170.10.1 --remote_host_ip 10.170.10.0
+      ```
+    - push_cache_sample.py：此样例程序使用单侧建链方式，申请内存并注册cache,  decoder发起建链并push cache
+      分别在Prompt主机与Decoder主机，执行样例程序：
+      ```
+      # Prompt主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python push_cache_sample.py --device_id 0 --role p --local_host_ip 10.170.10.0 --remote_host_ip 10.170.10.1
+      # Decoder主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python push_cache_sample.py --device_id 1 --role d --local_host_ip 10.170.10.1 --remote_host_ip 10.170.10.0
+      ```
     - switch_role_sample.py：执行switch role样例程序，此样例程序使用单侧建链方式，首先torch自行申请内存并注册blocks,
       decoder发起建链并pull blocks, 然后两侧切换角色, 并prompt发起建链， decoder进行push_blocks，执行方式如下：
       分别在Prompt主机与Decoder主机，执行样例程序：
@@ -106,5 +122,5 @@
       # Decoder主机:
       GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 1 --role d --local_host_ip 10.170.10.1 --remote_host_ip 10.170.10.0
       ```
-      **注**：**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
+    **注**：**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
 
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
new file mode 100644
index 000000000..d4c971629
--- /dev/null
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
@@ -0,0 +1,154 @@
+"""
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import argparse
+import os
+import logging
+import datetime
+from llm_datadist import LLMDataDist, LLMRole, LLMConfig, CacheDesc, DataType, BlocksCacheKey, \
+    Placement, LLMClusterInfo, LLMStatusCode
+import torch
+import torch.distributed as dist
+import torch_npu
+import torchair
+
+logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+NUM_TENSORS = 2
+BLOCKS_NUM = 3
+KV_SHAPE = 10
+PROMPT_CLUSTER_ID = 0
+DECODER_CLUSTER_ID = 1
+
+def init_process_group(rank, world_size, master_ip, backend='gloo'):
+    os.environ['MASTER_ADDR'] = master_ip
+    os.environ['MASTER_PORT'] = '29500'
+
+    logging.info(f"init group begin, {rank=}, {world_size=}, {master_ip=}")
+    dist.init_process_group(backend=backend, rank=rank, world_size=world_size, timeout=datetime.timedelta(seconds=30))
+    logging.info(f"init group success, {rank=}, {world_size=}, {master_ip=}")
+
+
+def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip, remote_host_ip) -> LLMDataDist:
+    init_process_group(cluster_id, 2, min(local_host_ip, remote_host_ip))
+    datadist = LLMDataDist(role, cluster_id)
+    llm_config = LLMConfig()
+    llm_config.device_id = device_id
+    llm_config.local_comm_res = ""
+    if role == LLMRole.PROMPT:
+        llm_config.listen_ip_info = f"{local_host_ip}:26000"
+    llm_options = llm_config.generate_options()
+    datadist.init(llm_options)
+    logging.info(f"init {role} success, {cluster_id=}")
+    return datadist
+
+
+def run_prompt_sample(datadist):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.ones(BLOCKS_NUM, KV_SHAPE, dtype=torch.float).npu()
+    tensor2 = torch.ones(BLOCKS_NUM, KV_SHAPE, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr, addr2], BlocksCacheKey(PROMPT_CLUSTER_ID, 0))
+    logging.info('register_blocks_cache success')
+
+    dist.barrier() # register end
+
+    # 2. 等decoder pull blocks
+    dist.barrier() # decoder push blocks end
+
+    # 3. 解链
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = DECODER_CLUSTER_ID
+    ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+    logging.info('[finalize] success')
+
+
+def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    tensor2 = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr, addr2], BlocksCacheKey(DECODER_CLUSTER_ID, 0))
+    logging.info('register_blocks_cache success')
+    dist.barrier() # register end
+
+    # 2. 向prompt建链
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    cluster.append_local_ip_info(local_host_ip, 26000)
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.link_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("link failed")
+
+    # 3. 向prompt push blocks
+    cache_manager.pull_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1], dst_blocks=[0, 2])
+    logging.info(f'after decoder pull, {tensor=}')
+    logging.info(f'after decoder pull, {tensor2=}')
+
+    dist.barrier() # push_blocks end
+
+    # 4. 断链
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    ret, _ = datadist.unlink_clusters([cluster], 5000, force=True)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device_id", type=int, default=0, help='device id')
+    parser.add_argument("--role", type=str, default=1, help='role type, support p/d')
+    parser.add_argument("--local_host_ip", type=str, help='local host ip')
+    parser.add_argument("--remote_host_ip", type=str, help='remote host ip')
+    args = parser.parse_args()
+    if args.role not in ['p', 'd']:
+        raise RuntimeError("Not supported cluster id")
+    if args.device_id not in [0, 1, 2, 3, 4, 5, 6, 7]:
+        raise RuntimeError("Not supported device id")
+    if args.role == 'd':
+        if args.local_host_ip is None:
+            raise RuntimeError("local_host_ip is not set")
+        if args.remote_host_ip is None:
+            raise RuntimeError("remote_host_ip is not set")
+    logging.info(f'Sample start, device_id = {args.device_id}, role = {args.role}')
+
+    torch.npu.set_device(args.device_id)
+    role = LLMRole.PROMPT if args.role == 'p' else LLMRole.DECODER
+    cluster_id = PROMPT_CLUSTER_ID if args.role == 'p' else DECODER_CLUSTER_ID
+    datadist = init_llm_datadist(role, cluster_id, args.device_id, args.local_host_ip, args.remote_host_ip)
+    if role == LLMRole.PROMPT:
+        run_prompt_sample(datadist)
+    else:
+        run_decoder_sample(datadist, args.local_host_ip, args.remote_host_ip)
+    logging.info('Sample end')
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_cache_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_cache_sample.py
new file mode 100644
index 000000000..15e27764e
--- /dev/null
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_cache_sample.py
@@ -0,0 +1,153 @@
+"""
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import argparse
+import os
+import logging
+import datetime
+from llm_datadist import LLMDataDist, LLMRole, LLMConfig, CacheDesc, DataType, CacheKeyByIdAndIndex, \
+    Placement, LLMClusterInfo, LLMStatusCode
+import torch
+import torch.distributed as dist
+import torch_npu
+import torchair
+
+logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+NUM_TENSORS = 2
+BLOCKS_NUM = 3
+KV_SHAPE = 10
+PROMPT_CLUSTER_ID = 0
+DECODER_CLUSTER_ID = 1
+
+def init_process_group(rank, world_size, master_ip, backend='gloo'):
+    os.environ['MASTER_ADDR'] = master_ip
+    os.environ['MASTER_PORT'] = '29500'
+
+    logging.info(f"init group begin, {rank=}, {world_size=}, {master_ip=}")
+    dist.init_process_group(backend=backend, rank=rank, world_size=world_size, timeout=datetime.timedelta(seconds=30))
+    logging.info(f"init group success, {rank=}, {world_size=}, {master_ip=}")
+
+
+def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip, remote_host_ip) -> LLMDataDist:
+    init_process_group(cluster_id, 2, min(local_host_ip, remote_host_ip))
+    datadist = LLMDataDist(role, cluster_id)
+    llm_config = LLMConfig()
+    llm_config.device_id = device_id
+    llm_config.local_comm_res = ""
+    if role == LLMRole.PROMPT:
+        llm_config.listen_ip_info = f"{local_host_ip}:26000"
+    llm_options = llm_config.generate_options()
+    datadist.init(llm_options)
+    logging.info(f"init {role} success, {cluster_id=}")
+    return datadist
+
+
+def run_prompt_sample(datadist):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    tensor2 = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_cache(cache_desc, [addr, addr2])
+    logging.info('register_blocks_cache success')
+
+    dist.barrier() # register end
+
+    # 2. 等decoder push cache
+    dist.barrier() # decoder push cache end
+
+    logging.info(f'after decoder push, {tensor=}')
+    logging.info(f'after decoder push, {tensor2=}')
+
+    dist.barrier() # wait unlink end
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+    logging.info('[finalize] success')
+
+
+def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), 1, dtype=torch.float).npu()
+    tensor2 = torch.full((BLOCKS_NUM, KV_SHAPE), 1, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    addr2 = int(tensor2.data_ptr())
+    cache = cache_manager.register_cache(cache_desc, [addr, addr2])
+    logging.info('register_blocks_cache success')
+    dist.barrier() # register end
+
+    # 2. 向prompt建链
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    cluster.append_local_ip_info(local_host_ip, 26000)
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.link_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("link failed")
+
+    # 3. 向prompt push cache
+    cache_manager.push_cache(CacheKeyByIdAndIndex(cluster_id=PROMPT_CLUSTER_ID, cache_id=1, batch_index=0), cache,
+                             src_batch_index=0)
+
+    dist.barrier() # push cache end
+
+    # 4. 断链
+    cluster = LLMClusterInfo()
+    cluster.remote_cluster_id = PROMPT_CLUSTER_ID
+    cluster.append_remote_ip_info(remote_host_ip, 26000)
+    ret, _ = datadist.unlink_clusters([cluster], 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+    dist.barrier() # unlink end
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device_id", type=int, default=0, help='device id')
+    parser.add_argument("--role", type=str, default=1, help='role type, support p/d')
+    parser.add_argument("--local_host_ip", type=str, help='local host ip')
+    parser.add_argument("--remote_host_ip", type=str, help='remote host ip')
+    args = parser.parse_args()
+    if args.role not in ['p', 'd']:
+        raise RuntimeError("Not supported cluster id")
+    if args.device_id not in [0, 1, 2, 3, 4, 5, 6, 7]:
+        raise RuntimeError("Not supported device id")
+    if args.role == 'd':
+        if args.local_host_ip is None:
+            raise RuntimeError("local_host_ip is not set")
+        if args.remote_host_ip is None:
+            raise RuntimeError("remote_host_ip is not set")
+    logging.info(f'Sample start, device_id = {args.device_id}, role = {args.role}')
+
+    torch.npu.set_device(args.device_id)
+    role = LLMRole.PROMPT if args.role == 'p' else LLMRole.DECODER
+    cluster_id = PROMPT_CLUSTER_ID if args.role == 'p' else DECODER_CLUSTER_ID
+    datadist = init_llm_datadist(role, cluster_id, args.device_id, args.local_host_ip, args.remote_host_ip)
+    if role == LLMRole.PROMPT:
+        run_prompt_sample(datadist)
+    else:
+        run_decoder_sample(datadist, args.local_host_ip, args.remote_host_ip)
+    logging.info('Sample end')
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
index 299f48c99..5b723329d 100644
--- a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/switch_role_sample.py
@@ -47,8 +47,7 @@ def init_llm_datadist(role: LLMRole, cluster_id, device_id: int, local_host_ip,
     datadist = LLMDataDist(role, cluster_id)
     llm_config = LLMConfig()
     llm_config.device_id = device_id
-    llm_config.enable_cache_manager = True
-    llm_config.enable_remote_cache_accessible = True
+    llm_config.local_comm_res = ""
     if role == LLMRole.PROMPT:
         llm_config.listen_ip_info = f"{local_host_ip}:26000"
     llm_options = llm_config.generate_options()
-- 
Gitee


From dfcbb5c3c9813daf812a66ed33f61032e2cd7009 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Mon, 8 Sep 2025 01:22:04 +0000
Subject: [PATCH 69/97] =?UTF-8?q?!2753=20fix=20readme=20Merge=20pull=20req?=
 =?UTF-8?q?uest=20!2753=20from=20=E8=B5=B5=E6=99=BA=E6=85=A7/zzh=5Fadd=5Fl?=
 =?UTF-8?q?lm=5Fdatadist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../level1_single_api/11_llm_data_dist/readme.md     | 12 +++++++++---
 cplusplus/level1_single_api/12_adxl/readme.md        |  3 ++-
 cplusplus/level1_single_api/README.md                |  1 +
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index 4e00425ea..24ae39e0f 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -15,9 +15,15 @@
 ## 目录结构
 
 ```
-├── prompt_sample.cpp                // prompt样例main函数
-├── decoder_sample.cpp               // decoder样例main函数
-├── CMakeLists.txt                  // 编译脚本 
+├── prompt_sample.cpp                // sample1的prompt样例main函数
+├── decoder_sample.cpp               // sample1的decoder样例main函数
+├── prompt_sample2.cpp               // sample2的prompt样例main函数
+├── decoder_sample2.cpp              // sample2的decoder样例main函数
+├── prompt_sample3.cpp               // sample3的prompt样例main函数
+├── decoder_sample3.cpp              // sample3的decoder样例main函数
+├── prompt_sample4.cpp               // sample4的prompt样例main函数
+├── decoder_sample4.cpp              // sample4的decoder样例main函数
+├── CMakeLists.txt                   // 编译脚本
 ```
 
 
diff --git a/cplusplus/level1_single_api/12_adxl/readme.md b/cplusplus/level1_single_api/12_adxl/readme.md
index 60ba349da..8da00a041 100644
--- a/cplusplus/level1_single_api/12_adxl/readme.md
+++ b/cplusplus/level1_single_api/12_adxl/readme.md
@@ -15,7 +15,8 @@
 ## 目录结构
 
 ```
-├── adxl_engine_sample.cpp          // adxl_engine样例
+├── adxl_engine_sample.cpp          // adxl_engine的sample1样例
+├── adxl_engine_sample2.cpp         // adxl_engine的sample2样例
 ├── CMakeLists.txt                  // 编译脚本 
 ```
 
diff --git a/cplusplus/level1_single_api/README.md b/cplusplus/level1_single_api/README.md
index 82be76df8..1d2856914 100644
--- a/cplusplus/level1_single_api/README.md
+++ b/cplusplus/level1_single_api/README.md
@@ -18,3 +18,4 @@ This catalog is a sample of a single function interface. Each folder corresponds
 | [9_feature_retrieval](./9_feature_retrieval) | feature vector search interface sample |
 | [10_aoe_api](./10_aoe_api) | aoe interface sample |
 | [11_llm_data_dist](./11_llm_data_dist) | LLM-DataDist sample |
+| [12_adxl](./12_adxl) | ADXL sample |
-- 
Gitee


From e62d0da942d993fe05bd18b9e3b14466c678f98e Mon Sep 17 00:00:00 2001
From: wangyuchen <wangyuchen38@huawei.com>
Date: Thu, 11 Sep 2025 12:28:37 +0000
Subject: [PATCH 70/97] !2758 fix sample log Merge pull request !2758 from
 wangyuchen/fix_sample

---
 .../3_ir/3_modify_subgraph_pass/readme.md     |  12 +-
 .../src/add_abs_node.hpp                      | 107 ++++++++----------
 2 files changed, 55 insertions(+), 64 deletions(-)

diff --git a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
index ae9ce6dff..66986b664 100644
--- a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
+++ b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
@@ -22,14 +22,14 @@
 -   编译器：g++
 -   芯片：all
 -   python及依赖的库：python3.7.5、tensorflow1.15.0
--   已完成昇腾AI软件栈在开发环境上的部署
+-   已完成昇腾AI软件栈在开发环境上的部署（本示例代码基于CANN 8.2.RC1版本）
 
 
 ## 程序编译<a name="section66456345656813"></a>
 
 1. 根据实际情况修改**CMakeLists.txt**文件中的如下信息。
 
-   - ASCEND_PATH：指定到ATC或FwkACLlib的安装目录，例如/home/HwHiAiUser/Ascend/ascend-toolkit/latest
+   - ASCEND_PATH：指定到ATC或FwkACLlib的安装目录，例如 ${HOME}/Ascend/ascend-toolkit/latest
 
    - target_include_directories：需要包含的头文件，对于本示例，无需修改。如果是用户自行开发的代码，当需要添加头文件时，在示例下方直接增加行即可，注意不要删除原有项目。如果网络中有自定义算子，请增加自定义算子的原型定义头文件。
 
@@ -95,11 +95,11 @@
     ```
     ModifySubgraphPass begin.
     Graph has 2 subgraphs.
+    Find dst node: cond1cond_false_80/const1_RetVal.
     Find src node: cond1cond_false_80/const1_0.
-    Find dst node: .
     Add abs node success.
+    Find dst node: cond0cond_true_71/const1_RetVal.
     Find src node: cond0cond_true_71/const1_0.
-    Find dst node: .
     Add abs node success.
     ModifySubgraphPass end.
     ```
@@ -109,11 +109,11 @@
     ```
     ModifySubgraphPass begin.
     Find cond node.
+    Find dst node: cond0cond_true_71/const1_RetVal.
     Find src node: cond0cond_true_71/const1_0.
-    Find dst node: .
     Add abs node success.
+    Find dst node: cond1cond_false_80/const1_RetVal.
     Find src node: cond1cond_false_80/const1_0.
-    Find dst node: .
     Add abs node success.
     ModifySubgraphPass end.
     ```
diff --git a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
index 1127e6e9d..9647ce0af 100644
--- a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
+++ b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/src/add_abs_node.hpp
@@ -18,25 +18,40 @@
 #define ADD_ABS_NODE_HPP_
 
 #include <iostream>
-#include "register_custom_pass.h"
-#include "all_ops.h"
 
-using namespace std;
-using namespace ge;
+#include "all_ops.h"
+#include "register_custom_pass.h"
 
 namespace pass {
 constexpr const char *kOpTypeData = "Data";
 constexpr const char *kOpTypFrameworkOp = "FrameworkOp";
 int32_t kCount = 0;
 
-inline int CheckGraphStatus(graphStatus ret, const std::string &msg, CustomContext &custom_context) {
-    if (ret != GRAPH_SUCCESS) {
-        custom_context.SetErrorMessage(msg);
-        return -1;
-    }
-    return 0;
-}
+#define CHECK_STATUS(exp, msg)                             \
+    do {                                                   \
+        if ((exp) != ge::GRAPH_SUCCESS) {                  \
+            std::cout << "Check (" << #exp << ") failed";  \
+            if (std::string(msg).length() > 0) {           \
+                std::cout << ", error message: " << (msg); \
+            }                                              \
+            std::cout << std::endl;                        \
+            return ge::GRAPH_FAILED;                       \
+        }                                                  \
+    } while (0)
 
+ge::graphStatus InsertAbsNode(const ge::GraphPtr &graph, ge::GNode &src_node, const int32_t src_idx, ge::GNode &dst_node,
+                              const int32_t dst_idx) {
+    // 删除Data和FrameworkOp节点之间的边
+    CHECK_STATUS(graph->RemoveEdge(src_node, src_idx, dst_node, dst_idx), "Remove edge failed.");
+    // 在Data和FrameworkOp节点之间插入Abs节点
+    std::string name = "abs_" + std::to_string(kCount++);
+    auto abs = ge::op::Abs(name.c_str());
+    ge::GNode node_abs = graph->AddNodeByOp(abs);
+    CHECK_STATUS(graph->AddDataEdge(src_node, src_idx, node_abs, 0), "Add data edge failed between Data and Abs.");
+    CHECK_STATUS(graph->AddDataEdge(node_abs, 0, dst_node, dst_idx), "Add data edge failed between Abs and FrameworkOp.");
+    std::cout << "Add abs node success." << std::endl;
+    return ge::GRAPH_SUCCESS;
+}
 // |o>-----------------------------------
 // |o>      Data              Data
 // |o>       |                 |
@@ -45,57 +60,33 @@ inline int CheckGraphStatus(graphStatus ret, const std::string &msg, CustomConte
 // |o>   FrameworkOp       FrameworkOp
 // |o>-----------------------------------
 // pass修改子图说明：本例识别上图中左边的Data和FrameworkOp节点，并在中间插入Abs节点得到右图
-graphStatus AddAbsNodeInSubgraph(GraphPtr &graph, CustomPassContext &custom_context) {
+ge::graphStatus AddAbsNodeInSubgraph(ge::GraphPtr &graph, ge::CustomPassContext &custom_context) {
     // 1. 获取子图中的Data和FrameworkOp节点
-    GNode src_node;
-    GNode dst_node;
-    vector<GNode> nodes = graph->GetAllNodes();
-    graphStatus ret = GRAPH_FAILED;
-    for (auto &node : nodes) {
-        AscendString node_type;
-        ret = node.GetType(node_type);
-        if (CheckGraphStatus(ret, "Get node type failed.", custom_context) != 0) {
-            return -1;
-        }
-        AscendString node_name;
-        ret = node.GetName(node_name);
-        if (CheckGraphStatus(ret, "Get node name failed.", custom_context) != 0) {
-            return -1;
+    auto all_nodes = graph->GetAllNodes();
+    for (auto &dst_node : all_nodes) {
+        ge::AscendString dst_type;
+        CHECK_STATUS(dst_node.GetType(dst_type), "Failed to get the type of the dst_node.");
+        if (dst_type != kOpTypFrameworkOp) {
+            continue;
         }
-        if (node_type == kOpTypeData) {
-            src_node = node;
-            cout << "Find src node: " << node_name.GetString() << "." << endl;
-        } else if (node_type == kOpTypFrameworkOp) {
-            AscendString node_name;
-            dst_node = node;
-            cout << "Find dst node: " << node_name.GetString() << "." << endl;
+        ge::AscendString dst_name;
+        CHECK_STATUS(dst_node.GetName(dst_name), "Failed to get the name of the dst_node");
+        std::cout << "Find dst node: " << dst_name.GetString() << "." << std::endl;
+        // 2. 找到目标节点FrameworkOp，然后依次获取输入节点
+        for (int32_t i = 0; i < dst_node.GetInputsSize(); ++i) {
+            const auto src_node_and_port = dst_node.GetInDataNodesAndPortIndexs(i);
+            // 3. 如果没有找到目标节点或者目标节点间无连边，跳过不改图
+            if (src_node_and_port.first != nullptr) {
+                ge::AscendString src_name;
+                CHECK_STATUS(src_node_and_port.first->GetName(src_name), "Failed to get the name of the src_node");
+                std::cout << "Find src node: " << src_name.GetString() << "." << std::endl;
+                CHECK_STATUS(InsertAbsNode(graph, *src_node_and_port.first, src_node_and_port.second, dst_node, i),
+                             "Failed to insert Abs node");
+            }
         }
     }
-    // 2. 删除Data和FrameworkOp节点之间的边，如果没有找到目标节点或者目标节点间无连边，返回成功，无改图
-    if (src_node == nullptr || dst_node == nullptr) {
-        cout << "Do not find target src_node or dst_node, stop to add abs node success." << endl;
-        return GRAPH_SUCCESS;
-    }
-    ret = graph->RemoveEdge(src_node, 0, dst_node, 0);
-    if (ret != GRAPH_SUCCESS) {
-        cout << "Do not find target nodes or there is no edge between src and dst nodes." << endl;
-        return GRAPH_SUCCESS;
-    }
-    // 3. 在Data和FrameworkOp节点之间插入Abs节点
-    string name = "abs_" + to_string(kCount++);
-    auto abs = op::Abs(name.c_str());
-    GNode node_abs = graph->AddNodeByOp(abs);
-    ret = graph->AddDataEdge(src_node, 0, node_abs, 0);
-    if (CheckGraphStatus(ret, "Add data edge failed between const1_0 and abs.", custom_context) != 0) {
-        return -1;
-    }
-    ret = graph->AddDataEdge(node_abs, 0, dst_node, 0);
-    if (CheckGraphStatus(ret, "Add data edge failed between abs and const1_RetVal.", custom_context) != 0) {
-        return -1;
-    }
-    cout << "Add abs node success." << endl;
-    return GRAPH_SUCCESS;
+    return ge::GRAPH_SUCCESS;
 }
-} // namespace pass
+}  // namespace pass
 
 #endif
\ No newline at end of file
-- 
Gitee


From 1932ccc18497ffa72c7131b1d9ba5b034dcf0886 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E4=B8=80=E6=BA=90?= <chenyiyuan5@huawei.com>
Date: Mon, 15 Sep 2025 03:00:42 +0000
Subject: [PATCH 71/97] !2763 Delete obselote AscendC samples. * Delete
 obselote AscendC samples.

---
 .../acl_invocation/.gitignore                 |     6 -
 .../acl_offline_model/op_verify/inc/common.h  |    45 -
 .../op_verify/inc/op_runner.h                 |   172 -
 .../op_verify/inc/operator_desc.h             |    59 -
 .../acl_offline_model/op_verify/run.sh        |    80 -
 .../run/out/test_data/config/acl.json         |     1 -
 .../config/add_custom_dynamic_shape.json      |    33 -
 .../config/add_custom_static_shape.json       |    30 -
 .../run/out/test_data/data/generate_data.py   |    16 -
 .../op_verify/scripts/verify_result.py        |    43 -
 .../op_verify/src/CMakeLists.txt              |    59 -
 .../op_verify/src/common.cpp                  |    79 -
 .../acl_offline_model/op_verify/src/main.cpp  |   204 -
 .../op_verify/src/op_runner.cpp               |   368 -
 .../op_verify/src/operator_desc.cpp           |    61 -
 .../acl_invocation/acl_offline_model/run.sh   |   168 -
 .../acl_online_model/op_verify/inc/common.h   |    45 -
 .../op_verify/inc/op_runner.h                 |   172 -
 .../op_verify/inc/operator_desc.h             |    59 -
 .../acl_online_model/op_verify/run.sh         |    80 -
 .../run/out/test_data/config/acl.json         |     1 -
 .../run/out/test_data/data/generate_data.py   |    16 -
 .../op_verify/scripts/verify_result.py        |    43 -
 .../op_verify/src/CMakeLists.txt              |    59 -
 .../acl_online_model/op_verify/src/common.cpp |    79 -
 .../acl_online_model/op_verify/src/main.cpp   |   208 -
 .../op_verify/src/op_runner.cpp               |   430 -
 .../op_verify/src/operator_desc.cpp           |    60 -
 .../acl_invocation/acl_online_model/run.sh    |   160 -
 .../op_verify/inc/common.h                    |    45 -
 .../op_verify/inc/op_runner.h                 |   172 -
 .../op_verify/inc/operator_desc.h             |    59 -
 .../acl_online_model_unalign/op_verify/run.sh |    80 -
 .../run/out/test_data/config/acl.json         |     1 -
 .../run/out/test_data/data/generate_data.py   |    16 -
 .../op_verify/scripts/verify_result.py        |    43 -
 .../op_verify/src/CMakeLists.txt              |    59 -
 .../op_verify/src/common.cpp                  |    79 -
 .../op_verify/src/main.cpp                    |   213 -
 .../op_verify/src/op_runner.cpp               |   430 -
 .../op_verify/src/operator_desc.cpp           |    60 -
 .../acl_online_model_unalign/run.sh           |   160 -
 .../acl_invocation/op_dev/add_custom.json     |    52 -
 .../op_dev/add_custom_unalign.json            |    52 -
 .../op_dev/op_host/add_custom.cpp             |    69 -
 .../op_dev/op_host/add_custom_tiling.h        |    16 -
 .../op_dev/op_host/add_custom_unalign.cpp     |    97 -
 .../op_host/add_custom_unalign_tiling.h       |    19 -
 .../op_dev/op_kernel/add_custom.cpp           |    87 -
 .../op_dev/op_kernel/add_custom_unalign.cpp   |    95 -
 .../acl_invocation/op_dev/run.sh              |    39 -
 .../acl_invocation/readme.md                  |    21 -
 .../kernel_invocation/.gitignore              |     4 -
 .../kernel_invocation/Add/CMakeLists.txt      |     1 -
 .../kernel_invocation/Add/add_custom.cpp      |   111 -
 .../kernel_invocation/Add/add_custom.py       |    28 -
 .../kernel_invocation/Add/cmake               |     1 -
 .../kernel_invocation/Add/data_utils.h        |     1 -
 .../kernel_invocation/Add/input/.gitkeep      |     0
 .../kernel_invocation/Add/main.cpp            |    79 -
 .../kernel_invocation/Add/output/.gitkeep     |     0
 .../kernel_invocation/Add/run.sh              |     1 -
 .../kernel_invocation/Add_tile/CMakeLists.txt |     1 -
 .../kernel_invocation/Add_tile/add_custom.cpp |   109 -
 .../kernel_invocation/Add_tile/add_custom.py  |    36 -
 .../Add_tile/add_custom_tiling.h              |    45 -
 .../kernel_invocation/Add_tile/cmake          |     1 -
 .../kernel_invocation/Add_tile/data_utils.h   |     1 -
 .../kernel_invocation/Add_tile/input/.gitkeep |     0
 .../kernel_invocation/Add_tile/main.cpp       |   108 -
 .../Add_tile/output/.gitkeep                  |     0
 .../kernel_invocation/Add_tile/run.sh         |     1 -
 .../LeakyReLU/CMakeLists.txt                  |    10 -
 .../cmake/Modules/CMakeCCECompiler.cmake.in   |     5 -
 .../cmake/Modules/CMakeCCEFunction.cmake      |    20 -
 .../cmake/Modules/CMakeCCEInformation.cmake   |    35 -
 .../Modules/CMakeDetermineCCECompiler.cmake   |   113 -
 .../cmake/Modules/CMakeTestCCECompiler.cmake  |     1 -
 .../LeakyReLU/cmake/cpu/CMakeLists.txt        |    30 -
 .../LeakyReLU/cmake/npu/CMakeLists.txt        |    85 -
 .../kernel_invocation/LeakyReLU/data_utils.h  |   196 -
 .../LeakyReLU/leakyrelu_custom.cpp            |    93 -
 .../LeakyReLU/leakyrelu_custom.py             |    27 -
 .../LeakyReLU/leakyrelu_custom_tiling.h       |    45 -
 .../kernel_invocation/LeakyReLU/main.cpp      |   101 -
 .../kernel_invocation/LeakyReLU/readme.md     |    16 -
 .../kernel_invocation/LeakyReLU/run.sh        |    84 -
 .../kernel_invocation/MatMul/CMakeLists.txt   |     1 -
 .../kernel_invocation/MatMul/data_utils.h     |     1 -
 .../kernel_invocation/MatMul/input/.gitkeep   |     0
 .../kernel_invocation/MatMul/main.cpp         |    80 -
 .../MatMul/matmul_custom.cpp                  |   186 -
 .../kernel_invocation/MatMul/matmul_custom.py |    25 -
 .../kernel_invocation/MatMul/output/.gitkeep  |     0
 .../kernel_invocation/MatMul/run.sh           |     1 -
 .../Matmul_high_level/CMakeLists.txt          |    14 -
 .../cmake/Modules/CMakeCCECompiler.cmake.in   |     5 -
 .../cmake/Modules/CMakeCCEFunction.cmake      |    20 -
 .../cmake/Modules/CMakeCCEInformation.cmake   |    35 -
 .../Modules/CMakeDetermineCCECompiler.cmake   |   123 -
 .../cmake/Modules/CMakeTestCCECompiler.cmake  |     1 -
 .../cmake/cpu/CMakeLists.txt                  |    30 -
 .../cmake/npu/CMakeLists.txt                  |    85 -
 .../cmake/tiling/CMakeLists.txt               |    39 -
 .../Matmul_high_level/custom_tiling/main.cpp  |    95 -
 .../Matmul_high_level/data_utils.h            |   196 -
 .../Matmul_high_level/input/.gitkeep          |     0
 .../Matmul_high_level/main.cpp                |   100 -
 .../Matmul_high_level/matmul_custom.cpp       |    75 -
 .../Matmul_high_level/matmul_custom.py        |    25 -
 .../Matmul_high_level/readme.md               |    16 -
 .../Matmul_high_level/run.sh                  |   110 -
 .../kernel_template/CMakeLists.txt            |    10 -
 .../cmake/Modules/CMakeCCECompiler.cmake.in   |     5 -
 .../cmake/Modules/CMakeCCEFunction.cmake      |    14 -
 .../cmake/Modules/CMakeCCEInformation.cmake   |    35 -
 .../Modules/CMakeDetermineCCECompiler.cmake   |   114 -
 .../cmake/Modules/CMakeTestCCECompiler.cmake  |     1 -
 .../kernel_template/cmake/cpu/CMakeLists.txt  |    30 -
 .../kernel_template/cmake/npu/CMakeLists.txt  |    85 -
 .../kernel_template/data_utils.h              |   196 -
 .../kernel_invocation/kernel_template/run.sh  |    83 -
 .../kernel_invocation/readme.md               |    28 -
 .../LeakyReluCustom/framework/CMakeLists.txt  |    11 -
 .../framework/onnx_plugin/CMakeLists.txt      |     8 -
 .../framework/onnx_plugin/json.hpp            | 26137 ----------------
 .../onnx_leaky_relu_custom_plugin.cc          |    55 -
 .../LeakyReluCustom/fusion_off.cfg            |    14 -
 .../LeakyReluCustom/leaky_relu_custom.json    |    38 -
 .../LeakyReluCustom/op_host/CMakeLists.txt    |    65 -
 .../op_host/leaky_relu_custom.cpp             |    71 -
 .../op_host/leaky_relu_custom_tiling.h        |    14 -
 .../LeakyReluCustom/op_kernel/CMakeLists.txt  |    56 -
 .../op_kernel/leaky_relu_custom.cpp           |   104 -
 .../pytorch_invocation/.gitignore             |    15 -
 .../pytorch_invocation/op_dev                 |     1 -
 .../pytorch_patch/AddCustomKernelNpu.cpp      |    20 -
 .../pytorch_patch/npu_native_functions.yaml   |     1 -
 .../pytorch_invocation/readme.md              |     5 -
 .../pytorch_invocation/run.sh                 |   197 -
 .../pytorch_invocation/test_ops_custom.py     |    28 -
 .../tensorflow_inference/LeakyRelu/README.md  |    30 -
 .../LeakyRelu/fusion_off.cfg                  |    14 -
 .../LeakyRelu/leakyrelu.json                  |    29 -
 .../LeakyRelu/op_host/leaky_relu.cpp          |    59 -
 .../LeakyRelu/op_host/leaky_relu_tiling.h     |    10 -
 .../LeakyRelu/op_kernel/kernel_leaky_relu.h   |    62 -
 .../op_kernel/kernel_leaky_relu_tiling.h      |    54 -
 .../LeakyRelu/op_kernel/leaky_relu.cpp        |    13 -
 149 files changed, 35164 deletions(-)
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/.gitignore
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/common.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/op_runner.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/operator_desc.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/acl.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_dynamic_shape.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_static_shape.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/data/generate_data.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/scripts/verify_result.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/common.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/op_runner.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/operator_desc.cpp
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/common.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/op_runner.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/operator_desc.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/config/acl.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/data/generate_data.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/scripts/verify_result.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/common.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/op_runner.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/operator_desc.cpp
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/common.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/op_runner.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/operator_desc.h
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/config/acl.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/data/generate_data.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/scripts/verify_result.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/common.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/op_runner.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/operator_desc.cpp
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom_unalign.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_tiling.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign_tiling.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom_unalign.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/readme.md
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/.gitignore
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.py
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/cmake
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/data_utils.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/input/.gitkeep
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/output/.gitkeep
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/run.sh
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom_tiling.h
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/cmake
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/data_utils.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/input/.gitkeep
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/output/.gitkeep
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCECompiler.cmake.in
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEFunction.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEInformation.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeDetermineCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeTestCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/cpu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/npu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/data_utils.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom_tiling.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/readme.md
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/run.sh
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/CMakeLists.txt
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/data_utils.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/input/.gitkeep
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/output/.gitkeep
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCECompiler.cmake.in
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEFunction.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEInformation.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeDetermineCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeTestCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/cpu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/npu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/tiling/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/custom_tiling/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/data_utils.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/input/.gitkeep
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/main.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/readme.md
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCECompiler.cmake.in
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEFunction.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEInformation.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeDetermineCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeTestCCECompiler.cmake
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/cpu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/npu/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/data_utils.h
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/readme.md
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/json.hpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/onnx_leaky_relu_custom_plugin.cc
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/fusion_off.cfg
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/leaky_relu_custom.json
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
 delete mode 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/CMakeLists.txt
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/leaky_relu_custom.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/.gitignore
 delete mode 120000 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/op_dev
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/AddCustomKernelNpu.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/npu_native_functions.yaml
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/readme.md
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/run.sh
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/test_ops_custom.py
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h
 delete mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp

diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/.gitignore b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/.gitignore
deleted file mode 100644
index 8a4e90845..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/.gitignore
+++ /dev/null
@@ -1,6 +0,0 @@
-/**/custom_op
-build
-build_out
-/**/run/out/*
-!/**/run/out/test_data
-fusion_result.json
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/common.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/common.h
deleted file mode 100644
index 854c5931c..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/common.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
-* @file common.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef COMMON_H
-#define COMMON_H
-
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-
-#include "acl/acl.h"
-
-#define SUCCESS 0
-#define FAILED 1
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize);
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
-
-#endif // COMMON_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/op_runner.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/op_runner.h
deleted file mode 100644
index 0dd73397d..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/op_runner.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
-* @file op_runner.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OP_RUNNER_H
-#define OP_RUNNER_H
-
-#include "acl/acl.h"
-#include "common.h"
-#include "operator_desc.h"
-
-/**
- * Op Runner
- */
-class OpRunner {
-public:
-    /**
-     * @brief Constructor
-     * @param [in] opDesc: op description
-     */
-    explicit OpRunner(OperatorDesc *opDesc);
-
-    /**
-     * @brief Destructor
-     */
-    virtual ~OpRunner();
-
-    /**
-    * @brief Init op runner
-    */
-    bool Init();
-
-    /**
-     * @brief Get number of inputs
-     * @return number of inputs
-     */
-    const size_t NumInputs();
-
-    /**
-     * @brief Get number of outputs
-     * @return number of outputs
-     */
-    const size_t NumOutputs();
-
-    /**
-     * @brief Get input size by index
-     * @param [in] index: input index
-     * @return size of the input
-     */
-    const size_t GetInputSize(size_t index) const;
-
-    /**
-     * @brief Get output size by index
-     * @param [in] index: output index
-     * @return size of the output
-     */
-    size_t GetOutputSize(size_t index) const;
-
-    /**
-     * @brief Get input element count by index
-     * @param i[in] ndex: input index
-     * @return element count of the input
-     */
-    size_t GetInputElementCount(size_t index) const;
-
-    /**
-     * @brief Get output element count by index
-     * @param [in] index: output index
-     * @return element count of the output
-     */
-    size_t GetOutputElementCount(size_t index) const;
-
-    /**
-     * @brief Get input shape by index
-     * @param [in] index: input index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetInputShape(size_t index) const;
-
-    /**
-     * @brief Get output shape by index
-     * @param [in] index: output index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetOutputShape(size_t index) const;
-
-    /**
-     * @brief Get input buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: input index
-     * @return host address of the input
-     */
-    template<typename T>
-    T *GetInputBuffer(size_t index)
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return nullptr;
-        }
-        return reinterpret_cast<T *>(hostInputs_[index]);
-    }
-
-    /**
-     * @brief Get output buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: output index
-     * @return host address of the output
-     */
-    template<typename T>
-    const T *GetOutputBuffer(size_t index)
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return nullptr;
-        }
-
-        return reinterpret_cast<T *>(hostOutputs_[index]);
-    }
-
-     /**
-      * @brief Print readable input by index
-      * @param [in] index: input index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintInput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-      * @brief Print readable output by index
-      * @param [in] index: output index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintOutput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-     * @brief Compile static op
-     * @return compile result
-     */
-    bool CompileStaticOp();
-
-    /**
-     * @brief Compile dynamic op
-     * @return compile result
-     */
-    bool CompileDynamicOp();
-
-    /**
-     * @brief Run op
-     * @return run result
-     */
-    bool RunOp();
-
-private:
-    size_t numInputs_;
-    size_t numOutputs_;
-
-    std::vector<aclDataBuffer *> inputBuffers_;
-    std::vector<aclDataBuffer *> outputBuffers_;
-
-    std::vector<void *> devInputs_;
-    std::vector<void *> devOutputs_;
-
-    std::vector<void *> hostInputs_;
-    std::vector<void *> hostOutputs_;
-    OperatorDesc *opDesc_;
-};
-
-#endif // OP_RUNNER_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/operator_desc.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/operator_desc.h
deleted file mode 100644
index 8a315e1f8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/inc/operator_desc.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
-* @file operator_desc.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OPERATOR_DESC_H
-#define OPERATOR_DESC_H
-
-#include <string>
-#include <vector>
-
-#include "acl/acl.h"
-
-/**
- * Op description
- */
-struct OperatorDesc {
-    /**
-     * Constructor
-     * @param [in] opType: op type
-     */
-    explicit OperatorDesc(std::string opType);
-
-    /**
-     * Destructor
-     */
-    virtual ~OperatorDesc();
-
-    /**
-     * Add an input tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    /**
-     * Add an output tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    std::string opType;
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    aclopAttr *opAttr;
-};
-
-#endif // OPERATOR_DESC_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run.sh
deleted file mode 100644
index 1b49051a3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-export ASCEND_GLOBAL_LOG_LEVEL=0
-
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-)
-cd $CURRENT_DIR
-
-# 导出环境变量
-IS_DYNAMIC=$1
-if [ ! $ASCEND_HOME_DIR ]; then
-    ASCEND_HOME_DIR=/usr/local/Ascend/latest
-    source $ASCEND_HOME_DIR/bin/setenv.bash
-fi
-
-export DDK_PATH=$ASCEND_HOME_DIR
-arch=$(uname -m)
-export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
-
-function main {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    # 1. 生成输入数据和真值数据
-    cd $CURRENT_DIR/run/out/test_data/data
-    python3 generate_data.py
-    if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
-        return 1
-    fi
-    echo "INFO: generate input data success!"
-
-    # 2. 编译acl可执行文件
-    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
-    cmake ../src
-    if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
-        return 1
-    fi
-    echo "INFO: cmake success!"
-    make
-    if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
-        return 1
-    fi
-    echo "INFO: make success!"
-
-    # 3. 运行可执行文件
-    cd $CURRENT_DIR/run/out
-    if [ $IS_DYNAMIC == 1 ]; then
-        echo "INFO: execute dynamic op!"
-        ./execute_add_op $IS_DYNAMIC 2048
-    else
-        echo "INFO: execute static op!"
-        ./execute_add_op
-    fi
-    if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
-        return 1
-    fi
-    echo "INFO: acl executable run success!"
-
-    # 4. 比较真值文件
-    cd $CURRENT_DIR
-    python3 $CURRENT_DIR/scripts/verify_result.py       \
-        $CURRENT_DIR/run/out/test_data/data/input_0.bin \
-        $CURRENT_DIR/run/out/test_data/data/input_1.bin \
-        $CURRENT_DIR/run/out/result_files/output_0.bin
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compare golden data failed! the result is wrong!"
-        return 1
-    fi
-    echo "INFO: compare golden data success!"
-}
-
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/acl.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/acl.json
deleted file mode 100644
index 9e26dfeeb..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/acl.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_dynamic_shape.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_dynamic_shape.json
deleted file mode 100644
index abba89d74..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_dynamic_shape.json
+++ /dev/null
@@ -1,33 +0,0 @@
-[
-    {
-        "op": "AddCustom",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": "ND",
-                "shape": [-1, -1],
-                "shape_range": [[1,-1],[1,-1]],
-                "type": "float16"
-            },
-            {
-                "name": "y",
-                "param_type": "required",
-                "format":"ND",
-                "shape": [-1, -1],
-                "shape_range": [[1,-1],[1,-1]],
-                "type": "float16"
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "z",
-                "param_type": "required",
-                "format":  "ND",
-                "shape": [-1, -1],
-                "shape_range": [[1,-1],[1,-1]],
-                "type": "float16"
-            }
-        ]
-    }
-]
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_static_shape.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_static_shape.json
deleted file mode 100644
index 0deaae0f7..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/config/add_custom_static_shape.json
+++ /dev/null
@@ -1,30 +0,0 @@
-[
-    {
-        "op": "AddCustom",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": "ND",
-                "shape": [8, 2048],
-                "type": "float16"
-            },
-            {
-                "name": "y",
-                "param_type": "required",
-                "format":"ND",
-                "shape": [8, 2048],
-                "type": "float16"
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "z",
-                "param_type": "required",
-                "format":  "ND",
-                "shape": [8, 2048],
-                "type": "float16"
-            }
-        ]
-    }
-]
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/data/generate_data.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/data/generate_data.py
deleted file mode 100644
index 2dbbc0f66..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/run/out/test_data/data/generate_data.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""
-* @file generate_data.py
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-"""
-import numpy as np
-
-a = np.random.randint(100, size=(8, 2048,)).astype(np.float16)
-b = np.random.randint(100, size=(8, 2048,)).astype(np.float16)
-
-a.tofile('input_0.bin')
-b.tofile('input_1.bin')
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/scripts/verify_result.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/scripts/verify_result.py
deleted file mode 100644
index 709911259..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/scripts/verify_result.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright (R) @huawei.com, all rights reserved
--*- coding:utf-8 -*-
-CREATED:  2020-12-17 10:12:13
-"""
-import sys
-import numpy as np
-
-
-def data_compare(file1, file2, file3):
-    """
-    Verify that the data are the same
-    """
-    input1 = np.fromfile(file1, dtype=np.float16)
-    print(input1)
-    input2 = np.fromfile(file2, dtype=np.float16)
-    print(input2)
-    golden = input1 + input2
-    output = np.fromfile(file3, dtype=np.float16)
-    print(output)
-    print("-------------golden is :")
-    print(golden)
-
-    different_element_results = np.isclose(
-        output, golden,
-        rtol=1e-3,
-        atol=1e-8,
-        equal_nan=True)
-    different_element_indexes = np.where(
-        different_element_results != np.array((True,)))[0]
-    return 0 if different_element_indexes.size == 0 else 1
-
-
-if __name__ == '__main__':
-    intput_file1 = sys.argv[1]
-    intput_file2 = sys.argv[2]
-    output_file = sys.argv[3]
-    cmp_result = data_compare(intput_file1, intput_file2, output_file)
-
-    if cmp_result == 0:
-        sys.exit(0)
-    else:
-        sys.exit(1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/CMakeLists.txt
deleted file mode 100644
index 0299ffa4e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
-
-# CMake lowest version requirement
-cmake_minimum_required(VERSION 3.5.1)
-
-# project information
-project(acl_execute_add)
-
-# Compile options
-add_compile_options(-std=c++11)
-
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../run/out")
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../../outputs")
-
-set(INC_PATH $ENV{DDK_PATH})
-
-if (NOT DEFINED ENV{DDK_PATH})
-    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
-    message(STATUS "set default INC_PATH: ${INC_PATH}")
-else ()
-    message(STATUS "env INC_PATH: ${INC_PATH}")
-endif()
-
-set(LIB_PATH $ENV{NPU_HOST_LIB})
-
-# Dynamic libraries in the stub directory can only be used for compilation
-if (NOT DEFINED ENV{NPU_HOST_LIB})
-    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
-    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
-    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
-else ()
-    message(STATUS "env LIB_PATH: ${LIB_PATH}")
-endif()
-
-# Header path
-include_directories(
-    ${INC_PATH}/runtime/include
-    ${INC_PATH}/atc/include
-    ../inc
-)
-
-# add host lib path
-link_directories(
-    ${LIB_PATH}
-    ${LIB_PATH1}
-)
-
-add_executable(execute_add_op
-        operator_desc.cpp
-        op_runner.cpp
-        main.cpp
-        common.cpp)
-
-target_link_libraries(execute_add_op
-        ascendcl
-        acl_op_compiler
-        stdc++)
-
-install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/common.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/common.cpp
deleted file mode 100644
index c6d3d0cd8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/common.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
-* @file common.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-
-#include <fstream>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-
-extern bool g_isDevice;
-
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file %s", filePath.c_str());
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/main.cpp
deleted file mode 100644
index 844ff105b..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/main.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/**
-* @file main.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include <cstdint>
-#include <iostream>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "acl/acl.h"
-#include "op_runner.h"
-
-#include "common.h"
-
-bool g_isDevice = false;
-int deviceId = 0;
-int isDynamic = 0;
-int length = 0;
-
-OperatorDesc CreateOpDesc()
-{
-    // define operator
-    std::vector<int64_t> shape { 8, 2048 };
-    std::string opType = "AddCustom";
-    if (isDynamic) {
-        shape = {8, length};
-    }
-    aclDataType dataType = ACL_FLOAT16;
-    aclFormat format = ACL_FORMAT_ND;
-    OperatorDesc opDesc(opType);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format);
-    return opDesc;
-}
-
-bool SetInputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumInputs(); ++i) {
-        size_t fileSize = 0;
-        std::string filePath = "test_data/data/input_" + std::to_string(i) + ".bin";
-        bool result = ReadFile(filePath, fileSize,
-            runner.GetInputBuffer<void>(i), runner.GetInputSize(i));
-        if (!result) {
-            ERROR_LOG("Read input[%zu] failed", i);
-            return false;
-        }
-
-        INFO_LOG("Set input[%zu] from %s success.", i, filePath.c_str());
-    }
-
-    return true;
-}
-
-bool ProcessOutputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumOutputs(); ++i) {
-        std::string filePath = "result_files/output_" + std::to_string(i) + ".bin";
-        if (!WriteFile(filePath, runner.GetOutputBuffer<void>(i), runner.GetOutputSize(i))) {
-            ERROR_LOG("Write output[%zu] failed.", i);
-            return false;
-        }
-
-        INFO_LOG("Write output[%zu] success. output file = %s", i, filePath.c_str());
-    }
-    return true;
-}
-
-bool RunOp()
-{
-    // Create op desc
-    OperatorDesc opDesc = CreateOpDesc();
-
-    // Create Runner
-    OpRunner opRunner(&opDesc);
-    if (!opRunner.Init()) {
-        ERROR_LOG("Init OpRunner failed");
-        return false;
-    }
-
-    // Load inputs
-    if (!SetInputData(opRunner)) {
-        ERROR_LOG("Set input data failed");
-        return false;
-    }
-
-    // Run op
-    if (!opRunner.RunOp()) {
-        ERROR_LOG("Run op failed");
-        return false;
-    }
-
-    // Process output data
-    if (!ProcessOutputData(opRunner)) {
-        ERROR_LOG("Process output data failed");
-        return false;
-    }
-
-    INFO_LOG("Run op success");
-    return true;
-}
-
-void DestoryResource()
-{
-    bool flag = false;
-    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Reset device %d failed", deviceId);
-        flag = true;
-    }
-    INFO_LOG("Reset Device success");
-    if (aclFinalize() != ACL_SUCCESS) {
-        ERROR_LOG("Finalize acl failed");
-        flag = true;
-    }
-    if (flag) {
-        ERROR_LOG("Destory resource failed");
-    } else {
-        INFO_LOG("Destory resource success");
-    }
-}
-
-bool InitResource()
-{
-    std::string output = "./result_files";
-    if (access(output.c_str(), 0) == -1) {
-        int ret = mkdir(output.c_str(), 0700);
-        if (ret == 0) {
-            INFO_LOG("Make output directory successfully");
-        }
-        else {
-            ERROR_LOG("Make output directory fail");
-            return false;
-        }
-    }
-
-    // acl.json is dump or profiling config file
-    if (aclInit("test_data/config/acl.json") != ACL_SUCCESS) {
-        ERROR_LOG("acl init failed");
-        return false;
-    }
-
-    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
-        (void)aclFinalize();
-        return false;
-    }
-    INFO_LOG("Set device[%d] success", deviceId);
-
-    // runMode is ACL_HOST which represents app is running in host
-    // runMode is ACL_DEVICE which represents app is running in device
-    aclrtRunMode runMode;
-    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
-        ERROR_LOG("Get run mode failed");
-        DestoryResource();
-        return false;
-    }
-    g_isDevice = (runMode == ACL_DEVICE);
-    INFO_LOG("Get RunMode[%d] success", runMode);
-
-    // set model path
-    if (aclopSetModelDir("op_models") != ACL_SUCCESS) {
-        std::cerr << "Load single op model failed" << std::endl;
-        (void) aclFinalize();
-        return FAILED;
-    }
-    INFO_LOG("aclopSetModelDir op model success", deviceId);
-
-    return true;
-}
-
-int main(int argc, char **argv)
-{
-    if (argc == 3) {
-        INFO_LOG("dynamic op will be called");
-        isDynamic = atoi(argv[1]);
-        length = atoi(argv[2]);
-    } else if (argc == 1) {
-        INFO_LOG("static op will be called");
-    } else {
-        ERROR_LOG("wrong input parameter number");
-        return -1;
-    }
-
-    if (!InitResource()) {
-        ERROR_LOG("Init resource failed");
-        return FAILED;
-    }
-    INFO_LOG("Init resource success");
-
-    if (!RunOp()) {
-        DestoryResource();
-        return FAILED;
-    }
-
-    DestoryResource();
-
-    return SUCCESS;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/op_runner.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/op_runner.cpp
deleted file mode 100644
index bcd325cce..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/op_runner.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-/**
-* @file op_runner.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "op_runner.h"
-
-#include <limits>
-#include <cassert>
-#include "acl/acl_op_compiler.h"
-#include "common.h"
-
-using namespace std;
-
-extern bool g_isDevice;
-
-OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
-{
-    numInputs_ = opDesc->inputDesc.size();
-    numOutputs_ = opDesc->outputDesc.size();
-}
-
-OpRunner::~OpRunner()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        (void)aclDestroyDataBuffer(inputBuffers_[i]);
-        (void)aclrtFree(devInputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostInputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostInputs_[i]);
-        }
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        (void)aclDestroyDataBuffer(outputBuffers_[i]);
-        (void)aclrtFree(devOutputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostOutputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostOutputs_[i]);
-        }
-    }
-}
-
-bool OpRunner::Init()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-            return false;
-        }
-        devInputs_.emplace_back(devMem);
-        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostMem = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostMem, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostMem == nullptr) {
-            ERROR_LOG("Malloc memory for input[%zu] failed", i);
-            return false;
-        }
-        hostInputs_.emplace_back(hostMem);
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-            return false;
-        }
-        devOutputs_.emplace_back(devMem);
-        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostOutput = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostOutput == nullptr) {
-            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
-            return false;
-        }
-        hostOutputs_.emplace_back(hostOutput);
-    }
-
-    return true;
-}
-
-const size_t OpRunner::NumInputs()
-{
-    return numInputs_;
-}
-
-const size_t OpRunner::NumOutputs()
-{
-    return numOutputs_;
-}
-
-const size_t OpRunner::GetInputSize(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
-}
-
-std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-
-    return ret;
-}
-
-std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-    return ret;
-}
-
-size_t OpRunner::GetInputElementCount(size_t index) const
-{
-    if (index >= opDesc_->inputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
-}
-
-size_t OpRunner::GetOutputElementCount(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
-}
-
-size_t OpRunner::GetOutputSize(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
-}
-
-bool OpRunner::RunOp()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
-            ERROR_LOG("Copy input[%zu] failed", i);
-            return false;
-        }
-        INFO_LOG("Copy input[%zu] success", i);
-    }
-
-    aclrtStream stream = nullptr;
-    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
-        ERROR_LOG("Create stream failed");
-        return false;
-    }
-    INFO_LOG("Create stream success");
-
-    auto ret = aclopExecuteV2(opDesc_->opType.c_str(),
-                              numInputs_,
-                              opDesc_->inputDesc.data(),
-                              inputBuffers_.data(),
-                              numOutputs_,
-                              opDesc_->outputDesc.data(),
-                              outputBuffers_.data(),
-                              opDesc_->opAttr,
-                              stream);
-    if (ret == ACL_ERROR_OP_TYPE_NOT_MATCH || ret == ACL_ERROR_OP_INPUT_NOT_MATCH ||
-        ret == ACL_ERROR_OP_OUTPUT_NOT_MATCH || ret == ACL_ERROR_OP_ATTR_NOT_MATCH) {
-        ERROR_LOG("[%s] op with the given description is not compiled. Please run atc first, errorCode is %d",
-            opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        (void)aclrtDestroyStream(stream);
-        return false;
-    } else if (ret != ACL_SUCCESS) {
-        (void)aclrtDestroyStream(stream);
-        ERROR_LOG("Execute %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("Execute %s success", opDesc_->opType.c_str());
-
-    if (aclrtSynchronizeStream(stream) != ACL_SUCCESS) {
-        ERROR_LOG("Synchronize stream failed");
-        (void)aclrtDestroyStream(stream);
-        return false;
-    }
-    INFO_LOG("Synchronize stream success");
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
-            INFO_LOG("Copy output[%zu] success", i);
-            (void)aclrtDestroyStream(stream);
-            return false;
-        }
-        INFO_LOG("Copy output[%zu] success", i);
-    }
-
-    (void)aclrtDestroyStream(stream);
-    return true;
-}
-
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case ACL_BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT8:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT8:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT16:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT16:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT32:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT32:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT64:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT64:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT16:
-            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case ACL_DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-}
-
-void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
-        return;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
-
-void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/operator_desc.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/operator_desc.cpp
deleted file mode 100644
index 484da75bd..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/op_verify/src/operator_desc.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
-* @file operator_desc.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-#include "operator_desc.h"
-
-using namespace std;
-
-OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType))
-{
-    opAttr = aclopCreateAttr();
-}
-
-OperatorDesc::~OperatorDesc()
-{
-    for (auto *desc : inputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    for (auto *desc : outputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    aclopDestroyAttr(opAttr);
-}
-
-OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType,
-                                               int numDims,
-                                               const int64_t *dims,
-                                               aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-
-    inputDesc.emplace_back(desc);
-    return *this;
-}
-
-OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType,
-                                                int numDims,
-                                                const int64_t *dims,
-                                                aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-
-    outputDesc.emplace_back(desc);
-    return *this;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/run.sh
deleted file mode 100755
index bb5e25093..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_offline_model/run.sh
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/bin/bash
-clear;clear
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-# 导出环境变量
-DTYPE="float16"
-
-SHORT=m:,t:,v:,
-LONG=is-dynamic:,replay-mode:,dtype:,
-OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
-eval set -- "$OPTS"
-while :
-do
-    case "$1" in
-        # IS_DYNAMIC 0: static op
-        # IS_DYNAMIC 1: dynamic op
-        (-m | --is-dynamic)
-            IS_DYNAMIC="$2"
-            shift 2;;
-        # batch, iterator
-        (-t | --replay-mode)
-            REPLAY_MODE ="$2"
-            shift 2;;
-        # float16, float, int32
-        (-v | --dtype)
-            DTYPE="$2"
-            shift 2;;
-        (--)
-            shift;
-            break;;
-        (*)
-            echo "[ERROR] Unexpected option: $1";
-            break;;
-    esac
-done
-
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-PYTHON_VERSION=`python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}'`
-export HI_PYTHON=python${PYTHON_VERSION}
-export PYTHONPATH=$ASCEND_HOME_DIR/python/site-packages:$PYTHONPATH
-export PATH=$ASCEND_HOME_DIR/python/site-packages/bin:$PATH
-
-# 检查当前昇腾芯片的类型
-function check_soc_version() {
-    SOC_VERSION_CONCAT=`python3 -c '''
-import ctypes, os
-def get_soc_version():
-    max_len = 256
-    rtsdll = ctypes.CDLL(f"libruntime.so")
-    c_char_t = ctypes.create_string_buffer(b"\xff" * max_len, max_len)
-    rtsdll.rtGetSocVersion.restype = ctypes.c_uint64
-    rt_error = rtsdll.rtGetSocVersion(c_char_t, ctypes.c_uint32(max_len))
-    if rt_error:
-        print("rt_error:", rt_error)
-        return ""
-    soc_full_name = c_char_t.value.decode("utf-8")
-    find_str = "Short_SoC_version="
-    ascend_home_dir = os.environ.get("ASCEND_HOME_DIR")
-    with open(f"{ascend_home_dir}/compiler/data/platform_config/{soc_full_name}.ini", "r") as f:
-        for line in f:
-            if find_str in line:
-                start_index = line.find(find_str)
-                result = line[start_index + len(find_str):].strip()
-                return "{},{}".format(soc_full_name, result.lower())
-    return ""
-print(get_soc_version())
-    '''`
-    if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
-        return 1
-    fi
-    SOC_FULL_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 1`
-    SOC_SHORT_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 2`
-}
-
-function main() {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    if [[ ${REPLAY_MODE}"x" = "x" || ${REPLAY_MODE} = "batch" || ${REPLAY_MODE} = "iterator" ]]; then
-        echo "INFO: REPLAY_MODE valid : ${REPLAY_MODE}"
-    else
-        echo "ERROR: REPLAY_MODE is invalid!"
-        return 1
-    fi
-
-    # 清除遗留生成文件和日志文件
-    rm -rf $HOME/ascend/log/*
-    rm -rf $ASCEND_OPP_PATH/vendors/*
-    rm -rf custom_op
-    rm -rf op_verify/run/out/op_models/*.om
-
-    # 生成自定义算子工程样例
-    JSON_NAME=add_custom
-    CAMEL_JSON_NAME=`echo $JSON_NAME | sed -r 's/(^|-|_)(\w)/\U\2/g'`
-    msopgen gen -i ../op_dev/${JSON_NAME}.json -f tf -c ai_core-${SOC_SHORT_VERSION} -lan cpp -out ./custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: msopgen custom op sample failed!"
-        return 1
-    fi
-    echo "INFO: msopgen custom op sample success!"
-
-    cp -rf ../op_dev/* custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: copy custom op files failed!"
-        return 1
-    fi
-    if [[ $IS_DYNAMIC != 1 ]]; then
-        if [[ $REPLAY_MODE = "batch" ]]; then
-            sed -i "s/set(BATCH_MODE_REPLAY_LIST/set(BATCH_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(BATCH_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        elif [[ $REPLAY_MODE = "iterator" ]]; then
-            sed -i "s/set(ITERATOR_MODE_REPLAY_LIST/set(ITERATOR_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(ITERATOR_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        fi
-    fi
-    sed -i "s#/usr/local/Ascend/latest#$ASCEND_HOME_DIR#g" `grep "/usr/local/Ascend/latest" -rl custom_op/CMakePresets.json`
-
-    # 测试不同输入数据类型, 修改对应代码
-    if [[ ${DTYPE} == "float16" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float16)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT16;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float16)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "float" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "int32" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.int32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_INT32;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.int32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    else
-        echo "ERROR: DTYPE is invalid!"
-        return 1
-    fi
-    # 构建自定义算子包并安装
-    bash custom_op/run.sh
-    if [ $? -ne 0 ]; then
-        echo "ERROR: build and install custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: build and install custom op run package success!"
-
-    # 编译离线om模型
-    cd op_verify
-    if [ $IS_DYNAMIC == 1 ]; then
-        atc --singleop=run/out/test_data/config/${JSON_NAME}_dynamic_shape.json  --output=run/out/op_models/ --soc_version=${SOC_FULL_VERSION}
-    else
-        atc --singleop=run/out/test_data/config/${JSON_NAME}_static_shape.json  --output=run/out/op_models/ --soc_version=${SOC_FULL_VERSION}
-    fi
-    # 编译acl可执行文件并运行
-    bash run.sh $IS_DYNAMIC
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute acl single op sample failed!"
-        return 1
-    fi
-    echo "INFO: execute acl single op sample success!"
-}
-
-check_soc_version
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/common.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/common.h
deleted file mode 100644
index 854c5931c..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/common.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
-* @file common.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef COMMON_H
-#define COMMON_H
-
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-
-#include "acl/acl.h"
-
-#define SUCCESS 0
-#define FAILED 1
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize);
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
-
-#endif // COMMON_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/op_runner.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/op_runner.h
deleted file mode 100644
index 0dd73397d..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/op_runner.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
-* @file op_runner.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OP_RUNNER_H
-#define OP_RUNNER_H
-
-#include "acl/acl.h"
-#include "common.h"
-#include "operator_desc.h"
-
-/**
- * Op Runner
- */
-class OpRunner {
-public:
-    /**
-     * @brief Constructor
-     * @param [in] opDesc: op description
-     */
-    explicit OpRunner(OperatorDesc *opDesc);
-
-    /**
-     * @brief Destructor
-     */
-    virtual ~OpRunner();
-
-    /**
-    * @brief Init op runner
-    */
-    bool Init();
-
-    /**
-     * @brief Get number of inputs
-     * @return number of inputs
-     */
-    const size_t NumInputs();
-
-    /**
-     * @brief Get number of outputs
-     * @return number of outputs
-     */
-    const size_t NumOutputs();
-
-    /**
-     * @brief Get input size by index
-     * @param [in] index: input index
-     * @return size of the input
-     */
-    const size_t GetInputSize(size_t index) const;
-
-    /**
-     * @brief Get output size by index
-     * @param [in] index: output index
-     * @return size of the output
-     */
-    size_t GetOutputSize(size_t index) const;
-
-    /**
-     * @brief Get input element count by index
-     * @param i[in] ndex: input index
-     * @return element count of the input
-     */
-    size_t GetInputElementCount(size_t index) const;
-
-    /**
-     * @brief Get output element count by index
-     * @param [in] index: output index
-     * @return element count of the output
-     */
-    size_t GetOutputElementCount(size_t index) const;
-
-    /**
-     * @brief Get input shape by index
-     * @param [in] index: input index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetInputShape(size_t index) const;
-
-    /**
-     * @brief Get output shape by index
-     * @param [in] index: output index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetOutputShape(size_t index) const;
-
-    /**
-     * @brief Get input buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: input index
-     * @return host address of the input
-     */
-    template<typename T>
-    T *GetInputBuffer(size_t index)
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return nullptr;
-        }
-        return reinterpret_cast<T *>(hostInputs_[index]);
-    }
-
-    /**
-     * @brief Get output buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: output index
-     * @return host address of the output
-     */
-    template<typename T>
-    const T *GetOutputBuffer(size_t index)
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return nullptr;
-        }
-
-        return reinterpret_cast<T *>(hostOutputs_[index]);
-    }
-
-     /**
-      * @brief Print readable input by index
-      * @param [in] index: input index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintInput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-      * @brief Print readable output by index
-      * @param [in] index: output index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintOutput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-     * @brief Compile static op
-     * @return compile result
-     */
-    bool CompileStaticOp();
-
-    /**
-     * @brief Compile dynamic op
-     * @return compile result
-     */
-    bool CompileDynamicOp();
-
-    /**
-     * @brief Run op
-     * @return run result
-     */
-    bool RunOp();
-
-private:
-    size_t numInputs_;
-    size_t numOutputs_;
-
-    std::vector<aclDataBuffer *> inputBuffers_;
-    std::vector<aclDataBuffer *> outputBuffers_;
-
-    std::vector<void *> devInputs_;
-    std::vector<void *> devOutputs_;
-
-    std::vector<void *> hostInputs_;
-    std::vector<void *> hostOutputs_;
-    OperatorDesc *opDesc_;
-};
-
-#endif // OP_RUNNER_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/operator_desc.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/operator_desc.h
deleted file mode 100644
index 8a315e1f8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/inc/operator_desc.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
-* @file operator_desc.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OPERATOR_DESC_H
-#define OPERATOR_DESC_H
-
-#include <string>
-#include <vector>
-
-#include "acl/acl.h"
-
-/**
- * Op description
- */
-struct OperatorDesc {
-    /**
-     * Constructor
-     * @param [in] opType: op type
-     */
-    explicit OperatorDesc(std::string opType);
-
-    /**
-     * Destructor
-     */
-    virtual ~OperatorDesc();
-
-    /**
-     * Add an input tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    /**
-     * Add an output tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    std::string opType;
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    aclopAttr *opAttr;
-};
-
-#endif // OPERATOR_DESC_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run.sh
deleted file mode 100644
index 1b49051a3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-export ASCEND_GLOBAL_LOG_LEVEL=0
-
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-)
-cd $CURRENT_DIR
-
-# 导出环境变量
-IS_DYNAMIC=$1
-if [ ! $ASCEND_HOME_DIR ]; then
-    ASCEND_HOME_DIR=/usr/local/Ascend/latest
-    source $ASCEND_HOME_DIR/bin/setenv.bash
-fi
-
-export DDK_PATH=$ASCEND_HOME_DIR
-arch=$(uname -m)
-export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
-
-function main {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    # 1. 生成输入数据和真值数据
-    cd $CURRENT_DIR/run/out/test_data/data
-    python3 generate_data.py
-    if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
-        return 1
-    fi
-    echo "INFO: generate input data success!"
-
-    # 2. 编译acl可执行文件
-    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
-    cmake ../src
-    if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
-        return 1
-    fi
-    echo "INFO: cmake success!"
-    make
-    if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
-        return 1
-    fi
-    echo "INFO: make success!"
-
-    # 3. 运行可执行文件
-    cd $CURRENT_DIR/run/out
-    if [ $IS_DYNAMIC == 1 ]; then
-        echo "INFO: execute dynamic op!"
-        ./execute_add_op $IS_DYNAMIC 2048
-    else
-        echo "INFO: execute static op!"
-        ./execute_add_op
-    fi
-    if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
-        return 1
-    fi
-    echo "INFO: acl executable run success!"
-
-    # 4. 比较真值文件
-    cd $CURRENT_DIR
-    python3 $CURRENT_DIR/scripts/verify_result.py       \
-        $CURRENT_DIR/run/out/test_data/data/input_0.bin \
-        $CURRENT_DIR/run/out/test_data/data/input_1.bin \
-        $CURRENT_DIR/run/out/result_files/output_0.bin
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compare golden data failed! the result is wrong!"
-        return 1
-    fi
-    echo "INFO: compare golden data success!"
-}
-
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/config/acl.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/config/acl.json
deleted file mode 100644
index 9e26dfeeb..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/config/acl.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/data/generate_data.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/data/generate_data.py
deleted file mode 100644
index 2dbbc0f66..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/run/out/test_data/data/generate_data.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""
-* @file generate_data.py
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-"""
-import numpy as np
-
-a = np.random.randint(100, size=(8, 2048,)).astype(np.float16)
-b = np.random.randint(100, size=(8, 2048,)).astype(np.float16)
-
-a.tofile('input_0.bin')
-b.tofile('input_1.bin')
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/scripts/verify_result.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/scripts/verify_result.py
deleted file mode 100644
index 709911259..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/scripts/verify_result.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright (R) @huawei.com, all rights reserved
--*- coding:utf-8 -*-
-CREATED:  2020-12-17 10:12:13
-"""
-import sys
-import numpy as np
-
-
-def data_compare(file1, file2, file3):
-    """
-    Verify that the data are the same
-    """
-    input1 = np.fromfile(file1, dtype=np.float16)
-    print(input1)
-    input2 = np.fromfile(file2, dtype=np.float16)
-    print(input2)
-    golden = input1 + input2
-    output = np.fromfile(file3, dtype=np.float16)
-    print(output)
-    print("-------------golden is :")
-    print(golden)
-
-    different_element_results = np.isclose(
-        output, golden,
-        rtol=1e-3,
-        atol=1e-8,
-        equal_nan=True)
-    different_element_indexes = np.where(
-        different_element_results != np.array((True,)))[0]
-    return 0 if different_element_indexes.size == 0 else 1
-
-
-if __name__ == '__main__':
-    intput_file1 = sys.argv[1]
-    intput_file2 = sys.argv[2]
-    output_file = sys.argv[3]
-    cmp_result = data_compare(intput_file1, intput_file2, output_file)
-
-    if cmp_result == 0:
-        sys.exit(0)
-    else:
-        sys.exit(1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/CMakeLists.txt
deleted file mode 100644
index 0299ffa4e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
-
-# CMake lowest version requirement
-cmake_minimum_required(VERSION 3.5.1)
-
-# project information
-project(acl_execute_add)
-
-# Compile options
-add_compile_options(-std=c++11)
-
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../run/out")
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../../outputs")
-
-set(INC_PATH $ENV{DDK_PATH})
-
-if (NOT DEFINED ENV{DDK_PATH})
-    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
-    message(STATUS "set default INC_PATH: ${INC_PATH}")
-else ()
-    message(STATUS "env INC_PATH: ${INC_PATH}")
-endif()
-
-set(LIB_PATH $ENV{NPU_HOST_LIB})
-
-# Dynamic libraries in the stub directory can only be used for compilation
-if (NOT DEFINED ENV{NPU_HOST_LIB})
-    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
-    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
-    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
-else ()
-    message(STATUS "env LIB_PATH: ${LIB_PATH}")
-endif()
-
-# Header path
-include_directories(
-    ${INC_PATH}/runtime/include
-    ${INC_PATH}/atc/include
-    ../inc
-)
-
-# add host lib path
-link_directories(
-    ${LIB_PATH}
-    ${LIB_PATH1}
-)
-
-add_executable(execute_add_op
-        operator_desc.cpp
-        op_runner.cpp
-        main.cpp
-        common.cpp)
-
-target_link_libraries(execute_add_op
-        ascendcl
-        acl_op_compiler
-        stdc++)
-
-install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/common.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/common.cpp
deleted file mode 100644
index c6d3d0cd8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/common.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
-* @file common.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-
-#include <fstream>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-
-extern bool g_isDevice;
-
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file %s", filePath.c_str());
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/main.cpp
deleted file mode 100644
index 95ad599cc..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/main.cpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
-* @file main.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include <cstdint>
-#include <iostream>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "acl/acl.h"
-#include "op_runner.h"
-
-#include "common.h"
-
-bool g_isDevice = false;
-int deviceId = 0;
-int isDynamic = 0;
-int length = 0;
-
-OperatorDesc CreateOpDesc()
-{
-    // define operator
-    std::vector<int64_t> shape { 8, 2048 };
-    std::string opType = "AddCustom";
-    if (isDynamic) {
-        shape = {8, length};
-    }
-    aclDataType dataType = ACL_FLOAT16;
-    aclFormat format = ACL_FORMAT_ND;
-    OperatorDesc opDesc(opType);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format);
-    return opDesc;
-}
-
-bool SetInputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumInputs(); ++i) {
-        size_t fileSize = 0;
-        std::string filePath = "test_data/data/input_" + std::to_string(i) + ".bin";
-        bool result = ReadFile(filePath, fileSize,
-            runner.GetInputBuffer<void>(i), runner.GetInputSize(i));
-        if (!result) {
-            ERROR_LOG("Read input[%zu] failed", i);
-            return false;
-        }
-
-        INFO_LOG("Set input[%zu] from %s success.", i, filePath.c_str());
-    }
-
-    return true;
-}
-
-bool ProcessOutputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumOutputs(); ++i) {
-        std::string filePath = "result_files/output_" + std::to_string(i) + ".bin";
-        if (!WriteFile(filePath, runner.GetOutputBuffer<void>(i), runner.GetOutputSize(i))) {
-            ERROR_LOG("Write output[%zu] failed.", i);
-            return false;
-        }
-
-        INFO_LOG("Write output[%zu] success. output file = %s", i, filePath.c_str());
-    }
-    return true;
-}
-
-bool CompileAndRunOp()
-{
-    // create op desc
-    OperatorDesc opDesc = CreateOpDesc();
-
-    // create Runner
-    OpRunner opRunner(&opDesc);
-    if (!opRunner.Init()) {
-        ERROR_LOG("Init OpRunner failed");
-        return false;
-    }
-
-    // Load inputs
-    if (!SetInputData(opRunner)) {
-        ERROR_LOG("Set input data failed");
-        return false;
-    }
-
-    if (isDynamic) {
-        if (!opRunner.CompileDynamicOp()) {
-            ERROR_LOG("compile dynamic op failed");
-            return false;
-        }
-    } else {
-        if (!opRunner.CompileStaticOp()) {
-            ERROR_LOG("compile static op failed");
-            return false;
-        }
-    }
-
-    // Run op
-    if (!opRunner.RunOp()) {
-        ERROR_LOG("Run op failed");
-        return false;
-    }
-
-    // process output data
-    if (!ProcessOutputData(opRunner)) {
-        ERROR_LOG("Process output data failed");
-        return false;
-    }
-
-    INFO_LOG("Run op success");
-    return true;
-}
-
-void DestoryResource()
-{
-    bool flag = false;
-    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Reset device %d failed", deviceId);
-        flag = true;
-    }
-    INFO_LOG("Reset Device success");
-    if (aclFinalize() != ACL_SUCCESS) {
-        ERROR_LOG("Finalize acl failed");
-        flag = true;
-    }
-    if (flag) {
-        ERROR_LOG("Destory resource failed");
-    } else {
-        INFO_LOG("Destory resource success");
-    }
-}
-
-bool InitResource()
-{
-    std::string output = "./result_files";
-    if (access(output.c_str(), 0) == -1) {
-        int ret = mkdir(output.c_str(), 0700);
-        if (ret == 0) {
-            INFO_LOG("Make output directory successfully");
-        }
-        else {
-            ERROR_LOG("Make output directory fail");
-            return false;
-        }
-    }
-
-    // acl.json is dump or profiling config file
-    if (aclInit("test_data/config/acl.json") != ACL_SUCCESS) {
-        ERROR_LOG("acl init failed");
-        return false;
-    }
-
-    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
-        (void)aclFinalize();
-        return false;
-    }
-    INFO_LOG("Set device[%d] success", deviceId);
-
-    // runMode is ACL_HOST which represents app is running in host
-    // runMode is ACL_DEVICE which represents app is running in device
-    aclrtRunMode runMode;
-    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
-        ERROR_LOG("Get run mode failed");
-        DestoryResource();
-        return false;
-    }
-    g_isDevice = (runMode == ACL_DEVICE);
-    INFO_LOG("Get RunMode[%d] success", runMode);
-
-    return true;
-}
-
-int main(int argc, char **argv)
-{
-    if (argc == 3) {
-        INFO_LOG("dynamic op will be called");
-        isDynamic = atoi(argv[1]);
-        length = atoi(argv[2]);
-    } else if (argc == 1) {
-        INFO_LOG("static op will be called");
-    } else {
-        ERROR_LOG("wrong input parameter number");
-        return -1;
-    }
-
-    if (!InitResource()) {
-        ERROR_LOG("Init resource failed");
-        return FAILED;
-    }
-    INFO_LOG("Init resource success");
-
-    if (!CompileAndRunOp()) {
-        DestoryResource();
-        return FAILED;
-    }
-
-    DestoryResource();
-
-    return SUCCESS;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/op_runner.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/op_runner.cpp
deleted file mode 100644
index bde1d96a2..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/op_runner.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-/**
-* @file op_runner.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "op_runner.h"
-
-#include <limits>
-#include <cassert>
-#include "acl/acl_op_compiler.h"
-#include "common.h"
-
-using namespace std;
-
-extern bool g_isDevice;
-
-OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
-{
-    numInputs_ = opDesc->inputDesc.size();
-    numOutputs_ = opDesc->outputDesc.size();
-}
-
-OpRunner::~OpRunner()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        (void)aclDestroyDataBuffer(inputBuffers_[i]);
-        (void)aclrtFree(devInputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostInputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostInputs_[i]);
-        }
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        (void)aclDestroyDataBuffer(outputBuffers_[i]);
-        (void)aclrtFree(devOutputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostOutputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostOutputs_[i]);
-        }
-    }
-}
-
-bool OpRunner::Init()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-            return false;
-        }
-        devInputs_.emplace_back(devMem);
-        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostMem = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostMem, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostMem == nullptr) {
-            ERROR_LOG("Malloc memory for input[%zu] failed", i);
-            return false;
-        }
-        hostInputs_.emplace_back(hostMem);
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-            return false;
-        }
-        devOutputs_.emplace_back(devMem);
-        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostOutput = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostOutput == nullptr) {
-            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
-            return false;
-        }
-        hostOutputs_.emplace_back(hostOutput);
-    }
-
-    return true;
-}
-
-const size_t OpRunner::NumInputs()
-{
-    return numInputs_;
-}
-
-const size_t OpRunner::NumOutputs()
-{
-    return numOutputs_;
-}
-
-const size_t OpRunner::GetInputSize(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
-}
-
-std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-
-    return ret;
-}
-
-std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-    return ret;
-}
-
-size_t OpRunner::GetInputElementCount(size_t index) const
-{
-    if (index >= opDesc_->inputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
-}
-
-size_t OpRunner::GetOutputElementCount(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
-}
-
-size_t OpRunner::GetOutputSize(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
-}
-
-bool OpRunner::CompileStaticOp()
-{
-    auto ret = aclopCompile(opDesc_->opType.c_str(),
-                            numInputs_,
-                            opDesc_->inputDesc.data(),
-                            numOutputs_,
-                            opDesc_->outputDesc.data(),
-                            opDesc_->opAttr,
-                            ACL_ENGINE_SYS,
-                            ACL_COMPILE_SYS,
-                            nullptr);
-    if (ret != ACL_SUCCESS) {
-        ERROR_LOG("compile static op %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("compile static op %s success", opDesc_->opType.c_str());
-    return true;
-}
-
-bool OpRunner::CompileDynamicOp()
-{
-    std::vector<int64_t> shape = { 8, -1 };
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    int64_t rangeStatic[2][2] = {{8, 8}, {0, 10240}};
-    for (size_t i = 0; i < opDesc_->inputDesc.size(); ++i) {
-        aclDataType dataType = aclGetTensorDescType(opDesc_->inputDesc[i]);
-        aclFormat format = aclGetTensorDescFormat(opDesc_->inputDesc[i]);
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, shape.size(), shape.data(), format);
-        aclSetTensorShapeRange(desc, 2, rangeStatic);
-        if (desc == nullptr) {
-            return false;
-        }
-        inputDesc.emplace_back(desc);
-    }
-    for (size_t i = 0; i < opDesc_->outputDesc.size(); ++i) {
-        aclDataType dataType = aclGetTensorDescType(opDesc_->outputDesc[i]);
-        aclFormat format = aclGetTensorDescFormat(opDesc_->outputDesc[i]);
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, shape.size(), shape.data(), format);
-        aclSetTensorShapeRange(desc, 2, rangeStatic);
-        if (desc == nullptr) {
-            return false;
-        }
-        outputDesc.emplace_back(desc);
-    }
-    auto ret = aclopCompile(opDesc_->opType.c_str(),
-                            numInputs_,
-                            inputDesc.data(),
-                            numOutputs_,
-                            outputDesc.data(),
-                            opDesc_->opAttr,
-                            ACL_ENGINE_SYS,
-                            ACL_COMPILE_SYS,
-                            nullptr);
-    if (ret != ACL_SUCCESS) {
-        ERROR_LOG("compile dynamic op %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("compile dynamic op %s success", opDesc_->opType.c_str());
-    return true;
-}
-
-bool OpRunner::RunOp()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
-            ERROR_LOG("Copy input[%zu] failed", i);
-            return false;
-        }
-        INFO_LOG("Copy input[%zu] success", i);
-    }
-
-    aclrtStream stream = nullptr;
-    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
-        ERROR_LOG("Create stream failed");
-        return false;
-    }
-    INFO_LOG("Create stream success");
-
-    auto ret = aclopExecuteV2(opDesc_->opType.c_str(),
-                              numInputs_,
-                              opDesc_->inputDesc.data(),
-                              inputBuffers_.data(),
-                              numOutputs_,
-                              opDesc_->outputDesc.data(),
-                              outputBuffers_.data(),
-                              opDesc_->opAttr,
-                              stream);
-    if (ret == ACL_ERROR_OP_TYPE_NOT_MATCH || ret == ACL_ERROR_OP_INPUT_NOT_MATCH ||
-        ret == ACL_ERROR_OP_OUTPUT_NOT_MATCH || ret == ACL_ERROR_OP_ATTR_NOT_MATCH) {
-        ERROR_LOG("[%s] op with the given description is not compiled. Please run atc first, errorCode is %d",
-            opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        (void)aclrtDestroyStream(stream);
-        return false;
-    } else if (ret != ACL_SUCCESS) {
-        (void)aclrtDestroyStream(stream);
-        ERROR_LOG("Execute %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("Execute %s success", opDesc_->opType.c_str());
-
-    if (aclrtSynchronizeStream(stream) != ACL_SUCCESS) {
-        ERROR_LOG("Synchronize stream failed");
-        (void)aclrtDestroyStream(stream);
-        return false;
-    }
-    INFO_LOG("Synchronize stream success");
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
-            INFO_LOG("Copy output[%zu] success", i);
-            (void)aclrtDestroyStream(stream);
-            return false;
-        }
-        INFO_LOG("Copy output[%zu] success", i);
-    }
-
-    (void)aclrtDestroyStream(stream);
-    return true;
-}
-
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case ACL_BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT8:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT8:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT16:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT16:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT32:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT32:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT64:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT64:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT16:
-            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case ACL_DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-}
-
-void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
-        return;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
-
-void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/operator_desc.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/operator_desc.cpp
deleted file mode 100644
index edb876575..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/op_verify/src/operator_desc.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
-* @file operator_desc.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-#include "operator_desc.h"
-
-using namespace std;
-
-OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType))
-{
-    opAttr = aclopCreateAttr();
-}
-
-OperatorDesc::~OperatorDesc()
-{
-    for (auto *desc : inputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    for (auto *desc : outputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    aclopDestroyAttr(opAttr);
-}
-
-OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType,
-                                               int numDims,
-                                               const int64_t *dims,
-                                               aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-    inputDesc.emplace_back(desc);
-    return *this;
-}
-
-OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType,
-                                                int numDims,
-                                                const int64_t *dims,
-                                                aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-
-    outputDesc.emplace_back(desc);
-    return *this;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/run.sh
deleted file mode 100755
index 9c5d22523..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model/run.sh
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/bin/bash
-clear;clear
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-# 导出环境变量
-DTYPE="float16"
-
-SHORT=m:,t:,v:,
-LONG=is-dynamic:,replay-mode:,dtype:,
-OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
-eval set -- "$OPTS"
-while :
-do
-    case "$1" in
-        # IS_DYNAMIC 0: static op
-        # IS_DYNAMIC 1: dynamic op
-        (-m | --is-dynamic)
-            IS_DYNAMIC="$2"
-            shift 2;;
-        # batch, iterator
-        (-t | --replay-mode)
-            REPLAY_MODE ="$2"
-            shift 2;;
-        # float16, float, int32
-        (-v | --dtype)
-            DTYPE="$2"
-            shift 2;;
-        (--)
-            shift;
-            break;;
-        (*)
-            echo "[ERROR] Unexpected option: $1";
-            break;;
-    esac
-done
-
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-PYTHON_VERSION=`python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}'`
-export HI_PYTHON=python${PYTHON_VERSION}
-export PYTHONPATH=$ASCEND_HOME_DIR/python/site-packages:$PYTHONPATH
-export PATH=$ASCEND_HOME_DIR/python/site-packages/bin:$PATH
-
-# 检查当前昇腾芯片的类型
-function check_soc_version() {
-    SOC_VERSION_CONCAT=`python3 -c '''
-import ctypes, os
-def get_soc_version():
-    max_len = 256
-    rtsdll = ctypes.CDLL(f"libruntime.so")
-    c_char_t = ctypes.create_string_buffer(b"\xff" * max_len, max_len)
-    rtsdll.rtGetSocVersion.restype = ctypes.c_uint64
-    rt_error = rtsdll.rtGetSocVersion(c_char_t, ctypes.c_uint32(max_len))
-    if rt_error:
-        print("rt_error:", rt_error)
-        return ""
-    soc_full_name = c_char_t.value.decode("utf-8")
-    find_str = "Short_SoC_version="
-    ascend_home_dir = os.environ.get("ASCEND_HOME_DIR")
-    with open(f"{ascend_home_dir}/compiler/data/platform_config/{soc_full_name}.ini", "r") as f:
-        for line in f:
-            if find_str in line:
-                start_index = line.find(find_str)
-                result = line[start_index + len(find_str):].strip()
-                return "{},{}".format(soc_full_name, result.lower())
-    return ""
-print(get_soc_version())
-    '''`
-    if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
-        return 1
-    fi
-    SOC_FULL_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 1`
-    SOC_SHORT_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 2`
-}
-
-function main() {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    if [[ ${REPLAY_MODE}"x" = "x" || ${REPLAY_MODE} = "batch" || ${REPLAY_MODE} = "iterator" ]]; then
-        echo "INFO: REPLAY_MODE valid : ${REPLAY_MODE}"
-    else
-        echo "ERROR: REPLAY_MODE is invalid!"
-        return 1
-    fi
-
-    # 清除遗留生成文件和日志文件
-    rm -rf $HOME/ascend/log/*
-    rm -rf $ASCEND_OPP_PATH/vendors/*
-    rm -rf custom_op
-
-    # 生成自定义算子工程样例
-    JSON_NAME=add_custom
-    CAMEL_JSON_NAME=`echo $JSON_NAME | sed -r 's/(^|-|_)(\w)/\U\2/g'`
-    msopgen gen -i ../op_dev/${JSON_NAME}.json -f tf -c ai_core-${SOC_SHORT_VERSION} -lan cpp -out ./custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: msopgen custom op sample failed!"
-        return 1
-    fi
-    echo "INFO: msopgen custom op sample success!"
-
-    cp -rf ../op_dev/* custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: copy custom op files failed!"
-        return 1
-    fi
-    if [[ $IS_DYNAMIC != 1 ]]; then
-        if [[ $REPLAY_MODE = "batch" ]]; then
-            sed -i "s/set(BATCH_MODE_REPLAY_LIST/set(BATCH_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(BATCH_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        elif [[ $REPLAY_MODE = "iterator" ]]; then
-            sed -i "s/set(ITERATOR_MODE_REPLAY_LIST/set(ITERATOR_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(ITERATOR_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        fi
-    fi
-    sed -i "s#/usr/local/Ascend/latest#$ASCEND_HOME_DIR#g" `grep "/usr/local/Ascend/latest" -rl custom_op/CMakePresets.json`
-
-    # 测试不同输入数据类型, 修改对应代码
-    if [[ ${DTYPE} == "float16" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float16)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT16;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float16)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "float" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "int32" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.int32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_INT32;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.int32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    else
-        echo "ERROR: DTYPE is invalid!"
-        return 1
-    fi
-    # 构建自定义算子包并安装
-    bash custom_op/run.sh
-    if [ $? -ne 0 ]; then
-        echo "ERROR: build and install custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: build and install custom op run package success!"
-
-    # 编译acl可执行文件并运行
-    bash op_verify/run.sh $IS_DYNAMIC
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute acl single op sample failed!"
-        return 1
-    fi
-    echo "INFO: execute acl single op sample success!"
-}
-
-check_soc_version
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/common.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/common.h
deleted file mode 100644
index 854c5931c..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/common.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
-* @file common.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef COMMON_H
-#define COMMON_H
-
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-
-#include "acl/acl.h"
-
-#define SUCCESS 0
-#define FAILED 1
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR]  " fmt "\n", ##args)
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize);
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size);
-
-#endif // COMMON_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/op_runner.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/op_runner.h
deleted file mode 100644
index 0dd73397d..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/op_runner.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
-* @file op_runner.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OP_RUNNER_H
-#define OP_RUNNER_H
-
-#include "acl/acl.h"
-#include "common.h"
-#include "operator_desc.h"
-
-/**
- * Op Runner
- */
-class OpRunner {
-public:
-    /**
-     * @brief Constructor
-     * @param [in] opDesc: op description
-     */
-    explicit OpRunner(OperatorDesc *opDesc);
-
-    /**
-     * @brief Destructor
-     */
-    virtual ~OpRunner();
-
-    /**
-    * @brief Init op runner
-    */
-    bool Init();
-
-    /**
-     * @brief Get number of inputs
-     * @return number of inputs
-     */
-    const size_t NumInputs();
-
-    /**
-     * @brief Get number of outputs
-     * @return number of outputs
-     */
-    const size_t NumOutputs();
-
-    /**
-     * @brief Get input size by index
-     * @param [in] index: input index
-     * @return size of the input
-     */
-    const size_t GetInputSize(size_t index) const;
-
-    /**
-     * @brief Get output size by index
-     * @param [in] index: output index
-     * @return size of the output
-     */
-    size_t GetOutputSize(size_t index) const;
-
-    /**
-     * @brief Get input element count by index
-     * @param i[in] ndex: input index
-     * @return element count of the input
-     */
-    size_t GetInputElementCount(size_t index) const;
-
-    /**
-     * @brief Get output element count by index
-     * @param [in] index: output index
-     * @return element count of the output
-     */
-    size_t GetOutputElementCount(size_t index) const;
-
-    /**
-     * @brief Get input shape by index
-     * @param [in] index: input index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetInputShape(size_t index) const;
-
-    /**
-     * @brief Get output shape by index
-     * @param [in] index: output index
-     * @return shape of the output
-     */
-    std::vector<int64_t> GetOutputShape(size_t index) const;
-
-    /**
-     * @brief Get input buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: input index
-     * @return host address of the input
-     */
-    template<typename T>
-    T *GetInputBuffer(size_t index)
-    {
-        if (index >= numInputs_) {
-            ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-            return nullptr;
-        }
-        return reinterpret_cast<T *>(hostInputs_[index]);
-    }
-
-    /**
-     * @brief Get output buffer(host memory) by index
-     * @tparam T: data type
-     * @param [in] index: output index
-     * @return host address of the output
-     */
-    template<typename T>
-    const T *GetOutputBuffer(size_t index)
-    {
-        if (index >= numOutputs_) {
-            ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-            return nullptr;
-        }
-
-        return reinterpret_cast<T *>(hostOutputs_[index]);
-    }
-
-     /**
-      * @brief Print readable input by index
-      * @param [in] index: input index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintInput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-      * @brief Print readable output by index
-      * @param [in] index: output index
-      * @param [in] elementsPerRow: number of elements per row
-      */
-    void PrintOutput(size_t index, size_t elementsPerRow = 16);
-
-    /**
-     * @brief Compile static op
-     * @return compile result
-     */
-    bool CompileStaticOp();
-
-    /**
-     * @brief Compile dynamic op
-     * @return compile result
-     */
-    bool CompileDynamicOp();
-
-    /**
-     * @brief Run op
-     * @return run result
-     */
-    bool RunOp();
-
-private:
-    size_t numInputs_;
-    size_t numOutputs_;
-
-    std::vector<aclDataBuffer *> inputBuffers_;
-    std::vector<aclDataBuffer *> outputBuffers_;
-
-    std::vector<void *> devInputs_;
-    std::vector<void *> devOutputs_;
-
-    std::vector<void *> hostInputs_;
-    std::vector<void *> hostOutputs_;
-    OperatorDesc *opDesc_;
-};
-
-#endif // OP_RUNNER_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/operator_desc.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/operator_desc.h
deleted file mode 100644
index 8a315e1f8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/inc/operator_desc.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
-* @file operator_desc.h
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#ifndef OPERATOR_DESC_H
-#define OPERATOR_DESC_H
-
-#include <string>
-#include <vector>
-
-#include "acl/acl.h"
-
-/**
- * Op description
- */
-struct OperatorDesc {
-    /**
-     * Constructor
-     * @param [in] opType: op type
-     */
-    explicit OperatorDesc(std::string opType);
-
-    /**
-     * Destructor
-     */
-    virtual ~OperatorDesc();
-
-    /**
-     * Add an input tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    /**
-     * Add an output tensor description
-     * @param [in] dataType: data type
-     * @param [in] numDims: number of dims
-     * @param [in] dims: dims
-     * @param [in] format: format
-     * @return OperatorDesc
-     */
-    OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format);
-
-    std::string opType;
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    aclopAttr *opAttr;
-};
-
-#endif // OPERATOR_DESC_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run.sh
deleted file mode 100755
index 1b49051a3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-export ASCEND_GLOBAL_LOG_LEVEL=0
-
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-)
-cd $CURRENT_DIR
-
-# 导出环境变量
-IS_DYNAMIC=$1
-if [ ! $ASCEND_HOME_DIR ]; then
-    ASCEND_HOME_DIR=/usr/local/Ascend/latest
-    source $ASCEND_HOME_DIR/bin/setenv.bash
-fi
-
-export DDK_PATH=$ASCEND_HOME_DIR
-arch=$(uname -m)
-export NPU_HOST_LIB=$ASCEND_HOME_DIR/${arch}-linux/lib64
-
-function main {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    # 1. 生成输入数据和真值数据
-    cd $CURRENT_DIR/run/out/test_data/data
-    python3 generate_data.py
-    if [ $? -ne 0 ]; then
-        echo "ERROR: generate input data failed!"
-        return 1
-    fi
-    echo "INFO: generate input data success!"
-
-    # 2. 编译acl可执行文件
-    cd $CURRENT_DIR; rm -rf build; mkdir -p build; cd build
-    cmake ../src
-    if [ $? -ne 0 ]; then
-        echo "ERROR: cmake failed!"
-        return 1
-    fi
-    echo "INFO: cmake success!"
-    make
-    if [ $? -ne 0 ]; then
-        echo "ERROR: make failed!"
-        return 1
-    fi
-    echo "INFO: make success!"
-
-    # 3. 运行可执行文件
-    cd $CURRENT_DIR/run/out
-    if [ $IS_DYNAMIC == 1 ]; then
-        echo "INFO: execute dynamic op!"
-        ./execute_add_op $IS_DYNAMIC 2048
-    else
-        echo "INFO: execute static op!"
-        ./execute_add_op
-    fi
-    if [ $? -ne 0 ]; then
-        echo "ERROR: acl executable run failed! please check your project!"
-        return 1
-    fi
-    echo "INFO: acl executable run success!"
-
-    # 4. 比较真值文件
-    cd $CURRENT_DIR
-    python3 $CURRENT_DIR/scripts/verify_result.py       \
-        $CURRENT_DIR/run/out/test_data/data/input_0.bin \
-        $CURRENT_DIR/run/out/test_data/data/input_1.bin \
-        $CURRENT_DIR/run/out/result_files/output_0.bin
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compare golden data failed! the result is wrong!"
-        return 1
-    fi
-    echo "INFO: compare golden data success!"
-}
-
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/config/acl.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/config/acl.json
deleted file mode 100644
index 9e26dfeeb..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/config/acl.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/data/generate_data.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/data/generate_data.py
deleted file mode 100644
index 044eaa917..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/run/out/test_data/data/generate_data.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""
-* @file generate_data.py
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-"""
-import numpy as np
-
-a = np.random.randint(100, size=(1, 999,)).astype(np.float16)
-b = np.random.randint(100, size=(1, 999,)).astype(np.float16)
-
-a.tofile('input_0.bin')
-b.tofile('input_1.bin')
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/scripts/verify_result.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/scripts/verify_result.py
deleted file mode 100644
index 709911259..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/scripts/verify_result.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Copyright (R) @huawei.com, all rights reserved
--*- coding:utf-8 -*-
-CREATED:  2020-12-17 10:12:13
-"""
-import sys
-import numpy as np
-
-
-def data_compare(file1, file2, file3):
-    """
-    Verify that the data are the same
-    """
-    input1 = np.fromfile(file1, dtype=np.float16)
-    print(input1)
-    input2 = np.fromfile(file2, dtype=np.float16)
-    print(input2)
-    golden = input1 + input2
-    output = np.fromfile(file3, dtype=np.float16)
-    print(output)
-    print("-------------golden is :")
-    print(golden)
-
-    different_element_results = np.isclose(
-        output, golden,
-        rtol=1e-3,
-        atol=1e-8,
-        equal_nan=True)
-    different_element_indexes = np.where(
-        different_element_results != np.array((True,)))[0]
-    return 0 if different_element_indexes.size == 0 else 1
-
-
-if __name__ == '__main__':
-    intput_file1 = sys.argv[1]
-    intput_file2 = sys.argv[2]
-    output_file = sys.argv[3]
-    cmp_result = data_compare(intput_file1, intput_file2, output_file)
-
-    if cmp_result == 0:
-        sys.exit(0)
-    else:
-        sys.exit(1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/CMakeLists.txt
deleted file mode 100644
index 0299ffa4e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
-
-# CMake lowest version requirement
-cmake_minimum_required(VERSION 3.5.1)
-
-# project information
-project(acl_execute_add)
-
-# Compile options
-add_compile_options(-std=c++11)
-
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../run/out")
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../../outputs")
-
-set(INC_PATH $ENV{DDK_PATH})
-
-if (NOT DEFINED ENV{DDK_PATH})
-    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
-    message(STATUS "set default INC_PATH: ${INC_PATH}")
-else ()
-    message(STATUS "env INC_PATH: ${INC_PATH}")
-endif()
-
-set(LIB_PATH $ENV{NPU_HOST_LIB})
-
-# Dynamic libraries in the stub directory can only be used for compilation
-if (NOT DEFINED ENV{NPU_HOST_LIB})
-    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
-    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
-    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
-else ()
-    message(STATUS "env LIB_PATH: ${LIB_PATH}")
-endif()
-
-# Header path
-include_directories(
-    ${INC_PATH}/runtime/include
-    ${INC_PATH}/atc/include
-    ../inc
-)
-
-# add host lib path
-link_directories(
-    ${LIB_PATH}
-    ${LIB_PATH1}
-)
-
-add_executable(execute_add_op
-        operator_desc.cpp
-        op_runner.cpp
-        main.cpp
-        common.cpp)
-
-target_link_libraries(execute_add_op
-        ascendcl
-        acl_op_compiler
-        stdc++)
-
-install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/common.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/common.cpp
deleted file mode 100644
index c6d3d0cd8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/common.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
-* @file common.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-
-#include <fstream>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-
-extern bool g_isDevice;
-
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file %s", filePath.c_str());
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/main.cpp
deleted file mode 100644
index 990afa969..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/main.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
-* @file main.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include <cstdint>
-#include <iostream>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "acl/acl.h"
-#include "op_runner.h"
-
-#include "common.h"
-
-bool g_isDevice = false;
-int deviceId = 0;
-int isDynamic = 0;
-int length = 0;
-
-OperatorDesc CreateOpDesc()
-{
-    // define operator
-    std::vector<int64_t> shape { 1, 999 };
-    std::string opType = "AddCustomUnalign";
-    if (isDynamic) {
-        shape = {8, length};
-    }
-    aclDataType dataType = ACL_FLOAT16;
-    aclFormat format = ACL_FORMAT_ND;
-    OperatorDesc opDesc(opType);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format);
-    opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format);
-    aclopSetAttrInt(opDesc.opAttr, "testAttr2", 8);
-    return opDesc;
-}
-
-bool SetInputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumInputs(); ++i) {
-        size_t fileSize = 0;
-        std::string filePath = "test_data/data/input_" + std::to_string(i) + ".bin";
-        bool result = ReadFile(filePath, fileSize,
-            runner.GetInputBuffer<void>(i), runner.GetInputSize(i));
-        if (!result) {
-            ERROR_LOG("Read input[%zu] failed", i);
-            return false;
-        }
-
-        INFO_LOG("Set input[%zu] from %s success.", i, filePath.c_str());
-    }
-
-    return true;
-}
-
-bool ProcessOutputData(OpRunner &runner)
-{
-    for (size_t i = 0; i < runner.NumOutputs(); ++i) {
-        std::string filePath = "result_files/output_" + std::to_string(i) + ".bin";
-        if (!WriteFile(filePath, runner.GetOutputBuffer<void>(i), runner.GetOutputSize(i))) {
-            ERROR_LOG("Write output[%zu] failed.", i);
-            return false;
-        }
-
-        INFO_LOG("Write output[%zu] success. output file = %s", i, filePath.c_str());
-    }
-    return true;
-}
-
-bool CompileAndRunOp()
-{
-    // create op desc
-    OperatorDesc opDesc = CreateOpDesc();
-
-    // create Runner
-    OpRunner opRunner(&opDesc);
-    if (!opRunner.Init()) {
-        ERROR_LOG("Init OpRunner failed");
-        return false;
-    }
-
-    // Load inputs
-    if (!SetInputData(opRunner)) {
-        ERROR_LOG("Set input data failed");
-        return false;
-    }
-
-    if (isDynamic) {
-        if (!opRunner.CompileDynamicOp()) {
-            ERROR_LOG("compile dynamic op failed");
-            return false;
-        }
-    } else {
-        if (!opRunner.CompileStaticOp()) {
-            ERROR_LOG("compile static op failed");
-            return false;
-        }
-    }
-
-    // Run op
-    if (!opRunner.RunOp()) {
-        ERROR_LOG("Run op failed");
-        return false;
-    }
-
-    // process output data
-    if (!ProcessOutputData(opRunner)) {
-        ERROR_LOG("Process output data failed");
-        return false;
-    }
-
-    INFO_LOG("Run op success");
-    return true;
-}
-
-void DestoryResource()
-{
-    bool flag = false;
-    if (aclrtResetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Reset device %d failed", deviceId);
-        flag = true;
-    }
-    INFO_LOG("Reset Device success");
-    if (aclFinalize() != ACL_SUCCESS) {
-        ERROR_LOG("Finalize acl failed");
-        flag = true;
-    }
-    if (flag) {
-        ERROR_LOG("Destory resource failed");
-    } else {
-        INFO_LOG("Destory resource success");
-    }
-}
-
-bool InitResource()
-{
-    std::string output = "./result_files";
-    if (access(output.c_str(), 0) == -1) {
-        int ret = mkdir(output.c_str(), 0700);
-        if (ret == 0) {
-            INFO_LOG("Make output directory successfully");
-        }
-        else {
-            ERROR_LOG("Make output directory fail");
-            return false;
-        }
-    }
-
-    // acl.json is dump or profiling config file
-    if (aclInit("test_data/config/acl.json") != ACL_SUCCESS) {
-        ERROR_LOG("acl init failed");
-        return false;
-    }
-
-    if (aclrtSetDevice(deviceId) != ACL_SUCCESS) {
-        ERROR_LOG("Set device failed. deviceId is %d", deviceId);
-        (void)aclFinalize();
-        return false;
-    }
-    INFO_LOG("Set device[%d] success", deviceId);
-
-    // runMode is ACL_HOST which represents app is running in host
-    // runMode is ACL_DEVICE which represents app is running in device
-    aclrtRunMode runMode;
-    if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) {
-        ERROR_LOG("Get run mode failed");
-        DestoryResource();
-        return false;
-    }
-    g_isDevice = (runMode == ACL_DEVICE);
-    INFO_LOG("Get RunMode[%d] success", runMode);
-
-    return true;
-}
-
-int main(int argc, char **argv)
-{
-    int DYNAMIC_FLAG = 3;
-    int STATIC_FLAG = 1;
-    if (argc == DYNAMIC_FLAG) {
-        int is_dynamic_idx = 1;
-        int length_idx = 2;
-        INFO_LOG("dynamic op will be called");
-        isDynamic = atoi(argv[is_dynamic_idx]);
-        length = atoi(argv[length_idx]);
-    } else if (argc == STATIC_FLAG) {
-        INFO_LOG("static op will be called");
-    } else {
-        ERROR_LOG("wrong input parameter number");
-        return -1;
-    }
-
-    if (!InitResource()) {
-        ERROR_LOG("Init resource failed");
-        return FAILED;
-    }
-    INFO_LOG("Init resource success");
-
-    if (!CompileAndRunOp()) {
-        DestoryResource();
-        return FAILED;
-    }
-
-    DestoryResource();
-
-    return SUCCESS;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/op_runner.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/op_runner.cpp
deleted file mode 100644
index e5bf76518..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/op_runner.cpp
+++ /dev/null
@@ -1,430 +0,0 @@
-/**
-* @file op_runner.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "op_runner.h"
-
-#include <limits>
-#include <cassert>
-#include "acl/acl_op_compiler.h"
-#include "common.h"
-
-using namespace std;
-
-extern bool g_isDevice;
-
-OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc)
-{
-    numInputs_ = opDesc->inputDesc.size();
-    numOutputs_ = opDesc->outputDesc.size();
-}
-
-OpRunner::~OpRunner()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        (void)aclDestroyDataBuffer(inputBuffers_[i]);
-        (void)aclrtFree(devInputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostInputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostInputs_[i]);
-        }
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        (void)aclDestroyDataBuffer(outputBuffers_[i]);
-        (void)aclrtFree(devOutputs_[i]);
-        if (g_isDevice) {
-            (void)aclrtFree(hostOutputs_[i]);
-        } else {
-            (void)aclrtFreeHost(hostOutputs_[i]);
-        }
-    }
-}
-
-bool OpRunner::Init()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-            return false;
-        }
-        devInputs_.emplace_back(devMem);
-        inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostMem = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostMem, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for input[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostMem == nullptr) {
-            ERROR_LOG("Malloc memory for input[%zu] failed", i);
-            return false;
-        }
-        hostInputs_.emplace_back(hostMem);
-    }
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        void *devMem = nullptr;
-        if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-            ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-            return false;
-        }
-        devOutputs_.emplace_back(devMem);
-        outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size));
-
-        void *hostOutput = nullptr;
-        if (g_isDevice) {
-            if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_NORMAL_ONLY) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        } else {
-            if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) {
-                ERROR_LOG("Malloc device memory for output[%zu] failed", i);
-                return false;
-            }
-        }
-        if (hostOutput == nullptr) {
-            ERROR_LOG("Malloc host memory for output[%zu] failed", i);
-            return false;
-        }
-        hostOutputs_.emplace_back(hostOutput);
-    }
-
-    return true;
-}
-
-const size_t OpRunner::NumInputs()
-{
-    return numInputs_;
-}
-
-const size_t OpRunner::NumOutputs()
-{
-    return numOutputs_;
-}
-
-const size_t OpRunner::GetInputSize(size_t index) const
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->inputDesc[index]);
-}
-
-std::vector<int64_t> OpRunner::GetInputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-
-    return ret;
-}
-
-std::vector<int64_t> OpRunner::GetOutputShape(size_t index) const
-{
-    std::vector<int64_t> ret;
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return ret;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) {
-        int64_t dimSize;
-        if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) {
-            ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i);
-            ret.clear();
-            return ret;
-        }
-        ret.emplace_back(dimSize);
-    }
-    return ret;
-}
-
-size_t OpRunner::GetInputElementCount(size_t index) const
-{
-    if (index >= opDesc_->inputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->inputDesc[index]);
-}
-
-size_t OpRunner::GetOutputElementCount(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescElementCount(opDesc_->outputDesc[index]);
-}
-
-size_t OpRunner::GetOutputSize(size_t index) const
-{
-    if (index >= opDesc_->outputDesc.size()) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return 0;
-    }
-
-    return aclGetTensorDescSize(opDesc_->outputDesc[index]);
-}
-
-bool OpRunner::CompileStaticOp()
-{
-    auto ret = aclopCompile(opDesc_->opType.c_str(),
-                            numInputs_,
-                            opDesc_->inputDesc.data(),
-                            numOutputs_,
-                            opDesc_->outputDesc.data(),
-                            opDesc_->opAttr,
-                            ACL_ENGINE_SYS,
-                            ACL_COMPILE_SYS,
-                            nullptr);
-    if (ret != ACL_SUCCESS) {
-        ERROR_LOG("compile static op %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("compile static op %s success", opDesc_->opType.c_str());
-    return true;
-}
-
-bool OpRunner::CompileDynamicOp()
-{
-    std::vector<int64_t> shape = { 8, -1 };
-    std::vector<aclTensorDesc *> inputDesc;
-    std::vector<aclTensorDesc *> outputDesc;
-    int64_t rangeStatic[2][2] = {{8, 8}, {0, 10240}};
-    for (size_t i = 0; i < opDesc_->inputDesc.size(); ++i) {
-        aclDataType dataType = aclGetTensorDescType(opDesc_->inputDesc[i]);
-        aclFormat format = aclGetTensorDescFormat(opDesc_->inputDesc[i]);
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, shape.size(), shape.data(), format);
-        aclSetTensorShapeRange(desc, 2U, rangeStatic);
-        if (desc == nullptr) {
-            return false;
-        }
-        inputDesc.emplace_back(desc);
-    }
-    for (size_t i = 0; i < opDesc_->outputDesc.size(); ++i) {
-        aclDataType dataType = aclGetTensorDescType(opDesc_->outputDesc[i]);
-        aclFormat format = aclGetTensorDescFormat(opDesc_->outputDesc[i]);
-        aclTensorDesc *desc = aclCreateTensorDesc(dataType, shape.size(), shape.data(), format);
-        aclSetTensorShapeRange(desc, 2U, rangeStatic);
-        if (desc == nullptr) {
-            return false;
-        }
-        outputDesc.emplace_back(desc);
-    }
-    auto ret = aclopCompile(opDesc_->opType.c_str(),
-                            numInputs_,
-                            inputDesc.data(),
-                            numOutputs_,
-                            outputDesc.data(),
-                            opDesc_->opAttr,
-                            ACL_ENGINE_SYS,
-                            ACL_COMPILE_SYS,
-                            nullptr);
-    if (ret != ACL_SUCCESS) {
-        ERROR_LOG("compile dynamic op %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("compile dynamic op %s success", opDesc_->opType.c_str());
-    return true;
-}
-
-bool OpRunner::RunOp()
-{
-    for (size_t i = 0; i < numInputs_; ++i) {
-        auto size = GetInputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) {
-            ERROR_LOG("Copy input[%zu] failed", i);
-            return false;
-        }
-        INFO_LOG("Copy input[%zu] success", i);
-    }
-
-    aclrtStream stream = nullptr;
-    if (aclrtCreateStream(&stream) != ACL_SUCCESS) {
-        ERROR_LOG("Create stream failed");
-        return false;
-    }
-    INFO_LOG("Create stream success");
-
-    auto ret = aclopExecuteV2(opDesc_->opType.c_str(),
-                              numInputs_,
-                              opDesc_->inputDesc.data(),
-                              inputBuffers_.data(),
-                              numOutputs_,
-                              opDesc_->outputDesc.data(),
-                              outputBuffers_.data(),
-                              opDesc_->opAttr,
-                              stream);
-    if (ret == ACL_ERROR_OP_TYPE_NOT_MATCH || ret == ACL_ERROR_OP_INPUT_NOT_MATCH ||
-        ret == ACL_ERROR_OP_OUTPUT_NOT_MATCH || ret == ACL_ERROR_OP_ATTR_NOT_MATCH) {
-        ERROR_LOG("[%s] op with the given description is not compiled. Please run atc first, errorCode is %d",
-            opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        (void)aclrtDestroyStream(stream);
-        return false;
-    } else if (ret != ACL_SUCCESS) {
-        (void)aclrtDestroyStream(stream);
-        ERROR_LOG("Execute %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast<int32_t>(ret));
-        return false;
-    }
-    INFO_LOG("Execute %s success", opDesc_->opType.c_str());
-
-    if (aclrtSynchronizeStream(stream) != ACL_SUCCESS) {
-        ERROR_LOG("Synchronize stream failed");
-        (void)aclrtDestroyStream(stream);
-        return false;
-    }
-    INFO_LOG("Synchronize stream success");
-
-    for (size_t i = 0; i < numOutputs_; ++i) {
-        auto size = GetOutputSize(i);
-        aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST;
-        if (g_isDevice) {
-            kind = ACL_MEMCPY_DEVICE_TO_DEVICE;
-        }
-        if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) {
-            INFO_LOG("Copy output[%zu] success", i);
-            (void)aclrtDestroyStream(stream);
-            return false;
-        }
-        INFO_LOG("Copy output[%zu] success", i);
-    }
-
-    (void)aclrtDestroyStream(stream);
-    return true;
-}
-
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10U) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10U) << std::setprecision(4U) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case ACL_BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT8:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT8:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT16:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT16:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT32:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT32:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_INT64:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_UINT64:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT16:
-            DoPrintFp16Data(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case ACL_FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case ACL_DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-}
-
-void OpRunner::PrintInput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numInputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_);
-        return;
-    }
-
-    auto desc = opDesc_->inputDesc[index];
-    PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
-
-void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow)
-{
-    if (index >= numOutputs_) {
-        ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_);
-        return;
-    }
-
-    auto desc = opDesc_->outputDesc[index];
-    PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow);
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/operator_desc.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/operator_desc.cpp
deleted file mode 100644
index edb876575..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/op_verify/src/operator_desc.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
-* @file operator_desc.cpp
-*
-* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-#include "common.h"
-#include "operator_desc.h"
-
-using namespace std;
-
-OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType))
-{
-    opAttr = aclopCreateAttr();
-}
-
-OperatorDesc::~OperatorDesc()
-{
-    for (auto *desc : inputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    for (auto *desc : outputDesc) {
-        aclDestroyTensorDesc(desc);
-    }
-
-    aclopDestroyAttr(opAttr);
-}
-
-OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType,
-                                               int numDims,
-                                               const int64_t *dims,
-                                               aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-    inputDesc.emplace_back(desc);
-    return *this;
-}
-
-OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType,
-                                                int numDims,
-                                                const int64_t *dims,
-                                                aclFormat format)
-{
-    aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format);
-    if (desc == nullptr) {
-        ERROR_LOG("create tensor failed");
-        return *this;
-    }
-
-    outputDesc.emplace_back(desc);
-    return *this;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/run.sh
deleted file mode 100755
index 38317caeb..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/acl_online_model_unalign/run.sh
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/bin/bash
-clear;clear
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-# 导出环境变量
-DTYPE="float16"
-
-SHORT=m:,t:,v:,
-LONG=is-dynamic:,replay-mode:,dtype:,
-OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
-eval set -- "$OPTS"
-while :
-do
-    case "$1" in
-        # IS_DYNAMIC 0: static op
-        # IS_DYNAMIC 1: dynamic op
-        (-m | --is-dynamic)
-            IS_DYNAMIC="$2"
-            shift 2;;
-        # batch, iterator
-        (-t | --replay-mode)
-            REPLAY_MODE ="$2"
-            shift 2;;
-        # float16, float, int32
-        (-v | --dtype)
-            DTYPE="$2"
-            shift 2;;
-        (--)
-            shift;
-            break;;
-        (*)
-            echo "[ERROR] Unexpected option: $1";
-            break;;
-    esac
-done
-
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-PYTHON_VERSION=`python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}'`
-export HI_PYTHON=python${PYTHON_VERSION}
-export PYTHONPATH=$ASCEND_HOME_DIR/python/site-packages:$PYTHONPATH
-export PATH=$ASCEND_HOME_DIR/python/site-packages/bin:$PATH
-
-# 检查当前昇腾芯片的类型
-function check_soc_version() {
-    SOC_VERSION_CONCAT=`python3 -c '''
-import ctypes, os
-def get_soc_version():
-    max_len = 256
-    rtsdll = ctypes.CDLL(f"libruntime.so")
-    c_char_t = ctypes.create_string_buffer(b"\xff" * max_len, max_len)
-    rtsdll.rtGetSocVersion.restype = ctypes.c_uint64
-    rt_error = rtsdll.rtGetSocVersion(c_char_t, ctypes.c_uint32(max_len))
-    if rt_error:
-        print("rt_error:", rt_error)
-        return ""
-    soc_full_name = c_char_t.value.decode("utf-8")
-    find_str = "Short_SoC_version="
-    ascend_home_dir = os.environ.get("ASCEND_HOME_DIR")
-    with open(f"{ascend_home_dir}/compiler/data/platform_config/{soc_full_name}.ini", "r") as f:
-        for line in f:
-            if find_str in line:
-                start_index = line.find(find_str)
-                result = line[start_index + len(find_str):].strip()
-                return "{},{}".format(soc_full_name, result.lower())
-    return ""
-print(get_soc_version())
-    '''`
-    if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
-        return 1
-    fi
-    SOC_FULL_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 1`
-    SOC_SHORT_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 2`
-}
-
-function main() {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    if [[ ${REPLAY_MODE}"x" = "x" || ${REPLAY_MODE} = "batch" || ${REPLAY_MODE} = "iterator" ]]; then
-        echo "INFO: REPLAY_MODE valid : ${REPLAY_MODE}"
-    else
-        echo "ERROR: REPLAY_MODE is invalid!"
-        return 1
-    fi
-
-    # 清除遗留生成文件和日志文件
-    rm -rf $HOME/ascend/log/*
-    rm -rf $ASCEND_OPP_PATH/vendors/*
-    rm -rf custom_op
-
-    # 生成自定义算子工程样例
-    JSON_NAME=add_custom_unalign
-    CAMEL_JSON_NAME=`echo $JSON_NAME | sed -r 's/(^|-|_)(\w)/\U\2/g'`
-    msopgen gen -i ../op_dev/${JSON_NAME}.json -f tf -c ai_core-${SOC_SHORT_VERSION} -lan cpp -out ./custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: msopgen custom op sample failed!"
-        return 1
-    fi
-    echo "INFO: msopgen custom op sample success!"
-
-    cp -rf ../op_dev/* custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: copy custom op files failed!"
-        return 1
-    fi
-    if [[ $IS_DYNAMIC != 1 ]]; then
-        if [[ $REPLAY_MODE = "batch" ]]; then
-            sed -i "s/set(BATCH_MODE_REPLAY_LIST/set(BATCH_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(BATCH_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        elif [[ $REPLAY_MODE = "iterator" ]]; then
-            sed -i "s/set(ITERATOR_MODE_REPLAY_LIST/set(ITERATOR_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(ITERATOR_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        fi
-    fi
-    sed -i "s#/usr/local/Ascend/latest#$ASCEND_HOME_DIR#g" `grep "/usr/local/Ascend/latest" -rl custom_op/CMakePresets.json`
-
-    # 测试不同输入数据类型, 修改对应代码
-    if [[ ${DTYPE} == "float16" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float16)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT16;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float16)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "float" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.float32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_FLOAT;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.float32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    elif [[ ${DTYPE} == "int32" ]]; then
-        sed -i "s/.astype(.*)/.astype(np.int32)/g" `grep ".astype(.*)" -rl op_verify/run/out/test_data/data/generate_data.py`
-        sed -i "s/aclDataType dataType =.*;/aclDataType dataType = ACL_INT32;/g" `grep "aclDataType dataType =.*;" -rl op_verify/src/main.cpp`
-        sed -i "s/dtype=.*)/dtype=np.int32)/g" `grep "dtype=.*)" -rl op_verify/scripts/verify_result.py`
-    else
-        echo "ERROR: DTYPE is invalid!"
-        return 1
-    fi
-    # 构建自定义算子包并安装
-    bash custom_op/run.sh
-    if [ $? -ne 0 ]; then
-        echo "ERROR: build and install custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: build and install custom op run package success!"
-
-    # 编译acl可执行文件并运行
-    bash op_verify/run.sh $IS_DYNAMIC
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute acl single op sample failed!"
-        return 1
-    fi
-    echo "INFO: execute acl single op sample success!"
-}
-
-check_soc_version
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom.json
deleted file mode 100644
index 5ffab9266..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom.json
+++ /dev/null
@@ -1,52 +0,0 @@
-[
-    {
-        "op": "AddCustom",
-        "language": "cpp",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            },
-            {
-                "name": "y",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "z",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            }
-        ]
-    }
-]
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom_unalign.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom_unalign.json
deleted file mode 100644
index 68f9b22e6..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/add_custom_unalign.json
+++ /dev/null
@@ -1,52 +0,0 @@
-[
-    {
-        "op": "AddCustomUnalign",
-        "language": "cpp",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            },
-            {
-                "name": "y",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "z",
-                "param_type": "required",
-                "format": [
-                    "ND",
-                    "ND",
-                    "ND"
-                ],
-                "type": [
-                    "fp16",
-                    "float",
-                    "int32"
-                ]
-            }
-        ]
-    }
-]
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom.cpp
deleted file mode 100644
index 5eeeea85a..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
- */
-#include "add_custom_tiling.h"
-#include "register/op_def_registry.h"
-
-namespace optiling {
-const uint32_t BLOCK_DIM = 8;
-const uint32_t TILE_NUM = 8;
-static ge::graphStatus TilingFunc(gert::TilingContext* context)
-{
-    TilingData tiling;
-    uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
-    context->SetBlockDim(BLOCK_DIM);
-    tiling.set_totalLength(totalLength);
-    tiling.set_tileNum(TILE_NUM);
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-    context->SetTilingKey(1);
-    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = 0;
-    return ge::GRAPH_SUCCESS;
-}
-} // namespace optiling
-
-namespace ge {
-static graphStatus InferShape(gert::InferShapeContext* context)
-{
-    const auto inputShape = context->GetInputShape(0);
-    auto outputShape = context->GetOutputShape(0);
-    *outputShape = *inputShape;
-    return GRAPH_SUCCESS;
-}
-} // namespace ge
-
-namespace ops {
-class AddCustom : public OpDef {
-public:
-    explicit AddCustom(const char* name) : OpDef(name)
-    {
-        this->Input("x")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-        this->Input("y")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-        this->Output("z")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-
-        this->SetInferShape(ge::InferShape);
-
-        this->AICore()
-            .SetTiling(optiling::TilingFunc);
-
-        this->AICore().AddConfig("ascend910");
-        this->AICore().AddConfig("ascend310p");
-        this->AICore().AddConfig("ascend910b");
-    }
-};
-
-OP_ADD(AddCustom);
-} // namespace ops
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_tiling.h
deleted file mode 100644
index 4bb6d940f..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_tiling.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef ADD_CUSTOM_TILING_H
-#define ADD_CUSTOM_TILING_H
-#include "register/tilingdata_base.h"
-
-namespace optiling {
-BEGIN_TILING_DATA_DEF(TilingData)
-  TILING_DATA_FIELD_DEF(uint32_t, totalLength);
-  TILING_DATA_FIELD_DEF(uint32_t, tileNum);
-END_TILING_DATA_DEF;
-
-REGISTER_TILING_DATA_CLASS(AddCustom, TilingData)
-}
-#endif // ADD_CUSTOM_TILING_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign.cpp
deleted file mode 100644
index eeb1b05c3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
- */
-#include "add_custom_unalign_tiling.h"
-#include "register/op_def_registry.h"
-
-namespace optiling {
-constexpr uint32_t BLOCK_DIM = 8;
-constexpr uint32_t SIZE_OF_HALF = 2;
-constexpr uint32_t BLOCK_SIZE = 32;
-// shape需要对齐到的最小单位
-constexpr uint32_t ALIGN_NUM = BLOCK_SIZE / SIZE_OF_HALF;
-
-static ge::graphStatus TilingFunc(gert::TilingContext *context)
-{
-    TilingDataUnalign tiling;
-    uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
-    context->SetBlockDim(BLOCK_DIM);
-    auto attrs = context->GetAttrs();
-    const uint32_t* testAttrs = attrs->GetAttrPointer<uint32_t>(1);
-
-    // 如果是非对齐的shape，需要向上对齐到最小单位
-    uint32_t totalLengthAligned = ((totalLength + ALIGN_NUM - 1) / ALIGN_NUM) * ALIGN_NUM;
-    // 把所有的数据尽可能均匀地分配到每个核上，如果不能均分的话，那么会有部分核多算一个最小单位ALIGN_NUM
-    // 通过模的计算，可以得到多算一个最小单位的核的数量，也可以得到少算一个最小单位的核的数量
-    // eg：对齐后的总数据量为160，核心数为8，数据块的最小单位是16，那么：
-    // 1、最小单位数据块的总数：160 / 16 = 10
-    // 2、有2个核会分到2个最小单位的数据块：10 % 8 =2，可以称之为整块
-    // 3、有6个核会分到1个最小单位的数据块：8 - 2 = 6，可以称之为尾块
-    uint32_t formerNum = (totalLengthAligned / ALIGN_NUM) % BLOCK_DIM;
-    uint32_t tailNum = BLOCK_DIM - formerNum;
-    // 计算整块和尾块的数据量
-    uint32_t formerLength = ((totalLengthAligned / BLOCK_DIM + ALIGN_NUM - 1) / ALIGN_NUM) * ALIGN_NUM;
-    uint32_t tailLength = (totalLengthAligned / BLOCK_DIM / ALIGN_NUM) * ALIGN_NUM;
-
-    tiling.set_formerNum(formerNum);
-    tiling.set_tailNum(tailNum);
-    tiling.set_formerLength(formerLength);
-    tiling.set_tailLength(tailLength);
-    tiling.set_alignNum(ALIGN_NUM);
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-    context->SetTilingKey(1);
-    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = 0;
-    return ge::GRAPH_SUCCESS;
-}
-} // namespace optiling
-
-namespace ge {
-static graphStatus InferShape(gert::InferShapeContext *context)
-{
-    const auto inputShape = context->GetInputShape(0);
-    auto outputShape = context->GetOutputShape(0);
-    *outputShape = *inputShape;
-    return GRAPH_SUCCESS;
-}
-} // namespace ge
-
-namespace ops {
-class AddCustomUnalign : public OpDef {
-public:
-    explicit AddCustomUnalign(const char *name) : OpDef(name)
-    {
-        this->Input("x")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-        this->Input("y")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-        this->Output("z")
-            .ParamType(REQUIRED)
-            .DataType({ ge::DT_FLOAT16, ge::DT_FLOAT, ge::DT_INT32 })
-            .Format({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND })
-            .UnknownShapeFormat({ ge::FORMAT_ND, ge::FORMAT_ND, ge::FORMAT_ND });
-        this->Attr("testAttr1")
-            .AttrType(OPTIONAL)
-            .Float(8.0);
-        this->Attr("testAttr2")
-            .AttrType(REQUIRED)
-            .Int();
-        this->SetInferShape(ge::InferShape);
-
-        this->AICore().SetTiling(optiling::TilingFunc);
-
-        this->AICore().AddConfig("ascend910");
-        this->AICore().AddConfig("ascend310p");
-        this->AICore().AddConfig("ascend910b");
-    }
-};
-
-OP_ADD(AddCustomUnalign);
-} // namespace ops
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign_tiling.h
deleted file mode 100644
index 350b59f21..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_host/add_custom_unalign_tiling.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef ADD_CUSTOM_UNALIGN_TILING_H
-#define ADD_CUSTOM_UNALIGN_TILING_H
-#include "register/tilingdata_base.h"
-
-namespace optiling {
-BEGIN_TILING_DATA_DEF(TilingDataUnalign)
-  TILING_DATA_FIELD_DEF(uint32_t, formerNum);
-  TILING_DATA_FIELD_DEF(uint32_t, tailNum);
-  TILING_DATA_FIELD_DEF(uint32_t, formerLength);
-  TILING_DATA_FIELD_DEF(uint32_t, tailLength);
-  TILING_DATA_FIELD_DEF(uint32_t, alignNum);
-END_TILING_DATA_DEF;
-
-REGISTER_TILING_DATA_CLASS(AddCustomUnalign, TilingDataUnalign)
-}
-#endif // ADD_CUSTOM_UNALIGN_TILING_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom.cpp
deleted file mode 100644
index 89cd6e599..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- *
- * Function : z = x + y
- * This sample is a very basic sample that implements vector add on Ascend plaform.
- */
-#include "kernel_operator.h"
-using namespace AscendC;
-
-constexpr int32_t BUFFER_NUM = 2;
-
-class KernelAdd {
-public:
-    __aicore__ inline KernelAdd() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
-    {
-        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
-        this->blockLength = totalLength / GetBlockNum();
-        this->tileNum = tileNum;
-        ASSERT(tileNum != 0 && "tile num can not be zero!");
-        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
-
-        xGm.SetGlobalBuffer((__gm__ DTYPE_X*)x + this->blockLength * GetBlockIdx(), this->blockLength);
-        yGm.SetGlobalBuffer((__gm__ DTYPE_Y*)y + this->blockLength * GetBlockIdx(), this->blockLength);
-        zGm.SetGlobalBuffer((__gm__ DTYPE_Z*)z + this->blockLength * GetBlockIdx(), this->blockLength);
-        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(DTYPE_X));
-        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Y));
-        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Z));
-    }
-    __aicore__ inline void Process()
-    {
-        int32_t loopCount = this->tileNum * BUFFER_NUM;
-        for (int32_t i = 0; i < loopCount; i++) {
-            CopyIn(i);
-            Compute(i);
-            CopyOut(i);
-        }
-    }
-
-private:
-    __aicore__ inline void CopyIn(int32_t progress)
-    {
-        LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
-        LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
-        DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
-        DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
-        inQueueX.EnQue(xLocal);
-        inQueueY.EnQue(yLocal);
-    }
-    __aicore__ inline void Compute(int32_t progress)
-    {
-        LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
-        LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
-        LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
-        Add(zLocal, xLocal, yLocal, this->tileLength);
-        outQueueZ.EnQue<DTYPE_Z>(zLocal);
-        inQueueX.FreeTensor(xLocal);
-        inQueueY.FreeTensor(yLocal);
-    }
-    __aicore__ inline void CopyOut(int32_t progress)
-    {
-        LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
-        DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
-        outQueueZ.FreeTensor(zLocal);
-    }
-
-private:
-    TPipe pipe;
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
-    GlobalTensor<DTYPE_X> xGm;
-    GlobalTensor<DTYPE_Y> yGm;
-    GlobalTensor<DTYPE_Z> zGm;
-    uint32_t blockLength;
-    uint32_t tileNum;
-    uint32_t tileLength;
-};
-
-extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
-{
-    GET_TILING_DATA(tilingData, tiling);
-    KernelAdd op;
-    op.Init(x, y, z, tilingData.totalLength, tilingData.tileNum);
-    if (TILING_KEY_IS(1)) {
-        op.Process();
-    }
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom_unalign.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom_unalign.cpp
deleted file mode 100644
index 06cee0bf1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/op_kernel/add_custom_unalign.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- *
- * Function : z = x + y
- * This sample is a very basic sample that implements vector add on Ascend plaform.
- */
-#include "kernel_operator.h"
-using namespace AscendC;
-
-constexpr int32_t BUFFER_NUM = 2;
-
-class KernelAdd {
-public:
-    __aicore__ inline KernelAdd() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t formerNum,
-        uint32_t tailNum, uint32_t formerLength, uint32_t tailLength, uint32_t alignNum)
-    {
-        if (GetBlockIdx() < formerNum) {
-            this->tileLength = formerLength;
-            xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x + formerLength * GetBlockIdx(), formerLength);
-            yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y + formerLength * GetBlockIdx(), formerLength);
-            zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z + formerLength * GetBlockIdx(), formerLength);
-        } else {
-            this->tileLength = tailLength;
-            xGm.SetGlobalBuffer(
-                (__gm__ DTYPE_X *)x + formerLength * formerNum + tailLength * (GetBlockIdx() - formerNum), tailLength);
-            yGm.SetGlobalBuffer(
-                (__gm__ DTYPE_Y *)y + formerLength * formerNum + tailLength * (GetBlockIdx() - formerNum), tailLength);
-            zGm.SetGlobalBuffer(
-                (__gm__ DTYPE_Z *)z + formerLength * formerNum + tailLength * (GetBlockIdx() - formerNum), tailLength);
-        }
-        ASSERT(alignNum != 0 && "align num can not be zero!");
-        pipe.InitBuffer(inQueueX, BUFFER_NUM,
-            (((this->tileLength + alignNum - 1) / alignNum) * alignNum) * sizeof(half));
-        pipe.InitBuffer(inQueueY, BUFFER_NUM,
-            (((this->tileLength + alignNum - 1) / alignNum) * alignNum) * sizeof(half));
-        pipe.InitBuffer(outQueueZ, BUFFER_NUM,
-            (((this->tileLength + alignNum - 1) / alignNum) * alignNum) * sizeof(half));
-    }
-    __aicore__ inline void Process()
-    {
-        CopyIn();
-        Compute();
-        CopyOut();
-    }
-
-private:
-    __aicore__ inline void CopyIn()
-    {
-        LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
-        LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
-        DataCopy(xLocal, xGm, this->tileLength);
-        DataCopy(yLocal, yGm, this->tileLength);
-        inQueueX.EnQue(xLocal);
-        inQueueY.EnQue(yLocal);
-    }
-    __aicore__ inline void Compute()
-    {
-        LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
-        LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
-        LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
-        Add(zLocal, xLocal, yLocal, this->tileLength);
-        outQueueZ.EnQue<DTYPE_Z>(zLocal);
-        inQueueX.FreeTensor(xLocal);
-        inQueueY.FreeTensor(yLocal);
-    }
-    __aicore__ inline void CopyOut()
-    {
-        LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
-        DataCopy(zGm, zLocal, this->tileLength);
-        outQueueZ.FreeTensor(zLocal);
-    }
-
-private:
-    TPipe pipe;
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
-    GlobalTensor<DTYPE_X> xGm;
-    GlobalTensor<DTYPE_Y> yGm;
-    GlobalTensor<DTYPE_Z> zGm;
-    uint32_t blockLength;
-    uint32_t tileLength;
-};
-
-extern "C" __global__ __aicore__ void add_custom_unalign(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace,
-    GM_ADDR tiling)
-{
-    GET_TILING_DATA(tilingData, tiling);
-    KernelAdd op;
-    op.Init(x, y, z, tilingData.formerNum, tilingData.tailNum, tilingData.formerLength,
-        tilingData.tailLength, tilingData.alignNum);
-    if (TILING_KEY_IS(1)) {
-        op.Process();
-    }
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/run.sh
deleted file mode 100644
index 5d650ed4e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/op_dev/run.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-)
-cd $CURRENT_DIR
-
-# 导出环境变量
-if [ ! $ASCEND_HOME_DIR ]; then
-    ASCEND_HOME_DIR=/usr/local/Ascend/latest
-    source $ASCEND_HOME_DIR/bin/setenv.bash
-fi
-
-export ASCEND_TENSOR_COMPILER_INCLUDE=$ASCEND_HOME_DIR/compiler/include
-
-function main {
-    # 1. 构建自定义算子包
-    rm -rf build_out
-    bash build.sh
-    if [ $? -ne 0 ]; then
-        echo "ERROR: build custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: build custom op run package success!"
-
-    # 2. 安装自定义算子包
-    cd build_out
-    OS_ID=$(cat /etc/os-release | grep "^ID=" | awk -F= '{print $2}')
-    OS_ID=$(echo $OS_ID | sed -e 's/^"//' -e 's/"$//')
-    arch=$(uname -m)
-    ./custom_opp_${OS_ID}_${arch}.run --quiet
-    if [ $? -ne 0 ]; then
-        echo "ERROR: install custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: install custom op run package success!"
-}
-
-main
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/readme.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/readme.md
deleted file mode 100644
index 1644d2dbf..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/acl_invocation/readme.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# acl samples
-```shell
-cd acl_offline_model/acl_online_model
-bash run_torch.sh ${is_dynamic}(0/1) ${replay_mode}(/batch/iterator)
-```
-
-# run static op (depend on chip version)
-```shell
-(cd acl_offline_model; bash run.sh --is-dynamic 0)
-
-(cd acl_online_model; bash run.sh --is-dynamic 0)
-
-(cd acl_online_model_unalign; bash run.sh --is-dynamic 0)
-```
-
-# run dynamic op (depend on chip version)
-```shell
-(cd acl_offline_model; bash run.sh --is-dynamic 1)
-
-(cd acl_online_model; bash run.sh --is-dynamic 1)
-```
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/.gitignore b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/.gitignore
deleted file mode 100644
index 249635946..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-*.cce
-*.dump
-*_cpu
-*_npu
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/CMakeLists.txt
deleted file mode 120000
index c6a720352..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/CMakeLists.txt
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.cpp
deleted file mode 100644
index 31ac8def1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- *
- * Function : z = x + y
- * This sample is a very basic sample that implements vector add on Ascend plaform.
- * In this sample:
- * Length of x / y / z is 8*2048.
- * Num of vector core used in sample is 8.
- * Length for each core to compute is 2048.
- * Tiles for each core is 8 which means we add 2048/8=256 elements in one loop.
- *
- * This is just a tile strategy for demonstration, in fact we can compute at most 128*255
- * elements in one loop for b16 type.
- */
-#include "kernel_operator.h"
-using namespace AscendC;
-
-constexpr int32_t TOTAL_LENGTH = 8 * 2048;                            // total length of data
-constexpr int32_t USE_CORE_NUM = 8;                                   // num of core used
-constexpr int32_t BLOCK_LENGTH = TOTAL_LENGTH / USE_CORE_NUM;         // length computed of each core
-constexpr int32_t TILE_NUM = 8;                                       // split data into 8 tiles for each core
-constexpr int32_t BUFFER_NUM = 2;                                     // tensor num for each queue
-constexpr int32_t TILE_LENGTH = BLOCK_LENGTH / TILE_NUM / BUFFER_NUM; // seperate to 2 parts, due to double buffer
-
-class KernelAdd {
-public:
-    __aicore__ inline KernelAdd() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z)
-    {
-        // get start index for current core, core parallel
-        xGm.SetGlobalBuffer((__gm__ half*)x + BLOCK_LENGTH * GetBlockIdx(), BLOCK_LENGTH);
-        yGm.SetGlobalBuffer((__gm__ half*)y + BLOCK_LENGTH * GetBlockIdx(), BLOCK_LENGTH);
-        zGm.SetGlobalBuffer((__gm__ half*)z + BLOCK_LENGTH * GetBlockIdx(), BLOCK_LENGTH);
-        // pipe alloc memory to queue, the unit is Bytes
-        pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_LENGTH * sizeof(half));
-        pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_LENGTH * sizeof(half));
-        pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_LENGTH * sizeof(half));
-    }
-    __aicore__ inline void Process()
-    {
-        // loop count need to be doubled, due to double buffer
-        constexpr int32_t loopCount = TILE_NUM * BUFFER_NUM;
-        // tiling strategy, pipeline parallel
-        for (int32_t i = 0; i < loopCount; i++) {
-            CopyIn(i);
-            Compute(i);
-            CopyOut(i);
-        }
-    }
-
-private:
-    __aicore__ inline void CopyIn(int32_t progress)
-    {
-        // alloc tensor from queue memory
-        LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
-        LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
-        // copy progress_th tile from global tensor to local tensor
-        DataCopy(xLocal, xGm[progress * TILE_LENGTH], TILE_LENGTH);
-        DataCopy(yLocal, yGm[progress * TILE_LENGTH], TILE_LENGTH);
-        // enque input tensors to VECIN queue
-        inQueueX.EnQue(xLocal);
-        inQueueY.EnQue(yLocal);
-    }
-    __aicore__ inline void Compute(int32_t progress)
-    {
-        // deque input tensors from VECIN queue
-        LocalTensor<half> xLocal = inQueueX.DeQue<half>();
-        LocalTensor<half> yLocal = inQueueY.DeQue<half>();
-        LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
-        // call Add instr for computation
-        Add(zLocal, xLocal, yLocal, TILE_LENGTH);
-        // enque the output tensor to VECOUT queue
-        outQueueZ.EnQue<half>(zLocal);
-        // free input tensors for reuse
-        inQueueX.FreeTensor(xLocal);
-        inQueueY.FreeTensor(yLocal);
-    }
-    __aicore__ inline void CopyOut(int32_t progress)
-    {
-        // deque output tensor from VECOUT queue
-        LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
-        // copy progress_th tile from local tensor to global tensor
-        DataCopy(zGm[progress * TILE_LENGTH], zLocal, TILE_LENGTH);
-        // free output tensor for reuse
-        outQueueZ.FreeTensor(zLocal);
-    }
-
-private:
-    TPipe pipe;
-    // create queues for input, in this case depth is equal to buffer num
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
-    // create queue for output, in this case depth is equal to buffer num
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
-    GlobalTensor<half> xGm, yGm, zGm;
-};
-
-// implementation of kernel function
-extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z)
-{
-    KernelAdd op;
-    op.Init(x, y, z);
-    op.Process();
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void add_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y, uint8_t* z)
-{
-    add_custom<<<blockDim, l2ctrl, stream>>>(x, y, z);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.py
deleted file mode 100644
index 3d5f309c9..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/add_custom.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/python3
-# -*- coding:utf-8 -*-
-# Copyright 2022-2023 Huawei Technologies Co., Ltd
-import numpy as np
-
-
-def gen_golden_data(params, data_dir):
-    input_x = np.random.uniform(-100, 100, params[0].shape).astype(params[0].np_dtype)
-    input_y = np.random.uniform(-100, 100, params[1].shape).astype(params[1].np_dtype)
-    golden = (input_x + input_y).astype(params[2].np_dtype)
-
-    input_x.tofile(str(data_dir / params[0].data_path))
-    input_y.tofile(str(data_dir / params[1].data_path))
-    golden.tofile(str(data_dir / params[2].golden_path))
-
-
-def gen_golden_data_simple():
-    input_x = np.random.uniform(-100, 100, [8, 2048]).astype(np.float16)
-    input_y = np.random.uniform(-100, 100, [8, 2048]).astype(np.float16)
-    golden = (input_x + input_y).astype(np.float16)
-
-    input_x.tofile("./input/input_x.bin")
-    input_y.tofile("./input/input_y.bin")
-    golden.tofile("./output/golden.bin")
-
-
-if __name__ == "__main__":
-    gen_golden_data_simple()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/cmake
deleted file mode 120000
index 64ed04a5e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/cmake
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/cmake
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/data_utils.h
deleted file mode 120000
index 2cdb090e1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/data_utils.h
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/data_utils.h
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/input/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/input/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/main.cpp
deleted file mode 100644
index 99ae83747..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/main.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- * This file constains code of cpu debug and npu code.We read data from bin file
- * and write result to file.
- */
-#include "data_utils.h"
-#ifndef __CCE_KT_TEST__
-#include "acl/acl.h"
-extern void add_custom_do(uint32_t coreDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y, uint8_t* z);
-#else
-#include "tikicpulib.h"
-extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z);
-#endif
-
-int32_t main(int32_t argc, char* argv[])
-{
-    size_t inputByteSize = 8 * 2048 * sizeof(uint16_t);  // uint16_t represent half
-    size_t outputByteSize = 8 * 2048 * sizeof(uint16_t);  // uint16_t represent half
-    uint32_t blockDim = 8;
-
-#ifdef __CCE_KT_TEST__
-    uint8_t* x = (uint8_t*)AscendC::GmAlloc(inputByteSize);
-    uint8_t* y = (uint8_t*)AscendC::GmAlloc(inputByteSize);
-    uint8_t* z = (uint8_t*)AscendC::GmAlloc(outputByteSize);
-
-    ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize);
-    ReadFile("./input/input_y.bin", inputByteSize, y, inputByteSize);
-
-    AscendC::SetKernelMode(KernelMode::AIV_MODE);
-    ICPU_RUN_KF(add_custom, blockDim, x, y, z); // use this macro for cpu debug
-
-    WriteFile("./output/output_z.bin", z, outputByteSize);
-
-    AscendC::GmFree((void *)x);
-    AscendC::GmFree((void *)y);
-    AscendC::GmFree((void *)z);
-#else
-    CHECK_ACL(aclInit(nullptr));
-    aclrtContext context;
-    int32_t deviceId = 0;
-    CHECK_ACL(aclrtSetDevice(deviceId));
-    CHECK_ACL(aclrtCreateContext(&context, deviceId));
-    aclrtStream stream = nullptr;
-    CHECK_ACL(aclrtCreateStream(&stream));
-
-    uint8_t *xHost, *yHost, *zHost;
-    uint8_t *xDevice, *yDevice, *zDevice;
-    CHECK_ACL(aclrtMallocHost((void**)(&xHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&yHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&zHost), outputByteSize));
-    CHECK_ACL(aclrtMalloc((void**)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
-    ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
-    ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize);
-    CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-    CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    add_custom_do(blockDim, nullptr, stream, xDevice, yDevice, zDevice);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
-
-    CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("./output/output_z.bin", zHost, outputByteSize);
-
-    CHECK_ACL(aclrtFree(xDevice));
-    CHECK_ACL(aclrtFree(yDevice));
-    CHECK_ACL(aclrtFree(zDevice));
-    CHECK_ACL(aclrtFreeHost(xHost));
-    CHECK_ACL(aclrtFreeHost(yHost));
-    CHECK_ACL(aclrtFreeHost(zHost));
-
-    CHECK_ACL(aclrtDestroyStream(stream));
-    CHECK_ACL(aclrtDestroyContext(context));
-    CHECK_ACL(aclrtResetDevice(deviceId));
-    CHECK_ACL(aclFinalize());
-#endif
-    return 0;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/output/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/output/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/run.sh
deleted file mode 120000
index c9ece8cbc..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/run.sh
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/CMakeLists.txt
deleted file mode 120000
index c6a720352..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/CMakeLists.txt
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.cpp
deleted file mode 100644
index b13f30432..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- *
- * Function : z = x + y
- * This sample is a very basic sample that implements vector add on Ascend plaform.
- */
-#include "kernel_operator.h"
-#include "add_custom_tiling.h"
-using namespace AscendC;
-
-constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
-
-class KernelAdd {
-public:
-    __aicore__ inline KernelAdd() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
-    {
-        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
-        this->blockLength = totalLength / GetBlockNum();
-        this->tileNum = tileNum;
-        ASSERT(tileNum != 0 && "tile num can not be zero!");
-        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
-
-        // get start index for current core, core parallel
-        xGm.SetGlobalBuffer((__gm__ half*)x + this->blockLength * GetBlockIdx(), this->blockLength);
-        yGm.SetGlobalBuffer((__gm__ half*)y + this->blockLength * GetBlockIdx(), this->blockLength);
-        zGm.SetGlobalBuffer((__gm__ half*)z + this->blockLength * GetBlockIdx(), this->blockLength);
-        // pipe alloc memory to queue, the unit is Bytes
-        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(half));
-        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(half));
-        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(half));
-    }
-    __aicore__ inline void Process()
-    {
-        // loop count need to be doubled, due to double buffer
-        int32_t loopCount = this->tileNum * BUFFER_NUM;
-        // tiling strategy, pipeline parallel
-        for (int32_t i = 0; i < loopCount; i++) {
-            CopyIn(i);
-            Compute(i);
-            CopyOut(i);
-        }
-    }
-
-private:
-    __aicore__ inline void CopyIn(int32_t progress)
-    {
-        // alloc tensor from queue memory
-        LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
-        LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
-        // copy progress_th tile from global tensor to local tensor
-        DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
-        DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
-        // enque input tensors to VECIN queue
-        inQueueX.EnQue(xLocal);
-        inQueueY.EnQue(yLocal);
-    }
-    __aicore__ inline void Compute(int32_t progress)
-    {
-        // deque input tensors from VECIN queue
-        LocalTensor<half> xLocal = inQueueX.DeQue<half>();
-        LocalTensor<half> yLocal = inQueueY.DeQue<half>();
-        LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
-        // call Add instr for computation
-        Add(zLocal, xLocal, yLocal, this->tileLength);
-        // enque the output tensor to VECOUT queue
-        outQueueZ.EnQue<half>(zLocal);
-        // free input tensors for reuse
-        inQueueX.FreeTensor(xLocal);
-        inQueueY.FreeTensor(yLocal);
-    }
-    __aicore__ inline void CopyOut(int32_t progress)
-    {
-        // deque output tensor from VECOUT queue
-        LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
-        // copy progress_th tile from local tensor to global tensor
-        DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
-        // free output tensor for reuse
-        outQueueZ.FreeTensor(zLocal);
-    }
-
-private:
-    TPipe pipe;
-    // create queues for input, in this case depth is equal to buffer num
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
-    // create queue for output, in this case depth is equal to buffer num
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
-    GlobalTensor<half> xGm, yGm, zGm;
-    uint32_t blockLength; // number of calculations on each core
-    uint32_t tileNum;     // number of tiles on each core
-    uint32_t tileLength;  // number of calculations in each tile
-};
-
-extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
-{
-    GET_TILING_DATA(tilingData, tiling);
-    KernelAdd op;
-    op.Init(x, y, z, tilingData.totalLength, tilingData.tileNum);
-    op.Process();
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void add_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y, uint8_t* z,
-    uint8_t* workspace, uint8_t* tiling)
-{
-    add_custom<<<blockDim, l2ctrl, stream>>>(x, y, z,  workspace, tiling);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.py
deleted file mode 100644
index 0af984a27..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python3
-# -*- coding:utf-8 -*-
-# Copyright 2022-2023 Huawei Technologies Co., Ltd
-import os
-import stat
-import numpy as np
-OPEN_FILE_MODES_640 = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
-WRITE_FILE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
-
-
-def gen_golden_data_simple():
-    one_repeat_calcount = 128  # fixed
-    block_dim_imm = 8
-    tile_num_imm = 8
-    double_buffer_imm = 2  # fixed
-    total_length_imm = block_dim_imm * \
-        one_repeat_calcount * tile_num_imm * double_buffer_imm
-
-    total_length = np.array(total_length_imm, dtype=np.uint32)
-    tile_num = np.array(tile_num_imm, dtype=np.uint32)
-    tiling = (total_length, tile_num)
-    tiling_data = b''.join(x.tobytes() for x in tiling)
-    with os.fdopen(os.open('./input/tiling.bin', WRITE_FILE_FLAGS, OPEN_FILE_MODES_640), 'wb') as f:
-        f.write(tiling_data)
-
-    input_x = np.random.uniform(-100, 100, [total_length_imm]).astype(np.float16)
-    input_y = np.random.uniform(-100, 100, [total_length_imm]).astype(np.float16)
-    golden = (input_x + input_y).astype(np.float16)
-
-    input_x.tofile("./input/input_x.bin")
-    input_y.tofile("./input/input_y.bin")
-    golden.tofile("./output/golden.bin")
-
-
-if __name__ == "__main__":
-    gen_golden_data_simple()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom_tiling.h
deleted file mode 100644
index 1258565fc..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/add_custom_tiling.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef ADD_CUSTOM_TILING_H
-#define ADD_CUSTOM_TILING_H
-
-#ifdef __CCE_KT_TEST__
-#define __aicore__
-#else
-#define __aicore__ [aicore]
-#endif
-
-inline __aicore__ int32_t AlignDiv32(int32_t n)
-{
-    return ((n + 31) & ~31) / 32;
-}
-
-struct AddCustomTilingData {
-    uint32_t totalLength;
-    uint32_t tileNum;
-};
-
-#define CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer) \
-    __ubuf__ tilingStruct *tilingDataPointer =                              \
-        reinterpret_cast<__ubuf__ tilingStruct *>((__ubuf__ uint8_t *)(tilingPointer));
-
-#ifdef __CCE_KT_TEST__
-#define INIT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer) \
-    CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer);
-#else
-#define INIT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer)                        \
-    __ubuf__ uint8_t* tilingUbPointer = (__ubuf__ uint8_t*)get_imm(0);                          \
-    copy_gm_to_ubuf(((__ubuf__ uint8_t*)(tilingUbPointer)), ((__gm__ uint8_t*)(tilingPointer)), \
-        0, 1, AlignDiv32(sizeof(tilingStruct)), 0, 0);                                          \
-    CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingUbPointer);                      \
-    pipe_barrier(PIPE_ALL);
-#endif
-
-// stub func, used to enable cpu mode in this code, will be deprecated soon
-#define GET_TILING_DATA(tilingData, tilingPointer)                             \
-    AddCustomTilingData tilingData;                                            \
-    INIT_TILING_DATA(AddCustomTilingData, tilingDataPointer, tilingPointer);   \
-    (tilingData).totalLength = tilingDataPointer->totalLength;                 \
-    (tilingData).tileNum = tilingDataPointer->tileNum;
-#endif // ADD_CUSTOM_TILING_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/cmake
deleted file mode 120000
index 64ed04a5e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/cmake
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/cmake
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/data_utils.h
deleted file mode 120000
index 2cdb090e1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/data_utils.h
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/data_utils.h
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/input/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/input/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/main.cpp
deleted file mode 100644
index 2677f5ea2..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/main.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- * This file constains code of cpu debug and npu code.We read data from bin file
- * and write result to file.
- */
-#include "data_utils.h"
-#ifndef __CCE_KT_TEST__
-#include "acl/acl.h"
-extern void add_custom_do(uint32_t coreDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y, uint8_t* z,
-    uint8_t* workspace, uint8_t* tiling);
-#else
-#include "tikicpulib.h"
-extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling);
-#endif
-
-int32_t main(int32_t argc, char* argv[])
-{
-    size_t tilingSize = 2 * sizeof(uint32_t);
-    size_t usrWorkspaceSize = 4096;
-    size_t sysWorkspaceSize = 16 * 1024 * 1024;
-    uint32_t blockDim = 8;
-#ifdef __CCE_KT_TEST__
-    uint8_t* usrWorkSpace = (uint8_t*)AscendC::GmAlloc(usrWorkspaceSize);
-    uint8_t* tiling = (uint8_t*)AscendC::GmAlloc(tilingSize);
-    ReadFile("./input/tiling.bin", tilingSize, tiling, tilingSize);
-
-    size_t inputByteSize = blockDim * 2048 * sizeof(uint16_t);  // uint16_t represent half
-    size_t outputByteSize = blockDim * 2048 * sizeof(uint16_t);  // uint16_t represent half
-
-    uint8_t* x = (uint8_t*)AscendC::GmAlloc(inputByteSize);
-    uint8_t* y = (uint8_t*)AscendC::GmAlloc(inputByteSize);
-    uint8_t* z = (uint8_t*)AscendC::GmAlloc(outputByteSize);
-
-    ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize);
-    ReadFile("./input/input_y.bin", inputByteSize, y, inputByteSize);
-
-    AscendC::SetKernelMode(KernelMode::AIV_MODE);
-    ICPU_RUN_KF(add_custom, blockDim, x, y, z, usrWorkSpace, tiling); // use this macro for cpu debug
-
-    WriteFile("./output/output_z.bin", z, outputByteSize);
-
-    AscendC::GmFree((void *)x);
-    AscendC::GmFree((void *)y);
-    AscendC::GmFree((void *)z);
-    AscendC::GmFree((void *)usrWorkSpace);
-    AscendC::GmFree((void *)tiling);
-#else
-    CHECK_ACL(aclInit(nullptr));
-    aclrtContext context;
-    int32_t deviceId = 0;
-    CHECK_ACL(aclrtSetDevice(deviceId));
-    CHECK_ACL(aclrtCreateContext(&context, deviceId));
-    aclrtStream stream = nullptr;
-    CHECK_ACL(aclrtCreateStream(&stream));
-
-    uint8_t *xHost, *yHost, *zHost, *tilingHost, *workspaceHost;
-    uint8_t *xDevice, *yDevice, *zDevice, *tilingDevice, *workspaceDevice;
-
-    CHECK_ACL(aclrtMallocHost((void**)(&tilingHost), tilingSize));
-    ReadFile("./input/tiling.bin", tilingSize, tilingHost, tilingSize);
-
-    CHECK_ACL(aclrtMallocHost((void**)(&workspaceHost), tilingSize));
-
-    size_t inputByteSize = blockDim * 2048 * sizeof(uint16_t);  // uint16_t represent half
-    size_t outputByteSize = blockDim * 2048 * sizeof(uint16_t);  // uint16_t represent half
-    size_t workspaceByteSize = sysWorkspaceSize + usrWorkspaceSize;
-
-    CHECK_ACL(aclrtMallocHost((void**)(&xHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&yHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&zHost), outputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&workspaceHost), workspaceByteSize));
-    CHECK_ACL(aclrtMalloc((void**)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&tilingDevice, tilingSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&workspaceDevice, workspaceByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
-    ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
-    ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize);
-
-    CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-    CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-    CHECK_ACL(aclrtMemcpy(tilingDevice, tilingSize, tilingHost, tilingSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    add_custom_do(blockDim, nullptr, stream, xDevice, yDevice, zDevice, workspaceDevice, tilingDevice);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
-
-    CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("./output/output_z.bin", zHost, outputByteSize);
-
-    CHECK_ACL(aclrtFree(xDevice));
-    CHECK_ACL(aclrtFree(yDevice));
-    CHECK_ACL(aclrtFree(zDevice));
-    CHECK_ACL(aclrtFree(workspaceDevice));
-    CHECK_ACL(aclrtFree(tilingDevice));
-    CHECK_ACL(aclrtFreeHost(xHost));
-    CHECK_ACL(aclrtFreeHost(yHost));
-    CHECK_ACL(aclrtFreeHost(zHost));
-    CHECK_ACL(aclrtFreeHost(workspaceHost));
-    CHECK_ACL(aclrtFreeHost(tilingHost));
-
-    CHECK_ACL(aclrtDestroyStream(stream));
-    CHECK_ACL(aclrtDestroyContext(context));
-    CHECK_ACL(aclrtResetDevice(deviceId));
-    CHECK_ACL(aclFinalize());
-#endif
-    return 0;
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/output/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/output/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/run.sh
deleted file mode 120000
index c9ece8cbc..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Add_tile/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/run.sh
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/CMakeLists.txt
deleted file mode 100644
index 5dafbdc1d..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-cmake_minimum_required(VERSION 3.16)
-set(CMAKE_SYSTEM_NAME Linux)
-set(CMAKE_CXX_STANDARD 17)
-
-set(CCE_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
-list(APPEND CMAKE_MODULE_PATH ${CCE_CMAKE_PATH})
-project(kernel_samples LANGUAGES CCE CXX)
-
-add_subdirectory(cmake/cpu)
-add_subdirectory(cmake/npu)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCECompiler.cmake.in b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCECompiler.cmake.in
deleted file mode 100644
index a9b5688ff..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCECompiler.cmake.in
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_CCE_COMPILER "@CMAKE_CCE_COMPILER@")
-set(CMAKE_CCE_COMPILER_LOADED 1)
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS @CMAKE_CCE_SOURCE_FILE_EXTENSIONS@)
-set(CMAKE_CCE_OUTPUT_EXTENSION @CMAKE_CCE_OUTPUT_EXTENSION@)
-set(CMAKE_CCE_COMPILER_ENV_VAR "@CMAKE_CCE_COMPILER_ENV_VAR@")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEFunction.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEFunction.cmake
deleted file mode 100644
index 246ce9022..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEFunction.cmake
+++ /dev/null
@@ -1,20 +0,0 @@
-function(product_dir str newstr)
-  if ("x${str}" STREQUAL "xascend610")
-    set(${newstr} "Ascend610" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend910")
-    set(${newstr} "Ascend910A" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend310")
-    set(${newstr} "Ascend310" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend310p")
-    set(${newstr} "Ascend310P1" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend920")
-    set(${newstr} "Ascend920A" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend910b")
-    set(${newstr} "Ascend910B1" PARENT_SCOPE)
-  else()
-    string(SUBSTRING ${str} 0 1 _headlower)
-    string(SUBSTRING ${str} 1 -1 _leftstr)
-    string(TOUPPER ${_headlower} _headupper)
-    set(${newstr} "${_headupper}${_leftstr}" PARENT_SCOPE)
-  endif()
-endfunction()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEInformation.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEInformation.cmake
deleted file mode 100644
index 0eaef0348..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeCCEInformation.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-include(CMakeCommonLanguageInclude)
-
-set(CMAKE_INCLUDE_FLAG_CCE "-I")
-
-if(UNIX)
-  set(CMAKE_CCE_OUTPUT_EXTENSION .o)
-else()
-  set(CMAKE_CCE_OUTPUT_EXTENSION .obj)
-endif()
-
-set(_INCLUDED_FILE 0)
-set(CMAKE_SHARED_LIBRARY_CCE_FLAGS -fPIC)
-set(CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS -shared)
-set(CMAKE_LIBRARY_CREATE_CCE_FLAGS "--cce-fatobj-link ${_CMAKE_COMPILE_AS_CCE_FLAG}")
-
-if(NOT CMAKE_CCE_COMPILE_OBJECT)
-    set(CMAKE_CCE_COMPILE_OBJECT
-      "<CMAKE_CCE_COMPILER> -xcce <DEFINES> <INCLUDES>${__IMPLICIT_INCLUDES} ${_CMAKE_CCE_BUILTIN_INCLUDE_PATH} <FLAGS> ${_CMAKE_COMPILE_AS_CCE_FLAG} ${_CMAKE_CCE_COMPILE_OPTIONS} ${_CMAKE_CCE_COMMON_COMPILE_OPTIONS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> -pthread -o <OBJECT> -c <SOURCE>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_LIBRARY)
-  set(CMAKE_CCE_CREATE_SHARED_LIBRARY
-      "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_MODULE)
-  set(CMAKE_CCE_CREATE_SHARED_MODULE ${CMAKE_CCE_CREATE_SHARED_LIBRARY})
-endif()
-
-if(NOT CMAKE_CCE_LINK_EXECUTABLE)
-  set(CMAKE_CCE_LINK_EXECUTABLE
-    "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <FLAGS> <CMAKE_CCE_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICIT_LINKS}")
-endif()
-
-set(CMAKE_CCE_INFORMATION_LOADED 1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeDetermineCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeDetermineCCECompiler.cmake
deleted file mode 100644
index ee3aa7deb..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeDetermineCCECompiler.cmake
+++ /dev/null
@@ -1,113 +0,0 @@
-find_program(CMAKE_CCE_COMPILER NAMES "ccec" PATHS "$ENV{PATH}" DOC "CCE Compiler")
-include(CMakeCCEFunction)
-
-mark_as_advanced(CMAKE_CCE_COMPILER)
-
-message(STATUS "CMAKE_CCE_COMPILER: " ${CMAKE_CCE_COMPILER})
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS cce;cpp)
-set(CMAKE_CCE_COMPILER_ENV_VAR "CCE")
-message(STATUS "CMAKE_CURRENT_LIST_DIR: " ${CMAKE_CURRENT_LIST_DIR})
-
-# configure all variables set in this file
-configure_file(${CMAKE_CURRENT_LIST_DIR}/CMakeCCECompiler.cmake.in
-    ${CMAKE_PLATFORM_INFO_DIR}/CMakeCCECompiler.cmake
-    @ONLY
-)
-
-message(STATUS "ASCEND_PRODUCT_TYPE:\n" "  ${ASCEND_PRODUCT_TYPE}")
-message(STATUS "ASCEND_CORE_TYPE:\n" "  ${ASCEND_CORE_TYPE}")
-message(STATUS "ASCEND_INSTALL_PATH:\n" "  ${ASCEND_INSTALL_PATH}")
-
-if(DEFINED ASCEND_INSTALL_PATH)
-    set(_CMAKE_ASCEND_INSTALL_PATH ${ASCEND_INSTALL_PATH})
-else()
-    message(FATAL_ERROR
-        "no, installation path found, should passing -DASCEND_INSTALL_PATH=<PATH_TO_ASCEND_INSTALLATION> in cmake"
-    )
-    set(_CMAKE_ASCEND_INSTALL_PATH)
-endif()
-
-if(DEFINED ASCEND_PRODUCT_TYPE)
-    set(_CMAKE_CCE_COMMON_COMPILE_OPTIONS "--cce-auto-sync")
-    if(ASCEND_PRODUCT_TYPE STREQUAL "")
-        message(FATAL_ERROR "ASCEND_PRODUCT_TYPE must be non-empty if set.")
-    elseif(ASCEND_PRODUCT_TYPE AND NOT ASCEND_PRODUCT_TYPE MATCHES "^ascend[0-9][0-9][0-9][a-zA-Z]?[1-9]?$")
-        message(FATAL_ERROR
-            "ASCEND_PRODUCT_TYPE: ${ASCEND_PRODUCT_TYPE}\n"
-            "is not one of the following: ascend910, ascend310p, ascend910B1"
-        )
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c100")
-        else()
-            message(FATAL_ERROR, "only AiCore inside")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS)
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend310p")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-fp-ceiling=2 -mllvm -cce-aicore-record-overflow=false")
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910B1")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-cube")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-record-overflow=false -mllvm -cce-aicore-addr-transform"
-        )
-    endif()
-endif()
-
-product_dir(${ASCEND_PRODUCT_TYPE} PRODUCT_UPPER)
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/runtime/lib64
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/simulator/${PRODUCT_UPPER}/lib
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${PRODUCT_UPPER}
-)
-
-# link library
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES stdc++)
-if(ASCEND_RUN_MODE STREQUAL "ONBOARD")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime)
-elseif(ASCEND_RUN_MODE STREQUAL "SIMULATOR")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES )
-    if(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES pem_davinci)
-    endif()
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime_camodel)
-else()
-    message(FATAL_ERROR
-        "ASCEND_RUN_MODE: ${ASCEND_RUN_MODE}\n"
-        "ASCEND_RUN_MODE must be one of the following: ONBOARD or SIMULATOR"
-    )
-endif()
-list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES ascendcl)
-
-set(__IMPLICIT_LINKS)
-foreach(dir ${_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES})
-  string(APPEND __IMPLICIT_LINKS " -L\"${dir}\"")
-endforeach()
-foreach(lib ${_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES})
-  if(${lib} MATCHES "/")
-    string(APPEND __IMPLICIT_LINKS " \"${lib}\"")
-  else()
-    string(APPEND __IMPLICIT_LINKS " -l${lib}")
-  endif()
-endforeach()
-
-set(_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/acllib/include
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/impl
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/interface
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/include
-)
-set(__IMPLICIT_INCLUDES)
-foreach(inc ${_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES})
-  string(APPEND __IMPLICIT_INCLUDES " -I\"${inc}\"")
-endforeach()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeTestCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeTestCCECompiler.cmake
deleted file mode 100644
index f00f227c1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/Modules/CMakeTestCCECompiler.cmake
+++ /dev/null
@@ -1 +0,0 @@
-set(CMAKE_CCE_COMPILER_WORKS 1 CACHE INTERNAL "")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/cpu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/cpu/CMakeLists.txt
deleted file mode 100644
index 326e1f546..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/cpu/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# cpu
-if (NOT DEFINED ENV{CMAKE_PREFIX_PATH})
-    set(CMAKE_PREFIX_PATH ${ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/cmake)
-endif()
-find_package(tikicpulib REQUIRED)
-
-file(GLOB SRC_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-add_executable(${smoke_testcase}_cpu
-    ${SRC_FILES}
-)
-
-target_include_directories(${smoke_testcase}_cpu PRIVATE
-    ${ASCEND_INSTALL_PATH}/acllib/include
-)
-
-target_link_libraries(${smoke_testcase}_cpu PRIVATE
-    tikicpulib::${ASCEND_PRODUCT_TYPE}
-    ascendcl
-)
-
-target_compile_options(${smoke_testcase}_cpu PRIVATE
-    -g
-)
-
-set_target_properties(${smoke_testcase}_cpu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_cpu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/npu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/npu/CMakeLists.txt
deleted file mode 100644
index cb86b9708..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/cmake/npu/CMakeLists.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-# npu
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-set_source_files_properties(${KERNEL_FILES} PROPERTIES LANGUAGE CCE)
-
-file(GLOB MAIN_FILES
-    ${CMAKE_SOURCE_DIR}/main.cpp
-)
-set_source_files_properties(${MAIN_FILES} PROPERTIES LANGUAGE CCE)
-
-# ===================================================================
-# exe mode: build a executable directly
-add_executable(${smoke_testcase}_npu
-    ${KERNEL_FILES}
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_compile_definitions(${smoke_testcase}_npu PRIVATE
-    TILING_KEY_VAR=0
-)
-
-set_target_properties(${smoke_testcase}_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-# so mode: build a shared library first, and dynamic link to build a executable
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-
-add_library(ascendc_kernels SHARED
-    ${KERNEL_FILES}
-)
-
-target_compile_definitions(ascendc_kernels PRIVATE
-    TILING_KEY_VAR=0
-)
-
-target_compile_options(ascendc_kernels PRIVATE
-    -O2
-    -std=c++17
-)
-
-set_target_properties(ascendc_kernels PROPERTIES
-    OUTPUT_NAME ascendc_kernels
-    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-install(TARGETS ascendc_kernels
-    LIBRARY DESTINATION ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-add_executable(${smoke_testcase}_lib_npu
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_lib_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_link_directories(${smoke_testcase}_lib_npu PRIVATE
-    ${CMAKE_SOURCE_DIR}
-)
-
-target_link_libraries(${smoke_testcase}_lib_npu PRIVATE
-    ascendc_kernels
-    -Wl,--as-needed
-)
-
-# add_dependencies(${smoke_testcase}_lib_npu ${smoke_testcase})
-set_target_properties(${smoke_testcase}_lib_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/data_utils.h
deleted file mode 100644
index 042de8bb0..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/data_utils.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef DATA_UTILS_H
-#define DATA_UTILS_H
-#include <iostream>
-#include <fstream>
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iomanip>
-#include <cassert>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include "acl/acl.h"
-
-typedef enum {
-    DT_UNDEFINED = -1,
-    FLOAT = 0,
-    HALF = 1,
-    INT8_T = 2,
-    INT32_T = 3,
-    UINT8_T = 4,
-    INT16_T = 6,
-    UINT16_T = 7,
-    UINT32_T = 8,
-    INT64_T = 9,
-    UINT64_T = 10,
-    DOUBLE = 11,
-    BOOL = 12,
-    STRING = 13,
-    COMPLEX64 = 16,
-    COMPLEX128 = 17,
-    BF16 = 27
-} printDataType;
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
-#define CHECK_ACL(x)                                                                        \
-    do {                                                                                    \
-        aclError __ret = x;                                                                 \
-        if (__ret != ACL_ERROR_NONE) {                                                      \
-            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
-        }                                                                                   \
-    } while (0);
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file");
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow=16)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case INT8_T:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case UINT8_T:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case INT16_T:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case UINT16_T:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case INT32_T:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case UINT32_T:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case INT64_T:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case UINT64_T:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case HALF:
-            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-    std::cout << std::endl;
-}
-#endif // DATA_UTILS_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.cpp
deleted file mode 100644
index 015231239..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include "kernel_operator.h"
-#include "leakyrelu_custom_tiling.h"
-using namespace AscendC;
-
-constexpr int32_t BUFFER_NUM = 2;
-
-
-class KernelLeakyRelu {
-public:
-    __aicore__ inline KernelLeakyRelu() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, uint32_t totalLength, uint32_t tileNum, float scalar)
-    {
-        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
-        this->blockLength = totalLength / GetBlockNum();
-        this->tileNum = tileNum;
-        this->scalar = static_cast<half>(scalar);
-        ASSERT(tileNum != 0 && "tile num can not be zero!");
-        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
-
-        xGm.SetGlobalBuffer((__gm__ half*)x + this->blockLength * GetBlockIdx(), this->blockLength);
-        yGm.SetGlobalBuffer((__gm__ half*)y + this->blockLength * GetBlockIdx(), this->blockLength);
- 
-        // pipe alloc memory to queue, the unit is Bytes
-        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(half));
-        pipe.InitBuffer(outQueueY, BUFFER_NUM, this->tileLength * sizeof(half));
-    }
-    __aicore__ inline void Process()
-    {
-        // loop count need to be doubled, due to double buffer
-        int32_t loopCount = this->tileNum * BUFFER_NUM;
-        // tiling strategy, pipeline parallel
-        for (int32_t i = 0; i < loopCount; i++) {
-            CopyIn(i);
-            Compute(i);
-            CopyOut(i);
-        }
-    }
-
-private:
-    __aicore__ inline void CopyIn(int32_t progress)
-    {
-        LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
-        DataCopy(xLocal, xGm[progress * tileLength], tileLength);
-        inQueueX.EnQue(xLocal);
-    }
-    __aicore__ inline void Compute(int32_t progress)
-    {
-        LocalTensor<half> xLocal = inQueueX.DeQue<half>();
-        LocalTensor<half> yLocal = outQueueY.AllocTensor<half>();
-        LeakyRelu(yLocal, xLocal, scalar, tileLength);
-        outQueueY.EnQue<half>(yLocal);
-        inQueueX.FreeTensor(xLocal);
-    }
-    __aicore__ inline void CopyOut(int32_t progress)
-    {
-        // deque output tensor from VECOUT queue
-        LocalTensor<half> yLocal = outQueueY.DeQue<half>();
-        // copy progress_th tile from local tensor to global tensor
-        DataCopy(yGm[progress * tileLength], yLocal, tileLength);
-        // free output tensor for reuse
-        outQueueY.FreeTensor(yLocal);
-    }
-
-private:
-    TPipe pipe;
-    // create queues for input, in this case depth is equal to buffer num
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX;
-    // create queue for output, in this case depth is equal to buffer num
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueY;
-    GlobalTensor<half> xGm, yGm;
-    uint32_t blockLength;
-    uint32_t tileNum;
-    uint32_t tileLength;
-    half scalar;
-};
-
-// implementation of kernel function
-extern "C" __global__ __aicore__ void leakyrelu_custom(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling)
-{
-    GET_TILING_DATA(tilingData, tiling);
-    KernelLeakyRelu op;
-    op.Init(x, y, tilingData.totalLength, tilingData.tileNum, tilingData.scalar);
-    op.Process();
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void leakyrelu_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y, 
-    uint8_t* workspace, uint8_t* tiling)
-{
-    leakyrelu_custom<<<blockDim, l2ctrl, stream>>>(x, y, workspace, tiling);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.py
deleted file mode 100644
index 318ef2913..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-import stat
-import numpy as np
-OPEN_FILE_MODES_640 = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
-WRITE_FILE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
-np.random.seed(0)
-
-def gen_golden_data_simple():
-    total_length_imm = 8 * 200 * 1024
-    tile_num_imm = 8
-
-    total_length = np.array(total_length_imm, dtype=np.uint32)
-    tile_num = np.array(tile_num_imm, dtype=np.uint32)
-    scalar = np.array(0.1,dtype=np.float32)
-    tiling = (total_length,tile_num,scalar)
-    tiling_data = b''.join(x.tobytes() for x in tiling)
-
-    with os.fdopen(os.open('./input/tiling.bin',WRITE_FILE_FLAGS, OPEN_FILE_MODES_640),"wb") as f:
-        f.write(tiling_data)
-    input_x = np.random.uniform(-100, 100, [8, 200, 1024]).astype(np.float16)
-    golden = np.where(input_x > 0, input_x, input_x * scalar).astype(np.float16)
-    input_x.tofile("./input/input_x.bin")
-    golden.tofile("./output/golden.bin")
-
-
-if __name__ == '__main__':
-    gen_golden_data_simple()
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom_tiling.h
deleted file mode 100644
index bdf7bc344..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/leakyrelu_custom_tiling.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef LEAKYRELU_CUSTOM_TILING_H
-#define LEAKYRELU_CUSTOM_TILING_H
-
-#ifdef __CCE_KT_TEST__
-#define __aicore__
-#else
-#define __aicore__ [aicore]
-#endif
-
-inline __aicore__ int32_t AlignDiv32(int32_t n)
-{
-    return ((n + 31) & ~31) / 32;
-}
-
-struct LeakyReluCustomTilingData
-{
-    uint32_t totalLength;
-    uint32_t tileNum;
-    float scalar;
-};
-
-#define CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer) \
-    __ubuf__ tilingStruct *tilingDataPointer =                              \
-        reinterpret_cast<__ubuf__ tilingStruct *>((__ubuf__ uint8_t *)(tilingPointer));
-
-#ifdef __CCE_KT_TEST__
-#define INIT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer) \
-    CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer);
-#else
-
-#define INIT_TILING_DATA(tilingStruct, tilingDataPointer, tilingPointer)                          \
-    __ubuf__ uint8_t *tilingUbPointer = (__ubuf__ uint8_t *)get_imm(0);                           \
-    copy_gm_to_ubuf(((__ubuf__ uint8_t *)(tilingUbPointer)), ((__gm__ uint8_t*)(tilingPointer)), \
-                    0, 1, AlignDiv32(sizeof(tilingStruct)), 0, 0);                                \
-    CONVERT_TILING_DATA(tilingStruct, tilingDataPointer, tilingUbPointer);                        \
-    pipe_barrier(PIPE_ALL);
-#endif
-
-#define GET_TILING_DATA(tilingData, tilingPointer)                                 \
-    LeakyReluCustomTilingData tilingData;                                          \
-    INIT_TILING_DATA(LeakyReluCustomTilingData, tilingDataPointer, tilingPointer); \
-    (tilingData).totalLength = tilingDataPointer->totalLength;                     \
-    (tilingData).tileNum = tilingDataPointer->tileNum;                             \
-    (tilingData).scalar = tilingDataPointer->scalar;
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/main.cpp
deleted file mode 100644
index 49601d50a..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/main.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- * This file constains code of cpu debug and npu code.We read data from bin file
- * and write result to file.
- */
-#include "data_utils.h"
-#include "leakyrelu_custom_tiling.h"
-#ifndef __CCE_KT_TEST__
-#include "acl/acl.h"
-extern void leakyrelu_custom_do(uint32_t coreDim, void* l2ctrl, void* stream,
-    uint8_t *x, uint8_t *y, uint8_t *workspace, uint8_t *tiling);
-#else
-#include "tikicpulib.h"
-extern "C" __global__ __aicore__ void leakyrelu_custom(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling);
-#endif
-
-int32_t main(int32_t argc, char* argv[])
-{
-    size_t tilingSize = sizeof(LeakyReluCustomTilingData);
-    size_t usrWorkSpaceSize = 4096;
-    size_t sysWorkSpaceSize = 16 * 1024 * 1024;
-    uint32_t blockDim = 8;
-
-#ifdef __CCE_KT_TEST__
-    uint8_t *usrWorkSpace = (uint8_t *)AscendC::GmAlloc(usrWorkSpaceSize);
-    uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingSize);
-    ReadFile("./input/tiling.bin", tilingSize, tiling, tilingSize);
-
-    size_t inputByteSize = blockDim * 200 *1024 * sizeof(u_int16_t);
-    size_t outputByteSize = blockDim * 200 *1024 * sizeof(u_int16_t);
-
-    uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize);
-    uint8_t *y = (uint8_t *)AscendC::GmAlloc(outputByteSize);
-
-    ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize);
-
-    AscendC::SetKernelMode(KernelMode::AIV_MODE);
-
-
-    ICPU_RUN_KF(leakyrelu_custom, blockDim, x, y, usrWorkSpace, tiling);
-
-    WriteFile("./output/output_y.bin", y, outputByteSize);
-
-    AscendC::GmFree((void *)x);
-    AscendC::GmFree((void *)y);
-    AscendC::GmFree((void *)usrWorkSpace);
-    AscendC::GmFree((void *)tiling);
-#else
-    CHECK_ACL(aclInit(nullptr));
-    aclrtContext context;
-    int32_t deviceId = 0;
-    CHECK_ACL(aclrtSetDevice(deviceId));
-    CHECK_ACL(aclrtCreateContext(&context, deviceId));
-    aclrtStream stream = nullptr;
-    CHECK_ACL(aclrtCreateStream(&stream));
-
-    uint8_t *xHost, *yHost, *tilingHost, *workspaceHost;
-    uint8_t *xDevice, *yDevice, *tilingDevice, *workspaceDevice;
-
-
-    CHECK_ACL(aclrtMallocHost((void**)(&tilingHost), tilingSize));
-    ReadFile("./input/tiling.bin", tilingSize, tilingHost, tilingSize);
-    CHECK_ACL(aclrtMallocHost((void**)(&workspaceHost), tilingSize));
-    
-    size_t inputByteSize = blockDim * 200 *1024 * sizeof(u_int16_t);
-    size_t outputByteSize = blockDim * 200 *1024 * sizeof(u_int16_t);
-    size_t workSpaceByteSize = sysWorkSpaceSize + usrWorkSpaceSize;
-
-    CHECK_ACL(aclrtMallocHost((void**)(&xHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&yHost), inputByteSize));
-    CHECK_ACL(aclrtMallocHost((void**)(&workspaceHost), workSpaceByteSize));
-    CHECK_ACL(aclrtMalloc((void**)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&tilingDevice, tilingSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    CHECK_ACL(aclrtMalloc((void**)&workspaceDevice, workSpaceByteSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
-    ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize);
-    CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE));
-    CHECK_ACL(aclrtMemcpy(tilingDevice, tilingSize, tilingHost, tilingSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    leakyrelu_custom_do(blockDim, nullptr, stream,xDevice,yDevice,workspaceDevice,tilingDevice);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
-    CHECK_ACL(aclrtMemcpy(yHost, outputByteSize, yDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("./output/output_y.bin", yHost, outputByteSize);
-
-    CHECK_ACL(aclrtFree(xDevice));
-    CHECK_ACL(aclrtFree(yDevice));
-    CHECK_ACL(aclrtFree(workspaceDevice));
-    CHECK_ACL(aclrtFree(tilingDevice));
-    CHECK_ACL(aclrtFreeHost(xHost));
-    CHECK_ACL(aclrtFreeHost(yHost));
-    CHECK_ACL(aclrtFreeHost(workspaceHost));
-    CHECK_ACL(aclrtFreeHost(tilingHost));
-
-    CHECK_ACL(aclrtDestroyStream(stream));
-    CHECK_ACL(aclrtDestroyContext(context));
-    CHECK_ACL(aclrtResetDevice(deviceId));
-    CHECK_ACL(aclFinalize());
-#endif
-    return 0;
-}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/readme.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/readme.md
deleted file mode 100644
index 69f98bd61..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/readme.md
+++ /dev/null
@@ -1,16 +0,0 @@
-1、环境变量依赖
-   需要配置ASCEND_HOME_DIR，值为CANN包安装路径，例如export ASCEND_HOME_DIR=~/Ascend/ascend-toolkit/latest
-
-2、CPU调试
-    测试命令
-    ```
-    chmod +x run.sh
-    ./run.sh leakyrelu_custom ascend910 VectorCore cpu
-    ```
-
-3、NPU调试
-    测试命令
-    ```
-    chmod +x run.sh
-    ./run.sh leakyrelu_custom ascend910 VectorCore npu
-    ```
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/run.sh
deleted file mode 100755
index f57666c5e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/LeakyReLU/run.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-clear;clear
-# 清除之前遗留的文件
-rm -rf *.vcd *.dump *.log *.bin *.o *.so *pu build output/*.bin input/*.bin core*_summary_log profile_.*.toml stub_reg.log sim_log/ *.toml
-# 不需要TIK打印出内存信息
-export PRINT_TIK_MEM_ACCESS=FALSE
-
-# 获取当前的目录
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-declare -A VersionMap
-VersionMap["ascend910"]="Ascend910A"
-VersionMap["ascend310p"]="Ascend310P1"
-VersionMap["ascend610"]="Ascend610"
-VersionMap["ascend910B1"]="Ascend910B1"
-
-# 指向昇腾软件包安装地址，导出环境变量
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-
-# 指定当前sample的算子文件名
-FILE_NAME=$1
-
-# 指定芯片版本: ascend910, ascend610, ascend310p
-SOC_VERSION=$2
-if [ ${SOC_VERSION}"x" = "x" ]; then
-    echo "ERROR: SOC_VERSION is not specified! please specify ascend910, ascend610, ascend310p or ascend910B1!"
-    exit -1
-fi
-
-# 指定运行的核: AiCore, VectorCore
-CORE_TYPE=$3
-if [ ${CORE_TYPE}"x" = "x" ]; then
-    echo "WARNING: CORE_TYPE is not specified, using AiCore as default."
-    CORE_TYPE=AiCore
-fi
-
-# 指定运行模式: cpu, npu
-RUN_MODE=$4
-if [ ${RUN_MODE}"x" = "x" ]; then
-    echo "WARNING: RUN_MODE is not specified, using cpu as default."
-    RUN_MODE=cpu
-fi
-
-# 生成计算输入数据和对比用的真值数据
-python3 $FILE_NAME.py
-
-function compile_and_execute() {
-    # 使用cmake编译cpu侧或者npu侧算子, SIMULATOR or ONBOARD
-    mkdir -p build; cd build;       \
-    cmake ..                        \
-        -Dsmoke_testcase=$1         \
-        -DASCEND_PRODUCT_TYPE=$2    \
-        -DASCEND_CORE_TYPE=$3       \
-        -DASCEND_RUN_MODE="SIMULATOR" \
-        -DASCEND_INSTALL_PATH=$ASCEND_HOME_DIR
-    VERBOSE=1 cmake --build . --target ${1}_${4}
-
-    cd -
-
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compile op on failed!"
-        return 1
-    fi
-    echo "INFO: compile op on ${RUN_MODE} succeed!"
-
-    # 执行生成的可执行文件
-    (export LD_LIBRARY_PATH=`pwd`:$ASCEND_HOME_DIR/tools/simulator/${VersionMap[$SOC_VERSION]}/lib:$LD_LIBRARY_PATH && ./${1}_${4})
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute op on ${RUN_MODE} failed!"
-        return 1
-    fi
-    echo "INFO: execute op on ${RUN_MODE} succeed!"
-}
-compile_and_execute $FILE_NAME $SOC_VERSION $CORE_TYPE $RUN_MODE
-
-# 验证计算结果
-echo "md5sum: ";md5sum output/*.bin
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/CMakeLists.txt
deleted file mode 120000
index c6a720352..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/CMakeLists.txt
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/data_utils.h
deleted file mode 120000
index 2cdb090e1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/data_utils.h
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/data_utils.h
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/input/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/input/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/main.cpp
deleted file mode 100644
index b03624617..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/main.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- * This file constains code of cpu debug and npu code.We read data from bin file
- * and write result to file.
- */
-#include "data_utils.h"
-#ifndef __CCE_KT_TEST__
-#include "acl/acl.h"
-extern void matmul_custom_do(uint32_t coreDim, void* l2ctrl, void* stream,
-    uint8_t *param1, uint8_t *param2, uint8_t *param3);
-#else
-#include "tikicpulib.h"
-extern "C" void matmul_custom(uint8_t *param1, uint8_t *param2, uint8_t *param3);
-#endif
-
-int32_t main(int32_t argc, char* argv[])
-{
-    size_t param1FileSize = 1024 * sizeof(uint16_t);  // uint16_t represent half
-    size_t param2FileSize = 1024 * sizeof(uint16_t);  // uint16_t represent half
-    size_t param3FileSize = 1024 * sizeof(float);
-    uint32_t blockDim = 1;
-
-#ifdef __CCE_KT_TEST__
-    uint8_t *param1 = (uint8_t *)AscendC::GmAlloc(param1FileSize);
-    uint8_t *param2 = (uint8_t *)AscendC::GmAlloc(param2FileSize);
-    uint8_t *param3 = (uint8_t *)AscendC::GmAlloc(param3FileSize);
-
-    ReadFile("./input/x1_gm.bin", param1FileSize, param1, param1FileSize);
-    ReadFile("./input/x2_gm.bin", param2FileSize, param2, param2FileSize);
-
-    ICPU_RUN_KF(matmul_custom, blockDim, param1, param2, param3);
-
-    WriteFile("./output/output.bin", param3, param3FileSize);
-
-    AscendC::GmFree((void *)param1);
-    AscendC::GmFree((void *)param2);
-    AscendC::GmFree((void *)param3);
-#else
-    CHECK_ACL(aclInit(nullptr));
-    aclrtContext context;
-    int32_t deviceId = 0;
-    CHECK_ACL(aclrtSetDevice(deviceId));
-    CHECK_ACL(aclrtCreateContext(&context, deviceId));
-    aclrtStream stream = nullptr;
-    CHECK_ACL(aclrtCreateStream(&stream));
-
-    uint8_t *param1Host;
-    uint8_t *param1Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param1Host), param1FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param1Device, param1FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x1_gm.bin", param1FileSize, param1Host, param1FileSize);
-    CHECK_ACL(aclrtMemcpy(param1Device, param1FileSize, param1Host, param1FileSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    uint8_t *param2Host;
-    uint8_t *param2Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param2Host), param2FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param2Device, param2FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x2_gm.bin", param2FileSize, param2Host, param2FileSize);
-    CHECK_ACL(aclrtMemcpy(param2Device, param2FileSize, param2Host, param2FileSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    uint8_t *param3Host;
-    uint8_t *param3Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param3Host), param3FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param3Device, param3FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
-    matmul_custom_do(blockDim, nullptr, stream, param1Device, param2Device, param3Device);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
-
-    CHECK_ACL(aclrtMemcpy(param3Host, param3FileSize, param3Device, param3FileSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    WriteFile("./output/output.bin", param3Host, param3FileSize);
-    CHECK_ACL(aclrtFree(param3Device));
-    CHECK_ACL(aclrtFreeHost(param3Host));
-
-    CHECK_ACL(aclrtDestroyStream(stream));
-    CHECK_ACL(aclrtDestroyContext(context));
-    CHECK_ACL(aclrtResetDevice(deviceId));
-    CHECK_ACL(aclFinalize());
-#endif
-    return 0;
-}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.cpp
deleted file mode 100644
index 663dcdd69..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- *
- * Function : c = a * b (matrix multiplication)
- * This sample is a very basic sample that implements Matmul on Ascend plaform.
- * In this sample:
- * Shape of matrix a is [m, k]: [32, 32]
- * Shape of matrix b is [k, n]: [32, 32]
- * Shape of matrix c is [m, n]: [32, 32]
- */
-
-#include "kernel_operator.h"
-using namespace AscendC;
-
-class KernelMatmul {
-public:
-    __aicore__ inline KernelMatmul()
-    {
-        aSize = m * k;
-        bSize = k * n;
-        cSize = m * m;
-        mBlocks = m / 16;
-        nBlocks = n / 16;
-        kBlocks = k / 16;
-    }
-    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR c)
-    {
-        aGM.SetGlobalBuffer((__gm__ half*)a);
-        bGM.SetGlobalBuffer((__gm__ half*)b);
-        cGM.SetGlobalBuffer((__gm__ float*)c);
-        pipe.InitBuffer(inQueueA1, 1, aSize * sizeof(half));
-        pipe.InitBuffer(inQueueA2, 1, aSize * sizeof(half));
-        pipe.InitBuffer(inQueueB1, 1, bSize * sizeof(half));
-        pipe.InitBuffer(inQueueB2, 2, bSize * sizeof(half) / 2);
-        pipe.InitBuffer(outQueueCO1, 2, cSize * sizeof(float) / 2);
-        pipe.InitBuffer(outQueueCO2, 1, cSize * sizeof(float));
-    }
-    __aicore__ inline void Process()
-    {
-        CopyIn();
-        SplitA();
-
-        LocalTensor<half> b1Local = inQueueB1.DeQue<half>();
-        LocalTensor<half> a2Local = inQueueA2.DeQue<half>();
-        LocalTensor<float> c2Local = outQueueCO2.AllocTensor<float>();
-        // split matrix b into 2 parts, [32, 16] and [32, 16]
-        for (int i = 0; i < 2; ++i) {
-            SplitB(b1Local, i);
-            Compute(a2Local);
-            Aggregate(c2Local, i);
-        }
-        inQueueB1.FreeTensor(b1Local);
-        inQueueA2.FreeTensor(a2Local);
-        outQueueCO2.EnQue<float>(c2Local);
-
-        CopyOut();
-    }
-
-private:
-    __aicore__ inline void CopyND2NZ(const LocalTensor<half>& dst, const GlobalTensor<half>& src, const uint16_t height,
-        const uint16_t width)
-    {
-        for (int i = 0; i < width / 16; ++i) {
-            int srcOffset = i * 16;
-            int dstOffset = i * 16 * height;
-            DataCopy(dst[dstOffset], src[srcOffset], { height, 1, uint16_t(width / 16 - 1), 0 });
-        }
-    }
-    __aicore__ inline void CopyIn()
-    {
-        LocalTensor<half> a1Local = inQueueA1.AllocTensor<half>();
-        LocalTensor<half> b1Local = inQueueB1.AllocTensor<half>();
-
-        CopyND2NZ(a1Local, aGM, m, k);
-        CopyND2NZ(b1Local, bGM, k, n);
-
-        inQueueA1.EnQue(a1Local);
-        inQueueB1.EnQue(b1Local);
-    }
-    __aicore__ inline void SplitA()
-    {
-        int srcOffset = 0;
-        int dstOffset = 0;
-        LocalTensor<half> a1Local = inQueueA1.DeQue<half>();
-        LocalTensor<half> a2Local = inQueueA2.AllocTensor<half>();
-
-        // transform nz to zz
-        for (int i = 0; i < mBlocks; ++i) {
-            LoadData2dParams loadDataParams;
-            loadDataParams.repeatTimes = kBlocks;
-            loadDataParams.srcStride = mBlocks;
-            loadDataParams.ifTranspose = false;
-
-            LoadData(a2Local[dstOffset], a1Local[srcOffset], loadDataParams);
-
-            srcOffset += 16 * 16;
-            dstOffset += k * 16;
-        }
-
-        inQueueA2.EnQue<half>(a2Local);
-        inQueueA1.FreeTensor(a1Local);
-    }
-    __aicore__ inline void SplitB(const LocalTensor<half>& b1Local, const int bSplitIdx)
-    {
-        LocalTensor<half> b2Local = inQueueB2.AllocTensor<half>();
-
-        // transform nz to zn
-        LoadData2dParams loadDataParams;
-        loadDataParams.repeatTimes = kBlocks;
-        loadDataParams.srcStride = 1;
-        loadDataParams.ifTranspose = true;
-
-        LoadData(b2Local, b1Local[bSplitIdx * bSize / 2], loadDataParams);
-
-        inQueueB2.EnQue<half>(b2Local);
-    }
-    __aicore__ inline void Compute(const LocalTensor<half>& a2Local)
-    {
-        LocalTensor<half> b2Local = inQueueB2.DeQue<half>();
-        LocalTensor<float> c1Local = outQueueCO1.AllocTensor<float>();
-
-        Mmad(c1Local, a2Local, b2Local, { m, uint16_t(n / 2), k, false, 0, false, false, false });
-
-        outQueueCO1.EnQue<float>(c1Local);
-        inQueueB2.FreeTensor(b2Local);
-    }
-    __aicore__ inline void Aggregate(const LocalTensor<float>& c2Local, const int bSplitIdx)
-    {
-        LocalTensor<float> c1Local = outQueueCO1.DeQue<float>();
-
-        DataCopyParams dataCopyParams;
-        dataCopyParams.blockCount = 1;
-        dataCopyParams.blockLen = 2;
-        DataCopyEnhancedParams enhancedParams;
-        enhancedParams.blockMode = BlockMode::BLOCK_MODE_MATRIX;
-        DataCopy(c2Local[bSplitIdx * cSize / 2], c1Local, dataCopyParams, enhancedParams);
-
-        outQueueCO1.FreeTensor(c1Local);
-    }
-    __aicore__ inline void CopyOut()
-    {
-        LocalTensor<float> c2Local = outQueueCO2.DeQue<float>();
-
-        // transform nz to nd
-        for (int i = 0; i < nBlocks; ++i) {
-            DataCopy(cGM[i * 16], c2Local[i * m * 16], { m, 2, 0, uint16_t((nBlocks - 1) * 2) });
-        }
-
-        outQueueCO2.FreeTensor(c2Local);
-    }
-
-private:
-    TPipe pipe;
-
-    TQue<QuePosition::A1, 1> inQueueA1;
-    TQue<QuePosition::A2, 1> inQueueA2;
-    TQue<QuePosition::B1, 1> inQueueB1;
-    TQue<QuePosition::B2, 2> inQueueB2;
-    // dst queue
-    TQue<QuePosition::CO1, 2> outQueueCO1;
-    TQue<QuePosition::CO2, 1> outQueueCO2;
-
-    GlobalTensor<half> aGM, bGM;
-    GlobalTensor<float> cGM;
-
-    uint16_t m = 32;
-    uint16_t n = 32;
-    uint16_t k = 32;
-
-    uint16_t aSize, bSize, cSize, mBlocks, nBlocks, kBlocks;
-};
-
-extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADDR c)
-{
-    KernelMatmul op;
-    op.Init(a, b, c);
-    op.Process();
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void matmul_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* a, uint8_t* b, uint8_t* c)
-{
-    matmul_custom<<<blockDim, l2ctrl, stream>>>(a, b, c);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.py
deleted file mode 100644
index 7203e4af3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/matmul_custom.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/python3
-# -*- coding:utf-8 -*-
-# Copyright 2022-2023 Huawei Technologies Co., Ltd
-import numpy as np
-
-
-def gen_golden_data():
-    x1_gm_type = np.float16
-    x2_gm_type = np.float16
-
-    M = 32
-    N = 32
-    K = 32
-
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(x1_gm_type)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(x2_gm_type)
-    golden = np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32)).astype(np.float32)
-
-    x1_gm.tofile("./input/x1_gm.bin")
-    x2_gm.tofile("./input/x2_gm.bin")
-    golden.tofile("./output/golden.bin")
-
-
-if __name__ == "__main__":
-    gen_golden_data()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/output/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/output/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/run.sh
deleted file mode 120000
index c9ece8cbc..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/MatMul/run.sh
+++ /dev/null
@@ -1 +0,0 @@
-../kernel_template/run.sh
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/CMakeLists.txt
deleted file mode 100644
index 58b490129..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-cmake_minimum_required(VERSION 3.16)
-set(CMAKE_SYSTEM_NAME Linux)
-set(CMAKE_CXX_STANDARD 17)
-
-set(CCE_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
-list(APPEND CMAKE_MODULE_PATH ${CCE_CMAKE_PATH})
-project(kernel_samples LANGUAGES CCE CXX)
-
-add_subdirectory(cmake/cpu)
-add_subdirectory(cmake/npu)
-
-if(ASCEND_CUSTOM_TILING STREQUAL "CUSTOM_TILING")
-    add_subdirectory(cmake/tiling)
-endif()
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCECompiler.cmake.in b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCECompiler.cmake.in
deleted file mode 100644
index a9b5688ff..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCECompiler.cmake.in
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_CCE_COMPILER "@CMAKE_CCE_COMPILER@")
-set(CMAKE_CCE_COMPILER_LOADED 1)
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS @CMAKE_CCE_SOURCE_FILE_EXTENSIONS@)
-set(CMAKE_CCE_OUTPUT_EXTENSION @CMAKE_CCE_OUTPUT_EXTENSION@)
-set(CMAKE_CCE_COMPILER_ENV_VAR "@CMAKE_CCE_COMPILER_ENV_VAR@")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEFunction.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEFunction.cmake
deleted file mode 100644
index 246ce9022..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEFunction.cmake
+++ /dev/null
@@ -1,20 +0,0 @@
-function(product_dir str newstr)
-  if ("x${str}" STREQUAL "xascend610")
-    set(${newstr} "Ascend610" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend910")
-    set(${newstr} "Ascend910A" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend310")
-    set(${newstr} "Ascend310" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend310p")
-    set(${newstr} "Ascend310P1" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend920")
-    set(${newstr} "Ascend920A" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend910b")
-    set(${newstr} "Ascend910B1" PARENT_SCOPE)
-  else()
-    string(SUBSTRING ${str} 0 1 _headlower)
-    string(SUBSTRING ${str} 1 -1 _leftstr)
-    string(TOUPPER ${_headlower} _headupper)
-    set(${newstr} "${_headupper}${_leftstr}" PARENT_SCOPE)
-  endif()
-endfunction()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEInformation.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEInformation.cmake
deleted file mode 100644
index 0eaef0348..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeCCEInformation.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-include(CMakeCommonLanguageInclude)
-
-set(CMAKE_INCLUDE_FLAG_CCE "-I")
-
-if(UNIX)
-  set(CMAKE_CCE_OUTPUT_EXTENSION .o)
-else()
-  set(CMAKE_CCE_OUTPUT_EXTENSION .obj)
-endif()
-
-set(_INCLUDED_FILE 0)
-set(CMAKE_SHARED_LIBRARY_CCE_FLAGS -fPIC)
-set(CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS -shared)
-set(CMAKE_LIBRARY_CREATE_CCE_FLAGS "--cce-fatobj-link ${_CMAKE_COMPILE_AS_CCE_FLAG}")
-
-if(NOT CMAKE_CCE_COMPILE_OBJECT)
-    set(CMAKE_CCE_COMPILE_OBJECT
-      "<CMAKE_CCE_COMPILER> -xcce <DEFINES> <INCLUDES>${__IMPLICIT_INCLUDES} ${_CMAKE_CCE_BUILTIN_INCLUDE_PATH} <FLAGS> ${_CMAKE_COMPILE_AS_CCE_FLAG} ${_CMAKE_CCE_COMPILE_OPTIONS} ${_CMAKE_CCE_COMMON_COMPILE_OPTIONS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> -pthread -o <OBJECT> -c <SOURCE>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_LIBRARY)
-  set(CMAKE_CCE_CREATE_SHARED_LIBRARY
-      "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_MODULE)
-  set(CMAKE_CCE_CREATE_SHARED_MODULE ${CMAKE_CCE_CREATE_SHARED_LIBRARY})
-endif()
-
-if(NOT CMAKE_CCE_LINK_EXECUTABLE)
-  set(CMAKE_CCE_LINK_EXECUTABLE
-    "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <FLAGS> <CMAKE_CCE_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICIT_LINKS}")
-endif()
-
-set(CMAKE_CCE_INFORMATION_LOADED 1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeDetermineCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeDetermineCCECompiler.cmake
deleted file mode 100644
index f1ca44350..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeDetermineCCECompiler.cmake
+++ /dev/null
@@ -1,123 +0,0 @@
-find_program(CMAKE_CCE_COMPILER NAMES "ccec" PATHS "$ENV{PATH}" DOC "CCE Compiler")
-include(CMakeCCEFunction)
-
-mark_as_advanced(CMAKE_CCE_COMPILER)
-
-message(STATUS "CMAKE_CCE_COMPILER: " ${CMAKE_CCE_COMPILER})
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS cce;cpp)
-set(CMAKE_CCE_COMPILER_ENV_VAR "CCE")
-message(STATUS "CMAKE_CURRENT_LIST_DIR: " ${CMAKE_CURRENT_LIST_DIR})
-
-# configure all variables set in this file
-configure_file(${CMAKE_CURRENT_LIST_DIR}/CMakeCCECompiler.cmake.in
-    ${CMAKE_PLATFORM_INFO_DIR}/CMakeCCECompiler.cmake
-    @ONLY
-)
-
-message(STATUS "ASCEND_PRODUCT_TYPE:\n" "  ${ASCEND_PRODUCT_TYPE}")
-message(STATUS "ASCEND_CORE_TYPE:\n" "  ${ASCEND_CORE_TYPE}")
-message(STATUS "ASCEND_INSTALL_PATH:\n" "  ${ASCEND_INSTALL_PATH}")
-
-if(DEFINED ASCEND_INSTALL_PATH)
-    set(_CMAKE_ASCEND_INSTALL_PATH ${ASCEND_INSTALL_PATH})
-else()
-    message(FATAL_ERROR
-        "no, installation path found, should passing -DASCEND_INSTALL_PATH=<PATH_TO_ASCEND_INSTALLATION> in cmake"
-    )
-    set(_CMAKE_ASCEND_INSTALL_PATH)
-endif()
-
-
-if(DEFINED ASCEND_PRODUCT_TYPE)
-    set(_CMAKE_CCE_COMMON_COMPILE_OPTIONS "--cce-auto-sync")
-    if(ASCEND_PRODUCT_TYPE STREQUAL "")
-        message(FATAL_ERROR "ASCEND_PRODUCT_TYPE must be non-empty if set.")
-    elseif(ASCEND_PRODUCT_TYPE AND NOT ASCEND_PRODUCT_TYPE MATCHES "^ascend[0-9][0-9][0-9][a-zA-Z]?[1-9]?$")
-        message(FATAL_ERROR
-            "ASCEND_PRODUCT_TYPE: ${ASCEND_PRODUCT_TYPE}\n"
-            "is not one of the following: ascend910, ascend310p, ascend910B1"
-        )
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c100")
-        else()
-            message(FATAL_ERROR, "only AiCore inside")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS)
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend310p")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-fp-ceiling=2 -mllvm -cce-aicore-record-overflow=false")
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910B1")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-cube")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-record-overflow=false -mllvm -cce-aicore-addr-transform"
-        )
-    endif()
-endif()
-
-product_dir(${ASCEND_PRODUCT_TYPE} PRODUCT_UPPER)
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/runtime/lib64
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/simulator/${PRODUCT_UPPER}/lib
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${PRODUCT_UPPER}
-)
-
-# link library
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES stdc++)
-if(ASCEND_RUN_MODE STREQUAL "ONBOARD")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime)
-elseif(ASCEND_RUN_MODE STREQUAL "SIMULATOR")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES )
-    if(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES pem_davinci)
-    endif()
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime_camodel)
-else()
-    message(FATAL_ERROR
-        "ASCEND_RUN_MODE: ${ASCEND_RUN_MODE}\n"
-        "ASCEND_RUN_MODE must be one of the following: ONBOARD or SIMULATOR"
-    )
-endif()
-list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES ascendcl)
-
-if(ASCEND_CUSTOM_TILING STREQUAL "CUSTOM_TILING")
-elseif(ASCEND_CUSTOM_TILING STREQUAL "NO_CUSTOM_TILING")
-else()
-    message(FATAL_ERROR
-        "ASCEND_CUSTOM_TILING: ${ASCEND_CUSTOM_TILING}\n"
-        "ASCEND_CUSTOM_TILING must be one of the following: CUSTOM_TILING or NO_CUSTOM_TILING"
-    )
-endif()
-
-set(__IMPLICIT_LINKS)
-foreach(dir ${_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES})
-  string(APPEND __IMPLICIT_LINKS " -L\"${dir}\"")
-endforeach()
-foreach(lib ${_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES})
-  if(${lib} MATCHES "/")
-    string(APPEND __IMPLICIT_LINKS " \"${lib}\"")
-  else()
-    string(APPEND __IMPLICIT_LINKS " -l${lib}")
-  endif()
-endforeach()
-
-set(_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/acllib/include
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/impl
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/interface
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/include
-)
-set(__IMPLICIT_INCLUDES)
-foreach(inc ${_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES})
-  string(APPEND __IMPLICIT_INCLUDES " -I\"${inc}\"")
-endforeach()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeTestCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeTestCCECompiler.cmake
deleted file mode 100644
index f00f227c1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/Modules/CMakeTestCCECompiler.cmake
+++ /dev/null
@@ -1 +0,0 @@
-set(CMAKE_CCE_COMPILER_WORKS 1 CACHE INTERNAL "")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/cpu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/cpu/CMakeLists.txt
deleted file mode 100644
index 326e1f546..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/cpu/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# cpu
-if (NOT DEFINED ENV{CMAKE_PREFIX_PATH})
-    set(CMAKE_PREFIX_PATH ${ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/cmake)
-endif()
-find_package(tikicpulib REQUIRED)
-
-file(GLOB SRC_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-add_executable(${smoke_testcase}_cpu
-    ${SRC_FILES}
-)
-
-target_include_directories(${smoke_testcase}_cpu PRIVATE
-    ${ASCEND_INSTALL_PATH}/acllib/include
-)
-
-target_link_libraries(${smoke_testcase}_cpu PRIVATE
-    tikicpulib::${ASCEND_PRODUCT_TYPE}
-    ascendcl
-)
-
-target_compile_options(${smoke_testcase}_cpu PRIVATE
-    -g
-)
-
-set_target_properties(${smoke_testcase}_cpu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_cpu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/npu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/npu/CMakeLists.txt
deleted file mode 100644
index cb86b9708..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/npu/CMakeLists.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-# npu
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-set_source_files_properties(${KERNEL_FILES} PROPERTIES LANGUAGE CCE)
-
-file(GLOB MAIN_FILES
-    ${CMAKE_SOURCE_DIR}/main.cpp
-)
-set_source_files_properties(${MAIN_FILES} PROPERTIES LANGUAGE CCE)
-
-# ===================================================================
-# exe mode: build a executable directly
-add_executable(${smoke_testcase}_npu
-    ${KERNEL_FILES}
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_compile_definitions(${smoke_testcase}_npu PRIVATE
-    TILING_KEY_VAR=0
-)
-
-set_target_properties(${smoke_testcase}_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-# so mode: build a shared library first, and dynamic link to build a executable
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-
-add_library(ascendc_kernels SHARED
-    ${KERNEL_FILES}
-)
-
-target_compile_definitions(ascendc_kernels PRIVATE
-    TILING_KEY_VAR=0
-)
-
-target_compile_options(ascendc_kernels PRIVATE
-    -O2
-    -std=c++17
-)
-
-set_target_properties(ascendc_kernels PROPERTIES
-    OUTPUT_NAME ascendc_kernels
-    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-install(TARGETS ascendc_kernels
-    LIBRARY DESTINATION ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-add_executable(${smoke_testcase}_lib_npu
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_lib_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_link_directories(${smoke_testcase}_lib_npu PRIVATE
-    ${CMAKE_SOURCE_DIR}
-)
-
-target_link_libraries(${smoke_testcase}_lib_npu PRIVATE
-    ascendc_kernels
-    -Wl,--as-needed
-)
-
-# add_dependencies(${smoke_testcase}_lib_npu ${smoke_testcase})
-set_target_properties(${smoke_testcase}_lib_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/tiling/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/tiling/CMakeLists.txt
deleted file mode 100644
index 2e5246345..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/cmake/tiling/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-cmake_minimum_required(VERSION 2.8)
-# 声明一个 cmake 工程
-project(${smoke_testcase}_tiling)
-
-add_executable(${smoke_testcase}_tiling 
-    ${CMAKE_SOURCE_DIR}/custom_tiling/main.cpp
-)
-
-message(STATUS "Ascend install path is : ${ASCEND_INSTALL_PATH}")
-
-target_include_directories(${smoke_testcase}_tiling PRIVATE
-    ${ASCEND_INSTALL_PATH}/include/
-    ${ASCEND_INSTALL_PATH}/runtime/include/
-    ${ASCEND_INSTALL_PATH}/runtime/include/tiling/
-)
-
-target_link_directories(${smoke_testcase}_tiling PRIVATE
-    ${ASCEND_INSTALL_PATH}/lib64/
-    ${ASCEND_INSTALL_PATH}/runtime/lib64/
-)
-
-target_compile_options(${smoke_testcase}_tiling PRIVATE
-    -g
-    -std=c++17
-)
-
-target_link_libraries(${smoke_testcase}_tiling PRIVATE
-    tiling_api
-    -Wl,--no-as-needed
-    c_sec
-    graph
-    register
-    -Wl,--as-needed
-)
-
-set_target_properties(${smoke_testcase}_tiling PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_tiling
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/custom_tiling/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/custom_tiling/main.cpp
deleted file mode 100644
index 697119497..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/custom_tiling/main.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include "tiling_api.h"
-#include <string>
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <cassert>
-using namespace matmul_tiling;
-using namespace std;
-
-void WriteTilingFile(optiling::TCubeTiling *tilingData) {
-    uint32_t tilingSize = tilingData->GetDataSize();
-    char *buf = (char *)malloc(tilingSize);
-    tilingData->SaveToBuffer(buf, tilingSize);
-    ofstream outfile("input/tiling.bin", ios::out | ios::binary);
-    if (!outfile) {
-        cout << "Failed to open file." << endl;
-        return;
-    }
-
-    outfile.write(buf, tilingSize);
-    outfile.close();
-}
-
-int main(int argc, char *argv[])
-{
-    int M = 512;
-    int N = 1024;
-    int K = 512;
-    TPosition leftPos = TPosition::GM;
-    CubeFormat leftFormat = CubeFormat::ND;
-    DataType leftDtype = DataType::DT_FLOAT16;
-    int transposeA = 0;
-
-    TPosition rightPos = TPosition::GM;
-    CubeFormat rightFormat = CubeFormat::ND;
-    DataType rightDtype = DataType::DT_FLOAT16;
-    int transposeB = 0;
-
-    TPosition resPos = TPosition::GM;
-    CubeFormat resFormat = CubeFormat::ND;
-    DataType resDtype = DataType::DT_FLOAT;
-
-    TPosition biasPos = TPosition::GM;
-    CubeFormat biasFormat = CubeFormat::ND;
-    DataType biasDtype = DataType::DT_FLOAT;
-    int isBias = 0;
-
-    int usedCoreNum = 1;
-    int runMode = 0;
-
-    // single core mode: runMode = 0
-    // multi core mode: runMode = 1
-    if (runMode == 0) {
-        optiling::TCubeTiling tilingData;
-        tilingData.set_usedCoreNum(usedCoreNum);
-        MatmulApiTiling tilingApi;
-        tilingApi.SetAType(leftPos, leftFormat, leftDtype, bool(transposeA));
-        tilingApi.SetBType(rightPos, rightFormat, rightDtype, bool(transposeB));
-        tilingApi.SetCType(resPos, resFormat, resDtype);
-        tilingApi.SetBiasType(biasPos, biasFormat, biasDtype);
-
-        tilingApi.SetShape(M, N, K);
-        tilingApi.SetOrgShape(M, N, K);
-        tilingApi.SetBias(bool(isBias));
-
-        tilingApi.SetBufferSpace(-1, -1, -1);
-        int64_t res = tilingApi.GetTiling(tilingData);
-        if (res == -1) {
-            std::cout << "gen tiling failed" << std::endl;
-        } else {
-            WriteTilingFile(&tilingData);
-        }
-    } else if (runMode = 1) {
-        optiling::TCubeTiling tilingData;
-        tilingData.set_usedCoreNum(usedCoreNum);
-        MultiCoreMatmulTiling tilingApi;
-        tilingApi.SetDim(usedCoreNum);
-        tilingApi.SetAType(leftPos, leftFormat, leftDtype, bool(transposeA));
-        tilingApi.SetBType(rightPos, rightFormat, rightDtype, bool(transposeB));
-        tilingApi.SetCType(resPos, resFormat, resDtype);
-        tilingApi.SetBiasType(biasPos, biasFormat, biasDtype);
-
-        tilingApi.SetOrgShape(M, N, K);
-        tilingApi.SetShape(M, N, K);
-        tilingApi.SetBias(bool(isBias));
-
-        tilingApi.SetBufferSpace(-1, -1, -1);
-        int64_t res = tilingApi.GetTiling(tilingData);
-        if (res == -1) {
-            std::cout << "gen tiling failed" << std::endl;
-        } else {
-            WriteTilingFile(&tilingData);
-        }
-    }
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/data_utils.h
deleted file mode 100644
index 042de8bb0..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/data_utils.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef DATA_UTILS_H
-#define DATA_UTILS_H
-#include <iostream>
-#include <fstream>
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iomanip>
-#include <cassert>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include "acl/acl.h"
-
-typedef enum {
-    DT_UNDEFINED = -1,
-    FLOAT = 0,
-    HALF = 1,
-    INT8_T = 2,
-    INT32_T = 3,
-    UINT8_T = 4,
-    INT16_T = 6,
-    UINT16_T = 7,
-    UINT32_T = 8,
-    INT64_T = 9,
-    UINT64_T = 10,
-    DOUBLE = 11,
-    BOOL = 12,
-    STRING = 13,
-    COMPLEX64 = 16,
-    COMPLEX128 = 17,
-    BF16 = 27
-} printDataType;
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
-#define CHECK_ACL(x)                                                                        \
-    do {                                                                                    \
-        aclError __ret = x;                                                                 \
-        if (__ret != ACL_ERROR_NONE) {                                                      \
-            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
-        }                                                                                   \
-    } while (0);
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file");
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow=16)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case INT8_T:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case UINT8_T:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case INT16_T:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case UINT16_T:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case INT32_T:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case UINT32_T:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case INT64_T:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case UINT64_T:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case HALF:
-            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-    std::cout << std::endl;
-}
-#endif // DATA_UTILS_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/input/.gitkeep b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/input/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/main.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/main.cpp
deleted file mode 100644
index eb4b026f8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/main.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- * This file constains code of cpu debug and npu code.We read data from bin file
- * and write result to file.
- */
-#include "data_utils.h"
-#include <chrono>
-#ifndef __CCE_KT_TEST__
-#include "acl/acl.h"
-extern void matmul_custom_do(uint32_t coreDim, void* l2ctrl, void* stream,
-    uint8_t *param1, uint8_t *param2, uint8_t *param3, uint8_t *param4);
-#else
-#include "tikicpulib.h"
-extern "C" void matmul_custom(uint8_t *param1, uint8_t *param2, uint8_t *param3, uint8_t *param4);
-#endif
-
-int32_t main(int32_t argc, char* argv[])
-{
-    size_t param1FileSize = 512 * 512 * sizeof(uint16_t);  // uint16_t represent half
-    size_t param2FileSize = 512 * 1024 * sizeof(uint16_t);  // uint16_t represent half
-    size_t param3FileSize = 512 * 1024 * sizeof(float);
-    size_t param4FileSize = 28 * sizeof(uint32_t);
-    uint32_t blockDim = 1;
-
-#ifdef __CCE_KT_TEST__
-    uint8_t *param1 = (uint8_t *)AscendC::GmAlloc(param1FileSize);
-    uint8_t *param2 = (uint8_t *)AscendC::GmAlloc(param2FileSize);
-    uint8_t *param3 = (uint8_t *)AscendC::GmAlloc(param3FileSize);
-    uint8_t *param4 = (uint8_t *)AscendC::GmAlloc(param4FileSize);
-
-    ReadFile("./input/x1_gm.bin", param1FileSize, param1, param1FileSize);
-    // PrintData(param1, 16, printDataType::HALF);
-    ReadFile("./input/x2_gm.bin", param2FileSize, param2, param2FileSize);
-    // PrintData(param2, 16, printDataType::HALF);
-    ReadFile("./input/tiling.bin", param4FileSize, param4, param4FileSize);
-    // PrintData(param4, 16, printDataType::UINT32_T);
-
-    ICPU_RUN_KF(matmul_custom, blockDim, param1, param2, param3, param4);
-
-    // PrintData(param3, 16, printDataType::FLOAT);
-    WriteFile("./output/output.bin", param3, param3FileSize);
-
-    AscendC::GmFree((void *)param1);
-    AscendC::GmFree((void *)param2);
-    AscendC::GmFree((void *)param3);
-    AscendC::GmFree((void *)param4);
-#else
-    CHECK_ACL(aclInit(nullptr));
-    aclrtContext context;
-    int32_t deviceId = 0;
-    CHECK_ACL(aclrtSetDevice(deviceId));
-    CHECK_ACL(aclrtCreateContext(&context, deviceId));
-    aclrtStream stream = nullptr;
-    CHECK_ACL(aclrtCreateStream(&stream));
-
-    uint8_t *param1Host;
-    uint8_t *param1Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param1Host), param1FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param1Device, param1FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x1_gm.bin", param1FileSize, param1Host, param1FileSize);
-    // PrintData(param1Host, 16, printDataType::HALF);
-    CHECK_ACL(aclrtMemcpy(param1Device, param1FileSize, param1Host, param1FileSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    uint8_t *param2Host;
-    uint8_t *param2Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param2Host), param2FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param2Device, param2FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/x2_gm.bin", param2FileSize, param2Host, param2FileSize);
-    // PrintData(param2Host, 16, printDataType::HALF);
-    CHECK_ACL(aclrtMemcpy(param2Device, param2FileSize, param2Host, param2FileSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    uint8_t *param4Host;
-    uint8_t *param4Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param4Host), param4FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param4Device, param4FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-    ReadFile("./input/tiling.bin", param4FileSize, param4Host, param4FileSize);
-    // PrintData(param4Host, 16, printDataType::UINT32_T);
-    CHECK_ACL(aclrtMemcpy(param4Device, param4FileSize, param4Host, param4FileSize, ACL_MEMCPY_HOST_TO_DEVICE));
-
-    uint8_t *param3Host;
-    uint8_t *param3Device;
-    CHECK_ACL(aclrtMallocHost((void**)(&param3Host), param3FileSize));
-    CHECK_ACL(aclrtMalloc((void**)&param3Device, param3FileSize, ACL_MEM_MALLOC_HUGE_FIRST));
-
-    matmul_custom_do(blockDim, nullptr, stream, param1Device, param2Device, param3Device, param4Device);
-    CHECK_ACL(aclrtSynchronizeStream(stream));
-
-    CHECK_ACL(aclrtMemcpy(param3Host, param3FileSize, param3Device, param3FileSize, ACL_MEMCPY_DEVICE_TO_HOST));
-    // PrintData(param3Host, 16, printDataType::FLOAT);
-    WriteFile("./output/output.bin", param3Host, param3FileSize);
-    CHECK_ACL(aclrtFree(param3Device));
-    CHECK_ACL(aclrtFreeHost(param3Host));
-
-    CHECK_ACL(aclrtDestroyStream(stream));
-    CHECK_ACL(aclrtDestroyContext(context));
-    CHECK_ACL(aclrtResetDevice(deviceId));
-    CHECK_ACL(aclFinalize());
-#endif
-    return 0;
-}
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.cpp
deleted file mode 100644
index b0e092a2f..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-
-#include "kernel_operator.h"
-#include "lib/matrix/matmul/matmul.h"
-using namespace AscendC;
-using namespace matmul;
-
-__aicore__ inline void CopyTiling(TCubeTiling* tiling, GM_ADDR tilingGM)
-{
-    uint32_t* ptr = reinterpret_cast<uint32_t*>(tiling);
-    auto tiling32 = reinterpret_cast<__gm__ uint32_t*>(tilingGM);
-
-    for (int i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) {
-        *ptr = *(tiling32 + i);
-    }
-    return;
-}
-
-extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADDR c, GM_ADDR tilingGm)
-{
-    // cube core cases, ignore vector core
-    if (g_coreType == AIV) {
-        return;
-    }
-    using A_T = half;
-    using B_T = half;
-    using C_T = float;
-    using BiasT = float;
-
-    TPipe que;
-    TCubeTiling tiling;
-    CopyTiling(&tiling, tilingGm);
-
-    if (GetBlockIdx() >= tiling.usedCoreNum) {
-        return;
-    }
-
-    GlobalTensor<A_T> aGlobal;
-    GlobalTensor<B_T> bGlobal;
-    GlobalTensor<C_T> cGlobal;
-
-    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ A_T*>(a), tiling.M * tiling.K);
-    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ B_T*>(b), tiling.K * tiling.N);
-    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ C_T*>(c), tiling.M * tiling.N);
-
-    int offsetA = 0;
-    int offsetB = 0;
-    int offsetC = 0;
-
-    auto gmA = aGlobal[offsetA];
-    auto gmB = bGlobal[offsetB];
-    auto gmC = cGlobal[offsetC];
-
-    typedef MatmulType<AscendC::TPosition::GM, CubeFormat::ND, A_T> aType;
-    typedef MatmulType<AscendC::TPosition::GM, CubeFormat::ND, B_T> bType;
-    typedef MatmulType<AscendC::TPosition::GM, CubeFormat::ND, C_T> cType;
-    typedef MatmulType<AscendC::TPosition::GM, CubeFormat::ND, BiasT> biasType;
-    MatmulImpl<aType, bType, cType, biasType> mm;
-    mm.SetSubBlockIdx(0);
-    mm.Init(&tiling, &que);
-
-    mm.SetTensorA(gmA);
-    mm.SetTensorB(gmB);
-    mm.IterateAll(gmC);
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void matmul_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* a, uint8_t* b, uint8_t* c, uint8_t* tilingGm)
-{
-    matmul_custom<<<blockDim, l2ctrl, stream>>>(a, b, c, tilingGm);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.py
deleted file mode 100644
index 059574bd3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/matmul_custom.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/python3
-# -*- coding:utf-8 -*-
-# Copyright 2022-2023 Huawei Technologies Co., Ltd
-import numpy as np
-
-
-def gen_golden_data():
-    x1_gm_type = np.float16
-    x2_gm_type = np.float16
-
-    M = 512
-    N = 1024
-    K = 512
-
-    x1_gm = np.random.randint(1, 10, [M, K]).astype(x1_gm_type)
-    x2_gm = np.random.randint(1, 10, [K, N]).astype(x2_gm_type)
-    golden = np.matmul(x1_gm.astype(np.float32), x2_gm.astype(np.float32)).astype(np.float32)
-
-    x1_gm.tofile("./input/x1_gm.bin")
-    x2_gm.tofile("./input/x2_gm.bin")
-    golden.tofile("./output/golden.bin")
-
-
-if __name__ == "__main__":
-    gen_golden_data()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/readme.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/readme.md
deleted file mode 100644
index ae72ca216..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/readme.md
+++ /dev/null
@@ -1,16 +0,0 @@
-1、环境变量依赖
-   需要配置ASCEND_HOME_DIR，值为CANN包安装路径，例如export ASCEND_HOME_DIR=~/Ascend/ascend-toolkit/latest
-
-2、CPU调试
-    测试命令
-    ```
-    chmod +x run.sh
-    ./run.sh matmul_custom ascend910 AiCore cpu ONBOARD CUSTOM_TILING
-    ```
-
-3、NPU调试
-    测试命令
-    ```
-    chmod +x run.sh
-    ./run.sh matmul_custom ascend910 AiCore npu SIMULATOR CUSTOM_TILING
-    ```
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/run.sh
deleted file mode 100755
index 30a5f5181..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/Matmul_high_level/run.sh
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/bin/bash
-clear;clear
-# 清除之前遗留的文件
-rm -rf *.vcd *.dump *.log *.bin *.o *.so *pu build output/*.bin input/*.bin
-# 不需要TIK打印出内存信息
-export PRINT_TIK_MEM_ACCESS=FALSE
-
-# 获取当前的目录
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-declare -A VersionMap
-VersionMap["ascend910"]="Ascend910A"
-VersionMap["ascend310p"]="Ascend310P1"
-VersionMap["ascend910B1"]="Ascend910B1"
-
-# 指向昇腾软件包安装地址，导出环境变量
-
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-
-# 指定当前sample的算子文件名
-FILE_NAME=$1
-
-# 指定芯片版本: ascend910, ascend310p
-SOC_VERSION=$2
-if [ ${SOC_VERSION}"x" = "x" ]; then
-    echo "ERROR: SOC_VERSION is not specified! please specify ascend910, ascend310p or ascend910B1!"
-    exit -1
-fi
-
-# 指定运行的核: AiCore, VectorCore
-CORE_TYPE=$3
-if [ ${CORE_TYPE}"x" = "x" ]; then
-    echo "WARNING: CORE_TYPE is not specified, using AiCore as default."
-    CORE_TYPE=AiCore
-fi
-
-# 指定运行模式: cpu, npu
-RUN_MODE=$4
-if [ ${RUN_MODE}"x" = "x" ]; then
-    echo "WARNING: RUN_MODE is not specified, using cpu as default."
-    RUN_MODE=cpu
-fi
-
-
-NPU_RUN_MODE=$5
-if [ ${NPU_RUN_MODE}"x" = "x" ]; then
-    echo "WARNING: NPU_RUN_MODE is not specified, using ONBOARD as default."
-    NPU_RUN_MODE=ONBOARD
-fi
-
-if [ ${RUN_MODE} = "cpu" ] && [ ${NPU_RUN_MODE} = "SIMULATOR" ]; then
-    echo "Error: cpu do not support SIMULATOR mode."
-    exit 1
-fi
-
-CUSTOM_TILING=$6
-if [ ${CUSTOM_TILING}"x" = "x" ]; then
-    echo "WARNING: CUSTOM_TILING is not specified, using NO_CUSTOM_TILING as default."
-    CUSTOM_TILING=NO_CUSTOM_TILING
-fi
-
-# 生成计算输入数据和对比用的真值数据
-python3 $FILE_NAME.py
-
-function compile_and_execute() {
-    # 使用cmake编译cpu侧或者npu侧算子, SIMULATOR or ONBOARD
-    mkdir -p build; cd build;       \
-    cmake ..                        \
-        -Dsmoke_testcase=$1         \
-        -DASCEND_PRODUCT_TYPE=$2    \
-        -DASCEND_CORE_TYPE=$3       \
-        -DASCEND_RUN_MODE=${NPU_RUN_MODE} \
-        -DASCEND_CUSTOM_TILING=$CUSTOM_TILING \
-        -DASCEND_INSTALL_PATH=$ASCEND_HOME_DIR
-    cmake --build . --target ${1}_${4}
-    # cmake --build . --target ascendc_kernels && cmake --install . && cmake --build . --target ${1}_lib_${4}
-    if [ ${CUSTOM_TILING} = "CUSTOM_TILING" ]; then
-        cmake --build . --target ${1}_tiling
-    fi
-    cd -
-
-    if [ ${CUSTOM_TILING} = "CUSTOM_TILING" ]; then
-        ./${1}_tiling
-    fi
-
-
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compile op on failed!"
-        return 1
-    fi
-    echo "INFO: compile op on ${RUN_MODE} succeed!"
-    # 执行生成的可执行文件
-    (export LD_LIBRARY_PATH=`pwd`:$ASCEND_HOME_DIR/tools/simulator/${VersionMap[$SOC_VERSION]}/lib:$LD_LIBRARY_PATH && ./${1}_${4})
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute op on ${RUN_MODE} failed!"
-        return 1
-    fi
-    echo "INFO: execute op on ${RUN_MODE} succeed!"
-}
-compile_and_execute $FILE_NAME $SOC_VERSION $CORE_TYPE $RUN_MODE
-
-# 验证计算结果
-echo "md5sum: ";md5sum output/*.bin
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/CMakeLists.txt
deleted file mode 100644
index fd87c7620..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-cmake_minimum_required(VERSION 3.16)
-set(CMAKE_SYSTEM_NAME Linux)
-set(CMAKE_CXX_STANDARD 17)
-
-set(CCE_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
-list(APPEND CMAKE_MODULE_PATH ${CCE_CMAKE_PATH})
-project(kernel_samples LANGUAGES CCE CXX)
-
-add_subdirectory(cmake/cpu)
-add_subdirectory(cmake/npu)
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCECompiler.cmake.in b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCECompiler.cmake.in
deleted file mode 100644
index a9b5688ff..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCECompiler.cmake.in
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CMAKE_CCE_COMPILER "@CMAKE_CCE_COMPILER@")
-set(CMAKE_CCE_COMPILER_LOADED 1)
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS @CMAKE_CCE_SOURCE_FILE_EXTENSIONS@)
-set(CMAKE_CCE_OUTPUT_EXTENSION @CMAKE_CCE_OUTPUT_EXTENSION@)
-set(CMAKE_CCE_COMPILER_ENV_VAR "@CMAKE_CCE_COMPILER_ENV_VAR@")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEFunction.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEFunction.cmake
deleted file mode 100644
index 60543737c..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEFunction.cmake
+++ /dev/null
@@ -1,14 +0,0 @@
-function(product_dir str newstr)
-  if ("x${str}" STREQUAL "xascend910")
-    set(${newstr} "Ascend910A" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend310p")
-    set(${newstr} "Ascend310P1" PARENT_SCOPE)
-  elseif("x${str}" STREQUAL "xascend910b")
-    set(${newstr} "Ascend910B1" PARENT_SCOPE)
-  else()
-    string(SUBSTRING ${str} 0 1 _headlower)
-    string(SUBSTRING ${str} 1 -1 _leftstr)
-    string(TOUPPER ${_headlower} _headupper)
-    set(${newstr} "${_headupper}${_leftstr}" PARENT_SCOPE)
-  endif()
-endfunction()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEInformation.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEInformation.cmake
deleted file mode 100644
index 0eaef0348..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeCCEInformation.cmake
+++ /dev/null
@@ -1,35 +0,0 @@
-include(CMakeCommonLanguageInclude)
-
-set(CMAKE_INCLUDE_FLAG_CCE "-I")
-
-if(UNIX)
-  set(CMAKE_CCE_OUTPUT_EXTENSION .o)
-else()
-  set(CMAKE_CCE_OUTPUT_EXTENSION .obj)
-endif()
-
-set(_INCLUDED_FILE 0)
-set(CMAKE_SHARED_LIBRARY_CCE_FLAGS -fPIC)
-set(CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS -shared)
-set(CMAKE_LIBRARY_CREATE_CCE_FLAGS "--cce-fatobj-link ${_CMAKE_COMPILE_AS_CCE_FLAG}")
-
-if(NOT CMAKE_CCE_COMPILE_OBJECT)
-    set(CMAKE_CCE_COMPILE_OBJECT
-      "<CMAKE_CCE_COMPILER> -xcce <DEFINES> <INCLUDES>${__IMPLICIT_INCLUDES} ${_CMAKE_CCE_BUILTIN_INCLUDE_PATH} <FLAGS> ${_CMAKE_COMPILE_AS_CCE_FLAG} ${_CMAKE_CCE_COMPILE_OPTIONS} ${_CMAKE_CCE_COMMON_COMPILE_OPTIONS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> -pthread -o <OBJECT> -c <SOURCE>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_LIBRARY)
-  set(CMAKE_CCE_CREATE_SHARED_LIBRARY
-      "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <CMAKE_SHARED_LIBRARY_CCE_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CCE_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS>")
-endif()
-
-if(NOT CMAKE_CCE_CREATE_SHARED_MODULE)
-  set(CMAKE_CCE_CREATE_SHARED_MODULE ${CMAKE_CCE_CREATE_SHARED_LIBRARY})
-endif()
-
-if(NOT CMAKE_CCE_LINK_EXECUTABLE)
-  set(CMAKE_CCE_LINK_EXECUTABLE
-    "<CMAKE_CCE_COMPILER> ${CMAKE_LIBRARY_CREATE_CCE_FLAGS} <FLAGS> <CMAKE_CCE_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICIT_LINKS}")
-endif()
-
-set(CMAKE_CCE_INFORMATION_LOADED 1)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeDetermineCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeDetermineCCECompiler.cmake
deleted file mode 100644
index 995937bd9..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeDetermineCCECompiler.cmake
+++ /dev/null
@@ -1,114 +0,0 @@
-find_program(CMAKE_CCE_COMPILER NAMES "ccec" PATHS "$ENV{PATH}" DOC "CCE Compiler")
-include(CMakeCCEFunction)
-
-mark_as_advanced(CMAKE_CCE_COMPILER)
-
-message(STATUS "CMAKE_CCE_COMPILER: " ${CMAKE_CCE_COMPILER})
-set(CMAKE_CCE_SOURCE_FILE_EXTENSIONS cce;cpp)
-set(CMAKE_CCE_COMPILER_ENV_VAR "CCE")
-message(STATUS "CMAKE_CURRENT_LIST_DIR: " ${CMAKE_CURRENT_LIST_DIR})
-
-# configure all variables set in this file
-configure_file(${CMAKE_CURRENT_LIST_DIR}/CMakeCCECompiler.cmake.in
-    ${CMAKE_PLATFORM_INFO_DIR}/CMakeCCECompiler.cmake
-    @ONLY
-)
-
-message(STATUS "ASCEND_PRODUCT_TYPE:\n" "  ${ASCEND_PRODUCT_TYPE}")
-message(STATUS "ASCEND_CORE_TYPE:\n" "  ${ASCEND_CORE_TYPE}")
-message(STATUS "ASCEND_INSTALL_PATH:\n" "  ${ASCEND_INSTALL_PATH}")
-
-if(DEFINED ASCEND_INSTALL_PATH)
-    set(_CMAKE_ASCEND_INSTALL_PATH ${ASCEND_INSTALL_PATH})
-else()
-    message(FATAL_ERROR
-        "no, installation path found, should passing -DASCEND_INSTALL_PATH=<PATH_TO_ASCEND_INSTALLATION> in cmake"
-    )
-    set(_CMAKE_ASCEND_INSTALL_PATH)
-endif()
-
-
-if(DEFINED ASCEND_PRODUCT_TYPE)
-    set(_CMAKE_CCE_COMMON_COMPILE_OPTIONS "--cce-auto-sync")
-    if(ASCEND_PRODUCT_TYPE STREQUAL "")
-        message(FATAL_ERROR "ASCEND_PRODUCT_TYPE must be non-empty if set.")
-    elseif(ASCEND_PRODUCT_TYPE AND NOT ASCEND_PRODUCT_TYPE MATCHES "^ascend[0-9][0-9][0-9][a-zA-Z]?[1-9]?$")
-        message(FATAL_ERROR
-            "ASCEND_PRODUCT_TYPE: ${ASCEND_PRODUCT_TYPE}\n"
-            "is not one of the following: ascend910, ascend310p, ascend910B1"
-        )
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c100")
-        else()
-            message(FATAL_ERROR, "only AiCore inside")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS)
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend310p")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-m200-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-fp-ceiling=2 -mllvm -cce-aicore-record-overflow=false")
-    elseif(ASCEND_PRODUCT_TYPE STREQUAL "ascend910B1")
-        if (ASCEND_CORE_TYPE STREQUAL "AiCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-cube")
-        elseif(ASCEND_CORE_TYPE STREQUAL "VectorCore")
-            set(_CMAKE_COMPILE_AS_CCE_FLAG "--cce-aicore-arch=dav-c220-vec")
-        endif()
-        set(_CMAKE_CCE_COMPILE_OPTIONS
-            "-mllvm -cce-aicore-function-stack-size=16000 -mllvm -cce-aicore-record-overflow=false -mllvm -cce-aicore-addr-transform"
-        )
-    endif()
-endif()
-
-product_dir(${ASCEND_PRODUCT_TYPE} PRODUCT_UPPER)
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/runtime/lib64
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/simulator/${PRODUCT_UPPER}/lib
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${PRODUCT_UPPER}
-)
-
-# link library
-set(_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES stdc++)
-if(ASCEND_RUN_MODE STREQUAL "ONBOARD")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime)
-elseif(ASCEND_RUN_MODE STREQUAL "SIMULATOR")
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES )
-    if(ASCEND_PRODUCT_TYPE STREQUAL "ascend910")
-        list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES pem_davinci)
-    endif()
-    list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES runtime_camodel)
-else()
-    message(FATAL_ERROR
-        "ASCEND_RUN_MODE: ${ASCEND_RUN_MODE}\n"
-        "ASCEND_RUN_MODE must be one of the following: ONBOARD or SIMULATOR"
-    )
-endif()
-list(APPEND _CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES ascendcl)
-
-set(__IMPLICIT_LINKS)
-foreach(dir ${_CMAKE_CCE_HOST_IMPLICIT_LINK_DIRECTORIES})
-  string(APPEND __IMPLICIT_LINKS " -L\"${dir}\"")
-endforeach()
-foreach(lib ${_CMAKE_CCE_HOST_IMPLICIT_LINK_LIBRARIES})
-  if(${lib} MATCHES "/")
-    string(APPEND __IMPLICIT_LINKS " \"${lib}\"")
-  else()
-    string(APPEND __IMPLICIT_LINKS " -l${lib}")
-  endif()
-endforeach()
-
-set(_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES
-    ${_CMAKE_ASCEND_INSTALL_PATH}/acllib/include
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/impl
-    ${_CMAKE_ASCEND_INSTALL_PATH}/compiler/tikcpp/tikcfw/interface
-    ${_CMAKE_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/include
-)
-set(__IMPLICIT_INCLUDES)
-foreach(inc ${_CMAKE_CCE_HOST_IMPLICIT_INCLUDE_DIRECTORIES})
-  string(APPEND __IMPLICIT_INCLUDES " -I\"${inc}\"")
-endforeach()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeTestCCECompiler.cmake b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeTestCCECompiler.cmake
deleted file mode 100644
index f00f227c1..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/Modules/CMakeTestCCECompiler.cmake
+++ /dev/null
@@ -1 +0,0 @@
-set(CMAKE_CCE_COMPILER_WORKS 1 CACHE INTERNAL "")
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/cpu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/cpu/CMakeLists.txt
deleted file mode 100644
index 326e1f546..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/cpu/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# cpu
-if (NOT DEFINED ENV{CMAKE_PREFIX_PATH})
-    set(CMAKE_PREFIX_PATH ${ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/cmake)
-endif()
-find_package(tikicpulib REQUIRED)
-
-file(GLOB SRC_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-add_executable(${smoke_testcase}_cpu
-    ${SRC_FILES}
-)
-
-target_include_directories(${smoke_testcase}_cpu PRIVATE
-    ${ASCEND_INSTALL_PATH}/acllib/include
-)
-
-target_link_libraries(${smoke_testcase}_cpu PRIVATE
-    tikicpulib::${ASCEND_PRODUCT_TYPE}
-    ascendcl
-)
-
-target_compile_options(${smoke_testcase}_cpu PRIVATE
-    -g
-)
-
-set_target_properties(${smoke_testcase}_cpu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_cpu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/npu/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/npu/CMakeLists.txt
deleted file mode 100644
index cb86b9708..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/cmake/npu/CMakeLists.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-# npu
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-set_source_files_properties(${KERNEL_FILES} PROPERTIES LANGUAGE CCE)
-
-file(GLOB MAIN_FILES
-    ${CMAKE_SOURCE_DIR}/main.cpp
-)
-set_source_files_properties(${MAIN_FILES} PROPERTIES LANGUAGE CCE)
-
-# ===================================================================
-# exe mode: build a executable directly
-add_executable(${smoke_testcase}_npu
-    ${KERNEL_FILES}
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_compile_definitions(${smoke_testcase}_npu PRIVATE
-    TILING_KEY_VAR=0
-)
-
-set_target_properties(${smoke_testcase}_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-# so mode: build a shared library first, and dynamic link to build a executable
-file(GLOB KERNEL_FILES
-    ${CMAKE_SOURCE_DIR}/*.cpp
-)
-list(REMOVE_ITEM KERNEL_FILES "${CMAKE_SOURCE_DIR}/main.cpp")
-
-add_library(ascendc_kernels SHARED
-    ${KERNEL_FILES}
-)
-
-target_compile_definitions(ascendc_kernels PRIVATE
-    TILING_KEY_VAR=0
-)
-
-target_compile_options(ascendc_kernels PRIVATE
-    -O2
-    -std=c++17
-)
-
-set_target_properties(ascendc_kernels PROPERTIES
-    OUTPUT_NAME ascendc_kernels
-    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-install(TARGETS ascendc_kernels
-    LIBRARY DESTINATION ${CMAKE_SOURCE_DIR}
-)
-
-# ===================================================================
-add_executable(${smoke_testcase}_lib_npu
-    ${MAIN_FILES}
-)
-
-target_compile_options(${smoke_testcase}_lib_npu PRIVATE
-    -O2
-    -std=c++17
-)
-
-target_link_directories(${smoke_testcase}_lib_npu PRIVATE
-    ${CMAKE_SOURCE_DIR}
-)
-
-target_link_libraries(${smoke_testcase}_lib_npu PRIVATE
-    ascendc_kernels
-    -Wl,--as-needed
-)
-
-# add_dependencies(${smoke_testcase}_lib_npu ${smoke_testcase})
-set_target_properties(${smoke_testcase}_lib_npu PROPERTIES
-    OUTPUT_NAME ${smoke_testcase}_npu
-    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/data_utils.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/data_utils.h
deleted file mode 100644
index 042de8bb0..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/data_utils.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef DATA_UTILS_H
-#define DATA_UTILS_H
-#include <iostream>
-#include <fstream>
-#include <cstdio>
-#include <string>
-#include <vector>
-#include <iomanip>
-#include <cassert>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include "acl/acl.h"
-
-typedef enum {
-    DT_UNDEFINED = -1,
-    FLOAT = 0,
-    HALF = 1,
-    INT8_T = 2,
-    INT32_T = 3,
-    UINT8_T = 4,
-    INT16_T = 6,
-    UINT16_T = 7,
-    UINT32_T = 8,
-    INT64_T = 9,
-    UINT64_T = 10,
-    DOUBLE = 11,
-    BOOL = 12,
-    STRING = 13,
-    COMPLEX64 = 16,
-    COMPLEX128 = 17,
-    BF16 = 27
-} printDataType;
-
-#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
-#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
-#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
-#define CHECK_ACL(x)                                                                        \
-    do {                                                                                    \
-        aclError __ret = x;                                                                 \
-        if (__ret != ACL_ERROR_NONE) {                                                      \
-            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
-        }                                                                                   \
-    } while (0);
-
-/**
- * @brief Read data from file
- * @param [in] filePath: file path
- * @param [out] fileSize: file size
- * @return read result
- */
-bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
-{
-    struct stat sBuf;
-    int fileStatus = stat(filePath.data(), &sBuf);
-    if (fileStatus == -1) {
-        ERROR_LOG("failed to get file");
-        return false;
-    }
-    if (S_ISREG(sBuf.st_mode) == 0) {
-        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
-        return false;
-    }
-
-    std::ifstream file;
-    file.open(filePath, std::ios::binary);
-    if (!file.is_open()) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    std::filebuf *buf = file.rdbuf();
-    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
-    if (size == 0) {
-        ERROR_LOG("file size is 0");
-        file.close();
-        return false;
-    }
-    if (size > bufferSize) {
-        ERROR_LOG("file size is larger than buffer size");
-        file.close();
-        return false;
-    }
-    buf->pubseekpos(0, std::ios::in);
-    buf->sgetn(static_cast<char *>(buffer), size);
-    fileSize = size;
-    file.close();
-    return true;
-}
-
-/**
- * @brief Write data to file
- * @param [in] filePath: file path
- * @param [in] buffer: data to write to file
- * @param [in] size: size to write
- * @return write result
- */
-bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
-{
-    if (buffer == nullptr) {
-        ERROR_LOG("Write file failed. buffer is nullptr");
-        return false;
-    }
-
-    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
-    if (fd < 0) {
-        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
-        return false;
-    }
-
-    auto writeSize = write(fd, buffer, size);
-    (void) close(fd);
-    if (writeSize != size) {
-        ERROR_LOG("Write file Failed.");
-        return false;
-    }
-
-    return true;
-}
-
-template<typename T>
-void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << data[i];
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
-{
-    assert(elementsPerRow != 0);
-    for (size_t i = 0; i < count; ++i) {
-        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
-        if (i % elementsPerRow == elementsPerRow - 1) {
-            std::cout << std::endl;
-        }
-    }
-}
-
-void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow=16)
-{
-    if (data == nullptr) {
-        ERROR_LOG("Print data failed. data is nullptr");
-        return;
-    }
-
-    switch (dataType) {
-        case BOOL:
-            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
-            break;
-        case INT8_T:
-            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
-            break;
-        case UINT8_T:
-            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
-            break;
-        case INT16_T:
-            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
-            break;
-        case UINT16_T:
-            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
-            break;
-        case INT32_T:
-            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
-            break;
-        case UINT32_T:
-            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
-            break;
-        case INT64_T:
-            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
-            break;
-        case UINT64_T:
-            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
-            break;
-        case HALF:
-            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
-            break;
-        case FLOAT:
-            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
-            break;
-        case DOUBLE:
-            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
-            break;
-        default:
-            ERROR_LOG("Unsupported type: %d", dataType);
-    }
-    std::cout << std::endl;
-}
-#endif // DATA_UTILS_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/run.sh
deleted file mode 100755
index 11e53e6ae..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/kernel_template/run.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/bash
-clear;clear
-# 清除之前遗留的文件
-rm -rf *.vcd *.dump *.log *.bin *.o *.so *pu build output/*.bin input/*.bin
-# 不需要TIK打印出内存信息
-export PRINT_TIK_MEM_ACCESS=FALSE
-
-# 获取当前的目录
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-declare -A VersionMap
-VersionMap["ascend910"]="Ascend910A"
-VersionMap["ascend310p"]="Ascend310P1"
-VersionMap["ascend910B1"]="Ascend910B1"
-
-# 指向昇腾软件包安装地址，导出环境变量
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-
-# 指定当前sample的算子文件名
-FILE_NAME=$1
-
-# 指定芯片版本: ascend910, ascend310p
-SOC_VERSION=$2
-if [ ${SOC_VERSION}"x" = "x" ]; then
-    echo "ERROR: SOC_VERSION is not specified! please specify ascend910, ascend310p or ascend910B1!"
-    exit -1
-fi
-
-# 指定运行的核: AiCore, VectorCore
-CORE_TYPE=$3
-if [ ${CORE_TYPE}"x" = "x" ]; then
-    echo "WARNING: CORE_TYPE is not specified, using AiCore as default."
-    CORE_TYPE=AiCore
-fi
-
-# 指定运行模式: cpu, npu
-RUN_MODE=$4
-if [ ${RUN_MODE}"x" = "x" ]; then
-    echo "WARNING: RUN_MODE is not specified, using cpu as default."
-    RUN_MODE=cpu
-fi
-
-# 生成计算输入数据和对比用的真值数据
-python3 $FILE_NAME.py
-
-function compile_and_execute() {
-    # 使用cmake编译cpu侧或者npu侧算子, SIMULATOR or ONBOARD
-    mkdir -p build; cd build;       \
-    cmake ..                        \
-        -Dsmoke_testcase=$1         \
-        -DASCEND_PRODUCT_TYPE=$2    \
-        -DASCEND_CORE_TYPE=$3       \
-        -DASCEND_RUN_MODE="ONBOARD" \
-        -DASCEND_INSTALL_PATH=$ASCEND_HOME_DIR
-    cmake --build . --target ${1}_${4}
-    # cmake --build . --target ascendc_kernels && cmake --install . && cmake --build . --target ${1}_lib_${4}
-    cd -
-
-    if [ $? -ne 0 ]; then
-        echo "ERROR: compile op on failed!"
-        return 1
-    fi
-    echo "INFO: compile op on ${RUN_MODE} succeed!"
-
-    # 执行生成的可执行文件
-    (export LD_LIBRARY_PATH=`pwd`:$ASCEND_HOME_DIR/tools/simulator/${VersionMap[$SOC_VERSION]}/lib:$LD_LIBRARY_PATH && ./${1}_${4})
-    if [ $? -ne 0 ]; then
-        echo "ERROR: execute op on ${RUN_MODE} failed!"
-        return 1
-    fi
-    echo "INFO: execute op on ${RUN_MODE} succeed!"
-}
-compile_and_execute $FILE_NAME $SOC_VERSION $CORE_TYPE $RUN_MODE
-
-# 验证计算结果
-echo "md5sum: ";md5sum output/*.bin
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/readme.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/readme.md
deleted file mode 100644
index 17a036967..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/kernel_invocation/readme.md
+++ /dev/null
@@ -1,28 +0,0 @@
-### Demostration
-```shell
-bash run.sh [KERNEL_NAME](add_custom/matmul_custom/topk_custom) [SOC_VERSION](ascend910/ascend310p) [CORE_TYPE](AiCore/VectorCore) [RUN_MODE](cpu/npu)
-```
-### NOTICE
-THEY ARE JUST DEMOS, NO DFX DEFENSE, do not type invalid command!!!
-actually all that you can run:
-#### On ascend910
-```shell
-(cd Add; bash run.sh add_custom ascend910 AiCore cpu)
-(cd Add; bash run.sh add_custom ascend910 AiCore npu)
-
-(cd Add_tile; bash run.sh add_custom ascend910 AiCore cpu)
-(cd Add_tile; bash run.sh add_custom ascend910 AiCore npu)
-
-(cd MatMul; bash run.sh matmul_custom ascend910 AiCore cpu)
-(cd MatMul; bash run.sh matmul_custom ascend910 AiCore npu)
-```
-
-#### On ascend310p
-```shell
-(cd Add; bash run.sh add_custom ascend310p AiCore cpu)
-(cd Add; bash run.sh add_custom ascend310p AiCore npu)
-
-(cd MatMul; bash run.sh matmul_custom ascend310p AiCore cpu)
-(cd MatMul; bash run.sh matmul_custom ascend310p AiCore npu)
-```
-
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/CMakeLists.txt
deleted file mode 100755
index b6be9b492..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore")
-    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin")
-        add_subdirectory(caffe_plugin)
-    endif()
-    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin")
-        add_subdirectory(tf_plugin)
-    endif()
-    if(EXISTS  "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin")
-        add_subdirectory(onnx_plugin)
-    endif()
-endif()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/CMakeLists.txt
deleted file mode 100644
index 5a9a4f996..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-
-aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} plugin_srcs)
-add_library(cust_onnx_parsers SHARED ${plugin_srcs})
-target_compile_definitions(cust_onnx_parsers PRIVATE google=ascend_private)
-target_link_libraries(cust_onnx_parsers PRIVATE intf_pub graph)
-install(TARGETS cust_onnx_parsers
-        LIBRARY DESTINATION packages/vendors/${vendor_name}/framework/tensorflow
-)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/json.hpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/json.hpp
deleted file mode 100644
index 2716e1e7b..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/json.hpp
+++ /dev/null
@@ -1,26137 +0,0 @@
-/*
-    __ _____ _____ _____
- __|  |   __|     |   | |  JSON for Modern C++
-|  |  |__   |  |  | | | |  version 3.9.1
-|_____|_____|_____|_|___|  https://github.com/nlohmann/json
-
-Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-SPDX-License-Identifier: MIT
-Copyright (c) 2013-2019 Niels Lohmann <http://nlohmann.me>.
-
-Permission is hereby  granted, free of charge, to any  person obtaining a copy
-of this software and associated  documentation files (the "Software"), to deal
-in the Software  without restriction, including without  limitation the rights
-to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
-copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
-IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
-FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
-AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
-LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
-
-#ifndef INCLUDE_NLOHMANN_JSON_HPP_
-#define INCLUDE_NLOHMANN_JSON_HPP_
-
-#define NLOHMANN_JSON_VERSION_MAJOR 3
-#define NLOHMANN_JSON_VERSION_MINOR 9
-#define NLOHMANN_JSON_VERSION_PATCH 1
-
-#include <algorithm> // all_of, find, for_each
-#include <cstddef> // nullptr_t, ptrdiff_t, size_t
-#include <functional> // hash, less
-#include <initializer_list> // initializer_list
-#include <iosfwd> // istream, ostream
-#include <iterator> // random_access_iterator_tag
-#include <memory> // unique_ptr
-#include <numeric> // accumulate
-#include <string> // string, stoi, to_string
-#include <utility> // declval, forward, move, pair, swap
-#include <vector> // vector
-
-// #include <nlohmann/adl_serializer.hpp>
-
-
-#include <type_traits>
-#include <utility>
-
-// #include <nlohmann/detail/conversions/from_json.hpp>
-
-
-#include <algorithm> // transform
-#include <array> // array
-#include <forward_list> // forward_list
-#include <iterator> // inserter, front_inserter, end
-#include <map> // map
-#include <string> // string
-#include <tuple> // tuple, make_tuple
-#include <type_traits> // is_arithmetic, is_same, is_enum, underlying_type, is_convertible
-#include <unordered_map> // unordered_map
-#include <utility> // pair, declval
-#include <valarray> // valarray
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-
-#include <exception> // exception
-#include <stdexcept> // runtime_error
-#include <string> // to_string
-#include <vector> // vector
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-#include <array> // array
-#include <cstddef> // size_t
-#include <cstdint> // uint8_t
-#include <string> // string
-
-namespace nlohmann
-{
-namespace detail
-{
-///////////////////////////
-// JSON type enumeration //
-///////////////////////////
-
-/*!
-@brief the JSON type enumeration
-
-This enumeration collects the different JSON types. It is internally used to
-distinguish the stored values, and the functions @ref basic_json::is_null(),
-@ref basic_json::is_object(), @ref basic_json::is_array(),
-@ref basic_json::is_string(), @ref basic_json::is_boolean(),
-@ref basic_json::is_number() (with @ref basic_json::is_number_integer(),
-@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()),
-@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and
-@ref basic_json::is_structured() rely on it.
-
-@note There are three enumeration entries (number_integer, number_unsigned, and
-number_float), because the library distinguishes these three types for numbers:
-@ref basic_json::number_unsigned_t is used for unsigned integers,
-@ref basic_json::number_integer_t is used for signed integers, and
-@ref basic_json::number_float_t is used for floating-point numbers or to
-approximate integers which do not fit in the limits of their respective type.
-
-@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON
-value with the default value for a given type
-
-@since version 1.0.0
-*/
-enum class value_t : std::uint8_t
-{
-    null,             ///< null value
-    object,           ///< object (unordered set of name/value pairs)
-    array,            ///< array (ordered collection of values)
-    string,           ///< string value
-    boolean,          ///< boolean value
-    number_integer,   ///< number value (signed integer)
-    number_unsigned,  ///< number value (unsigned integer)
-    number_float,     ///< number value (floating-point)
-    binary,           ///< binary array (ordered collection of bytes)
-    discarded         ///< discarded by the parser callback function
-};
-
-/*!
-@brief comparison operator for JSON types
-
-Returns an ordering that is similar to Python:
-- order: null < boolean < number < object < array < string < binary
-- furthermore, each type is not smaller than itself
-- discarded values are not comparable
-- binary is represented as a b"" string in python and directly comparable to a
-  string; however, making a binary array directly comparable with a string would
-  be surprising behavior in a JSON file.
-
-@since version 1.0.0
-*/
-inline bool operator<(const value_t lhs, const value_t rhs) noexcept
-{
-    static constexpr std::array<std::uint8_t, 9> order = {{
-            0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */,
-            1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */,
-            6 /* binary */
-        }
-    };
-
-    const auto l_index = static_cast<std::size_t>(lhs);
-    const auto r_index = static_cast<std::size_t>(rhs);
-    return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index];
-}
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/string_escape.hpp>
-
-
-#include <string>
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-#include <utility> // pair
-// #include <nlohmann/thirdparty/hedley/hedley.hpp>
-
-
-/* Hedley - https://nemequ.github.io/hedley
- * Created by Evan Nemerson <evan@nemerson.com>
- *
- * To the extent possible under law, the author(s) have dedicated all
- * copyright and related and neighboring rights to this software to
- * the public domain worldwide. This software is distributed without
- * any warranty.
- *
- * For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
- * SPDX-License-Identifier: CC0-1.0
- */
-
-#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15)
-#if defined(JSON_HEDLEY_VERSION)
-    #undef JSON_HEDLEY_VERSION
-#endif
-#define JSON_HEDLEY_VERSION 15
-
-#if defined(JSON_HEDLEY_STRINGIFY_EX)
-    #undef JSON_HEDLEY_STRINGIFY_EX
-#endif
-#define JSON_HEDLEY_STRINGIFY_EX(x) #x
-
-#if defined(JSON_HEDLEY_STRINGIFY)
-    #undef JSON_HEDLEY_STRINGIFY
-#endif
-#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x)
-
-#if defined(JSON_HEDLEY_CONCAT_EX)
-    #undef JSON_HEDLEY_CONCAT_EX
-#endif
-#define JSON_HEDLEY_CONCAT_EX(a,b) a##b
-
-#if defined(JSON_HEDLEY_CONCAT)
-    #undef JSON_HEDLEY_CONCAT
-#endif
-#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b)
-
-#if defined(JSON_HEDLEY_CONCAT3_EX)
-    #undef JSON_HEDLEY_CONCAT3_EX
-#endif
-#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c
-
-#if defined(JSON_HEDLEY_CONCAT3)
-    #undef JSON_HEDLEY_CONCAT3
-#endif
-#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c)
-
-#if defined(JSON_HEDLEY_VERSION_ENCODE)
-    #undef JSON_HEDLEY_VERSION_ENCODE
-#endif
-#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision))
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR)
-    #undef JSON_HEDLEY_VERSION_DECODE_MAJOR
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000)
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR)
-    #undef JSON_HEDLEY_VERSION_DECODE_MINOR
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000)
-
-#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION)
-    #undef JSON_HEDLEY_VERSION_DECODE_REVISION
-#endif
-#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000)
-
-#if defined(JSON_HEDLEY_GNUC_VERSION)
-    #undef JSON_HEDLEY_GNUC_VERSION
-#endif
-#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__)
-    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
-#elif defined(__GNUC__)
-    #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK)
-    #undef JSON_HEDLEY_GNUC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_GNUC_VERSION)
-    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_MSVC_VERSION)
-    #undef JSON_HEDLEY_MSVC_VERSION
-#endif
-#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100)
-#elif defined(_MSC_FULL_VER) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10)
-#elif defined(_MSC_VER) && !defined(__ICL)
-    #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK)
-    #undef JSON_HEDLEY_MSVC_VERSION_CHECK
-#endif
-#if !defined(JSON_HEDLEY_MSVC_VERSION)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0)
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch)))
-#elif defined(_MSC_VER) && (_MSC_VER >= 1200)
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch)))
-#else
-    #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor)))
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_VERSION)
-    #undef JSON_HEDLEY_INTEL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL)
-    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE)
-#elif defined(__INTEL_COMPILER) && !defined(__ICL)
-    #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK)
-    #undef JSON_HEDLEY_INTEL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_INTEL_VERSION)
-    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
-    #undef JSON_HEDLEY_INTEL_CL_VERSION
-#endif
-#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL)
-    #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0)
-#endif
-
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK)
-    #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_INTEL_CL_VERSION)
-    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_PGI_VERSION)
-    #undef JSON_HEDLEY_PGI_VERSION
-#endif
-#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
-    #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__)
-#endif
-
-#if defined(JSON_HEDLEY_PGI_VERSION_CHECK)
-    #undef JSON_HEDLEY_PGI_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_PGI_VERSION)
-    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_SUNPRO_VERSION)
-    #undef JSON_HEDLEY_SUNPRO_VERSION
-#endif
-#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10)
-#elif defined(__SUNPRO_C)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf)
-#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10)
-#elif defined(__SUNPRO_CC)
-    #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf)
-#endif
-
-#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK)
-    #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_SUNPRO_VERSION)
-    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
-    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION
-#endif
-#if defined(__EMSCRIPTEN__)
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__)
-#endif
-
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK)
-    #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION)
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_ARM_VERSION)
-    #undef JSON_HEDLEY_ARM_VERSION
-#endif
-#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100)
-#elif defined(__CC_ARM) && defined(__ARMCC_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100)
-#endif
-
-#if defined(JSON_HEDLEY_ARM_VERSION_CHECK)
-    #undef JSON_HEDLEY_ARM_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_ARM_VERSION)
-    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_IBM_VERSION)
-    #undef JSON_HEDLEY_IBM_VERSION
-#endif
-#if defined(__ibmxl__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__)
-#elif defined(__xlC__) && defined(__xlC_ver__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff)
-#elif defined(__xlC__)
-    #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0)
-#endif
-
-#if defined(JSON_HEDLEY_IBM_VERSION_CHECK)
-    #undef JSON_HEDLEY_IBM_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_IBM_VERSION)
-    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_VERSION)
-    #undef JSON_HEDLEY_TI_VERSION
-#endif
-#if \
-    defined(__TI_COMPILER_VERSION__) && \
-    ( \
-      defined(__TMS470__) || defined(__TI_ARM__) || \
-      defined(__MSP430__) || \
-      defined(__TMS320C2000__) \
-    )
-#if (__TI_COMPILER_VERSION__ >= 16000000)
-    #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-#endif
-
-#if defined(JSON_HEDLEY_TI_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_VERSION)
-    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
-    #undef JSON_HEDLEY_TI_CL2000_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__)
-    #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL2000_VERSION)
-    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL430_VERSION)
-    #undef JSON_HEDLEY_TI_CL430_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__)
-    #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL430_VERSION)
-    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
-    #undef JSON_HEDLEY_TI_ARMCL_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__))
-    #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_ARMCL_VERSION)
-    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
-    #undef JSON_HEDLEY_TI_CL6X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__)
-    #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL6X_VERSION)
-    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
-    #undef JSON_HEDLEY_TI_CL7X_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__)
-    #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CL7X_VERSION)
-    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
-    #undef JSON_HEDLEY_TI_CLPRU_VERSION
-#endif
-#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__)
-    #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000))
-#endif
-
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK)
-    #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TI_CLPRU_VERSION)
-    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_CRAY_VERSION)
-    #undef JSON_HEDLEY_CRAY_VERSION
-#endif
-#if defined(_CRAYC)
-    #if defined(_RELEASE_PATCHLEVEL)
-        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL)
-    #else
-        #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0)
-    #endif
-#endif
-
-#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK)
-    #undef JSON_HEDLEY_CRAY_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_CRAY_VERSION)
-    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_IAR_VERSION)
-    #undef JSON_HEDLEY_IAR_VERSION
-#endif
-#if defined(__IAR_SYSTEMS_ICC__)
-    #if __VER__ > 1000
-        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000))
-    #else
-        #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0)
-    #endif
-#endif
-
-#if defined(JSON_HEDLEY_IAR_VERSION_CHECK)
-    #undef JSON_HEDLEY_IAR_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_IAR_VERSION)
-    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_TINYC_VERSION)
-    #undef JSON_HEDLEY_TINYC_VERSION
-#endif
-#if defined(__TINYC__)
-    #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100)
-#endif
-
-#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK)
-    #undef JSON_HEDLEY_TINYC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_TINYC_VERSION)
-    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_DMC_VERSION)
-    #undef JSON_HEDLEY_DMC_VERSION
-#endif
-#if defined(__DMC__)
-    #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf)
-#endif
-
-#if defined(JSON_HEDLEY_DMC_VERSION_CHECK)
-    #undef JSON_HEDLEY_DMC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_DMC_VERSION)
-    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_COMPCERT_VERSION)
-    #undef JSON_HEDLEY_COMPCERT_VERSION
-#endif
-#if defined(__COMPCERT_VERSION__)
-    #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100)
-#endif
-
-#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK)
-    #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_COMPCERT_VERSION)
-    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_PELLES_VERSION)
-    #undef JSON_HEDLEY_PELLES_VERSION
-#endif
-#if defined(__POCC__)
-    #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0)
-#endif
-
-#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK)
-    #undef JSON_HEDLEY_PELLES_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_PELLES_VERSION)
-    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #undef JSON_HEDLEY_MCST_LCC_VERSION
-#endif
-#if defined(__LCC__) && defined(__LCC_MINOR__)
-    #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__)
-#endif
-
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK)
-    #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_VERSION)
-    #undef JSON_HEDLEY_GCC_VERSION
-#endif
-#if \
-    defined(JSON_HEDLEY_GNUC_VERSION) && \
-    !defined(__clang__) && \
-    !defined(JSON_HEDLEY_INTEL_VERSION) && \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_ARM_VERSION) && \
-    !defined(JSON_HEDLEY_CRAY_VERSION) && \
-    !defined(JSON_HEDLEY_TI_VERSION) && \
-    !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL430_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \
-    !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \
-    !defined(__COMPCERT__) && \
-    !defined(JSON_HEDLEY_MCST_LCC_VERSION)
-    #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION
-#endif
-
-#if defined(JSON_HEDLEY_GCC_VERSION_CHECK)
-    #undef JSON_HEDLEY_GCC_VERSION_CHECK
-#endif
-#if defined(JSON_HEDLEY_GCC_VERSION)
-    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch))
-#else
-    #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_ATTRIBUTE
-#endif
-#if \
-  defined(__has_attribute) && \
-  ( \
-    (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \
-  )
-#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute)
-#else
-#  define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE
-#endif
-#if defined(__has_attribute)
-    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE
-#endif
-#if \
-    defined(__has_cpp_attribute) && \
-    defined(__cplusplus) && \
-    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0))
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS)
-    #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS
-#endif
-#if !defined(__cplusplus) || !defined(__has_cpp_attribute)
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
-#elif \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_IAR_VERSION) && \
-    (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \
-    (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0))
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute)
-#else
-    #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE
-#endif
-#if defined(__has_cpp_attribute) && defined(__cplusplus)
-    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_BUILTIN)
-    #undef JSON_HEDLEY_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN)
-    #undef JSON_HEDLEY_GNUC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN)
-    #undef JSON_HEDLEY_GCC_HAS_BUILTIN
-#endif
-#if defined(__has_builtin)
-    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin)
-#else
-    #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_FEATURE)
-    #undef JSON_HEDLEY_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_HAS_FEATURE(feature) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE)
-    #undef JSON_HEDLEY_GNUC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_FEATURE)
-    #undef JSON_HEDLEY_GCC_HAS_FEATURE
-#endif
-#if defined(__has_feature)
-    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature)
-#else
-    #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_EXTENSION)
-    #undef JSON_HEDLEY_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_HAS_EXTENSION(extension) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION)
-    #undef JSON_HEDLEY_GNUC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION)
-    #undef JSON_HEDLEY_GCC_HAS_EXTENSION
-#endif
-#if defined(__has_extension)
-    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension)
-#else
-    #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
-#endif
-#if defined(__has_declspec_attribute)
-    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute)
-#else
-    #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_HAS_WARNING)
-    #undef JSON_HEDLEY_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_HAS_WARNING(warning) (0)
-#endif
-
-#if defined(JSON_HEDLEY_GNUC_HAS_WARNING)
-    #undef JSON_HEDLEY_GNUC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_GCC_HAS_WARNING)
-    #undef JSON_HEDLEY_GCC_HAS_WARNING
-#endif
-#if defined(__has_warning)
-    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning)
-#else
-    #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if \
-    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-    defined(__clang__) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \
-    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \
-    (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR))
-    #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value)
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_PRAGMA(value) __pragma(value)
-#else
-    #define JSON_HEDLEY_PRAGMA(value)
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH)
-    #undef JSON_HEDLEY_DIAGNOSTIC_PUSH
-#endif
-#if defined(JSON_HEDLEY_DIAGNOSTIC_POP)
-    #undef JSON_HEDLEY_DIAGNOSTIC_POP
-#endif
-#if defined(__clang__)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push))
-    #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop))
-#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop")
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)")
-    #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_PUSH
-    #define JSON_HEDLEY_DIAGNOSTIC_POP
-#endif
-
-/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
-#endif
-#if defined(__cplusplus)
-#  if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat")
-#    if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions")
-#      if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions")
-#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#      else
-#        define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#      endif
-#    else
-#      define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \
-    xpr \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#    endif
-#  endif
-#endif
-#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x
-#endif
-
-#if defined(JSON_HEDLEY_CONST_CAST)
-    #undef JSON_HEDLEY_CONST_CAST
-#endif
-#if defined(__cplusplus)
-#  define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast<T>(expr))
-#elif \
-  JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \
-        JSON_HEDLEY_DIAGNOSTIC_PUSH \
-        JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \
-        ((T) (expr)); \
-        JSON_HEDLEY_DIAGNOSTIC_POP \
-    }))
-#else
-#  define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_REINTERPRET_CAST)
-    #undef JSON_HEDLEY_REINTERPRET_CAST
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast<T>(expr))
-#else
-    #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_STATIC_CAST)
-    #undef JSON_HEDLEY_STATIC_CAST
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast<T>(expr))
-#else
-    #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr))
-#endif
-
-#if defined(JSON_HEDLEY_CPP_CAST)
-    #undef JSON_HEDLEY_CPP_CAST
-#endif
-#if defined(__cplusplus)
-#  if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast")
-#    define JSON_HEDLEY_CPP_CAST(T, expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \
-    ((T) (expr)) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0)
-#    define JSON_HEDLEY_CPP_CAST(T, expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("diag_suppress=Pe137") \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  else
-#    define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr))
-#  endif
-#else
-#  define JSON_HEDLEY_CPP_CAST(T, expr) (expr)
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445")
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996))
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215")
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068))
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161")
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)")
-#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292))
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030))
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098")
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)")
-#elif \
-    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097")
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"")
-#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#endif
-
-#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION)
-    #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunused-function")
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"")
-#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"")
-#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505))
-#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142")
-#else
-    #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
-#endif
-
-#if defined(JSON_HEDLEY_DEPRECATED)
-    #undef JSON_HEDLEY_DEPRECATED
-#endif
-#if defined(JSON_HEDLEY_DEPRECATED_FOR)
-    #undef JSON_HEDLEY_DEPRECATED_FOR
-#endif
-#if \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement))
-#elif \
-    (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since)))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement)))
-#elif defined(__cplusplus) && (__cplusplus >= 201402L)
-    #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]])
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]])
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__))
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated)
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated)
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated")
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated")
-#else
-    #define JSON_HEDLEY_DEPRECATED(since)
-    #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement)
-#endif
-
-#if defined(JSON_HEDLEY_UNAVAILABLE)
-    #undef JSON_HEDLEY_UNAVAILABLE
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since)))
-#else
-    #define JSON_HEDLEY_UNAVAILABLE(available_since)
-#endif
-
-#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT)
-    #undef JSON_HEDLEY_WARN_UNUSED_RESULT
-#endif
-#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG)
-    #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__))
-#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]])
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard)
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]])
-#elif defined(_Check_return_) /* SAL */
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_
-#else
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT
-    #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg)
-#endif
-
-#if defined(JSON_HEDLEY_SENTINEL)
-    #undef JSON_HEDLEY_SENTINEL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position)))
-#else
-    #define JSON_HEDLEY_SENTINEL(position)
-#endif
-
-#if defined(JSON_HEDLEY_NO_RETURN)
-    #undef JSON_HEDLEY_NO_RETURN
-#endif
-#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_NO_RETURN __noreturn
-#elif \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-    #define JSON_HEDLEY_NO_RETURN _Noreturn
-#elif defined(__cplusplus) && (__cplusplus >= 201103L)
-    #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]])
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;")
-#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
-    #define JSON_HEDLEY_NO_RETURN __attribute((noreturn))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
-    #define JSON_HEDLEY_NO_RETURN __declspec(noreturn)
-#else
-    #define JSON_HEDLEY_NO_RETURN
-#endif
-
-#if defined(JSON_HEDLEY_NO_ESCAPE)
-    #undef JSON_HEDLEY_NO_ESCAPE
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape)
-    #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__))
-#else
-    #define JSON_HEDLEY_NO_ESCAPE
-#endif
-
-#if defined(JSON_HEDLEY_UNREACHABLE)
-    #undef JSON_HEDLEY_UNREACHABLE
-#endif
-#if defined(JSON_HEDLEY_UNREACHABLE_RETURN)
-    #undef JSON_HEDLEY_UNREACHABLE_RETURN
-#endif
-#if defined(JSON_HEDLEY_ASSUME)
-    #undef JSON_HEDLEY_ASSUME
-#endif
-#if \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_ASSUME(expr) __assume(expr)
-#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume)
-    #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr)
-#elif \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
-    #if defined(__cplusplus)
-        #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr)
-    #else
-        #define JSON_HEDLEY_ASSUME(expr) _nassert(expr)
-    #endif
-#endif
-#if \
-    (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable()
-#elif defined(JSON_HEDLEY_ASSUME)
-    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
-#endif
-#if !defined(JSON_HEDLEY_ASSUME)
-    #if defined(JSON_HEDLEY_UNREACHABLE)
-        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1)))
-    #else
-        #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr)
-    #endif
-#endif
-#if defined(JSON_HEDLEY_UNREACHABLE)
-    #if  \
-        JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0)
-        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value))
-    #else
-        #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE()
-    #endif
-#else
-    #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value)
-#endif
-#if !defined(JSON_HEDLEY_UNREACHABLE)
-    #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0)
-#endif
-
-JSON_HEDLEY_DIAGNOSTIC_PUSH
-#if JSON_HEDLEY_HAS_WARNING("-Wpedantic")
-    #pragma clang diagnostic ignored "-Wpedantic"
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus)
-    #pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
-#endif
-#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0)
-    #if defined(__clang__)
-        #pragma clang diagnostic ignored "-Wvariadic-macros"
-    #elif defined(JSON_HEDLEY_GCC_VERSION)
-        #pragma GCC diagnostic ignored "-Wvariadic-macros"
-    #endif
-#endif
-#if defined(JSON_HEDLEY_NON_NULL)
-    #undef JSON_HEDLEY_NON_NULL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
-    #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
-#else
-    #define JSON_HEDLEY_NON_NULL(...)
-#endif
-JSON_HEDLEY_DIAGNOSTIC_POP
-
-#if defined(JSON_HEDLEY_PRINTF_FORMAT)
-    #undef JSON_HEDLEY_PRINTF_FORMAT
-#endif
-#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check)))
-#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check)))
-#elif \
-    JSON_HEDLEY_HAS_ATTRIBUTE(format) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check)))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0)
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check))
-#else
-    #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check)
-#endif
-
-#if defined(JSON_HEDLEY_CONSTEXPR)
-    #undef JSON_HEDLEY_CONSTEXPR
-#endif
-#if defined(__cplusplus)
-    #if __cplusplus >= 201103L
-        #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr)
-    #endif
-#endif
-#if !defined(JSON_HEDLEY_CONSTEXPR)
-    #define JSON_HEDLEY_CONSTEXPR
-#endif
-
-#if defined(JSON_HEDLEY_PREDICT)
-    #undef JSON_HEDLEY_PREDICT
-#endif
-#if defined(JSON_HEDLEY_LIKELY)
-    #undef JSON_HEDLEY_LIKELY
-#endif
-#if defined(JSON_HEDLEY_UNLIKELY)
-    #undef JSON_HEDLEY_UNLIKELY
-#endif
-#if defined(JSON_HEDLEY_UNPREDICTABLE)
-    #undef JSON_HEDLEY_UNPREDICTABLE
-#endif
-#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable)
-    #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr))
-#endif
-#if \
-  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability(  (expr), (value), (probability))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability)   __builtin_expect_with_probability(!!(expr),    1   , (probability))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability)  __builtin_expect_with_probability(!!(expr),    0   , (probability))
-#  define JSON_HEDLEY_LIKELY(expr)                      __builtin_expect                 (!!(expr),    1                  )
-#  define JSON_HEDLEY_UNLIKELY(expr)                    __builtin_expect                 (!!(expr),    0                  )
-#elif \
-  (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \
-  JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PREDICT(expr, expected, probability) \
-    (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \
-    (__extension__ ({ \
-        double hedley_probability_ = (probability); \
-        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \
-    }))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \
-    (__extension__ ({ \
-        double hedley_probability_ = (probability); \
-        ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \
-    }))
-#  define JSON_HEDLEY_LIKELY(expr)   __builtin_expect(!!(expr), 1)
-#  define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
-#else
-#  define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))
-#  define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr))
-#  define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr))
-#  define JSON_HEDLEY_LIKELY(expr) (!!(expr))
-#  define JSON_HEDLEY_UNLIKELY(expr) (!!(expr))
-#endif
-#if !defined(JSON_HEDLEY_UNPREDICTABLE)
-    #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5)
-#endif
-
-#if defined(JSON_HEDLEY_MALLOC)
-    #undef JSON_HEDLEY_MALLOC
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_MALLOC __attribute__((__malloc__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory")
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_MALLOC __declspec(restrict)
-#else
-    #define JSON_HEDLEY_MALLOC
-#endif
-
-#if defined(JSON_HEDLEY_PURE)
-    #undef JSON_HEDLEY_PURE
-#endif
-#if \
-  JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#  define JSON_HEDLEY_PURE __attribute__((__pure__))
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-#  define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data")
-#elif defined(__cplusplus) && \
-    ( \
-      JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \
-      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \
-      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \
-    )
-#  define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;")
-#else
-#  define JSON_HEDLEY_PURE
-#endif
-
-#if defined(JSON_HEDLEY_CONST)
-    #undef JSON_HEDLEY_CONST
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(const) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_CONST __attribute__((__const__))
-#elif \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0)
-    #define JSON_HEDLEY_CONST _Pragma("no_side_effect")
-#else
-    #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE
-#endif
-
-#if defined(JSON_HEDLEY_RESTRICT)
-    #undef JSON_HEDLEY_RESTRICT
-#endif
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus)
-    #define JSON_HEDLEY_RESTRICT restrict
-#elif \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \
-    defined(__clang__) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_RESTRICT __restrict
-#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus)
-    #define JSON_HEDLEY_RESTRICT _Restrict
-#else
-    #define JSON_HEDLEY_RESTRICT
-#endif
-
-#if defined(JSON_HEDLEY_INLINE)
-    #undef JSON_HEDLEY_INLINE
-#endif
-#if \
-    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \
-    (defined(__cplusplus) && (__cplusplus >= 199711L))
-    #define JSON_HEDLEY_INLINE inline
-#elif \
-    defined(JSON_HEDLEY_GCC_VERSION) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0)
-    #define JSON_HEDLEY_INLINE __inline__
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_INLINE __inline
-#else
-    #define JSON_HEDLEY_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_ALWAYS_INLINE)
-    #undef JSON_HEDLEY_ALWAYS_INLINE
-#endif
-#if \
-  JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-  JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-  JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-  JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-  JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-  (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-  (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-  (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-  (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-  JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-  JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-  JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-  JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-  JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE
-#elif \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE __forceinline
-#elif defined(__cplusplus) && \
-    ( \
-      JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-      JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-      JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-      JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-      JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-      JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \
-    )
-#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-#  define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced")
-#else
-#  define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_NEVER_INLINE)
-    #undef JSON_HEDLEY_NEVER_INLINE
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \
-    JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \
-    (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \
-    (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \
-    (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \
-    (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \
-    JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \
-    JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \
-    JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0)
-    #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
-#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline")
-#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;")
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-    #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never")
-#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0)
-    #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0)
-    #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline)
-#else
-    #define JSON_HEDLEY_NEVER_INLINE
-#endif
-
-#if defined(JSON_HEDLEY_PRIVATE)
-    #undef JSON_HEDLEY_PRIVATE
-#endif
-#if defined(JSON_HEDLEY_PUBLIC)
-    #undef JSON_HEDLEY_PUBLIC
-#endif
-#if defined(JSON_HEDLEY_IMPORT)
-    #undef JSON_HEDLEY_IMPORT
-#endif
-#if defined(_WIN32) || defined(__CYGWIN__)
-#  define JSON_HEDLEY_PRIVATE
-#  define JSON_HEDLEY_PUBLIC   __declspec(dllexport)
-#  define JSON_HEDLEY_IMPORT   __declspec(dllimport)
-#else
-#  if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-    ( \
-      defined(__TI_EABI__) && \
-      ( \
-        (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \
-        JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \
-      ) \
-    ) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-#    define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden")))
-#    define JSON_HEDLEY_PUBLIC  __attribute__((__visibility__("default")))
-#  else
-#    define JSON_HEDLEY_PRIVATE
-#    define JSON_HEDLEY_PUBLIC
-#  endif
-#  define JSON_HEDLEY_IMPORT    extern
-#endif
-
-#if defined(JSON_HEDLEY_NO_THROW)
-    #undef JSON_HEDLEY_NO_THROW
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__))
-#elif \
-    JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0)
-    #define JSON_HEDLEY_NO_THROW __declspec(nothrow)
-#else
-    #define JSON_HEDLEY_NO_THROW
-#endif
-
-#if defined(JSON_HEDLEY_FALL_THROUGH)
-    #undef JSON_HEDLEY_FALL_THROUGH
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__))
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough)
-    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]])
-#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough)
-    #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]])
-#elif defined(__fallthrough) /* SAL */
-    #define JSON_HEDLEY_FALL_THROUGH __fallthrough
-#else
-    #define JSON_HEDLEY_FALL_THROUGH
-#endif
-
-#if defined(JSON_HEDLEY_RETURNS_NON_NULL)
-    #undef JSON_HEDLEY_RETURNS_NON_NULL
-#endif
-#if \
-    JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__))
-#elif defined(_Ret_notnull_) /* SAL */
-    #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_
-#else
-    #define JSON_HEDLEY_RETURNS_NON_NULL
-#endif
-
-#if defined(JSON_HEDLEY_ARRAY_PARAM)
-    #undef JSON_HEDLEY_ARRAY_PARAM
-#endif
-#if \
-    defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
-    !defined(__STDC_NO_VLA__) && \
-    !defined(__cplusplus) && \
-    !defined(JSON_HEDLEY_PGI_VERSION) && \
-    !defined(JSON_HEDLEY_TINYC_VERSION)
-    #define JSON_HEDLEY_ARRAY_PARAM(name) (name)
-#else
-    #define JSON_HEDLEY_ARRAY_PARAM(name)
-#endif
-
-#if defined(JSON_HEDLEY_IS_CONSTANT)
-    #undef JSON_HEDLEY_IS_CONSTANT
-#endif
-#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR)
-    #undef JSON_HEDLEY_REQUIRE_CONSTEXPR
-#endif
-/* JSON_HEDLEY_IS_CONSTEXPR_ is for
-   HEDLEY INTERNAL USE ONLY.  API subject to change without notice. */
-#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
-    #undef JSON_HEDLEY_IS_CONSTEXPR_
-#endif
-#if \
-    JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \
-    JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-    JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-    JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \
-    JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \
-    JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-    JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \
-    (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \
-    JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-    JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10)
-    #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr)
-#endif
-#if !defined(__cplusplus)
-#  if \
-       JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \
-       JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \
-       JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-       JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \
-       JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \
-       JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \
-       JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24)
-#if defined(__INTPTR_TYPE__)
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*)
-#else
-    #include <stdint.h>
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*)
-#endif
-#  elif \
-       ( \
-          defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \
-          !defined(JSON_HEDLEY_SUNPRO_VERSION) && \
-          !defined(JSON_HEDLEY_PGI_VERSION) && \
-          !defined(JSON_HEDLEY_IAR_VERSION)) || \
-       (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \
-       JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \
-       JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \
-       JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \
-       JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0)
-#if defined(__INTPTR_TYPE__)
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0)
-#else
-    #include <stdint.h>
-    #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0)
-#endif
-#  elif \
-       defined(JSON_HEDLEY_GCC_VERSION) || \
-       defined(JSON_HEDLEY_INTEL_VERSION) || \
-       defined(JSON_HEDLEY_TINYC_VERSION) || \
-       defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \
-       JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \
-       defined(JSON_HEDLEY_TI_CL2000_VERSION) || \
-       defined(JSON_HEDLEY_TI_CL6X_VERSION) || \
-       defined(JSON_HEDLEY_TI_CL7X_VERSION) || \
-       defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \
-       defined(__clang__)
-#    define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \
-        sizeof(void) != \
-        sizeof(*( \
-                  1 ? \
-                  ((void*) ((expr) * 0L) ) : \
-((struct { char v[sizeof(void) * 2]; } *) 1) \
-                ) \
-              ) \
-                                            )
-#  endif
-#endif
-#if defined(JSON_HEDLEY_IS_CONSTEXPR_)
-    #if !defined(JSON_HEDLEY_IS_CONSTANT)
-        #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr)
-    #endif
-    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1))
-#else
-    #if !defined(JSON_HEDLEY_IS_CONSTANT)
-        #define JSON_HEDLEY_IS_CONSTANT(expr) (0)
-    #endif
-    #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr)
-#endif
-
-#if defined(JSON_HEDLEY_BEGIN_C_DECLS)
-    #undef JSON_HEDLEY_BEGIN_C_DECLS
-#endif
-#if defined(JSON_HEDLEY_END_C_DECLS)
-    #undef JSON_HEDLEY_END_C_DECLS
-#endif
-#if defined(JSON_HEDLEY_C_DECL)
-    #undef JSON_HEDLEY_C_DECL
-#endif
-#if defined(__cplusplus)
-    #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" {
-    #define JSON_HEDLEY_END_C_DECLS }
-    #define JSON_HEDLEY_C_DECL extern "C"
-#else
-    #define JSON_HEDLEY_BEGIN_C_DECLS
-    #define JSON_HEDLEY_END_C_DECLS
-    #define JSON_HEDLEY_C_DECL
-#endif
-
-#if defined(JSON_HEDLEY_STATIC_ASSERT)
-    #undef JSON_HEDLEY_STATIC_ASSERT
-#endif
-#if \
-  !defined(__cplusplus) && ( \
-      (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \
-      (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \
-      JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \
-      JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \
-      defined(_Static_assert) \
-    )
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message)
-#elif \
-  (defined(__cplusplus) && (__cplusplus >= 201103L)) || \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message))
-#else
-#  define JSON_HEDLEY_STATIC_ASSERT(expr, message)
-#endif
-
-#if defined(JSON_HEDLEY_NULL)
-    #undef JSON_HEDLEY_NULL
-#endif
-#if defined(__cplusplus)
-    #if __cplusplus >= 201103L
-        #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr)
-    #elif defined(NULL)
-        #define JSON_HEDLEY_NULL NULL
-    #else
-        #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0)
-    #endif
-#elif defined(NULL)
-    #define JSON_HEDLEY_NULL NULL
-#else
-    #define JSON_HEDLEY_NULL ((void*) 0)
-#endif
-
-#if defined(JSON_HEDLEY_MESSAGE)
-    #undef JSON_HEDLEY_MESSAGE
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#  define JSON_HEDLEY_MESSAGE(msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-    JSON_HEDLEY_PRAGMA(message msg) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#elif \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg)
-#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg)
-#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0)
-#  define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#else
-#  define JSON_HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(JSON_HEDLEY_WARNING)
-    #undef JSON_HEDLEY_WARNING
-#endif
-#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas")
-#  define JSON_HEDLEY_WARNING(msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \
-    JSON_HEDLEY_PRAGMA(clang warning msg) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#elif \
-  JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \
-  JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \
-  JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0)
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg)
-#elif \
-  JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \
-  JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg))
-#else
-#  define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg)
-#endif
-
-#if defined(JSON_HEDLEY_REQUIRE)
-    #undef JSON_HEDLEY_REQUIRE
-#endif
-#if defined(JSON_HEDLEY_REQUIRE_MSG)
-    #undef JSON_HEDLEY_REQUIRE_MSG
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if)
-#  if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat")
-#    define JSON_HEDLEY_REQUIRE(expr) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
-    __attribute__((diagnose_if(!(expr), #expr, "error"))) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \
-    JSON_HEDLEY_DIAGNOSTIC_PUSH \
-    _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \
-    __attribute__((diagnose_if(!(expr), msg, "error"))) \
-    JSON_HEDLEY_DIAGNOSTIC_POP
-#  else
-#    define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error")))
-#    define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error")))
-#  endif
-#else
-#  define JSON_HEDLEY_REQUIRE(expr)
-#  define JSON_HEDLEY_REQUIRE_MSG(expr,msg)
-#endif
-
-#if defined(JSON_HEDLEY_FLAGS)
-    #undef JSON_HEDLEY_FLAGS
-#endif
-#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion"))
-    #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__))
-#else
-    #define JSON_HEDLEY_FLAGS
-#endif
-
-#if defined(JSON_HEDLEY_FLAGS_CAST)
-    #undef JSON_HEDLEY_FLAGS_CAST
-#endif
-#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0)
-#  define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \
-        JSON_HEDLEY_DIAGNOSTIC_PUSH \
-        _Pragma("warning(disable:188)") \
-        ((T) (expr)); \
-        JSON_HEDLEY_DIAGNOSTIC_POP \
-    }))
-#else
-#  define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr)
-#endif
-
-#if defined(JSON_HEDLEY_EMPTY_BASES)
-    #undef JSON_HEDLEY_EMPTY_BASES
-#endif
-#if \
-    (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \
-    JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0)
-    #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases)
-#else
-    #define JSON_HEDLEY_EMPTY_BASES
-#endif
-
-/* Remaining macros are deprecated. */
-
-#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK)
-    #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
-#endif
-#if defined(__clang__)
-    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0)
-#else
-    #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch)
-#endif
-
-#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN)
-    #undef JSON_HEDLEY_CLANG_HAS_BUILTIN
-#endif
-#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE)
-    #undef JSON_HEDLEY_CLANG_HAS_FEATURE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION)
-    #undef JSON_HEDLEY_CLANG_HAS_EXTENSION
-#endif
-#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE)
-    #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
-#endif
-#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute)
-
-#if defined(JSON_HEDLEY_CLANG_HAS_WARNING)
-    #undef JSON_HEDLEY_CLANG_HAS_WARNING
-#endif
-#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning)
-
-#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */
-
-
-// This file contains all internal macro definitions
-// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them
-
-// exclude unsupported compilers
-#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK)
-    #if defined(__clang__)
-        #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400
-            #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers"
-        #endif
-    #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER))
-        #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800
-            #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers"
-        #endif
-    #endif
-#endif
-
-// C++ language standard detection
-// if the user manually specified the used c++ version this is skipped
-#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11)
-    #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
-        #define JSON_HAS_CPP_20
-        #define JSON_HAS_CPP_17
-        #define JSON_HAS_CPP_14
-    #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464
-        #define JSON_HAS_CPP_17
-        #define JSON_HAS_CPP_14
-    #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1)
-        #define JSON_HAS_CPP_14
-    #endif
-    // the cpp 11 flag is always specified because it is the minimal required version
-    #define JSON_HAS_CPP_11
-#endif
-
-// disable documentation warnings on clang
-#if defined(__clang__)
-    #pragma GCC diagnostic push
-    #pragma GCC diagnostic ignored "-Wdocumentation"
-#endif
-
-// allow to disable exceptions
-#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION)
-    #define JSON_THROW(exception) throw exception
-    #define JSON_TRY try
-    #define JSON_CATCH(exception) catch(exception)
-    #define JSON_INTERNAL_CATCH(exception) catch(exception)
-#else
-    #include <cstdlib>
-    #define JSON_THROW(exception) std::abort()
-    #define JSON_TRY if(true)
-    #define JSON_CATCH(exception) if(false)
-    #define JSON_INTERNAL_CATCH(exception) if(false)
-#endif
-
-// override exception macros
-#if defined(JSON_THROW_USER)
-    #undef JSON_THROW
-    #define JSON_THROW JSON_THROW_USER
-#endif
-#if defined(JSON_TRY_USER)
-    #undef JSON_TRY
-    #define JSON_TRY JSON_TRY_USER
-#endif
-#if defined(JSON_CATCH_USER)
-    #undef JSON_CATCH
-    #define JSON_CATCH JSON_CATCH_USER
-    #undef JSON_INTERNAL_CATCH
-    #define JSON_INTERNAL_CATCH JSON_CATCH_USER
-#endif
-#if defined(JSON_INTERNAL_CATCH_USER)
-    #undef JSON_INTERNAL_CATCH
-    #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER
-#endif
-
-// allow to override assert
-#if !defined(JSON_ASSERT)
-    #include <cassert> // assert
-    #define JSON_ASSERT(x) assert(x)
-#endif
-
-// allow to access some private functions (needed by the test suite)
-#if defined(JSON_TESTS_PRIVATE)
-    #define JSON_PRIVATE_UNLESS_TESTED public
-#else
-    #define JSON_PRIVATE_UNLESS_TESTED private
-#endif
-
-/*!
-@brief macro to briefly define a mapping between an enum and JSON
-@def NLOHMANN_JSON_SERIALIZE_ENUM
-@since version 3.4.0
-*/
-#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...)                                            \
-    template<typename BasicJsonType>                                                            \
-    inline void to_json(BasicJsonType& j, const ENUM_TYPE& e)                                   \
-    {                                                                                           \
-        static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!");          \
-        static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__;                     \
-        auto it = std::find_if(std::begin(m), std::end(m),                                      \
-                               [e](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool  \
-        {                                                                                       \
-            return ej_pair.first == e;                                                          \
-        });                                                                                     \
-        j = ((it != std::end(m)) ? it : std::begin(m))->second;                                 \
-    }                                                                                           \
-    template<typename BasicJsonType>                                                            \
-    inline void from_json(const BasicJsonType& j, ENUM_TYPE& e)                                 \
-    {                                                                                           \
-        static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!");          \
-        static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__;                     \
-        auto it = std::find_if(std::begin(m), std::end(m),                                      \
-                               [&j](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \
-        {                                                                                       \
-            return ej_pair.second == j;                                                         \
-        });                                                                                     \
-        e = ((it != std::end(m)) ? it : std::begin(m))->first;                                  \
-    }
-
-// Ugly macros to avoid uglier copy-paste when specializing basic_json. They
-// may be removed in the future once the class is split.
-
-#define NLOHMANN_BASIC_JSON_TPL_DECLARATION                                \
-    template<template<typename, typename, typename...> class ObjectType,   \
-             template<typename, typename...> class ArrayType,              \
-             class StringType, class BooleanType, class NumberIntegerType, \
-             class NumberUnsignedType, class NumberFloatType,              \
-             template<typename> class AllocatorType,                       \
-             template<typename, typename = void> class JSONSerializer,     \
-             class BinaryType>
-
-#define NLOHMANN_BASIC_JSON_TPL                                            \
-    basic_json<ObjectType, ArrayType, StringType, BooleanType,             \
-    NumberIntegerType, NumberUnsignedType, NumberFloatType,                \
-    AllocatorType, JSONSerializer, BinaryType>
-
-// Macros to simplify conversion from/to types
-
-#define NLOHMANN_JSON_EXPAND( x ) x
-#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME
-#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \
-        NLOHMANN_JSON_PASTE64, \
-        NLOHMANN_JSON_PASTE63, \
-        NLOHMANN_JSON_PASTE62, \
-        NLOHMANN_JSON_PASTE61, \
-        NLOHMANN_JSON_PASTE60, \
-        NLOHMANN_JSON_PASTE59, \
-        NLOHMANN_JSON_PASTE58, \
-        NLOHMANN_JSON_PASTE57, \
-        NLOHMANN_JSON_PASTE56, \
-        NLOHMANN_JSON_PASTE55, \
-        NLOHMANN_JSON_PASTE54, \
-        NLOHMANN_JSON_PASTE53, \
-        NLOHMANN_JSON_PASTE52, \
-        NLOHMANN_JSON_PASTE51, \
-        NLOHMANN_JSON_PASTE50, \
-        NLOHMANN_JSON_PASTE49, \
-        NLOHMANN_JSON_PASTE48, \
-        NLOHMANN_JSON_PASTE47, \
-        NLOHMANN_JSON_PASTE46, \
-        NLOHMANN_JSON_PASTE45, \
-        NLOHMANN_JSON_PASTE44, \
-        NLOHMANN_JSON_PASTE43, \
-        NLOHMANN_JSON_PASTE42, \
-        NLOHMANN_JSON_PASTE41, \
-        NLOHMANN_JSON_PASTE40, \
-        NLOHMANN_JSON_PASTE39, \
-        NLOHMANN_JSON_PASTE38, \
-        NLOHMANN_JSON_PASTE37, \
-        NLOHMANN_JSON_PASTE36, \
-        NLOHMANN_JSON_PASTE35, \
-        NLOHMANN_JSON_PASTE34, \
-        NLOHMANN_JSON_PASTE33, \
-        NLOHMANN_JSON_PASTE32, \
-        NLOHMANN_JSON_PASTE31, \
-        NLOHMANN_JSON_PASTE30, \
-        NLOHMANN_JSON_PASTE29, \
-        NLOHMANN_JSON_PASTE28, \
-        NLOHMANN_JSON_PASTE27, \
-        NLOHMANN_JSON_PASTE26, \
-        NLOHMANN_JSON_PASTE25, \
-        NLOHMANN_JSON_PASTE24, \
-        NLOHMANN_JSON_PASTE23, \
-        NLOHMANN_JSON_PASTE22, \
-        NLOHMANN_JSON_PASTE21, \
-        NLOHMANN_JSON_PASTE20, \
-        NLOHMANN_JSON_PASTE19, \
-        NLOHMANN_JSON_PASTE18, \
-        NLOHMANN_JSON_PASTE17, \
-        NLOHMANN_JSON_PASTE16, \
-        NLOHMANN_JSON_PASTE15, \
-        NLOHMANN_JSON_PASTE14, \
-        NLOHMANN_JSON_PASTE13, \
-        NLOHMANN_JSON_PASTE12, \
-        NLOHMANN_JSON_PASTE11, \
-        NLOHMANN_JSON_PASTE10, \
-        NLOHMANN_JSON_PASTE9, \
-        NLOHMANN_JSON_PASTE8, \
-        NLOHMANN_JSON_PASTE7, \
-        NLOHMANN_JSON_PASTE6, \
-        NLOHMANN_JSON_PASTE5, \
-        NLOHMANN_JSON_PASTE4, \
-        NLOHMANN_JSON_PASTE3, \
-        NLOHMANN_JSON_PASTE2, \
-        NLOHMANN_JSON_PASTE1)(__VA_ARGS__))
-#define NLOHMANN_JSON_PASTE2(func, v1) func(v1)
-#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2)
-#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3)
-#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4)
-#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5)
-#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6)
-#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7)
-#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8)
-#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9)
-#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10)
-#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11)
-#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12)
-#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13)
-#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14)
-#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15)
-#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16)
-#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17)
-#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18)
-#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19)
-#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20)
-#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21)
-#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22)
-#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23)
-#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24)
-#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25)
-#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26)
-#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27)
-#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28)
-#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29)
-#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30)
-#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31)
-#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32)
-#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33)
-#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34)
-#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35)
-#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36)
-#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37)
-#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38)
-#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39)
-#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40)
-#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41)
-#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42)
-#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43)
-#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44)
-#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45)
-#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46)
-#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47)
-#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48)
-#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49)
-#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50)
-#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51)
-#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52)
-#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53)
-#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54)
-#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55)
-#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56)
-#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57)
-#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58)
-#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59)
-#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60)
-#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61)
-#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62)
-#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63)
-
-#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1;
-#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1);
-
-/*!
-@brief macro
-@def NLOHMANN_DEFINE_TYPE_INTRUSIVE
-@since version 3.9.0
-*/
-#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...)  \
-    friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
-
-/*!
-@brief macro
-@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE
-@since version 3.9.0
-*/
-#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...)  \
-    inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \
-    inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) }
-
-#ifndef JSON_USE_IMPLICIT_CONVERSIONS
-    #define JSON_USE_IMPLICIT_CONVERSIONS 1
-#endif
-
-#if JSON_USE_IMPLICIT_CONVERSIONS
-    #define JSON_EXPLICIT
-#else
-    #define JSON_EXPLICIT explicit
-#endif
-
-
-namespace nlohmann
-{
-namespace detail
-{
-
-/*!
-@brief replace all occurrences of a substring by another string
-
-@param[in,out] s  the string to manipulate; changed so that all
-               occurrences of @a f are replaced with @a t
-@param[in]     f  the substring to replace with @a t
-@param[in]     t  the string to replace @a f
-
-@pre The search string @a f must not be empty. **This precondition is
-enforced with an assertion.**
-
-@since version 2.0.0
-*/
-inline void replace_substring(std::string& s, const std::string& f,
-                              const std::string& t)
-{
-    JSON_ASSERT(!f.empty());
-    for (auto pos = s.find(f);                // find first occurrence of f
-            pos != std::string::npos;         // make sure f was found
-            s.replace(pos, f.size(), t),      // replace with t, and
-            pos = s.find(f, pos + t.size()))  // find next occurrence of f
-    {}
-}
-
-/*!
- * @brief string escaping as described in RFC 6901 (Sect. 4)
- * @param[in] s string to escape
- * @return    escaped string
- *
- * Note the order of escaping "~" to "~0" and "/" to "~1" is important.
- */
-inline std::string escape(std::string s)
-{
-    replace_substring(s, "~", "~0");
-    replace_substring(s, "/", "~1");
-    return s;
-}
-
-/*!
- * @brief string unescaping as described in RFC 6901 (Sect. 4)
- * @param[in] s string to unescape
- * @return    unescaped string
- *
- * Note the order of escaping "~1" to "/" and "~0" to "~" is important.
- */
-static void unescape(std::string& s)
-{
-    replace_substring(s, "~1", "/");
-    replace_substring(s, "~0", "~");
-}
-
-} // namespace detail
-} // namespace nlohmann
-
-// #include <nlohmann/detail/input/position_t.hpp>
-
-
-#include <cstddef> // size_t
-
-namespace nlohmann
-{
-namespace detail
-{
-/// struct to capture the start position of the current token
-struct position_t
-{
-    /// the total number of characters read
-    std::size_t chars_read_total = 0;
-    /// the number of characters read in the current line
-    std::size_t chars_read_current_line = 0;
-    /// the number of lines read
-    std::size_t lines_read = 0;
-
-    /// conversion to size_t to preserve SAX interface
-    constexpr operator size_t() const
-    {
-        return chars_read_total;
-    }
-};
-
-} // namespace detail
-} // namespace nlohmann
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-////////////////
-// exceptions //
-////////////////
-
-/*!
-@brief general exception of the @ref basic_json class
-
-This class is an extension of `std::exception` objects with a member @a id for
-exception ids. It is used as the base class for all exceptions thrown by the
-@ref basic_json class. This class can hence be used as "wildcard" to catch
-exceptions.
-
-Subclasses:
-- @ref parse_error for exceptions indicating a parse error
-- @ref invalid_iterator for exceptions indicating errors with iterators
-- @ref type_error for exceptions indicating executing a member function with
-                  a wrong type
-- @ref out_of_range for exceptions indicating access out of the defined range
-- @ref other_error for exceptions indicating other library errors
-
-@internal
-@note To have nothrow-copy-constructible exceptions, we internally use
-      `std::runtime_error` which can cope with arbitrary-length error messages.
-      Intermediate strings are built with static functions and then passed to
-      the actual constructor.
-@endinternal
-
-@liveexample{The following code shows how arbitrary library exceptions can be
-caught.,exception}
-
-@since version 3.0.0
-*/
-class exception : public std::exception
-{
-  public:
-    /// returns the explanatory string
-    const char* what() const noexcept override
-    {
-        return m.what();
-    }
-
-    /// the id of the exception
-    const int id; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes)
-
-  protected:
-    JSON_HEDLEY_NON_NULL(3)
-    exception(int id_, const char* what_arg) : id(id_), m(what_arg) {}
-
-    static std::string name(const std::string& ename, int id_)
-    {
-        return "[json.exception." + ename + "." + std::to_string(id_) + "] ";
-    }
-
-    template<typename BasicJsonType>
-    static std::string diagnostics(const BasicJsonType& leaf_element)
-    {
-#if JSON_DIAGNOSTICS
-        std::vector<std::string> tokens;
-        for (const auto* current = &leaf_element; current->m_parent != nullptr; current = current->m_parent)
-        {
-            switch (current->m_parent->type())
-            {
-                case value_t::array:
-                {
-                    for (std::size_t i = 0; i < current->m_parent->m_value.array->size(); ++i)
-                    {
-                        if (&current->m_parent->m_value.array->operator[](i) == current)
-                        {
-                            tokens.emplace_back(std::to_string(i));
-                            break;
-                        }
-                    }
-                    break;
-                }
-
-                case value_t::object:
-                {
-                    for (const auto& element : *current->m_parent->m_value.object)
-                    {
-                        if (&element.second == current)
-                        {
-                            tokens.emplace_back(element.first.c_str());
-                            break;
-                        }
-                    }
-                    break;
-                }
-
-                default:   // LCOV_EXCL_LINE
-                    break; // LCOV_EXCL_LINE
-            }
-        }
-
-        if (tokens.empty())
-        {
-            return "";
-        }
-
-        return "(" + std::accumulate(tokens.rbegin(), tokens.rend(), std::string{},
-                                     [](const std::string & a, const std::string & b)
-        {
-            return a + "/" + detail::escape(b);
-        }) + ") ";
-#else
-        static_cast<void>(leaf_element);
-        return "";
-#endif
-    }
-
-  private:
-    /// an exception object as storage for error messages
-    std::runtime_error m;
-};
-
-/*!
-@brief exception indicating a parse error
-
-This exception is thrown by the library when a parse error occurs. Parse errors
-can occur during the deserialization of JSON text, CBOR, MessagePack, as well
-as when using JSON Patch.
-
-Member @a byte holds the byte index of the last read character in the input
-file.
-
-Exceptions have ids 1xx.
-
-name / id                      | example message | description
------------------------------- | --------------- | -------------------------
-json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position.
-json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point.
-json.exception.parse_error.103 | parse error: code points above 0x10FFFF are invalid | Unicode supports code points up to 0x10FFFF. Code points above 0x10FFFF are invalid.
-json.exception.parse_error.104 | parse error: JSON patch must be an array of objects | [RFC 6902](https://tools.ietf.org/html/rfc6902) requires a JSON Patch document to be a JSON document that represents an array of objects.
-json.exception.parse_error.105 | parse error: operation must have string member 'op' | An operation of a JSON Patch document must contain exactly one "op" member, whose value indicates the operation to perform. Its value must be one of "add", "remove", "replace", "move", "copy", or "test"; other values are errors.
-json.exception.parse_error.106 | parse error: array index '01' must not begin with '0' | An array index in a JSON Pointer ([RFC 6901](https://tools.ietf.org/html/rfc6901)) may be `0` or any number without a leading `0`.
-json.exception.parse_error.107 | parse error: JSON pointer must be empty or begin with '/' - was: 'foo' | A JSON Pointer must be a Unicode string containing a sequence of zero or more reference tokens, each prefixed by a `/` character.
-json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences.
-json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number.
-json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read.
-json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read.
-json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read.
-json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet).
-json.exception.parse_error.115 | parse error at byte 5: syntax error while parsing UBJSON high-precision number: invalid number text: 1A | A UBJSON high-precision number could not be parsed.
-
-@note For an input with n bytes, 1 is the index of the first character and n+1
-      is the index of the terminating null byte or the end of file. This also
-      holds true when reading a byte vector (CBOR or MessagePack).
-
-@liveexample{The following code shows how a `parse_error` exception can be
-caught.,parse_error}
-
-@sa - @ref exception for the base class of the library exceptions
-@sa - @ref invalid_iterator for exceptions indicating errors with iterators
-@sa - @ref type_error for exceptions indicating executing a member function with
-                    a wrong type
-@sa - @ref out_of_range for exceptions indicating access out of the defined range
-@sa - @ref other_error for exceptions indicating other library errors
-
-@since version 3.0.0
-*/
-class parse_error : public exception
-{
-  public:
-    /*!
-    @brief create a parse error exception
-    @param[in] id_       the id of the exception
-    @param[in] pos       the position where the error occurred (or with
-                         chars_read_total=0 if the position cannot be
-                         determined)
-    @param[in] what_arg  the explanatory string
-    @return parse_error object
-    */
-    template<typename BasicJsonType>
-    static parse_error create(int id_, const position_t& pos, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("parse_error", id_) + "parse error" +
-                        position_string(pos) + ": " + exception::diagnostics(context) + what_arg;
-        return parse_error(id_, pos.chars_read_total, w.c_str());
-    }
-
-    template<typename BasicJsonType>
-    static parse_error create(int id_, std::size_t byte_, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("parse_error", id_) + "parse error" +
-                        (byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") +
-                        ": " + exception::diagnostics(context) + what_arg;
-        return parse_error(id_, byte_, w.c_str());
-    }
-
-    /*!
-    @brief byte index of the parse error
-
-    The byte index of the last read character in the input file.
-
-    @note For an input with n bytes, 1 is the index of the first character and
-          n+1 is the index of the terminating null byte or the end of file.
-          This also holds true when reading a byte vector (CBOR or MessagePack).
-    */
-    const std::size_t byte;
-
-  private:
-    parse_error(int id_, std::size_t byte_, const char* what_arg)
-        : exception(id_, what_arg), byte(byte_) {}
-
-    static std::string position_string(const position_t& pos)
-    {
-        return " at line " + std::to_string(pos.lines_read + 1) +
-               ", column " + std::to_string(pos.chars_read_current_line);
-    }
-};
-
-/*!
-@brief exception indicating errors with iterators
-
-This exception is thrown if iterators passed to a library function do not match
-the expected semantics.
-
-Exceptions have ids 2xx.
-
-name / id                           | example message | description
------------------------------------ | --------------- | -------------------------
-json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid.
-json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion.
-json.exception.invalid_iterator.203 | iterators do not fit current value | Either iterator passed to function @ref erase(IteratorType first, IteratorType last) does not belong to the JSON value from which values shall be erased. It hence does not define a valid range to delete values from.
-json.exception.invalid_iterator.204 | iterators out of range | When an iterator range for a primitive type (number, boolean, or string) is passed to a constructor or an erase function, this range has to be exactly (@ref begin(), @ref end()), because this is the only way the single stored value is expressed. All other ranges are invalid.
-json.exception.invalid_iterator.205 | iterator out of range | When an iterator for a primitive type (number, boolean, or string) is passed to an erase function, the iterator has to be the @ref begin() iterator, because it is the only way to address the stored value. All other iterators are invalid.
-json.exception.invalid_iterator.206 | cannot construct with iterators from null | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) belong to a JSON null value and hence to not define a valid range.
-json.exception.invalid_iterator.207 | cannot use key() for non-object iterators | The key() member function can only be used on iterators belonging to a JSON object, because other types do not have a concept of a key.
-json.exception.invalid_iterator.208 | cannot use operator[] for object iterators | The operator[] to specify a concrete offset cannot be used on iterators belonging to a JSON object, because JSON objects are unordered.
-json.exception.invalid_iterator.209 | cannot use offsets with object iterators | The offset operators (+, -, +=, -=) cannot be used on iterators belonging to a JSON object, because JSON objects are unordered.
-json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid.
-json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to.
-json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container.
-json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compared, because JSON objects are unordered.
-json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin().
-
-@liveexample{The following code shows how an `invalid_iterator` exception can be
-caught.,invalid_iterator}
-
-@sa - @ref exception for the base class of the library exceptions
-@sa - @ref parse_error for exceptions indicating a parse error
-@sa - @ref type_error for exceptions indicating executing a member function with
-                    a wrong type
-@sa - @ref out_of_range for exceptions indicating access out of the defined range
-@sa - @ref other_error for exceptions indicating other library errors
-
-@since version 3.0.0
-*/
-class invalid_iterator : public exception
-{
-  public:
-    template<typename BasicJsonType>
-    static invalid_iterator create(int id_, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("invalid_iterator", id_) + exception::diagnostics(context) + what_arg;
-        return invalid_iterator(id_, w.c_str());
-    }
-
-  private:
-    JSON_HEDLEY_NON_NULL(3)
-    invalid_iterator(int id_, const char* what_arg)
-        : exception(id_, what_arg) {}
-};
-
-/*!
-@brief exception indicating executing a member function with a wrong type
-
-This exception is thrown in case of a type error; that is, a library function is
-executed on a JSON value whose type does not match the expected semantics.
-
-Exceptions have ids 3xx.
-
-name / id                     | example message | description
------------------------------ | --------------- | -------------------------
-json.exception.type_error.301 | cannot create object from initializer list | To create an object from an initializer list, the initializer list must consist only of a list of pairs whose first element is a string. When this constraint is violated, an array is created instead.
-json.exception.type_error.302 | type must be object, but is array | During implicit or explicit value conversion, the JSON type must be compatible to the target type. For instance, a JSON string can only be converted into string types, but not into numbers or boolean types.
-json.exception.type_error.303 | incompatible ReferenceType for get_ref, actual type is object | To retrieve a reference to a value stored in a @ref basic_json object with @ref get_ref, the type of the reference must match the value type. For instance, for a JSON array, the @a ReferenceType must be @ref array_t &.
-json.exception.type_error.304 | cannot use at() with string | The @ref at() member functions can only be executed for certain JSON types.
-json.exception.type_error.305 | cannot use operator[] with string | The @ref operator[] member functions can only be executed for certain JSON types.
-json.exception.type_error.306 | cannot use value() with string | The @ref value() member functions can only be executed for certain JSON types.
-json.exception.type_error.307 | cannot use erase() with string | The @ref erase() member functions can only be executed for certain JSON types.
-json.exception.type_error.308 | cannot use push_back() with string | The @ref push_back() and @ref operator+= member functions can only be executed for certain JSON types.
-json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types.
-json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types.
-json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types.
-json.exception.type_error.312 | cannot use update() with string | The @ref update() member functions can only be executed for certain JSON types.
-json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined.
-json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers.
-json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive.
-json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. |
-json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) |
-
-@liveexample{The following code shows how a `type_error` exception can be
-caught.,type_error}
-
-@sa - @ref exception for the base class of the library exceptions
-@sa - @ref parse_error for exceptions indicating a parse error
-@sa - @ref invalid_iterator for exceptions indicating errors with iterators
-@sa - @ref out_of_range for exceptions indicating access out of the defined range
-@sa - @ref other_error for exceptions indicating other library errors
-
-@since version 3.0.0
-*/
-class type_error : public exception
-{
-  public:
-    template<typename BasicJsonType>
-    static type_error create(int id_, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("type_error", id_) + exception::diagnostics(context) + what_arg;
-        return type_error(id_, w.c_str());
-    }
-
-  private:
-    JSON_HEDLEY_NON_NULL(3)
-    type_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
-};
-
-/*!
-@brief exception indicating access out of the defined range
-
-This exception is thrown in case a library function is called on an input
-parameter that exceeds the expected range, for instance in case of array
-indices or nonexisting object keys.
-
-Exceptions have ids 4xx.
-
-name / id                       | example message | description
-------------------------------- | --------------- | -------------------------
-json.exception.out_of_range.401 | array index 3 is out of range | The provided array index @a i is larger than @a size-1.
-json.exception.out_of_range.402 | array index '-' (3) is out of range | The special array index `-` in a JSON Pointer never describes a valid element of the array, but the index past the end. That is, it can only be used to add elements at this position, but not to read it.
-json.exception.out_of_range.403 | key 'foo' not found | The provided key was not found in the JSON object.
-json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved.
-json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value.
-json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF.
-json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. (until version 3.8.0) |
-json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. |
-json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string |
-
-@liveexample{The following code shows how an `out_of_range` exception can be
-caught.,out_of_range}
-
-@sa - @ref exception for the base class of the library exceptions
-@sa - @ref parse_error for exceptions indicating a parse error
-@sa - @ref invalid_iterator for exceptions indicating errors with iterators
-@sa - @ref type_error for exceptions indicating executing a member function with
-                    a wrong type
-@sa - @ref other_error for exceptions indicating other library errors
-
-@since version 3.0.0
-*/
-class out_of_range : public exception
-{
-  public:
-    template<typename BasicJsonType>
-    static out_of_range create(int id_, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("out_of_range", id_) + exception::diagnostics(context) + what_arg;
-        return out_of_range(id_, w.c_str());
-    }
-
-  private:
-    JSON_HEDLEY_NON_NULL(3)
-    out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {}
-};
-
-/*!
-@brief exception indicating other library errors
-
-This exception is thrown in case of errors that cannot be classified with the
-other exception types.
-
-Exceptions have ids 5xx.
-
-name / id                      | example message | description
------------------------------- | --------------- | -------------------------
-json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed.
-
-@sa - @ref exception for the base class of the library exceptions
-@sa - @ref parse_error for exceptions indicating a parse error
-@sa - @ref invalid_iterator for exceptions indicating errors with iterators
-@sa - @ref type_error for exceptions indicating executing a member function with
-                    a wrong type
-@sa - @ref out_of_range for exceptions indicating access out of the defined range
-
-@liveexample{The following code shows how an `other_error` exception can be
-caught.,other_error}
-
-@since version 3.0.0
-*/
-class other_error : public exception
-{
-  public:
-    template<typename BasicJsonType>
-    static other_error create(int id_, const std::string& what_arg, const BasicJsonType& context)
-    {
-        std::string w = exception::name("other_error", id_) + exception::diagnostics(context) + what_arg;
-        return other_error(id_, w.c_str());
-    }
-
-  private:
-    JSON_HEDLEY_NON_NULL(3)
-    other_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-
-#include <cstddef> // size_t
-#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
-#include <utility> // index_sequence, make_index_sequence, index_sequence_for
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-
-template<typename T>
-using uncvref_t = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
-
-#ifdef JSON_HAS_CPP_14
-
-// the following utilities are natively available in C++14
-using std::enable_if_t;
-using std::index_sequence;
-using std::make_index_sequence;
-using std::index_sequence_for;
-
-#else
-
-// alias templates to reduce boilerplate
-template<bool B, typename T = void>
-using enable_if_t = typename std::enable_if<B, T>::type;
-
-// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h
-// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0.
-
-//// START OF CODE FROM GOOGLE ABSEIL
-
-// integer_sequence
-//
-// Class template representing a compile-time integer sequence. An instantiation
-// of `integer_sequence<T, Ints...>` has a sequence of integers encoded in its
-// type through its template arguments (which is a common need when
-// working with C++11 variadic templates). `absl::integer_sequence` is designed
-// to be a drop-in replacement for C++14's `std::integer_sequence`.
-//
-// Example:
-//
-//   template< class T, T... Ints >
-//   void user_function(integer_sequence<T, Ints...>);
-//
-//   int main()
-//   {
-//     // user_function's `T` will be deduced to `int` and `Ints...`
-//     // will be deduced to `0, 1, 2, 3, 4`.
-//     user_function(make_integer_sequence<int, 5>());
-//   }
-template <typename T, T... Ints>
-struct integer_sequence
-{
-    using value_type = T;
-    static constexpr std::size_t size() noexcept
-    {
-        return sizeof...(Ints);
-    }
-};
-
-// index_sequence
-//
-// A helper template for an `integer_sequence` of `size_t`,
-// `absl::index_sequence` is designed to be a drop-in replacement for C++14's
-// `std::index_sequence`.
-template <size_t... Ints>
-using index_sequence = integer_sequence<size_t, Ints...>;
-
-namespace utility_internal
-{
-
-template <typename Seq, size_t SeqSize, size_t Rem>
-struct Extend;
-
-// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency.
-template <typename T, T... Ints, size_t SeqSize>
-struct Extend<integer_sequence<T, Ints...>, SeqSize, 0>
-{
-    using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >;
-};
-
-template <typename T, T... Ints, size_t SeqSize>
-struct Extend<integer_sequence<T, Ints...>, SeqSize, 1>
-{
-    using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >;
-};
-
-// Recursion helper for 'make_integer_sequence<T, N>'.
-// 'Gen<T, N>::type' is an alias for 'integer_sequence<T, 0, 1, ... N-1>'.
-template <typename T, size_t N>
-struct Gen
-{
-    using type =
-        typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type;
-};
-
-template <typename T>
-struct Gen<T, 0>
-{
-    using type = integer_sequence<T>;
-};
-
-}  // namespace utility_internal
-
-// Compile-time sequences of integers
-
-// make_integer_sequence
-//
-// This template alias is equivalent to
-// `integer_sequence<int, 0, 1, ..., N-1>`, and is designed to be a drop-in
-// replacement for C++14's `std::make_integer_sequence`.
-template <typename T, T N>
-using make_integer_sequence = typename utility_internal::Gen<T, N>::type;
-
-// make_index_sequence
-//
-// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`,
-// and is designed to be a drop-in replacement for C++14's
-// `std::make_index_sequence`.
-template <size_t N>
-using make_index_sequence = make_integer_sequence<size_t, N>;
-
-// index_sequence_for
-//
-// Converts a typename pack into an index sequence of the same length, and
-// is designed to be a drop-in replacement for C++14's
-// `std::index_sequence_for()`
-template <typename... Ts>
-using index_sequence_for = make_index_sequence<sizeof...(Ts)>;
-
-//// END OF CODE FROM GOOGLE ABSEIL
-
-#endif
-
-// dispatch utility (taken from ranges-v3)
-template<unsigned N> struct priority_tag : priority_tag < N - 1 > {};
-template<> struct priority_tag<0> {};
-
-// taken from ranges-v3
-template<typename T>
-struct static_const
-{
-    static constexpr T value{};
-};
-
-template<typename T>
-constexpr T static_const<T>::value;
-
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/meta/identity_tag.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-// dispatching helper struct
-template <class T> struct identity_tag {};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-
-#include <limits> // numeric_limits
-#include <type_traits> // false_type, is_constructible, is_integral, is_same, true_type
-#include <utility> // declval
-#include <tuple> // tuple
-
-// #include <nlohmann/detail/iterators/iterator_traits.hpp>
-
-
-#include <iterator> // random_access_iterator_tag
-
-// #include <nlohmann/detail/meta/void_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename ...Ts> struct make_void
-{
-    using type = void;
-};
-template<typename ...Ts> using void_t = typename make_void<Ts...>::type;
-} // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename It, typename = void>
-struct iterator_types {};
-
-template<typename It>
-struct iterator_types <
-    It,
-    void_t<typename It::difference_type, typename It::value_type, typename It::pointer,
-    typename It::reference, typename It::iterator_category >>
-{
-    using difference_type = typename It::difference_type;
-    using value_type = typename It::value_type;
-    using pointer = typename It::pointer;
-    using reference = typename It::reference;
-    using iterator_category = typename It::iterator_category;
-};
-
-// This is required as some compilers implement std::iterator_traits in a way that
-// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341.
-template<typename T, typename = void>
-struct iterator_traits
-{
-};
-
-template<typename T>
-struct iterator_traits < T, enable_if_t < !std::is_pointer<T>::value >>
-            : iterator_types<T>
-{
-};
-
-template<typename T>
-struct iterator_traits<T*, enable_if_t<std::is_object<T>::value>>
-{
-    using iterator_category = std::random_access_iterator_tag;
-    using value_type = T;
-    using difference_type = ptrdiff_t;
-    using pointer = T*;
-    using reference = T&;
-};
-} // namespace detail
-} // namespace nlohmann
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-// #include <nlohmann/detail/meta/detected.hpp>
-
-
-#include <type_traits>
-
-// #include <nlohmann/detail/meta/void_t.hpp>
-
-
-// https://en.cppreference.com/w/cpp/experimental/is_detected
-namespace nlohmann
-{
-namespace detail
-{
-struct nonesuch
-{
-    nonesuch() = delete;
-    ~nonesuch() = delete;
-    nonesuch(nonesuch const&) = delete;
-    nonesuch(nonesuch const&&) = delete;
-    void operator=(nonesuch const&) = delete;
-    void operator=(nonesuch&&) = delete;
-};
-
-template<class Default,
-         class AlwaysVoid,
-         template<class...> class Op,
-         class... Args>
-struct detector
-{
-    using value_t = std::false_type;
-    using type = Default;
-};
-
-template<class Default, template<class...> class Op, class... Args>
-struct detector<Default, void_t<Op<Args...>>, Op, Args...>
-{
-    using value_t = std::true_type;
-    using type = Op<Args...>;
-};
-
-template<template<class...> class Op, class... Args>
-using is_detected = typename detector<nonesuch, void, Op, Args...>::value_t;
-
-template<template<class...> class Op, class... Args>
-using detected_t = typename detector<nonesuch, void, Op, Args...>::type;
-
-template<class Default, template<class...> class Op, class... Args>
-using detected_or = detector<Default, void, Op, Args...>;
-
-template<class Default, template<class...> class Op, class... Args>
-using detected_or_t = typename detected_or<Default, Op, Args...>::type;
-
-template<class Expected, template<class...> class Op, class... Args>
-using is_detected_exact = std::is_same<Expected, detected_t<Op, Args...>>;
-
-template<class To, template<class...> class Op, class... Args>
-using is_detected_convertible =
-    std::is_convertible<detected_t<Op, Args...>, To>;
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/json_fwd.hpp>
-#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
-#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
-
-#include <cstdint> // int64_t, uint64_t
-#include <map> // map
-#include <memory> // allocator
-#include <string> // string
-#include <vector> // vector
-
-/*!
-@brief namespace for Niels Lohmann
-@see https://github.com/nlohmann
-@since version 1.0.0
-*/
-namespace nlohmann
-{
-/*!
-@brief default JSONSerializer template argument
-
-This serializer ignores the template arguments and uses ADL
-([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
-for serialization.
-*/
-template<typename T = void, typename SFINAE = void>
-struct adl_serializer;
-
-template<template<typename U, typename V, typename... Args> class ObjectType =
-         std::map,
-         template<typename U, typename... Args> class ArrayType = std::vector,
-         class StringType = std::string, class BooleanType = bool,
-         class NumberIntegerType = std::int64_t,
-         class NumberUnsignedType = std::uint64_t,
-         class NumberFloatType = double,
-         template<typename U> class AllocatorType = std::allocator,
-         template<typename T, typename SFINAE = void> class JSONSerializer =
-         adl_serializer,
-         class BinaryType = std::vector<std::uint8_t>>
-class basic_json;
-
-/*!
-@brief JSON Pointer
-
-A JSON pointer defines a string syntax for identifying a specific value
-within a JSON document. It can be used with functions `at` and
-`operator[]`. Furthermore, JSON pointers are the base for JSON patches.
-
-@sa [RFC 6901](https://tools.ietf.org/html/rfc6901)
-
-@since version 2.0.0
-*/
-template<typename BasicJsonType>
-class json_pointer;
-
-/*!
-@brief default JSON class
-
-This type is the default specialization of the @ref basic_json class which
-uses the standard template types.
-
-@since version 1.0.0
-*/
-using json = basic_json<>;
-
-template<class Key, class T, class IgnoredLess, class Allocator>
-struct ordered_map;
-
-/*!
-@brief ordered JSON class
-
-This type preserves the insertion order of object keys.
-
-@since version 3.9.0
-*/
-using ordered_json = basic_json<nlohmann::ordered_map>;
-
-}  // namespace nlohmann
-
-#endif  // INCLUDE_NLOHMANN_JSON_FWD_HPP_
-
-
-namespace nlohmann
-{
-/*!
-@brief detail namespace with internal helper functions
-
-This namespace collects functions that should not be exposed,
-implementations of some @ref basic_json methods, and meta-programming helpers.
-
-@since version 2.1.0
-*/
-namespace detail
-{
-/////////////
-// helpers //
-/////////////
-
-// Note to maintainers:
-//
-// Every trait in this file expects a non CV-qualified type.
-// The only exceptions are in the 'aliases for detected' section
-// (i.e. those of the form: decltype(T::member_function(std::declval<T>())))
-//
-// In this case, T has to be properly CV-qualified to constraint the function arguments
-// (e.g. to_json(BasicJsonType&, const T&))
-
-template<typename> struct is_basic_json : std::false_type {};
-
-NLOHMANN_BASIC_JSON_TPL_DECLARATION
-struct is_basic_json<NLOHMANN_BASIC_JSON_TPL> : std::true_type {};
-
-//////////////////////
-// json_ref helpers //
-//////////////////////
-
-template<typename>
-class json_ref;
-
-template<typename>
-struct is_json_ref : std::false_type {};
-
-template<typename T>
-struct is_json_ref<json_ref<T>> : std::true_type {};
-
-//////////////////////////
-// aliases for detected //
-//////////////////////////
-
-template<typename T>
-using mapped_type_t = typename T::mapped_type;
-
-template<typename T>
-using key_type_t = typename T::key_type;
-
-template<typename T>
-using value_type_t = typename T::value_type;
-
-template<typename T>
-using difference_type_t = typename T::difference_type;
-
-template<typename T>
-using pointer_t = typename T::pointer;
-
-template<typename T>
-using reference_t = typename T::reference;
-
-template<typename T>
-using iterator_category_t = typename T::iterator_category;
-
-template<typename T>
-using iterator_t = typename T::iterator;
-
-template<typename T, typename... Args>
-using to_json_function = decltype(T::to_json(std::declval<Args>()...));
-
-template<typename T, typename... Args>
-using from_json_function = decltype(T::from_json(std::declval<Args>()...));
-
-template<typename T, typename U>
-using get_template_function = decltype(std::declval<T>().template get<U>());
-
-// trait checking if JSONSerializer<T>::from_json(json const&, udt&) exists
-template<typename BasicJsonType, typename T, typename = void>
-struct has_from_json : std::false_type {};
-
-// trait checking if j.get<T> is valid
-// use this trait instead of std::is_constructible or std::is_convertible,
-// both rely on, or make use of implicit conversions, and thus fail when T
-// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958)
-template <typename BasicJsonType, typename T>
-struct is_getable
-{
-    static constexpr bool value = is_detected<get_template_function, const BasicJsonType&, T>::value;
-};
-
-template<typename BasicJsonType, typename T>
-struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer<T, void>;
-
-    static constexpr bool value =
-        is_detected_exact<void, from_json_function, serializer,
-        const BasicJsonType&, T&>::value;
-};
-
-// This trait checks if JSONSerializer<T>::from_json(json const&) exists
-// this overload is used for non-default-constructible user-defined-types
-template<typename BasicJsonType, typename T, typename = void>
-struct has_non_default_from_json : std::false_type {};
-
-template<typename BasicJsonType, typename T>
-struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer<T, void>;
-
-    static constexpr bool value =
-        is_detected_exact<T, from_json_function, serializer,
-        const BasicJsonType&>::value;
-};
-
-// This trait checks if BasicJsonType::json_serializer<T>::to_json exists
-// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion.
-template<typename BasicJsonType, typename T, typename = void>
-struct has_to_json : std::false_type {};
-
-template<typename BasicJsonType, typename T>
-struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json<T>::value >>
-{
-    using serializer = typename BasicJsonType::template json_serializer<T, void>;
-
-    static constexpr bool value =
-        is_detected_exact<void, to_json_function, serializer, BasicJsonType&,
-        T>::value;
-};
-
-
-///////////////////
-// is_ functions //
-///////////////////
-
-// https://en.cppreference.com/w/cpp/types/conjunction
-template<class...> struct conjunction : std::true_type { };
-template<class B1> struct conjunction<B1> : B1 { };
-template<class B1, class... Bn>
-struct conjunction<B1, Bn...>
-: std::conditional<bool(B1::value), conjunction<Bn...>, B1>::type {};
-
-// Reimplementation of is_constructible and is_default_constructible, due to them being broken for
-// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367).
-// This causes compile errors in e.g. clang 3.5 or gcc 4.9.
-template <typename T>
-struct is_default_constructible : std::is_default_constructible<T> {};
-
-template <typename T1, typename T2>
-struct is_default_constructible<std::pair<T1, T2>>
-            : conjunction<is_default_constructible<T1>, is_default_constructible<T2>> {};
-
-template <typename T1, typename T2>
-struct is_default_constructible<const std::pair<T1, T2>>
-            : conjunction<is_default_constructible<T1>, is_default_constructible<T2>> {};
-
-template <typename... Ts>
-struct is_default_constructible<std::tuple<Ts...>>
-            : conjunction<is_default_constructible<Ts>...> {};
-
-template <typename... Ts>
-struct is_default_constructible<const std::tuple<Ts...>>
-            : conjunction<is_default_constructible<Ts>...> {};
-
-
-template <typename T, typename... Args>
-struct is_constructible : std::is_constructible<T, Args...> {};
-
-template <typename T1, typename T2>
-struct is_constructible<std::pair<T1, T2>> : is_default_constructible<std::pair<T1, T2>> {};
-
-template <typename T1, typename T2>
-struct is_constructible<const std::pair<T1, T2>> : is_default_constructible<const std::pair<T1, T2>> {};
-
-template <typename... Ts>
-struct is_constructible<std::tuple<Ts...>> : is_default_constructible<std::tuple<Ts...>> {};
-
-template <typename... Ts>
-struct is_constructible<const std::tuple<Ts...>> : is_default_constructible<const std::tuple<Ts...>> {};
-
-
-template<typename T, typename = void>
-struct is_iterator_traits : std::false_type {};
-
-template<typename T>
-struct is_iterator_traits<iterator_traits<T>>
-{
-  private:
-    using traits = iterator_traits<T>;
-
-  public:
-    static constexpr auto value =
-        is_detected<value_type_t, traits>::value &&
-        is_detected<difference_type_t, traits>::value &&
-        is_detected<pointer_t, traits>::value &&
-        is_detected<iterator_category_t, traits>::value &&
-        is_detected<reference_t, traits>::value;
-};
-
-// The following implementation of is_complete_type is taken from
-// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/
-// and is written by Xiang Fan who agreed to using it in this library.
-
-template<typename T, typename = void>
-struct is_complete_type : std::false_type {};
-
-template<typename T>
-struct is_complete_type<T, decltype(void(sizeof(T)))> : std::true_type {};
-
-template<typename BasicJsonType, typename CompatibleObjectType,
-         typename = void>
-struct is_compatible_object_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename CompatibleObjectType>
-struct is_compatible_object_type_impl <
-    BasicJsonType, CompatibleObjectType,
-    enable_if_t < is_detected<mapped_type_t, CompatibleObjectType>::value&&
-    is_detected<key_type_t, CompatibleObjectType>::value >>
-{
-    using object_t = typename BasicJsonType::object_t;
-
-    // macOS's is_constructible does not play well with nonesuch...
-    static constexpr bool value =
-        is_constructible<typename object_t::key_type,
-        typename CompatibleObjectType::key_type>::value &&
-        is_constructible<typename object_t::mapped_type,
-        typename CompatibleObjectType::mapped_type>::value;
-};
-
-template<typename BasicJsonType, typename CompatibleObjectType>
-struct is_compatible_object_type
-    : is_compatible_object_type_impl<BasicJsonType, CompatibleObjectType> {};
-
-template<typename BasicJsonType, typename ConstructibleObjectType,
-         typename = void>
-struct is_constructible_object_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename ConstructibleObjectType>
-struct is_constructible_object_type_impl <
-    BasicJsonType, ConstructibleObjectType,
-    enable_if_t < is_detected<mapped_type_t, ConstructibleObjectType>::value&&
-    is_detected<key_type_t, ConstructibleObjectType>::value >>
-{
-    using object_t = typename BasicJsonType::object_t;
-
-    static constexpr bool value =
-        (is_default_constructible<ConstructibleObjectType>::value &&
-         (std::is_move_assignable<ConstructibleObjectType>::value ||
-          std::is_copy_assignable<ConstructibleObjectType>::value) &&
-         (is_constructible<typename ConstructibleObjectType::key_type,
-          typename object_t::key_type>::value &&
-          std::is_same <
-          typename object_t::mapped_type,
-          typename ConstructibleObjectType::mapped_type >::value)) ||
-        (has_from_json<BasicJsonType,
-         typename ConstructibleObjectType::mapped_type>::value ||
-         has_non_default_from_json <
-         BasicJsonType,
-         typename ConstructibleObjectType::mapped_type >::value);
-};
-
-template<typename BasicJsonType, typename ConstructibleObjectType>
-struct is_constructible_object_type
-    : is_constructible_object_type_impl<BasicJsonType,
-      ConstructibleObjectType> {};
-
-template<typename BasicJsonType, typename CompatibleStringType,
-         typename = void>
-struct is_compatible_string_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename CompatibleStringType>
-struct is_compatible_string_type_impl <
-    BasicJsonType, CompatibleStringType,
-    enable_if_t<is_detected_exact<typename BasicJsonType::string_t::value_type,
-    value_type_t, CompatibleStringType>::value >>
-{
-    static constexpr auto value =
-        is_constructible<typename BasicJsonType::string_t, CompatibleStringType>::value;
-};
-
-template<typename BasicJsonType, typename ConstructibleStringType>
-struct is_compatible_string_type
-    : is_compatible_string_type_impl<BasicJsonType, ConstructibleStringType> {};
-
-template<typename BasicJsonType, typename ConstructibleStringType,
-         typename = void>
-struct is_constructible_string_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename ConstructibleStringType>
-struct is_constructible_string_type_impl <
-    BasicJsonType, ConstructibleStringType,
-    enable_if_t<is_detected_exact<typename BasicJsonType::string_t::value_type,
-    value_type_t, ConstructibleStringType>::value >>
-{
-    static constexpr auto value =
-        is_constructible<ConstructibleStringType,
-        typename BasicJsonType::string_t>::value;
-};
-
-template<typename BasicJsonType, typename ConstructibleStringType>
-struct is_constructible_string_type
-    : is_constructible_string_type_impl<BasicJsonType, ConstructibleStringType> {};
-
-template<typename BasicJsonType, typename CompatibleArrayType, typename = void>
-struct is_compatible_array_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename CompatibleArrayType>
-struct is_compatible_array_type_impl <
-    BasicJsonType, CompatibleArrayType,
-    enable_if_t < is_detected<value_type_t, CompatibleArrayType>::value&&
-    is_detected<iterator_t, CompatibleArrayType>::value&&
-// This is needed because json_reverse_iterator has a ::iterator type...
-// Therefore it is detected as a CompatibleArrayType.
-// The real fix would be to have an Iterable concept.
-    !is_iterator_traits <
-    iterator_traits<CompatibleArrayType >>::value >>
-{
-    static constexpr bool value =
-        is_constructible<BasicJsonType,
-        typename CompatibleArrayType::value_type>::value;
-};
-
-template<typename BasicJsonType, typename CompatibleArrayType>
-struct is_compatible_array_type
-    : is_compatible_array_type_impl<BasicJsonType, CompatibleArrayType> {};
-
-template<typename BasicJsonType, typename ConstructibleArrayType, typename = void>
-struct is_constructible_array_type_impl : std::false_type {};
-
-template<typename BasicJsonType, typename ConstructibleArrayType>
-struct is_constructible_array_type_impl <
-    BasicJsonType, ConstructibleArrayType,
-    enable_if_t<std::is_same<ConstructibleArrayType,
-    typename BasicJsonType::value_type>::value >>
-            : std::true_type {};
-
-template<typename BasicJsonType, typename ConstructibleArrayType>
-struct is_constructible_array_type_impl <
-    BasicJsonType, ConstructibleArrayType,
-    enable_if_t < !std::is_same<ConstructibleArrayType,
-    typename BasicJsonType::value_type>::value&&
-    is_default_constructible<ConstructibleArrayType>::value&&
-(std::is_move_assignable<ConstructibleArrayType>::value ||
- std::is_copy_assignable<ConstructibleArrayType>::value)&&
-is_detected<value_type_t, ConstructibleArrayType>::value&&
-is_detected<iterator_t, ConstructibleArrayType>::value&&
-is_complete_type <
-detected_t<value_type_t, ConstructibleArrayType >>::value >>
-{
-    static constexpr bool value =
-        // This is needed because json_reverse_iterator has a ::iterator type,
-        // furthermore, std::back_insert_iterator (and other iterators) have a
-        // base class `iterator`... Therefore it is detected as a
-        // ConstructibleArrayType. The real fix would be to have an Iterable
-        // concept.
-        !is_iterator_traits<iterator_traits<ConstructibleArrayType>>::value &&
-
-        (std::is_same<typename ConstructibleArrayType::value_type,
-         typename BasicJsonType::array_t::value_type>::value ||
-         has_from_json<BasicJsonType,
-         typename ConstructibleArrayType::value_type>::value ||
-         has_non_default_from_json <
-         BasicJsonType, typename ConstructibleArrayType::value_type >::value);
-};
-
-template<typename BasicJsonType, typename ConstructibleArrayType>
-struct is_constructible_array_type
-    : is_constructible_array_type_impl<BasicJsonType, ConstructibleArrayType> {};
-
-template<typename RealIntegerType, typename CompatibleNumberIntegerType,
-         typename = void>
-struct is_compatible_integer_type_impl : std::false_type {};
-
-template<typename RealIntegerType, typename CompatibleNumberIntegerType>
-struct is_compatible_integer_type_impl <
-    RealIntegerType, CompatibleNumberIntegerType,
-    enable_if_t < std::is_integral<RealIntegerType>::value&&
-    std::is_integral<CompatibleNumberIntegerType>::value&&
-    !std::is_same<bool, CompatibleNumberIntegerType>::value >>
-{
-    // is there an assert somewhere on overflows?
-    using RealLimits = std::numeric_limits<RealIntegerType>;
-    using CompatibleLimits = std::numeric_limits<CompatibleNumberIntegerType>;
-
-    static constexpr auto value =
-        is_constructible<RealIntegerType,
-        CompatibleNumberIntegerType>::value &&
-        CompatibleLimits::is_integer &&
-        RealLimits::is_signed == CompatibleLimits::is_signed;
-};
-
-template<typename RealIntegerType, typename CompatibleNumberIntegerType>
-struct is_compatible_integer_type
-    : is_compatible_integer_type_impl<RealIntegerType,
-      CompatibleNumberIntegerType> {};
-
-template<typename BasicJsonType, typename CompatibleType, typename = void>
-struct is_compatible_type_impl: std::false_type {};
-
-template<typename BasicJsonType, typename CompatibleType>
-struct is_compatible_type_impl <
-    BasicJsonType, CompatibleType,
-    enable_if_t<is_complete_type<CompatibleType>::value >>
-{
-    static constexpr bool value =
-        has_to_json<BasicJsonType, CompatibleType>::value;
-};
-
-template<typename BasicJsonType, typename CompatibleType>
-struct is_compatible_type
-    : is_compatible_type_impl<BasicJsonType, CompatibleType> {};
-
-template<typename T1, typename T2>
-struct is_constructible_tuple : std::false_type {};
-
-template<typename T1, typename... Args>
-struct is_constructible_tuple<T1, std::tuple<Args...>> : conjunction<is_constructible<T1, Args>...> {};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename std::nullptr_t& n)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_null()))
-    {
-        JSON_THROW(type_error::create(302, "type must be null, but is " + std::string(j.type_name()), j));
-    }
-    n = nullptr;
-}
-
-// overloads for basic_json template parameters
-template < typename BasicJsonType, typename ArithmeticType,
-           enable_if_t < std::is_arithmetic<ArithmeticType>::value&&
-                         !std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
-                         int > = 0 >
-void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val)
-{
-    switch (static_cast<value_t>(j))
-    {
-        case value_t::number_unsigned:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
-            break;
-        }
-        case value_t::number_integer:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
-            break;
-        }
-        case value_t::number_float:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
-            break;
-        }
-
-        default:
-            JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()), j));
-    }
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_boolean()))
-    {
-        JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(j.type_name()), j));
-    }
-    b = *j.template get_ptr<const typename BasicJsonType::boolean_t*>();
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
-    {
-        JSON_THROW(type_error::create(302, "type must be string, but is " + std::string(j.type_name()), j));
-    }
-    s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
-}
-
-template <
-    typename BasicJsonType, typename ConstructibleStringType,
-    enable_if_t <
-        is_constructible_string_type<BasicJsonType, ConstructibleStringType>::value&&
-        !std::is_same<typename BasicJsonType::string_t,
-                      ConstructibleStringType>::value,
-        int > = 0 >
-void from_json(const BasicJsonType& j, ConstructibleStringType& s)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
-    {
-        JSON_THROW(type_error::create(302, "type must be string, but is " + std::string(j.type_name()), j));
-    }
-
-    s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val)
-{
-    get_arithmetic_value(j, val);
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val)
-{
-    get_arithmetic_value(j, val);
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val)
-{
-    get_arithmetic_value(j, val);
-}
-
-template<typename BasicJsonType, typename EnumType,
-         enable_if_t<std::is_enum<EnumType>::value, int> = 0>
-void from_json(const BasicJsonType& j, EnumType& e)
-{
-    typename std::underlying_type<EnumType>::type val;
-    get_arithmetic_value(j, val);
-    e = static_cast<EnumType>(val);
-}
-
-// forward_list doesn't have an insert method
-template<typename BasicJsonType, typename T, typename Allocator,
-         enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
-void from_json(const BasicJsonType& j, std::forward_list<T, Allocator>& l)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-    l.clear();
-    std::transform(j.rbegin(), j.rend(),
-                   std::front_inserter(l), [](const BasicJsonType & i)
-    {
-        return i.template get<T>();
-    });
-}
-
-// valarray doesn't have an insert method
-template<typename BasicJsonType, typename T,
-         enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
-void from_json(const BasicJsonType& j, std::valarray<T>& l)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-    l.resize(j.size());
-    std::transform(j.begin(), j.end(), std::begin(l),
-                   [](const BasicJsonType & elem)
-    {
-        return elem.template get<T>();
-    });
-}
-
-template<typename BasicJsonType, typename T, std::size_t N>
-auto from_json(const BasicJsonType& j, T (&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
--> decltype(j.template get<T>(), void())
-{
-    for (std::size_t i = 0; i < N; ++i)
-    {
-        arr[i] = j.at(i).template get<T>();
-    }
-}
-
-template<typename BasicJsonType>
-void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/)
-{
-    arr = *j.template get_ptr<const typename BasicJsonType::array_t*>();
-}
-
-template<typename BasicJsonType, typename T, std::size_t N>
-auto from_json_array_impl(const BasicJsonType& j, std::array<T, N>& arr,
-                          priority_tag<2> /*unused*/)
--> decltype(j.template get<T>(), void())
-{
-    for (std::size_t i = 0; i < N; ++i)
-    {
-        arr[i] = j.at(i).template get<T>();
-    }
-}
-
-template<typename BasicJsonType, typename ConstructibleArrayType,
-         enable_if_t<
-             std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
-             int> = 0>
-auto from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<1> /*unused*/)
--> decltype(
-    arr.reserve(std::declval<typename ConstructibleArrayType::size_type>()),
-    j.template get<typename ConstructibleArrayType::value_type>(),
-    void())
-{
-    using std::end;
-
-    ConstructibleArrayType ret;
-    ret.reserve(j.size());
-    std::transform(j.begin(), j.end(),
-                   std::inserter(ret, end(ret)), [](const BasicJsonType & i)
-    {
-        // get<BasicJsonType>() returns *this, this won't call a from_json
-        // method when value_type is BasicJsonType
-        return i.template get<typename ConstructibleArrayType::value_type>();
-    });
-    arr = std::move(ret);
-}
-
-template<typename BasicJsonType, typename ConstructibleArrayType,
-         enable_if_t<
-             std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
-             int> = 0>
-void from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr,
-                          priority_tag<0> /*unused*/)
-{
-    using std::end;
-
-    ConstructibleArrayType ret;
-    std::transform(
-        j.begin(), j.end(), std::inserter(ret, end(ret)),
-        [](const BasicJsonType & i)
-    {
-        // get<BasicJsonType>() returns *this, this won't call a from_json
-        // method when value_type is BasicJsonType
-        return i.template get<typename ConstructibleArrayType::value_type>();
-    });
-    arr = std::move(ret);
-}
-
-template < typename BasicJsonType, typename ConstructibleArrayType,
-           enable_if_t <
-               is_constructible_array_type<BasicJsonType, ConstructibleArrayType>::value&&
-               !is_constructible_object_type<BasicJsonType, ConstructibleArrayType>::value&&
-               !is_constructible_string_type<BasicJsonType, ConstructibleArrayType>::value&&
-               !std::is_same<ConstructibleArrayType, typename BasicJsonType::binary_t>::value&&
-               !is_basic_json<ConstructibleArrayType>::value,
-               int > = 0 >
-auto from_json(const BasicJsonType& j, ConstructibleArrayType& arr)
--> decltype(from_json_array_impl(j, arr, priority_tag<3> {}),
-j.template get<typename ConstructibleArrayType::value_type>(),
-void())
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-
-    from_json_array_impl(j, arr, priority_tag<3> {});
-}
-
-template < typename BasicJsonType, typename T, std::size_t... Idx >
-std::array<T, sizeof...(Idx)> from_json_inplace_array_impl(BasicJsonType&& j,
-        identity_tag<std::array<T, sizeof...(Idx)>> /*unused*/, index_sequence<Idx...> /*unused*/)
-{
-    return { { std::forward<BasicJsonType>(j).at(Idx).template get<T>()... } };
-}
-
-template < typename BasicJsonType, typename T, std::size_t N >
-auto from_json(BasicJsonType&& j, identity_tag<std::array<T, N>> tag)
--> decltype(from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {}))
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-
-    return from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {});
-}
-
-template<typename BasicJsonType>
-void from_json(const BasicJsonType& j, typename BasicJsonType::binary_t& bin)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_binary()))
-    {
-        JSON_THROW(type_error::create(302, "type must be binary, but is " + std::string(j.type_name()), j));
-    }
-
-    bin = *j.template get_ptr<const typename BasicJsonType::binary_t*>();
-}
-
-template<typename BasicJsonType, typename ConstructibleObjectType,
-         enable_if_t<is_constructible_object_type<BasicJsonType, ConstructibleObjectType>::value, int> = 0>
-void from_json(const BasicJsonType& j, ConstructibleObjectType& obj)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_object()))
-    {
-        JSON_THROW(type_error::create(302, "type must be object, but is " + std::string(j.type_name()), j));
-    }
-
-    ConstructibleObjectType ret;
-    const auto* inner_object = j.template get_ptr<const typename BasicJsonType::object_t*>();
-    using value_type = typename ConstructibleObjectType::value_type;
-    std::transform(
-        inner_object->begin(), inner_object->end(),
-        std::inserter(ret, ret.begin()),
-        [](typename BasicJsonType::object_t::value_type const & p)
-    {
-        return value_type(p.first, p.second.template get<typename ConstructibleObjectType::mapped_type>());
-    });
-    obj = std::move(ret);
-}
-
-// overload for arithmetic types, not chosen for basic_json template arguments
-// (BooleanType, etc..); note: Is it really necessary to provide explicit
-// overloads for boolean_t etc. in case of a custom BooleanType which is not
-// an arithmetic type?
-template < typename BasicJsonType, typename ArithmeticType,
-           enable_if_t <
-               std::is_arithmetic<ArithmeticType>::value&&
-               !std::is_same<ArithmeticType, typename BasicJsonType::number_unsigned_t>::value&&
-               !std::is_same<ArithmeticType, typename BasicJsonType::number_integer_t>::value&&
-               !std::is_same<ArithmeticType, typename BasicJsonType::number_float_t>::value&&
-               !std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
-               int > = 0 >
-void from_json(const BasicJsonType& j, ArithmeticType& val)
-{
-    switch (static_cast<value_t>(j))
-    {
-        case value_t::number_unsigned:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
-            break;
-        }
-        case value_t::number_integer:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
-            break;
-        }
-        case value_t::number_float:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
-            break;
-        }
-        case value_t::boolean:
-        {
-            val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::boolean_t*>());
-            break;
-        }
-
-        default:
-            JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()), j));
-    }
-}
-
-template<typename BasicJsonType, typename... Args, std::size_t... Idx>
-std::tuple<Args...> from_json_tuple_impl_base(BasicJsonType&& j, index_sequence<Idx...> /*unused*/)
-{
-    return std::make_tuple(std::forward<BasicJsonType>(j).at(Idx).template get<Args>()...);
-}
-
-template < typename BasicJsonType, class A1, class A2 >
-std::pair<A1, A2> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::pair<A1, A2>> /*unused*/, priority_tag<0> /*unused*/)
-{
-    return {std::forward<BasicJsonType>(j).at(0).template get<A1>(),
-            std::forward<BasicJsonType>(j).at(1).template get<A2>()};
-}
-
-template<typename BasicJsonType, typename A1, typename A2>
-void from_json_tuple_impl(BasicJsonType&& j, std::pair<A1, A2>& p, priority_tag<1> /*unused*/)
-{
-    p = from_json_tuple_impl(std::forward<BasicJsonType>(j), identity_tag<std::pair<A1, A2>> {}, priority_tag<0> {});
-}
-
-template<typename BasicJsonType, typename... Args>
-std::tuple<Args...> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::tuple<Args...>> /*unused*/, priority_tag<2> /*unused*/)
-{
-    return from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
-}
-
-template<typename BasicJsonType, typename... Args>
-void from_json_tuple_impl(BasicJsonType&& j, std::tuple<Args...>& t, priority_tag<3> /*unused*/)
-{
-    t = from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
-}
-
-template<typename BasicJsonType, typename TupleRelated>
-auto from_json(BasicJsonType&& j, TupleRelated&& t)
--> decltype(from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {}))
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-
-    return from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {});
-}
-
-template < typename BasicJsonType, typename Key, typename Value, typename Compare, typename Allocator,
-           typename = enable_if_t < !std::is_constructible <
-                                        typename BasicJsonType::string_t, Key >::value >>
-void from_json(const BasicJsonType& j, std::map<Key, Value, Compare, Allocator>& m)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-    m.clear();
-    for (const auto& p : j)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
-        {
-            JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(p.type_name()), j));
-        }
-        m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
-    }
-}
-
-template < typename BasicJsonType, typename Key, typename Value, typename Hash, typename KeyEqual, typename Allocator,
-           typename = enable_if_t < !std::is_constructible <
-                                        typename BasicJsonType::string_t, Key >::value >>
-void from_json(const BasicJsonType& j, std::unordered_map<Key, Value, Hash, KeyEqual, Allocator>& m)
-{
-    if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
-    {
-        JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()), j));
-    }
-    m.clear();
-    for (const auto& p : j)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
-        {
-            JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(p.type_name()), j));
-        }
-        m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
-    }
-}
-
-struct from_json_fn
-{
-    template<typename BasicJsonType, typename T>
-    auto operator()(const BasicJsonType& j, T&& val) const
-    noexcept(noexcept(from_json(j, std::forward<T>(val))))
-    -> decltype(from_json(j, std::forward<T>(val)))
-    {
-        return from_json(j, std::forward<T>(val));
-    }
-};
-}  // namespace detail
-
-/// namespace to hold default `from_json` function
-/// to see why this is required:
-/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
-namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
-{
-constexpr const auto& from_json = detail::static_const<detail::from_json_fn>::value; // NOLINT(misc-definitions-in-headers)
-} // namespace
-} // namespace nlohmann
-
-// #include <nlohmann/detail/conversions/to_json.hpp>
-
-
-#include <algorithm> // copy
-#include <iterator> // begin, end
-#include <string> // string
-#include <tuple> // tuple, get
-#include <type_traits> // is_same, is_constructible, is_floating_point, is_enum, underlying_type
-#include <utility> // move, forward, declval, pair
-#include <valarray> // valarray
-#include <vector> // vector
-
-// #include <nlohmann/detail/iterators/iteration_proxy.hpp>
-
-
-#include <cstddef> // size_t
-#include <iterator> // input_iterator_tag
-#include <string> // string, to_string
-#include <tuple> // tuple_size, get, tuple_element
-#include <utility> // move
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename string_type>
-void int_to_string( string_type& target, std::size_t value )
-{
-    // For ADL
-    using std::to_string;
-    target = to_string(value);
-}
-template<typename IteratorType> class iteration_proxy_value
-{
-  public:
-    using difference_type = std::ptrdiff_t;
-    using value_type = iteration_proxy_value;
-    using pointer = value_type * ;
-    using reference = value_type & ;
-    using iterator_category = std::input_iterator_tag;
-    using string_type = typename std::remove_cv< typename std::remove_reference<decltype( std::declval<IteratorType>().key() ) >::type >::type;
-
-  private:
-    /// the iterator
-    IteratorType anchor;
-    /// an index for arrays (used to create key names)
-    std::size_t array_index = 0;
-    /// last stringified array index
-    mutable std::size_t array_index_last = 0;
-    /// a string representation of the array index
-    mutable string_type array_index_str = "0";
-    /// an empty string (to return a reference for primitive values)
-    const string_type empty_str{};
-
-  public:
-    explicit iteration_proxy_value(IteratorType it) noexcept
-        : anchor(std::move(it))
-    {}
-
-    /// dereference operator (needed for range-based for)
-    iteration_proxy_value& operator*()
-    {
-        return *this;
-    }
-
-    /// increment operator (needed for range-based for)
-    iteration_proxy_value& operator++()
-    {
-        ++anchor;
-        ++array_index;
-
-        return *this;
-    }
-
-    /// equality operator (needed for InputIterator)
-    bool operator==(const iteration_proxy_value& o) const
-    {
-        return anchor == o.anchor;
-    }
-
-    /// inequality operator (needed for range-based for)
-    bool operator!=(const iteration_proxy_value& o) const
-    {
-        return anchor != o.anchor;
-    }
-
-    /// return key of the iterator
-    const string_type& key() const
-    {
-        JSON_ASSERT(anchor.m_object != nullptr);
-
-        switch (anchor.m_object->type())
-        {
-            // use integer array index as key
-            case value_t::array:
-            {
-                if (array_index != array_index_last)
-                {
-                    int_to_string( array_index_str, array_index );
-                    array_index_last = array_index;
-                }
-                return array_index_str;
-            }
-
-            // use key from the object
-            case value_t::object:
-                return anchor.key();
-
-            // use an empty key for all primitive types
-            default:
-                return empty_str;
-        }
-    }
-
-    /// return value of the iterator
-    typename IteratorType::reference value() const
-    {
-        return anchor.value();
-    }
-};
-
-/// proxy class for the items() function
-template<typename IteratorType> class iteration_proxy
-{
-  private:
-    /// the container to iterate
-    typename IteratorType::reference container;
-
-  public:
-    /// construct iteration proxy from a container
-    explicit iteration_proxy(typename IteratorType::reference cont) noexcept
-        : container(cont) {}
-
-    /// return iterator begin (needed for range-based for)
-    iteration_proxy_value<IteratorType> begin() noexcept
-    {
-        return iteration_proxy_value<IteratorType>(container.begin());
-    }
-
-    /// return iterator end (needed for range-based for)
-    iteration_proxy_value<IteratorType> end() noexcept
-    {
-        return iteration_proxy_value<IteratorType>(container.end());
-    }
-};
-// Structured Bindings Support
-// For further reference see https://blog.tartanllama.xyz/structured-bindings/
-// And see https://github.com/nlohmann/json/pull/1391
-template<std::size_t N, typename IteratorType, enable_if_t<N == 0, int> = 0>
-auto get(const nlohmann::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.key())
-{
-    return i.key();
-}
-// Structured Bindings Support
-// For further reference see https://blog.tartanllama.xyz/structured-bindings/
-// And see https://github.com/nlohmann/json/pull/1391
-template<std::size_t N, typename IteratorType, enable_if_t<N == 1, int> = 0>
-auto get(const nlohmann::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.value())
-{
-    return i.value();
-}
-}  // namespace detail
-}  // namespace nlohmann
-
-// The Addition to the STD Namespace is required to add
-// Structured Bindings Support to the iteration_proxy_value class
-// For further reference see https://blog.tartanllama.xyz/structured-bindings/
-// And see https://github.com/nlohmann/json/pull/1391
-namespace std
-{
-#if defined(__clang__)
-    // Fix: https://github.com/nlohmann/json/issues/1401
-    #pragma clang diagnostic push
-    #pragma clang diagnostic ignored "-Wmismatched-tags"
-#endif
-template<typename IteratorType>
-class tuple_size<::nlohmann::detail::iteration_proxy_value<IteratorType>>
-            : public std::integral_constant<std::size_t, 2> {};
-
-template<std::size_t N, typename IteratorType>
-class tuple_element<N, ::nlohmann::detail::iteration_proxy_value<IteratorType >>
-{
-  public:
-    using type = decltype(
-                     get<N>(std::declval <
-                            ::nlohmann::detail::iteration_proxy_value<IteratorType >> ()));
-};
-#if defined(__clang__)
-    #pragma clang diagnostic pop
-#endif
-} // namespace std
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-//////////////////
-// constructors //
-//////////////////
-
-template<value_t> struct external_constructor;
-
-template<>
-struct external_constructor<value_t::boolean>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept
-    {
-        j.m_type = value_t::boolean;
-        j.m_value = b;
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::string>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s)
-    {
-        j.m_type = value_t::string;
-        j.m_value = s;
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::string_t&& s)
-    {
-        j.m_type = value_t::string;
-        j.m_value = std::move(s);
-        j.assert_invariant();
-    }
-
-    template < typename BasicJsonType, typename CompatibleStringType,
-               enable_if_t < !std::is_same<CompatibleStringType, typename BasicJsonType::string_t>::value,
-                             int > = 0 >
-    static void construct(BasicJsonType& j, const CompatibleStringType& str)
-    {
-        j.m_type = value_t::string;
-        j.m_value.string = j.template create<typename BasicJsonType::string_t>(str);
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::binary>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, const typename BasicJsonType::binary_t& b)
-    {
-        j.m_type = value_t::binary;
-        j.m_value = typename BasicJsonType::binary_t(b);
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::binary_t&& b)
-    {
-        j.m_type = value_t::binary;
-        j.m_value = typename BasicJsonType::binary_t(std::move(b));;
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::number_float>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept
-    {
-        j.m_type = value_t::number_float;
-        j.m_value = val;
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::number_unsigned>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept
-    {
-        j.m_type = value_t::number_unsigned;
-        j.m_value = val;
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::number_integer>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept
-    {
-        j.m_type = value_t::number_integer;
-        j.m_value = val;
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::array>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr)
-    {
-        j.m_type = value_t::array;
-        j.m_value = arr;
-        j.set_parents();
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
-    {
-        j.m_type = value_t::array;
-        j.m_value = std::move(arr);
-        j.set_parents();
-        j.assert_invariant();
-    }
-
-    template < typename BasicJsonType, typename CompatibleArrayType,
-               enable_if_t < !std::is_same<CompatibleArrayType, typename BasicJsonType::array_t>::value,
-                             int > = 0 >
-    static void construct(BasicJsonType& j, const CompatibleArrayType& arr)
-    {
-        using std::begin;
-        using std::end;
-        j.m_type = value_t::array;
-        j.m_value.array = j.template create<typename BasicJsonType::array_t>(begin(arr), end(arr));
-        j.set_parents();
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, const std::vector<bool>& arr)
-    {
-        j.m_type = value_t::array;
-        j.m_value = value_t::array;
-        j.m_value.array->reserve(arr.size());
-        for (const bool x : arr)
-        {
-            j.m_value.array->push_back(x);
-            j.set_parent(j.m_value.array->back());
-        }
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType, typename T,
-             enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
-    static void construct(BasicJsonType& j, const std::valarray<T>& arr)
-    {
-        j.m_type = value_t::array;
-        j.m_value = value_t::array;
-        j.m_value.array->resize(arr.size());
-        if (arr.size() > 0)
-        {
-            std::copy(std::begin(arr), std::end(arr), j.m_value.array->begin());
-        }
-        j.set_parents();
-        j.assert_invariant();
-    }
-};
-
-template<>
-struct external_constructor<value_t::object>
-{
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj)
-    {
-        j.m_type = value_t::object;
-        j.m_value = obj;
-        j.set_parents();
-        j.assert_invariant();
-    }
-
-    template<typename BasicJsonType>
-    static void construct(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
-    {
-        j.m_type = value_t::object;
-        j.m_value = std::move(obj);
-        j.set_parents();
-        j.assert_invariant();
-    }
-
-    template < typename BasicJsonType, typename CompatibleObjectType,
-               enable_if_t < !std::is_same<CompatibleObjectType, typename BasicJsonType::object_t>::value, int > = 0 >
-    static void construct(BasicJsonType& j, const CompatibleObjectType& obj)
-    {
-        using std::begin;
-        using std::end;
-
-        j.m_type = value_t::object;
-        j.m_value.object = j.template create<typename BasicJsonType::object_t>(begin(obj), end(obj));
-        j.set_parents();
-        j.assert_invariant();
-    }
-};
-
-/////////////
-// to_json //
-/////////////
-
-template<typename BasicJsonType, typename T,
-         enable_if_t<std::is_same<T, typename BasicJsonType::boolean_t>::value, int> = 0>
-void to_json(BasicJsonType& j, T b) noexcept
-{
-    external_constructor<value_t::boolean>::construct(j, b);
-}
-
-template<typename BasicJsonType, typename CompatibleString,
-         enable_if_t<std::is_constructible<typename BasicJsonType::string_t, CompatibleString>::value, int> = 0>
-void to_json(BasicJsonType& j, const CompatibleString& s)
-{
-    external_constructor<value_t::string>::construct(j, s);
-}
-
-template<typename BasicJsonType>
-void to_json(BasicJsonType& j, typename BasicJsonType::string_t&& s)
-{
-    external_constructor<value_t::string>::construct(j, std::move(s));
-}
-
-template<typename BasicJsonType, typename FloatType,
-         enable_if_t<std::is_floating_point<FloatType>::value, int> = 0>
-void to_json(BasicJsonType& j, FloatType val) noexcept
-{
-    external_constructor<value_t::number_float>::construct(j, static_cast<typename BasicJsonType::number_float_t>(val));
-}
-
-template<typename BasicJsonType, typename CompatibleNumberUnsignedType,
-         enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_unsigned_t, CompatibleNumberUnsignedType>::value, int> = 0>
-void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept
-{
-    external_constructor<value_t::number_unsigned>::construct(j, static_cast<typename BasicJsonType::number_unsigned_t>(val));
-}
-
-template<typename BasicJsonType, typename CompatibleNumberIntegerType,
-         enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_integer_t, CompatibleNumberIntegerType>::value, int> = 0>
-void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept
-{
-    external_constructor<value_t::number_integer>::construct(j, static_cast<typename BasicJsonType::number_integer_t>(val));
-}
-
-template<typename BasicJsonType, typename EnumType,
-         enable_if_t<std::is_enum<EnumType>::value, int> = 0>
-void to_json(BasicJsonType& j, EnumType e) noexcept
-{
-    using underlying_type = typename std::underlying_type<EnumType>::type;
-    external_constructor<value_t::number_integer>::construct(j, static_cast<underlying_type>(e));
-}
-
-template<typename BasicJsonType>
-void to_json(BasicJsonType& j, const std::vector<bool>& e)
-{
-    external_constructor<value_t::array>::construct(j, e);
-}
-
-template < typename BasicJsonType, typename CompatibleArrayType,
-           enable_if_t < is_compatible_array_type<BasicJsonType,
-                         CompatibleArrayType>::value&&
-                         !is_compatible_object_type<BasicJsonType, CompatibleArrayType>::value&&
-                         !is_compatible_string_type<BasicJsonType, CompatibleArrayType>::value&&
-                         !std::is_same<typename BasicJsonType::binary_t, CompatibleArrayType>::value&&
-                         !is_basic_json<CompatibleArrayType>::value,
-                         int > = 0 >
-void to_json(BasicJsonType& j, const CompatibleArrayType& arr)
-{
-    external_constructor<value_t::array>::construct(j, arr);
-}
-
-template<typename BasicJsonType>
-void to_json(BasicJsonType& j, const typename BasicJsonType::binary_t& bin)
-{
-    external_constructor<value_t::binary>::construct(j, bin);
-}
-
-template<typename BasicJsonType, typename T,
-         enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
-void to_json(BasicJsonType& j, const std::valarray<T>& arr)
-{
-    external_constructor<value_t::array>::construct(j, std::move(arr));
-}
-
-template<typename BasicJsonType>
-void to_json(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
-{
-    external_constructor<value_t::array>::construct(j, std::move(arr));
-}
-
-template < typename BasicJsonType, typename CompatibleObjectType,
-           enable_if_t < is_compatible_object_type<BasicJsonType, CompatibleObjectType>::value&& !is_basic_json<CompatibleObjectType>::value, int > = 0 >
-void to_json(BasicJsonType& j, const CompatibleObjectType& obj)
-{
-    external_constructor<value_t::object>::construct(j, obj);
-}
-
-template<typename BasicJsonType>
-void to_json(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
-{
-    external_constructor<value_t::object>::construct(j, std::move(obj));
-}
-
-template <
-    typename BasicJsonType, typename T, std::size_t N,
-    enable_if_t < !std::is_constructible<typename BasicJsonType::string_t,
-                  const T(&)[N]>::value, // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-                  int > = 0 >
-void to_json(BasicJsonType& j, const T(&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-{
-    external_constructor<value_t::array>::construct(j, arr);
-}
-
-template < typename BasicJsonType, typename T1, typename T2, enable_if_t < std::is_constructible<BasicJsonType, T1>::value&& std::is_constructible<BasicJsonType, T2>::value, int > = 0 >
-void to_json(BasicJsonType& j, const std::pair<T1, T2>& p)
-{
-    j = { p.first, p.second };
-}
-
-// for https://github.com/nlohmann/json/pull/1134
-template<typename BasicJsonType, typename T,
-         enable_if_t<std::is_same<T, iteration_proxy_value<typename BasicJsonType::iterator>>::value, int> = 0>
-void to_json(BasicJsonType& j, const T& b)
-{
-    j = { {b.key(), b.value()} };
-}
-
-template<typename BasicJsonType, typename Tuple, std::size_t... Idx>
-void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...> /*unused*/)
-{
-    j = { std::get<Idx>(t)... };
-}
-
-template<typename BasicJsonType, typename T, enable_if_t<is_constructible_tuple<BasicJsonType, T>::value, int > = 0>
-void to_json(BasicJsonType& j, const T& t)
-{
-    to_json_tuple_impl(j, t, make_index_sequence<std::tuple_size<T>::value> {});
-}
-
-struct to_json_fn
-{
-    template<typename BasicJsonType, typename T>
-    auto operator()(BasicJsonType& j, T&& val) const noexcept(noexcept(to_json(j, std::forward<T>(val))))
-    -> decltype(to_json(j, std::forward<T>(val)), void())
-    {
-        return to_json(j, std::forward<T>(val));
-    }
-};
-}  // namespace detail
-
-/// namespace to hold default `to_json` function
-/// to see why this is required:
-/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
-namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
-{
-constexpr const auto& to_json = detail::static_const<detail::to_json_fn>::value; // NOLINT(misc-definitions-in-headers)
-} // namespace
-} // namespace nlohmann
-
-// #include <nlohmann/detail/meta/identity_tag.hpp>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-
-namespace nlohmann
-{
-
-template<typename ValueType, typename>
-struct adl_serializer
-{
-    /*!
-    @brief convert a JSON value to any value type
-
-    This function is usually called by the `get()` function of the
-    @ref basic_json class (either explicit or via conversion operators).
-
-    @note This function is chosen for default-constructible value types.
-
-    @param[in] j        JSON value to read from
-    @param[in,out] val  value to write to
-    */
-    template<typename BasicJsonType, typename TargetType = ValueType>
-    static auto from_json(BasicJsonType && j, TargetType& val) noexcept(
-        noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), val)))
-    -> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void())
-    {
-        ::nlohmann::from_json(std::forward<BasicJsonType>(j), val);
-    }
-
-    /*!
-    @brief convert a JSON value to any value type
-
-    This function is usually called by the `get()` function of the
-    @ref basic_json class (either explicit or via conversion operators).
-
-    @note This function is chosen for value types which are not default-constructible.
-
-    @param[in] j  JSON value to read from
-
-    @return copy of the JSON value, converted to @a ValueType
-    */
-    template<typename BasicJsonType, typename TargetType = ValueType>
-    static auto from_json(BasicJsonType && j) noexcept(
-    noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {})))
-    -> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {}))
-    {
-        return ::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {});
-    }
-
-    /*!
-    @brief convert any value type to a JSON value
-
-    This function is usually called by the constructors of the @ref basic_json
-    class.
-
-    @param[in,out] j  JSON value to write to
-    @param[in] val    value to read from
-    */
-    template<typename BasicJsonType, typename TargetType = ValueType>
-    static auto to_json(BasicJsonType& j, TargetType && val) noexcept(
-        noexcept(::nlohmann::to_json(j, std::forward<TargetType>(val))))
-    -> decltype(::nlohmann::to_json(j, std::forward<TargetType>(val)), void())
-    {
-        ::nlohmann::to_json(j, std::forward<TargetType>(val));
-    }
-};
-}  // namespace nlohmann
-
-// #include <nlohmann/byte_container_with_subtype.hpp>
-
-
-#include <cstdint> // uint8_t
-#include <tuple> // tie
-#include <utility> // move
-
-namespace nlohmann
-{
-
-/*!
-@brief an internal type for a backed binary type
-
-This type extends the template parameter @a BinaryType provided to `basic_json`
-with a subtype used by BSON and MessagePack. This type exists so that the user
-does not have to specify a type themselves with a specific naming scheme in
-order to override the binary type.
-
-@tparam BinaryType container to store bytes (`std::vector<std::uint8_t>` by
-                   default)
-
-@since version 3.8.0
-*/
-template<typename BinaryType>
-class byte_container_with_subtype : public BinaryType
-{
-  public:
-    /// the type of the underlying container
-    using container_type = BinaryType;
-
-    byte_container_with_subtype() noexcept(noexcept(container_type()))
-        : container_type()
-    {}
-
-    byte_container_with_subtype(const container_type& b) noexcept(noexcept(container_type(b)))
-        : container_type(b)
-    {}
-
-    byte_container_with_subtype(container_type&& b) noexcept(noexcept(container_type(std::move(b))))
-        : container_type(std::move(b))
-    {}
-
-    byte_container_with_subtype(const container_type& b, std::uint8_t subtype_) noexcept(noexcept(container_type(b)))
-        : container_type(b)
-        , m_subtype(subtype_)
-        , m_has_subtype(true)
-    {}
-
-    byte_container_with_subtype(container_type&& b, std::uint8_t subtype_) noexcept(noexcept(container_type(std::move(b))))
-        : container_type(std::move(b))
-        , m_subtype(subtype_)
-        , m_has_subtype(true)
-    {}
-
-    bool operator==(const byte_container_with_subtype& rhs) const
-    {
-        return std::tie(static_cast<const BinaryType&>(*this), m_subtype, m_has_subtype) ==
-               std::tie(static_cast<const BinaryType&>(rhs), rhs.m_subtype, rhs.m_has_subtype);
-    }
-
-    bool operator!=(const byte_container_with_subtype& rhs) const
-    {
-        return !(rhs == *this);
-    }
-
-    /*!
-    @brief sets the binary subtype
-
-    Sets the binary subtype of the value, also flags a binary JSON value as
-    having a subtype, which has implications for serialization.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @sa see @ref subtype() -- return the binary subtype
-    @sa see @ref clear_subtype() -- clears the binary subtype
-    @sa see @ref has_subtype() -- returns whether or not the binary value has a
-    subtype
-
-    @since version 3.8.0
-    */
-    void set_subtype(std::uint8_t subtype_) noexcept
-    {
-        m_subtype = subtype_;
-        m_has_subtype = true;
-    }
-
-    /*!
-    @brief return the binary subtype
-
-    Returns the numerical subtype of the value if it has a subtype. If it does
-    not have a subtype, this function will return size_t(-1) as a sentinel
-    value.
-
-    @return the numerical subtype of the binary value
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @sa see @ref set_subtype() -- sets the binary subtype
-    @sa see @ref clear_subtype() -- clears the binary subtype
-    @sa see @ref has_subtype() -- returns whether or not the binary value has a
-    subtype
-
-    @since version 3.8.0
-    */
-    constexpr std::uint8_t subtype() const noexcept
-    {
-        return m_subtype;
-    }
-
-    /*!
-    @brief return whether the value has a subtype
-
-    @return whether the value has a subtype
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @sa see @ref subtype() -- return the binary subtype
-    @sa see @ref set_subtype() -- sets the binary subtype
-    @sa see @ref clear_subtype() -- clears the binary subtype
-
-    @since version 3.8.0
-    */
-    constexpr bool has_subtype() const noexcept
-    {
-        return m_has_subtype;
-    }
-
-    /*!
-    @brief clears the binary subtype
-
-    Clears the binary subtype and flags the value as not having a subtype, which
-    has implications for serialization; for instance MessagePack will prefer the
-    bin family over the ext family.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @sa see @ref subtype() -- return the binary subtype
-    @sa see @ref set_subtype() -- sets the binary subtype
-    @sa see @ref has_subtype() -- returns whether or not the binary value has a
-    subtype
-
-    @since version 3.8.0
-    */
-    void clear_subtype() noexcept
-    {
-        m_subtype = 0;
-        m_has_subtype = false;
-    }
-
-  private:
-    std::uint8_t m_subtype = 0;
-    bool m_has_subtype = false;
-};
-
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/conversions/from_json.hpp>
-
-// #include <nlohmann/detail/conversions/to_json.hpp>
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/hash.hpp>
-
-
-#include <cstdint> // uint8_t
-#include <cstddef> // size_t
-#include <functional> // hash
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-
-// boost::hash_combine
-inline std::size_t combine(std::size_t seed, std::size_t h) noexcept
-{
-    seed ^= h + 0x9e3779b9 + (seed << 6U) + (seed >> 2U);
-    return seed;
-}
-
-/*!
-@brief hash a JSON value
-
-The hash function tries to rely on std::hash where possible. Furthermore, the
-type of the JSON value is taken into account to have different hash values for
-null, 0, 0U, and false, etc.
-
-@tparam BasicJsonType basic_json specialization
-@param j JSON value to hash
-@return hash value of j
-*/
-template<typename BasicJsonType>
-std::size_t hash(const BasicJsonType& j)
-{
-    using string_t = typename BasicJsonType::string_t;
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-
-    const auto type = static_cast<std::size_t>(j.type());
-    switch (j.type())
-    {
-        case BasicJsonType::value_t::null:
-        case BasicJsonType::value_t::discarded:
-        {
-            return combine(type, 0);
-        }
-
-        case BasicJsonType::value_t::object:
-        {
-            auto seed = combine(type, j.size());
-            for (const auto& element : j.items())
-            {
-                const auto h = std::hash<string_t> {}(element.key());
-                seed = combine(seed, h);
-                seed = combine(seed, hash(element.value()));
-            }
-            return seed;
-        }
-
-        case BasicJsonType::value_t::array:
-        {
-            auto seed = combine(type, j.size());
-            for (const auto& element : j)
-            {
-                seed = combine(seed, hash(element));
-            }
-            return seed;
-        }
-
-        case BasicJsonType::value_t::string:
-        {
-            const auto h = std::hash<string_t> {}(j.template get_ref<const string_t&>());
-            return combine(type, h);
-        }
-
-        case BasicJsonType::value_t::boolean:
-        {
-            const auto h = std::hash<bool> {}(j.template get<bool>());
-            return combine(type, h);
-        }
-
-        case BasicJsonType::value_t::number_integer:
-        {
-            const auto h = std::hash<number_integer_t> {}(j.template get<number_integer_t>());
-            return combine(type, h);
-        }
-
-        case BasicJsonType::value_t::number_unsigned:
-        {
-            const auto h = std::hash<number_unsigned_t> {}(j.template get<number_unsigned_t>());
-            return combine(type, h);
-        }
-
-        case BasicJsonType::value_t::number_float:
-        {
-            const auto h = std::hash<number_float_t> {}(j.template get<number_float_t>());
-            return combine(type, h);
-        }
-
-        case BasicJsonType::value_t::binary:
-        {
-            auto seed = combine(type, j.get_binary().size());
-            const auto h = std::hash<bool> {}(j.get_binary().has_subtype());
-            seed = combine(seed, h);
-            seed = combine(seed, j.get_binary().subtype());
-            for (const auto byte : j.get_binary())
-            {
-                seed = combine(seed, std::hash<std::uint8_t> {}(byte));
-            }
-            return seed;
-        }
-
-        default:                   // LCOV_EXCL_LINE
-            JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-            return 0;              // LCOV_EXCL_LINE
-    }
-}
-
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/input/binary_reader.hpp>
-
-
-#include <algorithm> // generate_n
-#include <array> // array
-#include <cmath> // ldexp
-#include <cstddef> // size_t
-#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
-#include <cstdio> // snprintf
-#include <cstring> // memcpy
-#include <iterator> // back_inserter
-#include <limits> // numeric_limits
-#include <string> // char_traits, string
-#include <utility> // make_pair, move
-#include <vector> // vector
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/input/input_adapters.hpp>
-
-
-#include <array> // array
-#include <cstddef> // size_t
-#include <cstdio> //FILE *
-#include <cstring> // strlen
-#include <istream> // istream
-#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
-#include <memory> // shared_ptr, make_shared, addressof
-#include <numeric> // accumulate
-#include <string> // string, char_traits
-#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
-#include <utility> // pair, declval
-
-// #include <nlohmann/detail/iterators/iterator_traits.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-/// the supported input formats
-enum class input_format_t { json, cbor, msgpack, ubjson, bson };
-
-////////////////////
-// input adapters //
-////////////////////
-
-/*!
-Input adapter for stdio file access. This adapter read only 1 byte and do not use any
- buffer. This adapter is a very low level adapter.
-*/
-class file_input_adapter
-{
-  public:
-    using char_type = char;
-
-    JSON_HEDLEY_NON_NULL(2)
-    explicit file_input_adapter(std::FILE* f) noexcept
-        : m_file(f)
-    {}
-
-    // make class move-only
-    file_input_adapter(const file_input_adapter&) = delete;
-    file_input_adapter(file_input_adapter&&) noexcept = default;
-    file_input_adapter& operator=(const file_input_adapter&) = delete;
-    file_input_adapter& operator=(file_input_adapter&&) = delete;
-    ~file_input_adapter() = default;
-
-    std::char_traits<char>::int_type get_character() noexcept
-    {
-        return std::fgetc(m_file);
-    }
-
-  private:
-    /// the file pointer to read from
-    std::FILE* m_file;
-};
-
-
-/*!
-Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
-beginning of input. Does not support changing the underlying std::streambuf
-in mid-input. Maintains underlying std::istream and std::streambuf to support
-subsequent use of standard std::istream operations to process any input
-characters following those used in parsing the JSON input.  Clears the
-std::istream flags; any input errors (e.g., EOF) will be detected by the first
-subsequent call for input from the std::istream.
-*/
-class input_stream_adapter
-{
-  public:
-    using char_type = char;
-
-    ~input_stream_adapter()
-    {
-        // clear stream flags; we use underlying streambuf I/O, do not
-        // maintain ifstream flags, except eof
-        if (is != nullptr)
-        {
-            is->clear(is->rdstate() & std::ios::eofbit);
-        }
-    }
-
-    explicit input_stream_adapter(std::istream& i)
-        : is(&i), sb(i.rdbuf())
-    {}
-
-    // delete because of pointer members
-    input_stream_adapter(const input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&) = delete;
-    input_stream_adapter& operator=(input_stream_adapter&&) = delete;
-
-    input_stream_adapter(input_stream_adapter&& rhs) noexcept
-        : is(rhs.is), sb(rhs.sb)
-    {
-        rhs.is = nullptr;
-        rhs.sb = nullptr;
-    }
-
-    // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
-    // ensure that std::char_traits<char>::eof() and the character 0xFF do not
-    // end up as the same value, eg. 0xFFFFFFFF.
-    std::char_traits<char>::int_type get_character()
-    {
-        auto res = sb->sbumpc();
-        // set eof manually, as we don't use the istream interface.
-        if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
-        {
-            is->clear(is->rdstate() | std::ios::eofbit);
-        }
-        return res;
-    }
-
-  private:
-    /// the associated input stream
-    std::istream* is = nullptr;
-    std::streambuf* sb = nullptr;
-};
-
-// General-purpose iterator-based adapter. It might not be as fast as
-// theoretically possible for some containers, but it is extremely versatile.
-template<typename IteratorType>
-class iterator_input_adapter
-{
-  public:
-    using char_type = typename std::iterator_traits<IteratorType>::value_type;
-
-    iterator_input_adapter(IteratorType first, IteratorType last)
-        : current(std::move(first)), end(std::move(last))
-    {}
-
-    typename std::char_traits<char_type>::int_type get_character()
-    {
-        if (JSON_HEDLEY_LIKELY(current != end))
-        {
-            auto result = std::char_traits<char_type>::to_int_type(*current);
-            std::advance(current, 1);
-            return result;
-        }
-
-        return std::char_traits<char_type>::eof();
-    }
-
-  private:
-    IteratorType current;
-    IteratorType end;
-
-    template<typename BaseInputAdapter, size_t T>
-    friend struct wide_string_input_helper;
-
-    bool empty() const
-    {
-        return current == end;
-    }
-};
-
-
-template<typename BaseInputAdapter, size_t T>
-struct wide_string_input_helper;
-
-template<typename BaseInputAdapter>
-struct wide_string_input_helper<BaseInputAdapter, 4>
-{
-    // UTF-32
-    static void fill_buffer(BaseInputAdapter& input,
-                            std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
-                            size_t& utf8_bytes_index,
-                            size_t& utf8_bytes_filled)
-    {
-        utf8_bytes_index = 0;
-
-        if (JSON_HEDLEY_UNLIKELY(input.empty()))
-        {
-            utf8_bytes[0] = std::char_traits<char>::eof();
-            utf8_bytes_filled = 1;
-        }
-        else
-        {
-            // get the current character
-            const auto wc = input.get_character();
-
-            // UTF-32 to UTF-8 encoding
-            if (wc < 0x80)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
-                utf8_bytes_filled = 1;
-            }
-            else if (wc <= 0x7FF)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
-                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
-                utf8_bytes_filled = 2;
-            }
-            else if (wc <= 0xFFFF)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
-                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
-                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
-                utf8_bytes_filled = 3;
-            }
-            else if (wc <= 0x10FFFF)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
-                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
-                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
-                utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
-                utf8_bytes_filled = 4;
-            }
-            else
-            {
-                // unknown character
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
-                utf8_bytes_filled = 1;
-            }
-        }
-    }
-};
-
-template<typename BaseInputAdapter>
-struct wide_string_input_helper<BaseInputAdapter, 2>
-{
-    // UTF-16
-    static void fill_buffer(BaseInputAdapter& input,
-                            std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
-                            size_t& utf8_bytes_index,
-                            size_t& utf8_bytes_filled)
-    {
-        utf8_bytes_index = 0;
-
-        if (JSON_HEDLEY_UNLIKELY(input.empty()))
-        {
-            utf8_bytes[0] = std::char_traits<char>::eof();
-            utf8_bytes_filled = 1;
-        }
-        else
-        {
-            // get the current character
-            const auto wc = input.get_character();
-
-            // UTF-16 to UTF-8 encoding
-            if (wc < 0x80)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
-                utf8_bytes_filled = 1;
-            }
-            else if (wc <= 0x7FF)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
-                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
-                utf8_bytes_filled = 2;
-            }
-            else if (0xD800 > wc || wc >= 0xE000)
-            {
-                utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
-                utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
-                utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
-                utf8_bytes_filled = 3;
-            }
-            else
-            {
-                if (JSON_HEDLEY_UNLIKELY(!input.empty()))
-                {
-                    const auto wc2 = static_cast<unsigned int>(input.get_character());
-                    const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
-                    utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
-                    utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
-                    utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
-                    utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
-                    utf8_bytes_filled = 4;
-                }
-                else
-                {
-                    utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
-                    utf8_bytes_filled = 1;
-                }
-            }
-        }
-    }
-};
-
-// Wraps another input apdater to convert wide character types into individual bytes.
-template<typename BaseInputAdapter, typename WideCharType>
-class wide_string_input_adapter
-{
-  public:
-    using char_type = char;
-
-    wide_string_input_adapter(BaseInputAdapter base)
-        : base_adapter(base) {}
-
-    typename std::char_traits<char>::int_type get_character() noexcept
-    {
-        // check if buffer needs to be filled
-        if (utf8_bytes_index == utf8_bytes_filled)
-        {
-            fill_buffer<sizeof(WideCharType)>();
-
-            JSON_ASSERT(utf8_bytes_filled > 0);
-            JSON_ASSERT(utf8_bytes_index == 0);
-        }
-
-        // use buffer
-        JSON_ASSERT(utf8_bytes_filled > 0);
-        JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
-        return utf8_bytes[utf8_bytes_index++];
-    }
-
-  private:
-    BaseInputAdapter base_adapter;
-
-    template<size_t T>
-    void fill_buffer()
-    {
-        wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
-    }
-
-    /// a buffer for UTF-8 bytes
-    std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
-
-    /// index to the utf8_codes array for the next valid byte
-    std::size_t utf8_bytes_index = 0;
-    /// number of valid bytes in the utf8_codes array
-    std::size_t utf8_bytes_filled = 0;
-};
-
-
-template<typename IteratorType, typename Enable = void>
-struct iterator_input_adapter_factory
-{
-    using iterator_type = IteratorType;
-    using char_type = typename std::iterator_traits<iterator_type>::value_type;
-    using adapter_type = iterator_input_adapter<iterator_type>;
-
-    static adapter_type create(IteratorType first, IteratorType last)
-    {
-        return adapter_type(std::move(first), std::move(last));
-    }
-};
-
-template<typename T>
-struct is_iterator_of_multibyte
-{
-    using value_type = typename std::iterator_traits<T>::value_type;
-    enum
-    {
-        value = sizeof(value_type) > 1
-    };
-};
-
-template<typename IteratorType>
-struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
-{
-    using iterator_type = IteratorType;
-    using char_type = typename std::iterator_traits<iterator_type>::value_type;
-    using base_adapter_type = iterator_input_adapter<iterator_type>;
-    using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
-
-    static adapter_type create(IteratorType first, IteratorType last)
-    {
-        return adapter_type(base_adapter_type(std::move(first), std::move(last)));
-    }
-};
-
-// General purpose iterator-based input
-template<typename IteratorType>
-typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
-{
-    using factory_type = iterator_input_adapter_factory<IteratorType>;
-    return factory_type::create(first, last);
-}
-
-// Convenience shorthand from container to iterator
-// Enables ADL on begin(container) and end(container)
-// Encloses the using declarations in namespace for not to leak them to outside scope
-
-namespace container_input_adapter_factory_impl
-{
-
-using std::begin;
-using std::end;
-
-template<typename ContainerType, typename Enable = void>
-struct container_input_adapter_factory {};
-
-template<typename ContainerType>
-struct container_input_adapter_factory< ContainerType,
-       void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
-       {
-           using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
-
-           static adapter_type create(const ContainerType& container)
-{
-    return input_adapter(begin(container), end(container));
-}
-       };
-
-} // namespace container_input_adapter_factory_impl
-
-template<typename ContainerType>
-typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container)
-{
-    return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container);
-}
-
-// Special cases with fast paths
-inline file_input_adapter input_adapter(std::FILE* file)
-{
-    return file_input_adapter(file);
-}
-
-inline input_stream_adapter input_adapter(std::istream& stream)
-{
-    return input_stream_adapter(stream);
-}
-
-inline input_stream_adapter input_adapter(std::istream&& stream)
-{
-    return input_stream_adapter(stream);
-}
-
-using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
-
-// Null-delimited strings, and the like.
-template < typename CharT,
-           typename std::enable_if <
-               std::is_pointer<CharT>::value&&
-               !std::is_array<CharT>::value&&
-               std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
-               sizeof(typename std::remove_pointer<CharT>::type) == 1,
-               int >::type = 0 >
-contiguous_bytes_input_adapter input_adapter(CharT b)
-{
-    auto length = std::strlen(reinterpret_cast<const char*>(b));
-    const auto* ptr = reinterpret_cast<const char*>(b);
-    return input_adapter(ptr, ptr + length);
-}
-
-template<typename T, std::size_t N>
-auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-{
-    return input_adapter(array, array + N);
-}
-
-// This class only handles inputs of input_buffer_adapter type.
-// It's required so that expressions like {ptr, len} can be implicitely casted
-// to the correct adapter.
-class span_input_adapter
-{
-  public:
-    template < typename CharT,
-               typename std::enable_if <
-                   std::is_pointer<CharT>::value&&
-                   std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
-                   sizeof(typename std::remove_pointer<CharT>::type) == 1,
-                   int >::type = 0 >
-    span_input_adapter(CharT b, std::size_t l)
-        : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
-
-    template<class IteratorType,
-             typename std::enable_if<
-                 std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
-                 int>::type = 0>
-    span_input_adapter(IteratorType first, IteratorType last)
-        : ia(input_adapter(first, last)) {}
-
-    contiguous_bytes_input_adapter&& get()
-    {
-        return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
-    }
-
-  private:
-    contiguous_bytes_input_adapter ia;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/input/json_sax.hpp>
-
-
-#include <cstddef>
-#include <string> // string
-#include <utility> // move
-#include <vector> // vector
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-
-/*!
-@brief SAX interface
-
-This class describes the SAX interface used by @ref nlohmann::json::sax_parse.
-Each function is called in different situations while the input is parsed. The
-boolean return value informs the parser whether to continue processing the
-input.
-*/
-template<typename BasicJsonType>
-struct json_sax
-{
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-
-    /*!
-    @brief a null value was read
-    @return whether parsing should proceed
-    */
-    virtual bool null() = 0;
-
-    /*!
-    @brief a boolean value was read
-    @param[in] val  boolean value
-    @return whether parsing should proceed
-    */
-    virtual bool boolean(bool val) = 0;
-
-    /*!
-    @brief an integer number was read
-    @param[in] val  integer value
-    @return whether parsing should proceed
-    */
-    virtual bool number_integer(number_integer_t val) = 0;
-
-    /*!
-    @brief an unsigned integer number was read
-    @param[in] val  unsigned integer value
-    @return whether parsing should proceed
-    */
-    virtual bool number_unsigned(number_unsigned_t val) = 0;
-
-    /*!
-    @brief an floating-point number was read
-    @param[in] val  floating-point value
-    @param[in] s    raw token value
-    @return whether parsing should proceed
-    */
-    virtual bool number_float(number_float_t val, const string_t& s) = 0;
-
-    /*!
-    @brief a string was read
-    @param[in] val  string value
-    @return whether parsing should proceed
-    @note It is safe to move the passed string.
-    */
-    virtual bool string(string_t& val) = 0;
-
-    /*!
-    @brief a binary string was read
-    @param[in] val  binary value
-    @return whether parsing should proceed
-    @note It is safe to move the passed binary.
-    */
-    virtual bool binary(binary_t& val) = 0;
-
-    /*!
-    @brief the beginning of an object was read
-    @param[in] elements  number of object elements or -1 if unknown
-    @return whether parsing should proceed
-    @note binary formats may report the number of elements
-    */
-    virtual bool start_object(std::size_t elements) = 0;
-
-    /*!
-    @brief an object key was read
-    @param[in] val  object key
-    @return whether parsing should proceed
-    @note It is safe to move the passed string.
-    */
-    virtual bool key(string_t& val) = 0;
-
-    /*!
-    @brief the end of an object was read
-    @return whether parsing should proceed
-    */
-    virtual bool end_object() = 0;
-
-    /*!
-    @brief the beginning of an array was read
-    @param[in] elements  number of array elements or -1 if unknown
-    @return whether parsing should proceed
-    @note binary formats may report the number of elements
-    */
-    virtual bool start_array(std::size_t elements) = 0;
-
-    /*!
-    @brief the end of an array was read
-    @return whether parsing should proceed
-    */
-    virtual bool end_array() = 0;
-
-    /*!
-    @brief a parse error occurred
-    @param[in] position    the position in the input where the error occurs
-    @param[in] last_token  the last read token
-    @param[in] ex          an exception object describing the error
-    @return whether parsing should proceed (must return false)
-    */
-    virtual bool parse_error(std::size_t position,
-                             const std::string& last_token,
-                             const detail::exception& ex) = 0;
-
-    json_sax() = default;
-    json_sax(const json_sax&) = default;
-    json_sax(json_sax&&) noexcept = default;
-    json_sax& operator=(const json_sax&) = default;
-    json_sax& operator=(json_sax&&) noexcept = default;
-    virtual ~json_sax() = default;
-};
-
-
-namespace detail
-{
-/*!
-@brief SAX implementation to create a JSON value from SAX events
-
-This class implements the @ref json_sax interface and processes the SAX events
-to create a JSON value which makes it basically a DOM parser. The structure or
-hierarchy of the JSON value is managed by the stack `ref_stack` which contains
-a pointer to the respective array or object for each recursion depth.
-
-After successful parsing, the value that is passed by reference to the
-constructor contains the parsed value.
-
-@tparam BasicJsonType  the JSON type
-*/
-template<typename BasicJsonType>
-class json_sax_dom_parser
-{
-  public:
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-
-    /*!
-    @param[in,out] r  reference to a JSON value that is manipulated while
-                       parsing
-    @param[in] allow_exceptions_  whether parse errors yield exceptions
-    */
-    explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
-        : root(r), allow_exceptions(allow_exceptions_)
-    {}
-
-    // make class move-only
-    json_sax_dom_parser(const json_sax_dom_parser&) = delete;
-    json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete;
-    json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    ~json_sax_dom_parser() = default;
-
-    bool null()
-    {
-        handle_value(nullptr);
-        return true;
-    }
-
-    bool boolean(bool val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_integer(number_integer_t val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_unsigned(number_unsigned_t val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_float(number_float_t val, const string_t& /*unused*/)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool string(string_t& val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool binary(binary_t& val)
-    {
-        handle_value(std::move(val));
-        return true;
-    }
-
-    bool start_object(std::size_t len)
-    {
-        ref_stack.push_back(handle_value(BasicJsonType::value_t::object));
-
-        if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) && len > ref_stack.back()->max_size()))
-        {
-            JSON_THROW(out_of_range::create(408, "excessive object size: " + std::to_string(len), *ref_stack.back()));
-        }
-
-        return true;
-    }
-
-    bool key(string_t& val)
-    {
-        // add null at given key and store the reference for later
-        object_element = &(ref_stack.back()->m_value.object->operator[](val));
-        return true;
-    }
-
-    bool end_object()
-    {
-        ref_stack.back()->set_parents();
-        ref_stack.pop_back();
-        return true;
-    }
-
-    bool start_array(std::size_t len)
-    {
-        ref_stack.push_back(handle_value(BasicJsonType::value_t::array));
-
-        if (JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) && len > ref_stack.back()->max_size()))
-        {
-            JSON_THROW(out_of_range::create(408, "excessive array size: " + std::to_string(len), *ref_stack.back()));
-        }
-
-        return true;
-    }
-
-    bool end_array()
-    {
-        ref_stack.back()->set_parents();
-        ref_stack.pop_back();
-        return true;
-    }
-
-    template<class Exception>
-    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
-                     const Exception& ex)
-    {
-        errored = true;
-        static_cast<void>(ex);
-        if (allow_exceptions)
-        {
-            JSON_THROW(ex);
-        }
-        return false;
-    }
-
-    constexpr bool is_errored() const
-    {
-        return errored;
-    }
-
-  private:
-    /*!
-    @invariant If the ref stack is empty, then the passed value will be the new
-               root.
-    @invariant If the ref stack contains a value, then it is an array or an
-               object to which we can add elements
-    */
-    template<typename Value>
-    JSON_HEDLEY_RETURNS_NON_NULL
-    BasicJsonType* handle_value(Value&& v)
-    {
-        if (ref_stack.empty())
-        {
-            root = BasicJsonType(std::forward<Value>(v));
-            return &root;
-        }
-
-        JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object());
-
-        if (ref_stack.back()->is_array())
-        {
-            ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v));
-            return &(ref_stack.back()->m_value.array->back());
-        }
-
-        JSON_ASSERT(ref_stack.back()->is_object());
-        JSON_ASSERT(object_element);
-        *object_element = BasicJsonType(std::forward<Value>(v));
-        return object_element;
-    }
-
-    /// the parsed JSON value
-    BasicJsonType& root;
-    /// stack to model hierarchy of values
-    std::vector<BasicJsonType*> ref_stack {};
-    /// helper to hold the reference for the next object element
-    BasicJsonType* object_element = nullptr;
-    /// whether a syntax error occurred
-    bool errored = false;
-    /// whether to throw exceptions in case of errors
-    const bool allow_exceptions = true;
-};
-
-template<typename BasicJsonType>
-class json_sax_dom_callback_parser
-{
-  public:
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-    using parser_callback_t = typename BasicJsonType::parser_callback_t;
-    using parse_event_t = typename BasicJsonType::parse_event_t;
-
-    json_sax_dom_callback_parser(BasicJsonType& r,
-                                 const parser_callback_t cb,
-                                 const bool allow_exceptions_ = true)
-        : root(r), callback(cb), allow_exceptions(allow_exceptions_)
-    {
-        keep_stack.push_back(true);
-    }
-
-    // make class move-only
-    json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete;
-    json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete;
-    json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    ~json_sax_dom_callback_parser() = default;
-
-    bool null()
-    {
-        handle_value(nullptr);
-        return true;
-    }
-
-    bool boolean(bool val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_integer(number_integer_t val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_unsigned(number_unsigned_t val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool number_float(number_float_t val, const string_t& /*unused*/)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool string(string_t& val)
-    {
-        handle_value(val);
-        return true;
-    }
-
-    bool binary(binary_t& val)
-    {
-        handle_value(std::move(val));
-        return true;
-    }
-
-    bool start_object(std::size_t len)
-    {
-        // check callback for object start
-        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded);
-        keep_stack.push_back(keep);
-
-        auto val = handle_value(BasicJsonType::value_t::object, true);
-        ref_stack.push_back(val.second);
-
-        // check object limit
-        if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) && len > ref_stack.back()->max_size()))
-        {
-            JSON_THROW(out_of_range::create(408, "excessive object size: " + std::to_string(len), *ref_stack.back()));
-        }
-
-        return true;
-    }
-
-    bool key(string_t& val)
-    {
-        BasicJsonType k = BasicJsonType(val);
-
-        // check callback for key
-        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k);
-        key_keep_stack.push_back(keep);
-
-        // add discarded value at given key and store the reference for later
-        if (keep && ref_stack.back())
-        {
-            object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded);
-        }
-
-        return true;
-    }
-
-    bool end_object()
-    {
-        if (ref_stack.back())
-        {
-            if (!callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back()))
-            {
-                // discard object
-                *ref_stack.back() = discarded;
-            }
-            else
-            {
-                ref_stack.back()->set_parents();
-            }
-        }
-
-        JSON_ASSERT(!ref_stack.empty());
-        JSON_ASSERT(!keep_stack.empty());
-        ref_stack.pop_back();
-        keep_stack.pop_back();
-
-        if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured())
-        {
-            // remove discarded value
-            for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it)
-            {
-                if (it->is_discarded())
-                {
-                    ref_stack.back()->erase(it);
-                    break;
-                }
-            }
-        }
-
-        return true;
-    }
-
-    bool start_array(std::size_t len)
-    {
-        const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded);
-        keep_stack.push_back(keep);
-
-        auto val = handle_value(BasicJsonType::value_t::array, true);
-        ref_stack.push_back(val.second);
-
-        // check array limit
-        if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != std::size_t(-1) && len > ref_stack.back()->max_size()))
-        {
-            JSON_THROW(out_of_range::create(408, "excessive array size: " + std::to_string(len), *ref_stack.back()));
-        }
-
-        return true;
-    }
-
-    bool end_array()
-    {
-        bool keep = true;
-
-        if (ref_stack.back())
-        {
-            keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back());
-            if (keep)
-            {
-                ref_stack.back()->set_parents();
-            }
-            else
-            {
-                // discard array
-                *ref_stack.back() = discarded;
-            }
-        }
-
-        JSON_ASSERT(!ref_stack.empty());
-        JSON_ASSERT(!keep_stack.empty());
-        ref_stack.pop_back();
-        keep_stack.pop_back();
-
-        // remove discarded value
-        if (!keep && !ref_stack.empty() && ref_stack.back()->is_array())
-        {
-            ref_stack.back()->m_value.array->pop_back();
-        }
-
-        return true;
-    }
-
-    template<class Exception>
-    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
-                     const Exception& ex)
-    {
-        errored = true;
-        static_cast<void>(ex);
-        if (allow_exceptions)
-        {
-            JSON_THROW(ex);
-        }
-        return false;
-    }
-
-    constexpr bool is_errored() const
-    {
-        return errored;
-    }
-
-  private:
-    /*!
-    @param[in] v  value to add to the JSON value we build during parsing
-    @param[in] skip_callback  whether we should skip calling the callback
-               function; this is required after start_array() and
-               start_object() SAX events, because otherwise we would call the
-               callback function with an empty array or object, respectively.
-
-    @invariant If the ref stack is empty, then the passed value will be the new
-               root.
-    @invariant If the ref stack contains a value, then it is an array or an
-               object to which we can add elements
-
-    @return pair of boolean (whether value should be kept) and pointer (to the
-            passed value in the ref_stack hierarchy; nullptr if not kept)
-    */
-    template<typename Value>
-    std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false)
-    {
-        JSON_ASSERT(!keep_stack.empty());
-
-        // do not handle this value if we know it would be added to a discarded
-        // container
-        if (!keep_stack.back())
-        {
-            return {false, nullptr};
-        }
-
-        // create value
-        auto value = BasicJsonType(std::forward<Value>(v));
-
-        // check callback
-        const bool keep = skip_callback || callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value);
-
-        // do not handle this value if we just learnt it shall be discarded
-        if (!keep)
-        {
-            return {false, nullptr};
-        }
-
-        if (ref_stack.empty())
-        {
-            root = std::move(value);
-            return {true, &root};
-        }
-
-        // skip this value if we already decided to skip the parent
-        // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360)
-        if (!ref_stack.back())
-        {
-            return {false, nullptr};
-        }
-
-        // we now only expect arrays and objects
-        JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object());
-
-        // array
-        if (ref_stack.back()->is_array())
-        {
-            ref_stack.back()->m_value.array->emplace_back(std::move(value));
-            return {true, &(ref_stack.back()->m_value.array->back())};
-        }
-
-        // object
-        JSON_ASSERT(ref_stack.back()->is_object());
-        // check if we should store an element for the current key
-        JSON_ASSERT(!key_keep_stack.empty());
-        const bool store_element = key_keep_stack.back();
-        key_keep_stack.pop_back();
-
-        if (!store_element)
-        {
-            return {false, nullptr};
-        }
-
-        JSON_ASSERT(object_element);
-        *object_element = std::move(value);
-        return {true, object_element};
-    }
-
-    /// the parsed JSON value
-    BasicJsonType& root;
-    /// stack to model hierarchy of values
-    std::vector<BasicJsonType*> ref_stack {};
-    /// stack to manage which values to keep
-    std::vector<bool> keep_stack {};
-    /// stack to manage which object keys to keep
-    std::vector<bool> key_keep_stack {};
-    /// helper to hold the reference for the next object element
-    BasicJsonType* object_element = nullptr;
-    /// whether a syntax error occurred
-    bool errored = false;
-    /// callback function
-    const parser_callback_t callback = nullptr;
-    /// whether to throw exceptions in case of errors
-    const bool allow_exceptions = true;
-    /// a discarded value for the callback
-    BasicJsonType discarded = BasicJsonType::value_t::discarded;
-};
-
-template<typename BasicJsonType>
-class json_sax_acceptor
-{
-  public:
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-
-    bool null()
-    {
-        return true;
-    }
-
-    bool boolean(bool /*unused*/)
-    {
-        return true;
-    }
-
-    bool number_integer(number_integer_t /*unused*/)
-    {
-        return true;
-    }
-
-    bool number_unsigned(number_unsigned_t /*unused*/)
-    {
-        return true;
-    }
-
-    bool number_float(number_float_t /*unused*/, const string_t& /*unused*/)
-    {
-        return true;
-    }
-
-    bool string(string_t& /*unused*/)
-    {
-        return true;
-    }
-
-    bool binary(binary_t& /*unused*/)
-    {
-        return true;
-    }
-
-    bool start_object(std::size_t /*unused*/ = std::size_t(-1))
-    {
-        return true;
-    }
-
-    bool key(string_t& /*unused*/)
-    {
-        return true;
-    }
-
-    bool end_object()
-    {
-        return true;
-    }
-
-    bool start_array(std::size_t /*unused*/ = std::size_t(-1))
-    {
-        return true;
-    }
-
-    bool end_array()
-    {
-        return true;
-    }
-
-    bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/)
-    {
-        return false;
-    }
-};
-}  // namespace detail
-
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/input/lexer.hpp>
-
-
-#include <array> // array
-#include <clocale> // localeconv
-#include <cstddef> // size_t
-#include <cstdio> // snprintf
-#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull
-#include <initializer_list> // initializer_list
-#include <string> // char_traits, string
-#include <utility> // move
-#include <vector> // vector
-
-// #include <nlohmann/detail/input/input_adapters.hpp>
-
-// #include <nlohmann/detail/input/position_t.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-///////////
-// lexer //
-///////////
-
-template<typename BasicJsonType>
-class lexer_base
-{
-  public:
-    /// token types for the parser
-    enum class token_type
-    {
-        uninitialized,    ///< indicating the scanner is uninitialized
-        literal_true,     ///< the `true` literal
-        literal_false,    ///< the `false` literal
-        literal_null,     ///< the `null` literal
-        value_string,     ///< a string -- use get_string() for actual value
-        value_unsigned,   ///< an unsigned integer -- use get_number_unsigned() for actual value
-        value_integer,    ///< a signed integer -- use get_number_integer() for actual value
-        value_float,      ///< an floating point number -- use get_number_float() for actual value
-        begin_array,      ///< the character for array begin `[`
-        begin_object,     ///< the character for object begin `{`
-        end_array,        ///< the character for array end `]`
-        end_object,       ///< the character for object end `}`
-        name_separator,   ///< the name separator `:`
-        value_separator,  ///< the value separator `,`
-        parse_error,      ///< indicating a parse error
-        end_of_input,     ///< indicating the end of the input buffer
-        literal_or_value  ///< a literal or the begin of a value (only for diagnostics)
-    };
-
-    /// return name of values of type token_type (only used for errors)
-    JSON_HEDLEY_RETURNS_NON_NULL
-    JSON_HEDLEY_CONST
-    static const char* token_type_name(const token_type t) noexcept
-    {
-        switch (t)
-        {
-            case token_type::uninitialized:
-                return "<uninitialized>";
-            case token_type::literal_true:
-                return "true literal";
-            case token_type::literal_false:
-                return "false literal";
-            case token_type::literal_null:
-                return "null literal";
-            case token_type::value_string:
-                return "string literal";
-            case token_type::value_unsigned:
-            case token_type::value_integer:
-            case token_type::value_float:
-                return "number literal";
-            case token_type::begin_array:
-                return "'['";
-            case token_type::begin_object:
-                return "'{'";
-            case token_type::end_array:
-                return "']'";
-            case token_type::end_object:
-                return "'}'";
-            case token_type::name_separator:
-                return "':'";
-            case token_type::value_separator:
-                return "','";
-            case token_type::parse_error:
-                return "<parse error>";
-            case token_type::end_of_input:
-                return "end of input";
-            case token_type::literal_or_value:
-                return "'[', '{', or a literal";
-            // LCOV_EXCL_START
-            default: // catch non-enum values
-                return "unknown token";
-                // LCOV_EXCL_STOP
-        }
-    }
-};
-/*!
-@brief lexical analysis
-
-This class organizes the lexical analysis during JSON deserialization.
-*/
-template<typename BasicJsonType, typename InputAdapterType>
-class lexer : public lexer_base<BasicJsonType>
-{
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using char_type = typename InputAdapterType::char_type;
-    using char_int_type = typename std::char_traits<char_type>::int_type;
-
-  public:
-    using token_type = typename lexer_base<BasicJsonType>::token_type;
-
-    explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept
-        : ia(std::move(adapter))
-        , ignore_comments(ignore_comments_)
-        , decimal_point_char(static_cast<char_int_type>(get_decimal_point()))
-    {}
-
-    // delete because of pointer members
-    lexer(const lexer&) = delete;
-    lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    lexer& operator=(lexer&) = delete;
-    lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    ~lexer() = default;
-
-  private:
-    /////////////////////
-    // locales
-    /////////////////////
-
-    /// return the locale-dependent decimal point
-    JSON_HEDLEY_PURE
-    static char get_decimal_point() noexcept
-    {
-        const auto* loc = localeconv();
-        JSON_ASSERT(loc != nullptr);
-        return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point);
-    }
-
-    /////////////////////
-    // scan functions
-    /////////////////////
-
-    /*!
-    @brief get codepoint from 4 hex characters following `\u`
-
-    For input "\u c1 c2 c3 c4" the codepoint is:
-      (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4
-    = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0)
-
-    Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f'
-    must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The
-    conversion is done by subtracting the offset (0x30, 0x37, and 0x57)
-    between the ASCII value of the character and the desired integer value.
-
-    @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or
-            non-hex character)
-    */
-    int get_codepoint()
-    {
-        // this function only makes sense after reading `\u`
-        JSON_ASSERT(current == 'u');
-        int codepoint = 0;
-
-        const auto factors = { 12u, 8u, 4u, 0u };
-        for (const auto factor : factors)
-        {
-            get();
-
-            if (current >= '0' && current <= '9')
-            {
-                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
-            }
-            else if (current >= 'A' && current <= 'F')
-            {
-                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
-            }
-            else if (current >= 'a' && current <= 'f')
-            {
-                codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
-            }
-            else
-            {
-                return -1;
-            }
-        }
-
-        JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF);
-        return codepoint;
-    }
-
-    /*!
-    @brief check if the next byte(s) are inside a given range
-
-    Adds the current byte and, for each passed range, reads a new byte and
-    checks if it is inside the range. If a violation was detected, set up an
-    error message and return false. Otherwise, return true.
-
-    @param[in] ranges  list of integers; interpreted as list of pairs of
-                       inclusive lower and upper bound, respectively
-
-    @pre The passed list @a ranges must have 2, 4, or 6 elements; that is,
-         1, 2, or 3 pairs. This precondition is enforced by an assertion.
-
-    @return true if and only if no range violation was detected
-    */
-    bool next_byte_in_range(std::initializer_list<char_int_type> ranges)
-    {
-        JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6);
-        add(current);
-
-        for (auto range = ranges.begin(); range != ranges.end(); ++range)
-        {
-            get();
-            if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range)))
-            {
-                add(current);
-            }
-            else
-            {
-                error_message = "invalid string: ill-formed UTF-8 byte";
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    /*!
-    @brief scan a string literal
-
-    This function scans a string according to Sect. 7 of RFC 8259. While
-    scanning, bytes are escaped and copied into buffer token_buffer. Then the
-    function returns successfully, token_buffer is *not* null-terminated (as it
-    may contain \0 bytes), and token_buffer.size() is the number of bytes in the
-    string.
-
-    @return token_type::value_string if string could be successfully scanned,
-            token_type::parse_error otherwise
-
-    @note In case of errors, variable error_message contains a textual
-          description.
-    */
-    token_type scan_string()
-    {
-        // reset token_buffer (ignore opening quote)
-        reset();
-
-        // we entered the function by reading an open quote
-        JSON_ASSERT(current == '\"');
-
-        while (true)
-        {
-            // get next character
-            switch (get())
-            {
-                // end of file while parsing string
-                case std::char_traits<char_type>::eof():
-                {
-                    error_message = "invalid string: missing closing quote";
-                    return token_type::parse_error;
-                }
-
-                // closing quote
-                case '\"':
-                {
-                    return token_type::value_string;
-                }
-
-                // escapes
-                case '\\':
-                {
-                    switch (get())
-                    {
-                        // quotation mark
-                        case '\"':
-                            add('\"');
-                            break;
-                        // reverse solidus
-                        case '\\':
-                            add('\\');
-                            break;
-                        // solidus
-                        case '/':
-                            add('/');
-                            break;
-                        // backspace
-                        case 'b':
-                            add('\b');
-                            break;
-                        // form feed
-                        case 'f':
-                            add('\f');
-                            break;
-                        // line feed
-                        case 'n':
-                            add('\n');
-                            break;
-                        // carriage return
-                        case 'r':
-                            add('\r');
-                            break;
-                        // tab
-                        case 't':
-                            add('\t');
-                            break;
-
-                        // unicode escapes
-                        case 'u':
-                        {
-                            const int codepoint1 = get_codepoint();
-                            int codepoint = codepoint1; // start with codepoint1
-
-                            if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1))
-                            {
-                                error_message = "invalid string: '\\u' must be followed by 4 hex digits";
-                                return token_type::parse_error;
-                            }
-
-                            // check if code point is a high surrogate
-                            if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF)
-                            {
-                                // expect next \uxxxx entry
-                                if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u'))
-                                {
-                                    const int codepoint2 = get_codepoint();
-
-                                    if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1))
-                                    {
-                                        error_message = "invalid string: '\\u' must be followed by 4 hex digits";
-                                        return token_type::parse_error;
-                                    }
-
-                                    // check if codepoint2 is a low surrogate
-                                    if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF))
-                                    {
-                                        // overwrite codepoint
-                                        codepoint = static_cast<int>(
-                                                        // high surrogate occupies the most significant 22 bits
-                                                        (static_cast<unsigned int>(codepoint1) << 10u)
-                                                        // low surrogate occupies the least significant 15 bits
-                                                        + static_cast<unsigned int>(codepoint2)
-                                                        // there is still the 0xD800, 0xDC00 and 0x10000 noise
-                                                        // in the result so we have to subtract with:
-                                                        // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
-                                                        - 0x35FDC00u);
-                                    }
-                                    else
-                                    {
-                                        error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
-                                        return token_type::parse_error;
-                                    }
-                                }
-                                else
-                                {
-                                    error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF";
-                                    return token_type::parse_error;
-                                }
-                            }
-                            else
-                            {
-                                if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF))
-                                {
-                                    error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
-                                    return token_type::parse_error;
-                                }
-                            }
-
-                            // result of the above calculation yields a proper codepoint
-                            JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF);
-
-                            // translate codepoint into bytes
-                            if (codepoint < 0x80)
-                            {
-                                // 1-byte characters: 0xxxxxxx (ASCII)
-                                add(static_cast<char_int_type>(codepoint));
-                            }
-                            else if (codepoint <= 0x7FF)
-                            {
-                                // 2-byte characters: 110xxxxx 10xxxxxx
-                                add(static_cast<char_int_type>(0xC0u | (static_cast<unsigned int>(codepoint) >> 6u)));
-                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
-                            }
-                            else if (codepoint <= 0xFFFF)
-                            {
-                                // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
-                                add(static_cast<char_int_type>(0xE0u | (static_cast<unsigned int>(codepoint) >> 12u)));
-                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
-                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
-                            }
-                            else
-                            {
-                                // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-                                add(static_cast<char_int_type>(0xF0u | (static_cast<unsigned int>(codepoint) >> 18u)));
-                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 12u) & 0x3Fu)));
-                                add(static_cast<char_int_type>(0x80u | ((static_cast<unsigned int>(codepoint) >> 6u) & 0x3Fu)));
-                                add(static_cast<char_int_type>(0x80u | (static_cast<unsigned int>(codepoint) & 0x3Fu)));
-                            }
-
-                            break;
-                        }
-
-                        // other characters after escape
-                        default:
-                            error_message = "invalid string: forbidden character after backslash";
-                            return token_type::parse_error;
-                    }
-
-                    break;
-                }
-
-                // invalid control characters
-                case 0x00:
-                {
-                    error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000";
-                    return token_type::parse_error;
-                }
-
-                case 0x01:
-                {
-                    error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001";
-                    return token_type::parse_error;
-                }
-
-                case 0x02:
-                {
-                    error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002";
-                    return token_type::parse_error;
-                }
-
-                case 0x03:
-                {
-                    error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003";
-                    return token_type::parse_error;
-                }
-
-                case 0x04:
-                {
-                    error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004";
-                    return token_type::parse_error;
-                }
-
-                case 0x05:
-                {
-                    error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005";
-                    return token_type::parse_error;
-                }
-
-                case 0x06:
-                {
-                    error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006";
-                    return token_type::parse_error;
-                }
-
-                case 0x07:
-                {
-                    error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007";
-                    return token_type::parse_error;
-                }
-
-                case 0x08:
-                {
-                    error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b";
-                    return token_type::parse_error;
-                }
-
-                case 0x09:
-                {
-                    error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t";
-                    return token_type::parse_error;
-                }
-
-                case 0x0A:
-                {
-                    error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n";
-                    return token_type::parse_error;
-                }
-
-                case 0x0B:
-                {
-                    error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B";
-                    return token_type::parse_error;
-                }
-
-                case 0x0C:
-                {
-                    error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f";
-                    return token_type::parse_error;
-                }
-
-                case 0x0D:
-                {
-                    error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r";
-                    return token_type::parse_error;
-                }
-
-                case 0x0E:
-                {
-                    error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E";
-                    return token_type::parse_error;
-                }
-
-                case 0x0F:
-                {
-                    error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F";
-                    return token_type::parse_error;
-                }
-
-                case 0x10:
-                {
-                    error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010";
-                    return token_type::parse_error;
-                }
-
-                case 0x11:
-                {
-                    error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011";
-                    return token_type::parse_error;
-                }
-
-                case 0x12:
-                {
-                    error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012";
-                    return token_type::parse_error;
-                }
-
-                case 0x13:
-                {
-                    error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013";
-                    return token_type::parse_error;
-                }
-
-                case 0x14:
-                {
-                    error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014";
-                    return token_type::parse_error;
-                }
-
-                case 0x15:
-                {
-                    error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015";
-                    return token_type::parse_error;
-                }
-
-                case 0x16:
-                {
-                    error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016";
-                    return token_type::parse_error;
-                }
-
-                case 0x17:
-                {
-                    error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017";
-                    return token_type::parse_error;
-                }
-
-                case 0x18:
-                {
-                    error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018";
-                    return token_type::parse_error;
-                }
-
-                case 0x19:
-                {
-                    error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019";
-                    return token_type::parse_error;
-                }
-
-                case 0x1A:
-                {
-                    error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A";
-                    return token_type::parse_error;
-                }
-
-                case 0x1B:
-                {
-                    error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B";
-                    return token_type::parse_error;
-                }
-
-                case 0x1C:
-                {
-                    error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C";
-                    return token_type::parse_error;
-                }
-
-                case 0x1D:
-                {
-                    error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D";
-                    return token_type::parse_error;
-                }
-
-                case 0x1E:
-                {
-                    error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E";
-                    return token_type::parse_error;
-                }
-
-                case 0x1F:
-                {
-                    error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F";
-                    return token_type::parse_error;
-                }
-
-                // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace))
-                case 0x20:
-                case 0x21:
-                case 0x23:
-                case 0x24:
-                case 0x25:
-                case 0x26:
-                case 0x27:
-                case 0x28:
-                case 0x29:
-                case 0x2A:
-                case 0x2B:
-                case 0x2C:
-                case 0x2D:
-                case 0x2E:
-                case 0x2F:
-                case 0x30:
-                case 0x31:
-                case 0x32:
-                case 0x33:
-                case 0x34:
-                case 0x35:
-                case 0x36:
-                case 0x37:
-                case 0x38:
-                case 0x39:
-                case 0x3A:
-                case 0x3B:
-                case 0x3C:
-                case 0x3D:
-                case 0x3E:
-                case 0x3F:
-                case 0x40:
-                case 0x41:
-                case 0x42:
-                case 0x43:
-                case 0x44:
-                case 0x45:
-                case 0x46:
-                case 0x47:
-                case 0x48:
-                case 0x49:
-                case 0x4A:
-                case 0x4B:
-                case 0x4C:
-                case 0x4D:
-                case 0x4E:
-                case 0x4F:
-                case 0x50:
-                case 0x51:
-                case 0x52:
-                case 0x53:
-                case 0x54:
-                case 0x55:
-                case 0x56:
-                case 0x57:
-                case 0x58:
-                case 0x59:
-                case 0x5A:
-                case 0x5B:
-                case 0x5D:
-                case 0x5E:
-                case 0x5F:
-                case 0x60:
-                case 0x61:
-                case 0x62:
-                case 0x63:
-                case 0x64:
-                case 0x65:
-                case 0x66:
-                case 0x67:
-                case 0x68:
-                case 0x69:
-                case 0x6A:
-                case 0x6B:
-                case 0x6C:
-                case 0x6D:
-                case 0x6E:
-                case 0x6F:
-                case 0x70:
-                case 0x71:
-                case 0x72:
-                case 0x73:
-                case 0x74:
-                case 0x75:
-                case 0x76:
-                case 0x77:
-                case 0x78:
-                case 0x79:
-                case 0x7A:
-                case 0x7B:
-                case 0x7C:
-                case 0x7D:
-                case 0x7E:
-                case 0x7F:
-                {
-                    add(current);
-                    break;
-                }
-
-                // U+0080..U+07FF: bytes C2..DF 80..BF
-                case 0xC2:
-                case 0xC3:
-                case 0xC4:
-                case 0xC5:
-                case 0xC6:
-                case 0xC7:
-                case 0xC8:
-                case 0xC9:
-                case 0xCA:
-                case 0xCB:
-                case 0xCC:
-                case 0xCD:
-                case 0xCE:
-                case 0xCF:
-                case 0xD0:
-                case 0xD1:
-                case 0xD2:
-                case 0xD3:
-                case 0xD4:
-                case 0xD5:
-                case 0xD6:
-                case 0xD7:
-                case 0xD8:
-                case 0xD9:
-                case 0xDA:
-                case 0xDB:
-                case 0xDC:
-                case 0xDD:
-                case 0xDE:
-                case 0xDF:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF})))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+0800..U+0FFF: bytes E0 A0..BF 80..BF
-                case 0xE0:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF
-                // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF
-                case 0xE1:
-                case 0xE2:
-                case 0xE3:
-                case 0xE4:
-                case 0xE5:
-                case 0xE6:
-                case 0xE7:
-                case 0xE8:
-                case 0xE9:
-                case 0xEA:
-                case 0xEB:
-                case 0xEC:
-                case 0xEE:
-                case 0xEF:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+D000..U+D7FF: bytes ED 80..9F 80..BF
-                case 0xED:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
-                case 0xF0:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
-                case 0xF1:
-                case 0xF2:
-                case 0xF3:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
-                case 0xF4:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
-                    {
-                        return token_type::parse_error;
-                    }
-                    break;
-                }
-
-                // remaining bytes (80..C1 and F5..FF) are ill-formed
-                default:
-                {
-                    error_message = "invalid string: ill-formed UTF-8 byte";
-                    return token_type::parse_error;
-                }
-            }
-        }
-    }
-
-    /*!
-     * @brief scan a comment
-     * @return whether comment could be scanned successfully
-     */
-    bool scan_comment()
-    {
-        switch (get())
-        {
-            // single-line comments skip input until a newline or EOF is read
-            case '/':
-            {
-                while (true)
-                {
-                    switch (get())
-                    {
-                        case '\n':
-                        case '\r':
-                        case std::char_traits<char_type>::eof():
-                        case '\0':
-                            return true;
-
-                        default:
-                            break;
-                    }
-                }
-            }
-
-            // multi-line comments skip input until */ is read
-            case '*':
-            {
-                while (true)
-                {
-                    switch (get())
-                    {
-                        case std::char_traits<char_type>::eof():
-                        case '\0':
-                        {
-                            error_message = "invalid comment; missing closing '*/'";
-                            return false;
-                        }
-
-                        case '*':
-                        {
-                            switch (get())
-                            {
-                                case '/':
-                                    return true;
-
-                                default:
-                                {
-                                    unget();
-                                    continue;
-                                }
-                            }
-                        }
-
-                        default:
-                            continue;
-                    }
-                }
-            }
-
-            // unexpected character after reading '/'
-            default:
-            {
-                error_message = "invalid comment; expecting '/' or '*' after '/'";
-                return false;
-            }
-        }
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    static void strtof(float& f, const char* str, char** endptr) noexcept
-    {
-        f = std::strtof(str, endptr);
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    static void strtof(double& f, const char* str, char** endptr) noexcept
-    {
-        f = std::strtod(str, endptr);
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    static void strtof(long double& f, const char* str, char** endptr) noexcept
-    {
-        f = std::strtold(str, endptr);
-    }
-
-    /*!
-    @brief scan a number literal
-
-    This function scans a string according to Sect. 6 of RFC 8259.
-
-    The function is realized with a deterministic finite state machine derived
-    from the grammar described in RFC 8259. Starting in state "init", the
-    input is read and used to determined the next state. Only state "done"
-    accepts the number. State "error" is a trap state to model errors. In the
-    table below, "anything" means any character but the ones listed before.
-
-    state    | 0        | 1-9      | e E      | +       | -       | .        | anything
-    ---------|----------|----------|----------|---------|---------|----------|-----------
-    init     | zero     | any1     | [error]  | [error] | minus   | [error]  | [error]
-    minus    | zero     | any1     | [error]  | [error] | [error] | [error]  | [error]
-    zero     | done     | done     | exponent | done    | done    | decimal1 | done
-    any1     | any1     | any1     | exponent | done    | done    | decimal1 | done
-    decimal1 | decimal2 | decimal2 | [error]  | [error] | [error] | [error]  | [error]
-    decimal2 | decimal2 | decimal2 | exponent | done    | done    | done     | done
-    exponent | any2     | any2     | [error]  | sign    | sign    | [error]  | [error]
-    sign     | any2     | any2     | [error]  | [error] | [error] | [error]  | [error]
-    any2     | any2     | any2     | done     | done    | done    | done     | done
-
-    The state machine is realized with one label per state (prefixed with
-    "scan_number_") and `goto` statements between them. The state machine
-    contains cycles, but any cycle can be left when EOF is read. Therefore,
-    the function is guaranteed to terminate.
-
-    During scanning, the read bytes are stored in token_buffer. This string is
-    then converted to a signed integer, an unsigned integer, or a
-    floating-point number.
-
-    @return token_type::value_unsigned, token_type::value_integer, or
-            token_type::value_float if number could be successfully scanned,
-            token_type::parse_error otherwise
-
-    @note The scanner is independent of the current locale. Internally, the
-          locale's decimal point is used instead of `.` to work with the
-          locale-dependent converters.
-    */
-    token_type scan_number()  // lgtm [cpp/use-of-goto]
-    {
-        // reset token_buffer to store the number's bytes
-        reset();
-
-        // the type of the parsed number; initially set to unsigned; will be
-        // changed if minus sign, decimal point or exponent is read
-        token_type number_type = token_type::value_unsigned;
-
-        // state (init): we just found out we need to scan a number
-        switch (current)
-        {
-            case '-':
-            {
-                add(current);
-                goto scan_number_minus;
-            }
-
-            case '0':
-            {
-                add(current);
-                goto scan_number_zero;
-            }
-
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any1;
-            }
-
-            // all other characters are rejected outside scan_number()
-            default:            // LCOV_EXCL_LINE
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        }
-
-scan_number_minus:
-        // state: we just parsed a leading minus sign
-        number_type = token_type::value_integer;
-        switch (get())
-        {
-            case '0':
-            {
-                add(current);
-                goto scan_number_zero;
-            }
-
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any1;
-            }
-
-            default:
-            {
-                error_message = "invalid number; expected digit after '-'";
-                return token_type::parse_error;
-            }
-        }
-
-scan_number_zero:
-        // state: we just parse a zero (maybe with a leading minus sign)
-        switch (get())
-        {
-            case '.':
-            {
-                add(decimal_point_char);
-                goto scan_number_decimal1;
-            }
-
-            case 'e':
-            case 'E':
-            {
-                add(current);
-                goto scan_number_exponent;
-            }
-
-            default:
-                goto scan_number_done;
-        }
-
-scan_number_any1:
-        // state: we just parsed a number 0-9 (maybe with a leading minus sign)
-        switch (get())
-        {
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any1;
-            }
-
-            case '.':
-            {
-                add(decimal_point_char);
-                goto scan_number_decimal1;
-            }
-
-            case 'e':
-            case 'E':
-            {
-                add(current);
-                goto scan_number_exponent;
-            }
-
-            default:
-                goto scan_number_done;
-        }
-
-scan_number_decimal1:
-        // state: we just parsed a decimal point
-        number_type = token_type::value_float;
-        switch (get())
-        {
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_decimal2;
-            }
-
-            default:
-            {
-                error_message = "invalid number; expected digit after '.'";
-                return token_type::parse_error;
-            }
-        }
-
-scan_number_decimal2:
-        // we just parsed at least one number after a decimal point
-        switch (get())
-        {
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_decimal2;
-            }
-
-            case 'e':
-            case 'E':
-            {
-                add(current);
-                goto scan_number_exponent;
-            }
-
-            default:
-                goto scan_number_done;
-        }
-
-scan_number_exponent:
-        // we just parsed an exponent
-        number_type = token_type::value_float;
-        switch (get())
-        {
-            case '+':
-            case '-':
-            {
-                add(current);
-                goto scan_number_sign;
-            }
-
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any2;
-            }
-
-            default:
-            {
-                error_message =
-                    "invalid number; expected '+', '-', or digit after exponent";
-                return token_type::parse_error;
-            }
-        }
-
-scan_number_sign:
-        // we just parsed an exponent sign
-        switch (get())
-        {
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any2;
-            }
-
-            default:
-            {
-                error_message = "invalid number; expected digit after exponent sign";
-                return token_type::parse_error;
-            }
-        }
-
-scan_number_any2:
-        // we just parsed a number after the exponent or exponent sign
-        switch (get())
-        {
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                add(current);
-                goto scan_number_any2;
-            }
-
-            default:
-                goto scan_number_done;
-        }
-
-scan_number_done:
-        // unget the character after the number (we only read it to know that
-        // we are done scanning a number)
-        unget();
-
-        char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-        errno = 0;
-
-        // try to parse integers first and fall back to floats
-        if (number_type == token_type::value_unsigned)
-        {
-            const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
-
-            // we checked the number format before
-            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
-
-            if (errno == 0)
-            {
-                value_unsigned = static_cast<number_unsigned_t>(x);
-                if (value_unsigned == x)
-                {
-                    return token_type::value_unsigned;
-                }
-            }
-        }
-        else if (number_type == token_type::value_integer)
-        {
-            const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
-
-            // we checked the number format before
-            JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
-
-            if (errno == 0)
-            {
-                value_integer = static_cast<number_integer_t>(x);
-                if (value_integer == x)
-                {
-                    return token_type::value_integer;
-                }
-            }
-        }
-
-        // this code is reached if we parse a floating-point number or if an
-        // integer conversion above failed
-        strtof(value_float, token_buffer.data(), &endptr);
-
-        // we checked the number format before
-        JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size());
-
-        return token_type::value_float;
-    }
-
-    /*!
-    @param[in] literal_text  the literal text to expect
-    @param[in] length        the length of the passed literal text
-    @param[in] return_type   the token type to return on success
-    */
-    JSON_HEDLEY_NON_NULL(2)
-    token_type scan_literal(const char_type* literal_text, const std::size_t length,
-                            token_type return_type)
-    {
-        JSON_ASSERT(std::char_traits<char_type>::to_char_type(current) == literal_text[0]);
-        for (std::size_t i = 1; i < length; ++i)
-        {
-            if (JSON_HEDLEY_UNLIKELY(std::char_traits<char_type>::to_char_type(get()) != literal_text[i]))
-            {
-                error_message = "invalid literal";
-                return token_type::parse_error;
-            }
-        }
-        return return_type;
-    }
-
-    /////////////////////
-    // input management
-    /////////////////////
-
-    /// reset token_buffer; current character is beginning of token
-    void reset() noexcept
-    {
-        token_buffer.clear();
-        token_string.clear();
-        token_string.push_back(std::char_traits<char_type>::to_char_type(current));
-    }
-
-    /*
-    @brief get next character from the input
-
-    This function provides the interface to the used input adapter. It does
-    not throw in case the input reached EOF, but returns a
-    `std::char_traits<char>::eof()` in that case.  Stores the scanned characters
-    for use in error messages.
-
-    @return character read from the input
-    */
-    char_int_type get()
-    {
-        ++position.chars_read_total;
-        ++position.chars_read_current_line;
-
-        if (next_unget)
-        {
-            // just reset the next_unget variable and work with current
-            next_unget = false;
-        }
-        else
-        {
-            current = ia.get_character();
-        }
-
-        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
-        {
-            token_string.push_back(std::char_traits<char_type>::to_char_type(current));
-        }
-
-        if (current == '\n')
-        {
-            ++position.lines_read;
-            position.chars_read_current_line = 0;
-        }
-
-        return current;
-    }
-
-    /*!
-    @brief unget current character (read it again on next get)
-
-    We implement unget by setting variable next_unget to true. The input is not
-    changed - we just simulate ungetting by modifying chars_read_total,
-    chars_read_current_line, and token_string. The next call to get() will
-    behave as if the unget character is read again.
-    */
-    void unget()
-    {
-        next_unget = true;
-
-        --position.chars_read_total;
-
-        // in case we "unget" a newline, we have to also decrement the lines_read
-        if (position.chars_read_current_line == 0)
-        {
-            if (position.lines_read > 0)
-            {
-                --position.lines_read;
-            }
-        }
-        else
-        {
-            --position.chars_read_current_line;
-        }
-
-        if (JSON_HEDLEY_LIKELY(current != std::char_traits<char_type>::eof()))
-        {
-            JSON_ASSERT(!token_string.empty());
-            token_string.pop_back();
-        }
-    }
-
-    /// add a character to token_buffer
-    void add(char_int_type c)
-    {
-        token_buffer.push_back(static_cast<typename string_t::value_type>(c));
-    }
-
-  public:
-    /////////////////////
-    // value getters
-    /////////////////////
-
-    /// return integer value
-    constexpr number_integer_t get_number_integer() const noexcept
-    {
-        return value_integer;
-    }
-
-    /// return unsigned integer value
-    constexpr number_unsigned_t get_number_unsigned() const noexcept
-    {
-        return value_unsigned;
-    }
-
-    /// return floating-point value
-    constexpr number_float_t get_number_float() const noexcept
-    {
-        return value_float;
-    }
-
-    /// return current string value (implicitly resets the token; useful only once)
-    string_t& get_string()
-    {
-        return token_buffer;
-    }
-
-    /////////////////////
-    // diagnostics
-    /////////////////////
-
-    /// return position of last read token
-    constexpr position_t get_position() const noexcept
-    {
-        return position;
-    }
-
-    /// return the last read token (for errors only).  Will never contain EOF
-    /// (an arbitrary value that is not a valid char value, often -1), because
-    /// 255 may legitimately occur.  May contain NUL, which should be escaped.
-    std::string get_token_string() const
-    {
-        // escape control characters
-        std::string result;
-        for (const auto c : token_string)
-        {
-            if (static_cast<unsigned char>(c) <= '\x1F')
-            {
-                // escape control characters
-                std::array<char, 9> cs{{}};
-                (std::snprintf)(cs.data(), cs.size(), "<U+%.4X>", static_cast<unsigned char>(c)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                result += cs.data();
-            }
-            else
-            {
-                // add character as is
-                result.push_back(static_cast<std::string::value_type>(c));
-            }
-        }
-
-        return result;
-    }
-
-    /// return syntax error message
-    JSON_HEDLEY_RETURNS_NON_NULL
-    constexpr const char* get_error_message() const noexcept
-    {
-        return error_message;
-    }
-
-    /////////////////////
-    // actual scanner
-    /////////////////////
-
-    /*!
-    @brief skip the UTF-8 byte order mark
-    @return true iff there is no BOM or the correct BOM has been skipped
-    */
-    bool skip_bom()
-    {
-        if (get() == 0xEF)
-        {
-            // check if we completely parse the BOM
-            return get() == 0xBB && get() == 0xBF;
-        }
-
-        // the first character is not the beginning of the BOM; unget it to
-        // process is later
-        unget();
-        return true;
-    }
-
-    void skip_whitespace()
-    {
-        do
-        {
-            get();
-        }
-        while (current == ' ' || current == '\t' || current == '\n' || current == '\r');
-    }
-
-    token_type scan()
-    {
-        // initially, skip the BOM
-        if (position.chars_read_total == 0 && !skip_bom())
-        {
-            error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given";
-            return token_type::parse_error;
-        }
-
-        // read next character and ignore whitespace
-        skip_whitespace();
-
-        // ignore comments
-        while (ignore_comments && current == '/')
-        {
-            if (!scan_comment())
-            {
-                return token_type::parse_error;
-            }
-
-            // skip following whitespace
-            skip_whitespace();
-        }
-
-        switch (current)
-        {
-            // structural characters
-            case '[':
-                return token_type::begin_array;
-            case ']':
-                return token_type::end_array;
-            case '{':
-                return token_type::begin_object;
-            case '}':
-                return token_type::end_object;
-            case ':':
-                return token_type::name_separator;
-            case ',':
-                return token_type::value_separator;
-
-            // literals
-            case 't':
-            {
-                std::array<char_type, 4> true_literal = {{char_type('t'), char_type('r'), char_type('u'), char_type('e')}};
-                return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true);
-            }
-            case 'f':
-            {
-                std::array<char_type, 5> false_literal = {{char_type('f'), char_type('a'), char_type('l'), char_type('s'), char_type('e')}};
-                return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false);
-            }
-            case 'n':
-            {
-                std::array<char_type, 4> null_literal = {{char_type('n'), char_type('u'), char_type('l'), char_type('l')}};
-                return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null);
-            }
-
-            // string
-            case '\"':
-                return scan_string();
-
-            // number
-            case '-':
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-                return scan_number();
-
-            // end of input (the null byte is needed when parsing from
-            // string literals)
-            case '\0':
-            case std::char_traits<char_type>::eof():
-                return token_type::end_of_input;
-
-            // error
-            default:
-                error_message = "invalid literal";
-                return token_type::parse_error;
-        }
-    }
-
-  private:
-    /// input adapter
-    InputAdapterType ia;
-
-    /// whether comments should be ignored (true) or signaled as errors (false)
-    const bool ignore_comments = false;
-
-    /// the current character
-    char_int_type current = std::char_traits<char_type>::eof();
-
-    /// whether the next get() call should just return current
-    bool next_unget = false;
-
-    /// the start position of the current token
-    position_t position {};
-
-    /// raw input token string (for error messages)
-    std::vector<char_type> token_string {};
-
-    /// buffer for variable-length tokens (numbers, strings)
-    string_t token_buffer {};
-
-    /// a description of occurred lexer errors
-    const char* error_message = "";
-
-    // number values
-    number_integer_t value_integer = 0;
-    number_unsigned_t value_unsigned = 0;
-    number_float_t value_float = 0;
-
-    /// the decimal point
-    const char_int_type decimal_point_char = '.';
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/is_sax.hpp>
-
-
-#include <cstdint> // size_t
-#include <utility> // declval
-#include <string> // string
-
-// #include <nlohmann/detail/meta/detected.hpp>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename T>
-using null_function_t = decltype(std::declval<T&>().null());
-
-template<typename T>
-using boolean_function_t =
-    decltype(std::declval<T&>().boolean(std::declval<bool>()));
-
-template<typename T, typename Integer>
-using number_integer_function_t =
-    decltype(std::declval<T&>().number_integer(std::declval<Integer>()));
-
-template<typename T, typename Unsigned>
-using number_unsigned_function_t =
-    decltype(std::declval<T&>().number_unsigned(std::declval<Unsigned>()));
-
-template<typename T, typename Float, typename String>
-using number_float_function_t = decltype(std::declval<T&>().number_float(
-                                    std::declval<Float>(), std::declval<const String&>()));
-
-template<typename T, typename String>
-using string_function_t =
-    decltype(std::declval<T&>().string(std::declval<String&>()));
-
-template<typename T, typename Binary>
-using binary_function_t =
-    decltype(std::declval<T&>().binary(std::declval<Binary&>()));
-
-template<typename T>
-using start_object_function_t =
-    decltype(std::declval<T&>().start_object(std::declval<std::size_t>()));
-
-template<typename T, typename String>
-using key_function_t =
-    decltype(std::declval<T&>().key(std::declval<String&>()));
-
-template<typename T>
-using end_object_function_t = decltype(std::declval<T&>().end_object());
-
-template<typename T>
-using start_array_function_t =
-    decltype(std::declval<T&>().start_array(std::declval<std::size_t>()));
-
-template<typename T>
-using end_array_function_t = decltype(std::declval<T&>().end_array());
-
-template<typename T, typename Exception>
-using parse_error_function_t = decltype(std::declval<T&>().parse_error(
-        std::declval<std::size_t>(), std::declval<const std::string&>(),
-        std::declval<const Exception&>()));
-
-template<typename SAX, typename BasicJsonType>
-struct is_sax
-{
-  private:
-    static_assert(is_basic_json<BasicJsonType>::value,
-                  "BasicJsonType must be of type basic_json<...>");
-
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-    using exception_t = typename BasicJsonType::exception;
-
-  public:
-    static constexpr bool value =
-        is_detected_exact<bool, null_function_t, SAX>::value &&
-        is_detected_exact<bool, boolean_function_t, SAX>::value &&
-        is_detected_exact<bool, number_integer_function_t, SAX, number_integer_t>::value &&
-        is_detected_exact<bool, number_unsigned_function_t, SAX, number_unsigned_t>::value &&
-        is_detected_exact<bool, number_float_function_t, SAX, number_float_t, string_t>::value &&
-        is_detected_exact<bool, string_function_t, SAX, string_t>::value &&
-        is_detected_exact<bool, binary_function_t, SAX, binary_t>::value &&
-        is_detected_exact<bool, start_object_function_t, SAX>::value &&
-        is_detected_exact<bool, key_function_t, SAX, string_t>::value &&
-        is_detected_exact<bool, end_object_function_t, SAX>::value &&
-        is_detected_exact<bool, start_array_function_t, SAX>::value &&
-        is_detected_exact<bool, end_array_function_t, SAX>::value &&
-        is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value;
-};
-
-template<typename SAX, typename BasicJsonType>
-struct is_sax_static_asserts
-{
-  private:
-    static_assert(is_basic_json<BasicJsonType>::value,
-                  "BasicJsonType must be of type basic_json<...>");
-
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-    using exception_t = typename BasicJsonType::exception;
-
-  public:
-    static_assert(is_detected_exact<bool, null_function_t, SAX>::value,
-                  "Missing/invalid function: bool null()");
-    static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value,
-                  "Missing/invalid function: bool boolean(bool)");
-    static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value,
-                  "Missing/invalid function: bool boolean(bool)");
-    static_assert(
-        is_detected_exact<bool, number_integer_function_t, SAX,
-        number_integer_t>::value,
-        "Missing/invalid function: bool number_integer(number_integer_t)");
-    static_assert(
-        is_detected_exact<bool, number_unsigned_function_t, SAX,
-        number_unsigned_t>::value,
-        "Missing/invalid function: bool number_unsigned(number_unsigned_t)");
-    static_assert(is_detected_exact<bool, number_float_function_t, SAX,
-                  number_float_t, string_t>::value,
-                  "Missing/invalid function: bool number_float(number_float_t, const string_t&)");
-    static_assert(
-        is_detected_exact<bool, string_function_t, SAX, string_t>::value,
-        "Missing/invalid function: bool string(string_t&)");
-    static_assert(
-        is_detected_exact<bool, binary_function_t, SAX, binary_t>::value,
-        "Missing/invalid function: bool binary(binary_t&)");
-    static_assert(is_detected_exact<bool, start_object_function_t, SAX>::value,
-                  "Missing/invalid function: bool start_object(std::size_t)");
-    static_assert(is_detected_exact<bool, key_function_t, SAX, string_t>::value,
-                  "Missing/invalid function: bool key(string_t&)");
-    static_assert(is_detected_exact<bool, end_object_function_t, SAX>::value,
-                  "Missing/invalid function: bool end_object()");
-    static_assert(is_detected_exact<bool, start_array_function_t, SAX>::value,
-                  "Missing/invalid function: bool start_array(std::size_t)");
-    static_assert(is_detected_exact<bool, end_array_function_t, SAX>::value,
-                  "Missing/invalid function: bool end_array()");
-    static_assert(
-        is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value,
-        "Missing/invalid function: bool parse_error(std::size_t, const "
-        "std::string&, const exception&)");
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-
-/// how to treat CBOR tags
-enum class cbor_tag_handler_t
-{
-    error,  ///< throw a parse_error exception in case of a tag
-    ignore   ///< ignore tags
-};
-
-/*!
-@brief determine system byte order
-
-@return true if and only if system's byte order is little endian
-
-@note from https://stackoverflow.com/a/1001328/266378
-*/
-static inline bool little_endianess(int num = 1) noexcept
-{
-    return *reinterpret_cast<char*>(&num) == 1;
-}
-
-
-///////////////////
-// binary reader //
-///////////////////
-
-/*!
-@brief deserialization of CBOR, MessagePack, and UBJSON values
-*/
-template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
-class binary_reader
-{
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-    using json_sax_t = SAX;
-    using char_type = typename InputAdapterType::char_type;
-    using char_int_type = typename std::char_traits<char_type>::int_type;
-
-  public:
-    /*!
-    @brief create a binary reader
-
-    @param[in] adapter  input adapter to read from
-    */
-    explicit binary_reader(InputAdapterType&& adapter) noexcept : ia(std::move(adapter))
-    {
-        (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
-    }
-
-    // make class move-only
-    binary_reader(const binary_reader&) = delete;
-    binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    binary_reader& operator=(const binary_reader&) = delete;
-    binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
-    ~binary_reader() = default;
-
-    /*!
-    @param[in] format  the binary format to parse
-    @param[in] sax_    a SAX event processor
-    @param[in] strict  whether to expect the input to be consumed completed
-    @param[in] tag_handler  how to treat CBOR tags
-
-    @return whether parsing was successful
-    */
-    JSON_HEDLEY_NON_NULL(3)
-    bool sax_parse(const input_format_t format,
-                   json_sax_t* sax_,
-                   const bool strict = true,
-                   const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
-    {
-        sax = sax_;
-        bool result = false;
-
-        switch (format)
-        {
-            case input_format_t::bson:
-                result = parse_bson_internal();
-                break;
-
-            case input_format_t::cbor:
-                result = parse_cbor_internal(true, tag_handler);
-                break;
-
-            case input_format_t::msgpack:
-                result = parse_msgpack_internal();
-                break;
-
-            case input_format_t::ubjson:
-                result = parse_ubjson_internal();
-                break;
-
-            default:            // LCOV_EXCL_LINE
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        }
-
-        // strict mode: next byte must be EOF
-        if (result && strict)
-        {
-            if (format == input_format_t::ubjson)
-            {
-                get_ignore_noop();
-            }
-            else
-            {
-                get();
-            }
-
-            if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
-            {
-                return sax->parse_error(chars_read, get_token_string(),
-                                        parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"), BasicJsonType()));
-            }
-        }
-
-        return result;
-    }
-
-  private:
-    //////////
-    // BSON //
-    //////////
-
-    /*!
-    @brief Reads in a BSON-object and passes it to the SAX-parser.
-    @return whether a valid BSON-value was passed to the SAX parser
-    */
-    bool parse_bson_internal()
-    {
-        std::int32_t document_size{};
-        get_number<std::int32_t, true>(input_format_t::bson, document_size);
-
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
-        {
-            return false;
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
-        {
-            return false;
-        }
-
-        return sax->end_object();
-    }
-
-    /*!
-    @brief Parses a C-style string from the BSON input.
-    @param[in,out] result  A reference to the string variable where the read
-                            string is to be stored.
-    @return `true` if the \x00-byte indicating the end of the string was
-             encountered before the EOF; false` indicates an unexpected EOF.
-    */
-    bool get_bson_cstr(string_t& result)
-    {
-        auto out = std::back_inserter(result);
-        while (true)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
-            {
-                return false;
-            }
-            if (current == 0x00)
-            {
-                return true;
-            }
-            *out++ = static_cast<typename string_t::value_type>(current);
-        }
-    }
-
-    /*!
-    @brief Parses a zero-terminated string of length @a len from the BSON
-           input.
-    @param[in] len  The length (including the zero-byte at the end) of the
-                    string to be read.
-    @param[in,out] result  A reference to the string variable where the read
-                            string is to be stored.
-    @tparam NumberType The type of the length @a len
-    @pre len >= 1
-    @return `true` if the string was successfully parsed
-    */
-    template<typename NumberType>
-    bool get_bson_string(const NumberType len, string_t& result)
-    {
-        if (JSON_HEDLEY_UNLIKELY(len < 1))
-        {
-            auto last_token = get_token_string();
-            return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"), BasicJsonType()));
-        }
-
-        return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
-    }
-
-    /*!
-    @brief Parses a byte array input of length @a len from the BSON input.
-    @param[in] len  The length of the byte array to be read.
-    @param[in,out] result  A reference to the binary variable where the read
-                            array is to be stored.
-    @tparam NumberType The type of the length @a len
-    @pre len >= 0
-    @return `true` if the byte array was successfully parsed
-    */
-    template<typename NumberType>
-    bool get_bson_binary(const NumberType len, binary_t& result)
-    {
-        if (JSON_HEDLEY_UNLIKELY(len < 0))
-        {
-            auto last_token = get_token_string();
-            return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"), BasicJsonType()));
-        }
-
-        // All BSON binary values have a subtype
-        std::uint8_t subtype{};
-        get_number<std::uint8_t>(input_format_t::bson, subtype);
-        result.set_subtype(subtype);
-
-        return get_binary(input_format_t::bson, len, result);
-    }
-
-    /*!
-    @brief Read a BSON document element of the given @a element_type.
-    @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
-    @param[in] element_type_parse_position The position in the input stream,
-               where the `element_type` was read.
-    @warning Not all BSON element types are supported yet. An unsupported
-             @a element_type will give rise to a parse_error.114:
-             Unsupported BSON record type 0x...
-    @return whether a valid BSON-object/array was passed to the SAX parser
-    */
-    bool parse_bson_element_internal(const char_int_type element_type,
-                                     const std::size_t element_type_parse_position)
-    {
-        switch (element_type)
-        {
-            case 0x01: // double
-            {
-                double number{};
-                return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 0x02: // string
-            {
-                std::int32_t len{};
-                string_t value;
-                return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
-            }
-
-            case 0x03: // object
-            {
-                return parse_bson_internal();
-            }
-
-            case 0x04: // array
-            {
-                return parse_bson_array();
-            }
-
-            case 0x05: // binary
-            {
-                std::int32_t len{};
-                binary_t value;
-                return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
-            }
-
-            case 0x08: // boolean
-            {
-                return sax->boolean(get() != 0);
-            }
-
-            case 0x0A: // null
-            {
-                return sax->null();
-            }
-
-            case 0x10: // int32
-            {
-                std::int32_t value{};
-                return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
-            }
-
-            case 0x12: // int64
-            {
-                std::int64_t value{};
-                return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
-            }
-
-            default: // anything else not supported (yet)
-            {
-                std::array<char, 3> cr{{}};
-                (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief Read a BSON element list (as specified in the BSON-spec)
-
-    The same binary layout is used for objects and arrays, hence it must be
-    indicated with the argument @a is_array which one is expected
-    (true --> array, false --> object).
-
-    @param[in] is_array Determines if the element list being read is to be
-                        treated as an object (@a is_array == false), or as an
-                        array (@a is_array == true).
-    @return whether a valid BSON-object/array was passed to the SAX parser
-    */
-    bool parse_bson_element_list(const bool is_array)
-    {
-        string_t key;
-
-        while (auto element_type = get())
-        {
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
-            {
-                return false;
-            }
-
-            const std::size_t element_type_parse_position = chars_read;
-            if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
-            {
-                return false;
-            }
-
-            if (!is_array && !sax->key(key))
-            {
-                return false;
-            }
-
-            if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
-            {
-                return false;
-            }
-
-            // get_bson_cstr only appends
-            key.clear();
-        }
-
-        return true;
-    }
-
-    /*!
-    @brief Reads an array from the BSON input and passes it to the SAX-parser.
-    @return whether a valid BSON-array was passed to the SAX parser
-    */
-    bool parse_bson_array()
-    {
-        std::int32_t document_size{};
-        get_number<std::int32_t, true>(input_format_t::bson, document_size);
-
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
-        {
-            return false;
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
-        {
-            return false;
-        }
-
-        return sax->end_array();
-    }
-
-    //////////
-    // CBOR //
-    //////////
-
-    /*!
-    @param[in] get_char  whether a new character should be retrieved from the
-                         input (true) or whether the last read character should
-                         be considered instead (false)
-    @param[in] tag_handler how CBOR tags should be treated
-
-    @return whether a valid CBOR value was passed to the SAX parser
-    */
-    bool parse_cbor_internal(const bool get_char,
-                             const cbor_tag_handler_t tag_handler)
-    {
-        switch (get_char ? get() : current)
-        {
-            // EOF
-            case std::char_traits<char_type>::eof():
-                return unexpect_eof(input_format_t::cbor, "value");
-
-            // Integer 0x00..0x17 (0..23)
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x04:
-            case 0x05:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0A:
-            case 0x0B:
-            case 0x0C:
-            case 0x0D:
-            case 0x0E:
-            case 0x0F:
-            case 0x10:
-            case 0x11:
-            case 0x12:
-            case 0x13:
-            case 0x14:
-            case 0x15:
-            case 0x16:
-            case 0x17:
-                return sax->number_unsigned(static_cast<number_unsigned_t>(current));
-
-            case 0x18: // Unsigned integer (one-byte uint8_t follows)
-            {
-                std::uint8_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
-            }
-
-            case 0x19: // Unsigned integer (two-byte uint16_t follows)
-            {
-                std::uint16_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
-            }
-
-            case 0x1A: // Unsigned integer (four-byte uint32_t follows)
-            {
-                std::uint32_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
-            }
-
-            case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
-            {
-                std::uint64_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
-            }
-
-            // Negative integer -1-0x00..-1-0x17 (-1..-24)
-            case 0x20:
-            case 0x21:
-            case 0x22:
-            case 0x23:
-            case 0x24:
-            case 0x25:
-            case 0x26:
-            case 0x27:
-            case 0x28:
-            case 0x29:
-            case 0x2A:
-            case 0x2B:
-            case 0x2C:
-            case 0x2D:
-            case 0x2E:
-            case 0x2F:
-            case 0x30:
-            case 0x31:
-            case 0x32:
-            case 0x33:
-            case 0x34:
-            case 0x35:
-            case 0x36:
-            case 0x37:
-                return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
-
-            case 0x38: // Negative integer (one-byte uint8_t follows)
-            {
-                std::uint8_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
-            }
-
-            case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
-            {
-                std::uint16_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
-            }
-
-            case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
-            {
-                std::uint32_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
-            }
-
-            case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
-            {
-                std::uint64_t number{};
-                return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
-                        - static_cast<number_integer_t>(number));
-            }
-
-            // Binary data (0x00..0x17 bytes follow)
-            case 0x40:
-            case 0x41:
-            case 0x42:
-            case 0x43:
-            case 0x44:
-            case 0x45:
-            case 0x46:
-            case 0x47:
-            case 0x48:
-            case 0x49:
-            case 0x4A:
-            case 0x4B:
-            case 0x4C:
-            case 0x4D:
-            case 0x4E:
-            case 0x4F:
-            case 0x50:
-            case 0x51:
-            case 0x52:
-            case 0x53:
-            case 0x54:
-            case 0x55:
-            case 0x56:
-            case 0x57:
-            case 0x58: // Binary data (one-byte uint8_t for n follows)
-            case 0x59: // Binary data (two-byte uint16_t for n follow)
-            case 0x5A: // Binary data (four-byte uint32_t for n follow)
-            case 0x5B: // Binary data (eight-byte uint64_t for n follow)
-            case 0x5F: // Binary data (indefinite length)
-            {
-                binary_t b;
-                return get_cbor_binary(b) && sax->binary(b);
-            }
-
-            // UTF-8 string (0x00..0x17 bytes follow)
-            case 0x60:
-            case 0x61:
-            case 0x62:
-            case 0x63:
-            case 0x64:
-            case 0x65:
-            case 0x66:
-            case 0x67:
-            case 0x68:
-            case 0x69:
-            case 0x6A:
-            case 0x6B:
-            case 0x6C:
-            case 0x6D:
-            case 0x6E:
-            case 0x6F:
-            case 0x70:
-            case 0x71:
-            case 0x72:
-            case 0x73:
-            case 0x74:
-            case 0x75:
-            case 0x76:
-            case 0x77:
-            case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
-            case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
-            case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
-            case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
-            case 0x7F: // UTF-8 string (indefinite length)
-            {
-                string_t s;
-                return get_cbor_string(s) && sax->string(s);
-            }
-
-            // array (0x00..0x17 data items follow)
-            case 0x80:
-            case 0x81:
-            case 0x82:
-            case 0x83:
-            case 0x84:
-            case 0x85:
-            case 0x86:
-            case 0x87:
-            case 0x88:
-            case 0x89:
-            case 0x8A:
-            case 0x8B:
-            case 0x8C:
-            case 0x8D:
-            case 0x8E:
-            case 0x8F:
-            case 0x90:
-            case 0x91:
-            case 0x92:
-            case 0x93:
-            case 0x94:
-            case 0x95:
-            case 0x96:
-            case 0x97:
-                return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
-
-            case 0x98: // array (one-byte uint8_t for n follows)
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0x99: // array (two-byte uint16_t for n follow)
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0x9A: // array (four-byte uint32_t for n follow)
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0x9B: // array (eight-byte uint64_t for n follow)
-            {
-                std::uint64_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0x9F: // array (indefinite length)
-                return get_cbor_array(std::size_t(-1), tag_handler);
-
-            // map (0x00..0x17 pairs of data items follow)
-            case 0xA0:
-            case 0xA1:
-            case 0xA2:
-            case 0xA3:
-            case 0xA4:
-            case 0xA5:
-            case 0xA6:
-            case 0xA7:
-            case 0xA8:
-            case 0xA9:
-            case 0xAA:
-            case 0xAB:
-            case 0xAC:
-            case 0xAD:
-            case 0xAE:
-            case 0xAF:
-            case 0xB0:
-            case 0xB1:
-            case 0xB2:
-            case 0xB3:
-            case 0xB4:
-            case 0xB5:
-            case 0xB6:
-            case 0xB7:
-                return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
-
-            case 0xB8: // map (one-byte uint8_t for n follows)
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0xB9: // map (two-byte uint16_t for n follow)
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0xBA: // map (four-byte uint32_t for n follow)
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0xBB: // map (eight-byte uint64_t for n follow)
-            {
-                std::uint64_t len{};
-                return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
-            }
-
-            case 0xBF: // map (indefinite length)
-                return get_cbor_object(std::size_t(-1), tag_handler);
-
-            case 0xC6: // tagged item
-            case 0xC7:
-            case 0xC8:
-            case 0xC9:
-            case 0xCA:
-            case 0xCB:
-            case 0xCC:
-            case 0xCD:
-            case 0xCE:
-            case 0xCF:
-            case 0xD0:
-            case 0xD1:
-            case 0xD2:
-            case 0xD3:
-            case 0xD4:
-            case 0xD8: // tagged item (1 bytes follow)
-            case 0xD9: // tagged item (2 bytes follow)
-            case 0xDA: // tagged item (4 bytes follow)
-            case 0xDB: // tagged item (8 bytes follow)
-            {
-                switch (tag_handler)
-                {
-                    case cbor_tag_handler_t::error:
-                    {
-                        auto last_token = get_token_string();
-                        return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
-                    }
-
-                    case cbor_tag_handler_t::ignore:
-                    {
-                        switch (current)
-                        {
-                            case 0xD8:
-                            {
-                                std::uint8_t len{};
-                                get_number(input_format_t::cbor, len);
-                                break;
-                            }
-                            case 0xD9:
-                            {
-                                std::uint16_t len{};
-                                get_number(input_format_t::cbor, len);
-                                break;
-                            }
-                            case 0xDA:
-                            {
-                                std::uint32_t len{};
-                                get_number(input_format_t::cbor, len);
-                                break;
-                            }
-                            case 0xDB:
-                            {
-                                std::uint64_t len{};
-                                get_number(input_format_t::cbor, len);
-                                break;
-                            }
-                            default:
-                                break;
-                        }
-                        return parse_cbor_internal(true, tag_handler);
-                    }
-
-                    default:                 // LCOV_EXCL_LINE
-                        JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-                        return false;        // LCOV_EXCL_LINE
-                }
-            }
-
-            case 0xF4: // false
-                return sax->boolean(false);
-
-            case 0xF5: // true
-                return sax->boolean(true);
-
-            case 0xF6: // null
-                return sax->null();
-
-            case 0xF9: // Half-Precision Float (two-byte IEEE 754)
-            {
-                const auto byte1_raw = get();
-                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
-                {
-                    return false;
-                }
-                const auto byte2_raw = get();
-                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
-                {
-                    return false;
-                }
-
-                const auto byte1 = static_cast<unsigned char>(byte1_raw);
-                const auto byte2 = static_cast<unsigned char>(byte2_raw);
-
-                // code from RFC 7049, Appendix D, Figure 3:
-                // As half-precision floating-point numbers were only added
-                // to IEEE 754 in 2008, today's programming platforms often
-                // still only have limited support for them. It is very
-                // easy to include at least decoding support for them even
-                // without such support. An example of a small decoder for
-                // half-precision floating-point numbers in the C language
-                // is shown in Fig. 3.
-                const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
-                const double val = [&half]
-                {
-                    const int exp = (half >> 10u) & 0x1Fu;
-                    const unsigned int mant = half & 0x3FFu;
-                    JSON_ASSERT(0 <= exp&& exp <= 32);
-                    JSON_ASSERT(mant <= 1024);
-                    switch (exp)
-                    {
-                        case 0:
-                            return std::ldexp(mant, -24);
-                        case 31:
-                            return (mant == 0)
-                            ? std::numeric_limits<double>::infinity()
-                            : std::numeric_limits<double>::quiet_NaN();
-                        default:
-                            return std::ldexp(mant + 1024, exp - 25);
-                    }
-                }();
-                return sax->number_float((half & 0x8000u) != 0
-                                         ? static_cast<number_float_t>(-val)
-                                         : static_cast<number_float_t>(val), "");
-            }
-
-            case 0xFA: // Single-Precision Float (four-byte IEEE 754)
-            {
-                float number{};
-                return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
-            {
-                double number{};
-                return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            default: // anything else (0xFF is handled inside the other types)
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief reads a CBOR string
-
-    This function first reads starting bytes to determine the expected
-    string length and then copies this number of bytes into a string.
-    Additionally, CBOR's strings with indefinite lengths are supported.
-
-    @param[out] result  created string
-
-    @return whether string creation completed
-    */
-    bool get_cbor_string(string_t& result)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
-        {
-            return false;
-        }
-
-        switch (current)
-        {
-            // UTF-8 string (0x00..0x17 bytes follow)
-            case 0x60:
-            case 0x61:
-            case 0x62:
-            case 0x63:
-            case 0x64:
-            case 0x65:
-            case 0x66:
-            case 0x67:
-            case 0x68:
-            case 0x69:
-            case 0x6A:
-            case 0x6B:
-            case 0x6C:
-            case 0x6D:
-            case 0x6E:
-            case 0x6F:
-            case 0x70:
-            case 0x71:
-            case 0x72:
-            case 0x73:
-            case 0x74:
-            case 0x75:
-            case 0x76:
-            case 0x77:
-            {
-                return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
-            }
-
-            case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
-            }
-
-            case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
-            }
-
-            case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
-            }
-
-            case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
-            {
-                std::uint64_t len{};
-                return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
-            }
-
-            case 0x7F: // UTF-8 string (indefinite length)
-            {
-                while (get() != 0xFF)
-                {
-                    string_t chunk;
-                    if (!get_cbor_string(chunk))
-                    {
-                        return false;
-                    }
-                    result.append(chunk);
-                }
-                return true;
-            }
-
-            default:
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief reads a CBOR byte array
-
-    This function first reads starting bytes to determine the expected
-    byte array length and then copies this number of bytes into the byte array.
-    Additionally, CBOR's byte arrays with indefinite lengths are supported.
-
-    @param[out] result  created byte array
-
-    @return whether byte array creation completed
-    */
-    bool get_cbor_binary(binary_t& result)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
-        {
-            return false;
-        }
-
-        switch (current)
-        {
-            // Binary data (0x00..0x17 bytes follow)
-            case 0x40:
-            case 0x41:
-            case 0x42:
-            case 0x43:
-            case 0x44:
-            case 0x45:
-            case 0x46:
-            case 0x47:
-            case 0x48:
-            case 0x49:
-            case 0x4A:
-            case 0x4B:
-            case 0x4C:
-            case 0x4D:
-            case 0x4E:
-            case 0x4F:
-            case 0x50:
-            case 0x51:
-            case 0x52:
-            case 0x53:
-            case 0x54:
-            case 0x55:
-            case 0x56:
-            case 0x57:
-            {
-                return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
-            }
-
-            case 0x58: // Binary data (one-byte uint8_t for n follows)
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::cbor, len) &&
-                       get_binary(input_format_t::cbor, len, result);
-            }
-
-            case 0x59: // Binary data (two-byte uint16_t for n follow)
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::cbor, len) &&
-                       get_binary(input_format_t::cbor, len, result);
-            }
-
-            case 0x5A: // Binary data (four-byte uint32_t for n follow)
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::cbor, len) &&
-                       get_binary(input_format_t::cbor, len, result);
-            }
-
-            case 0x5B: // Binary data (eight-byte uint64_t for n follow)
-            {
-                std::uint64_t len{};
-                return get_number(input_format_t::cbor, len) &&
-                       get_binary(input_format_t::cbor, len, result);
-            }
-
-            case 0x5F: // Binary data (indefinite length)
-            {
-                while (get() != 0xFF)
-                {
-                    binary_t chunk;
-                    if (!get_cbor_binary(chunk))
-                    {
-                        return false;
-                    }
-                    result.insert(result.end(), chunk.begin(), chunk.end());
-                }
-                return true;
-            }
-
-            default:
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @param[in] len  the length of the array or std::size_t(-1) for an
-                    array of indefinite size
-    @param[in] tag_handler how CBOR tags should be treated
-    @return whether array creation completed
-    */
-    bool get_cbor_array(const std::size_t len,
-                        const cbor_tag_handler_t tag_handler)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
-        {
-            return false;
-        }
-
-        if (len != std::size_t(-1))
-        {
-            for (std::size_t i = 0; i < len; ++i)
-            {
-                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
-                {
-                    return false;
-                }
-            }
-        }
-        else
-        {
-            while (get() != 0xFF)
-            {
-                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
-                {
-                    return false;
-                }
-            }
-        }
-
-        return sax->end_array();
-    }
-
-    /*!
-    @param[in] len  the length of the object or std::size_t(-1) for an
-                    object of indefinite size
-    @param[in] tag_handler how CBOR tags should be treated
-    @return whether object creation completed
-    */
-    bool get_cbor_object(const std::size_t len,
-                         const cbor_tag_handler_t tag_handler)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
-        {
-            return false;
-        }
-
-        string_t key;
-        if (len != std::size_t(-1))
-        {
-            for (std::size_t i = 0; i < len; ++i)
-            {
-                get();
-                if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
-                {
-                    return false;
-                }
-
-                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
-                {
-                    return false;
-                }
-                key.clear();
-            }
-        }
-        else
-        {
-            while (get() != 0xFF)
-            {
-                if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
-                {
-                    return false;
-                }
-
-                if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
-                {
-                    return false;
-                }
-                key.clear();
-            }
-        }
-
-        return sax->end_object();
-    }
-
-    /////////////
-    // MsgPack //
-    /////////////
-
-    /*!
-    @return whether a valid MessagePack value was passed to the SAX parser
-    */
-    bool parse_msgpack_internal()
-    {
-        switch (get())
-        {
-            // EOF
-            case std::char_traits<char_type>::eof():
-                return unexpect_eof(input_format_t::msgpack, "value");
-
-            // positive fixint
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x04:
-            case 0x05:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0A:
-            case 0x0B:
-            case 0x0C:
-            case 0x0D:
-            case 0x0E:
-            case 0x0F:
-            case 0x10:
-            case 0x11:
-            case 0x12:
-            case 0x13:
-            case 0x14:
-            case 0x15:
-            case 0x16:
-            case 0x17:
-            case 0x18:
-            case 0x19:
-            case 0x1A:
-            case 0x1B:
-            case 0x1C:
-            case 0x1D:
-            case 0x1E:
-            case 0x1F:
-            case 0x20:
-            case 0x21:
-            case 0x22:
-            case 0x23:
-            case 0x24:
-            case 0x25:
-            case 0x26:
-            case 0x27:
-            case 0x28:
-            case 0x29:
-            case 0x2A:
-            case 0x2B:
-            case 0x2C:
-            case 0x2D:
-            case 0x2E:
-            case 0x2F:
-            case 0x30:
-            case 0x31:
-            case 0x32:
-            case 0x33:
-            case 0x34:
-            case 0x35:
-            case 0x36:
-            case 0x37:
-            case 0x38:
-            case 0x39:
-            case 0x3A:
-            case 0x3B:
-            case 0x3C:
-            case 0x3D:
-            case 0x3E:
-            case 0x3F:
-            case 0x40:
-            case 0x41:
-            case 0x42:
-            case 0x43:
-            case 0x44:
-            case 0x45:
-            case 0x46:
-            case 0x47:
-            case 0x48:
-            case 0x49:
-            case 0x4A:
-            case 0x4B:
-            case 0x4C:
-            case 0x4D:
-            case 0x4E:
-            case 0x4F:
-            case 0x50:
-            case 0x51:
-            case 0x52:
-            case 0x53:
-            case 0x54:
-            case 0x55:
-            case 0x56:
-            case 0x57:
-            case 0x58:
-            case 0x59:
-            case 0x5A:
-            case 0x5B:
-            case 0x5C:
-            case 0x5D:
-            case 0x5E:
-            case 0x5F:
-            case 0x60:
-            case 0x61:
-            case 0x62:
-            case 0x63:
-            case 0x64:
-            case 0x65:
-            case 0x66:
-            case 0x67:
-            case 0x68:
-            case 0x69:
-            case 0x6A:
-            case 0x6B:
-            case 0x6C:
-            case 0x6D:
-            case 0x6E:
-            case 0x6F:
-            case 0x70:
-            case 0x71:
-            case 0x72:
-            case 0x73:
-            case 0x74:
-            case 0x75:
-            case 0x76:
-            case 0x77:
-            case 0x78:
-            case 0x79:
-            case 0x7A:
-            case 0x7B:
-            case 0x7C:
-            case 0x7D:
-            case 0x7E:
-            case 0x7F:
-                return sax->number_unsigned(static_cast<number_unsigned_t>(current));
-
-            // fixmap
-            case 0x80:
-            case 0x81:
-            case 0x82:
-            case 0x83:
-            case 0x84:
-            case 0x85:
-            case 0x86:
-            case 0x87:
-            case 0x88:
-            case 0x89:
-            case 0x8A:
-            case 0x8B:
-            case 0x8C:
-            case 0x8D:
-            case 0x8E:
-            case 0x8F:
-                return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
-
-            // fixarray
-            case 0x90:
-            case 0x91:
-            case 0x92:
-            case 0x93:
-            case 0x94:
-            case 0x95:
-            case 0x96:
-            case 0x97:
-            case 0x98:
-            case 0x99:
-            case 0x9A:
-            case 0x9B:
-            case 0x9C:
-            case 0x9D:
-            case 0x9E:
-            case 0x9F:
-                return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
-
-            // fixstr
-            case 0xA0:
-            case 0xA1:
-            case 0xA2:
-            case 0xA3:
-            case 0xA4:
-            case 0xA5:
-            case 0xA6:
-            case 0xA7:
-            case 0xA8:
-            case 0xA9:
-            case 0xAA:
-            case 0xAB:
-            case 0xAC:
-            case 0xAD:
-            case 0xAE:
-            case 0xAF:
-            case 0xB0:
-            case 0xB1:
-            case 0xB2:
-            case 0xB3:
-            case 0xB4:
-            case 0xB5:
-            case 0xB6:
-            case 0xB7:
-            case 0xB8:
-            case 0xB9:
-            case 0xBA:
-            case 0xBB:
-            case 0xBC:
-            case 0xBD:
-            case 0xBE:
-            case 0xBF:
-            case 0xD9: // str 8
-            case 0xDA: // str 16
-            case 0xDB: // str 32
-            {
-                string_t s;
-                return get_msgpack_string(s) && sax->string(s);
-            }
-
-            case 0xC0: // nil
-                return sax->null();
-
-            case 0xC2: // false
-                return sax->boolean(false);
-
-            case 0xC3: // true
-                return sax->boolean(true);
-
-            case 0xC4: // bin 8
-            case 0xC5: // bin 16
-            case 0xC6: // bin 32
-            case 0xC7: // ext 8
-            case 0xC8: // ext 16
-            case 0xC9: // ext 32
-            case 0xD4: // fixext 1
-            case 0xD5: // fixext 2
-            case 0xD6: // fixext 4
-            case 0xD7: // fixext 8
-            case 0xD8: // fixext 16
-            {
-                binary_t b;
-                return get_msgpack_binary(b) && sax->binary(b);
-            }
-
-            case 0xCA: // float 32
-            {
-                float number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 0xCB: // float 64
-            {
-                double number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 0xCC: // uint 8
-            {
-                std::uint8_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
-            }
-
-            case 0xCD: // uint 16
-            {
-                std::uint16_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
-            }
-
-            case 0xCE: // uint 32
-            {
-                std::uint32_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
-            }
-
-            case 0xCF: // uint 64
-            {
-                std::uint64_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
-            }
-
-            case 0xD0: // int 8
-            {
-                std::int8_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
-            }
-
-            case 0xD1: // int 16
-            {
-                std::int16_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
-            }
-
-            case 0xD2: // int 32
-            {
-                std::int32_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
-            }
-
-            case 0xD3: // int 64
-            {
-                std::int64_t number{};
-                return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
-            }
-
-            case 0xDC: // array 16
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
-            }
-
-            case 0xDD: // array 32
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
-            }
-
-            case 0xDE: // map 16
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
-            }
-
-            case 0xDF: // map 32
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
-            }
-
-            // negative fixint
-            case 0xE0:
-            case 0xE1:
-            case 0xE2:
-            case 0xE3:
-            case 0xE4:
-            case 0xE5:
-            case 0xE6:
-            case 0xE7:
-            case 0xE8:
-            case 0xE9:
-            case 0xEA:
-            case 0xEB:
-            case 0xEC:
-            case 0xED:
-            case 0xEE:
-            case 0xEF:
-            case 0xF0:
-            case 0xF1:
-            case 0xF2:
-            case 0xF3:
-            case 0xF4:
-            case 0xF5:
-            case 0xF6:
-            case 0xF7:
-            case 0xF8:
-            case 0xF9:
-            case 0xFA:
-            case 0xFB:
-            case 0xFC:
-            case 0xFD:
-            case 0xFE:
-            case 0xFF:
-                return sax->number_integer(static_cast<std::int8_t>(current));
-
-            default: // anything else
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief reads a MessagePack string
-
-    This function first reads starting bytes to determine the expected
-    string length and then copies this number of bytes into a string.
-
-    @param[out] result  created string
-
-    @return whether string creation completed
-    */
-    bool get_msgpack_string(string_t& result)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
-        {
-            return false;
-        }
-
-        switch (current)
-        {
-            // fixstr
-            case 0xA0:
-            case 0xA1:
-            case 0xA2:
-            case 0xA3:
-            case 0xA4:
-            case 0xA5:
-            case 0xA6:
-            case 0xA7:
-            case 0xA8:
-            case 0xA9:
-            case 0xAA:
-            case 0xAB:
-            case 0xAC:
-            case 0xAD:
-            case 0xAE:
-            case 0xAF:
-            case 0xB0:
-            case 0xB1:
-            case 0xB2:
-            case 0xB3:
-            case 0xB4:
-            case 0xB5:
-            case 0xB6:
-            case 0xB7:
-            case 0xB8:
-            case 0xB9:
-            case 0xBA:
-            case 0xBB:
-            case 0xBC:
-            case 0xBD:
-            case 0xBE:
-            case 0xBF:
-            {
-                return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
-            }
-
-            case 0xD9: // str 8
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
-            }
-
-            case 0xDA: // str 16
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
-            }
-
-            case 0xDB: // str 32
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
-            }
-
-            default:
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief reads a MessagePack byte array
-
-    This function first reads starting bytes to determine the expected
-    byte array length and then copies this number of bytes into a byte array.
-
-    @param[out] result  created byte array
-
-    @return whether byte array creation completed
-    */
-    bool get_msgpack_binary(binary_t& result)
-    {
-        // helper function to set the subtype
-        auto assign_and_return_true = [&result](std::int8_t subtype)
-        {
-            result.set_subtype(static_cast<std::uint8_t>(subtype));
-            return true;
-        };
-
-        switch (current)
-        {
-            case 0xC4: // bin 8
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_binary(input_format_t::msgpack, len, result);
-            }
-
-            case 0xC5: // bin 16
-            {
-                std::uint16_t len{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_binary(input_format_t::msgpack, len, result);
-            }
-
-            case 0xC6: // bin 32
-            {
-                std::uint32_t len{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_binary(input_format_t::msgpack, len, result);
-            }
-
-            case 0xC7: // ext 8
-            {
-                std::uint8_t len{};
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, len, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xC8: // ext 16
-            {
-                std::uint16_t len{};
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, len, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xC9: // ext 32
-            {
-                std::uint32_t len{};
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, len) &&
-                       get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, len, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xD4: // fixext 1
-            {
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, 1, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xD5: // fixext 2
-            {
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, 2, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xD6: // fixext 4
-            {
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, 4, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xD7: // fixext 8
-            {
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, 8, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            case 0xD8: // fixext 16
-            {
-                std::int8_t subtype{};
-                return get_number(input_format_t::msgpack, subtype) &&
-                       get_binary(input_format_t::msgpack, 16, result) &&
-                       assign_and_return_true(subtype);
-            }
-
-            default:           // LCOV_EXCL_LINE
-                return false;  // LCOV_EXCL_LINE
-        }
-    }
-
-    /*!
-    @param[in] len  the length of the array
-    @return whether array creation completed
-    */
-    bool get_msgpack_array(const std::size_t len)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
-        {
-            return false;
-        }
-
-        for (std::size_t i = 0; i < len; ++i)
-        {
-            if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
-            {
-                return false;
-            }
-        }
-
-        return sax->end_array();
-    }
-
-    /*!
-    @param[in] len  the length of the object
-    @return whether object creation completed
-    */
-    bool get_msgpack_object(const std::size_t len)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
-        {
-            return false;
-        }
-
-        string_t key;
-        for (std::size_t i = 0; i < len; ++i)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
-            {
-                return false;
-            }
-
-            if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
-            {
-                return false;
-            }
-            key.clear();
-        }
-
-        return sax->end_object();
-    }
-
-    ////////////
-    // UBJSON //
-    ////////////
-
-    /*!
-    @param[in] get_char  whether a new character should be retrieved from the
-                         input (true, default) or whether the last read
-                         character should be considered instead
-
-    @return whether a valid UBJSON value was passed to the SAX parser
-    */
-    bool parse_ubjson_internal(const bool get_char = true)
-    {
-        return get_ubjson_value(get_char ? get_ignore_noop() : current);
-    }
-
-    /*!
-    @brief reads a UBJSON string
-
-    This function is either called after reading the 'S' byte explicitly
-    indicating a string, or in case of an object key where the 'S' byte can be
-    left out.
-
-    @param[out] result   created string
-    @param[in] get_char  whether a new character should be retrieved from the
-                         input (true, default) or whether the last read
-                         character should be considered instead
-
-    @return whether string creation completed
-    */
-    bool get_ubjson_string(string_t& result, const bool get_char = true)
-    {
-        if (get_char)
-        {
-            get();  // TODO(niels): may we ignore N here?
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
-        {
-            return false;
-        }
-
-        switch (current)
-        {
-            case 'U':
-            {
-                std::uint8_t len{};
-                return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
-            }
-
-            case 'i':
-            {
-                std::int8_t len{};
-                return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
-            }
-
-            case 'I':
-            {
-                std::int16_t len{};
-                return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
-            }
-
-            case 'l':
-            {
-                std::int32_t len{};
-                return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
-            }
-
-            case 'L':
-            {
-                std::int64_t len{};
-                return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
-            }
-
-            default:
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"), BasicJsonType()));
-        }
-    }
-
-    /*!
-    @param[out] result  determined size
-    @return whether size determination completed
-    */
-    bool get_ubjson_size_value(std::size_t& result)
-    {
-        switch (get_ignore_noop())
-        {
-            case 'U':
-            {
-                std::uint8_t number{};
-                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
-                {
-                    return false;
-                }
-                result = static_cast<std::size_t>(number);
-                return true;
-            }
-
-            case 'i':
-            {
-                std::int8_t number{};
-                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
-                {
-                    return false;
-                }
-                result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
-                return true;
-            }
-
-            case 'I':
-            {
-                std::int16_t number{};
-                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
-                {
-                    return false;
-                }
-                result = static_cast<std::size_t>(number);
-                return true;
-            }
-
-            case 'l':
-            {
-                std::int32_t number{};
-                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
-                {
-                    return false;
-                }
-                result = static_cast<std::size_t>(number);
-                return true;
-            }
-
-            case 'L':
-            {
-                std::int64_t number{};
-                if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
-                {
-                    return false;
-                }
-                result = static_cast<std::size_t>(number);
-                return true;
-            }
-
-            default:
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @brief determine the type and size for a container
-
-    In the optimized UBJSON format, a type and a size can be provided to allow
-    for a more compact representation.
-
-    @param[out] result  pair of the size and the type
-
-    @return whether pair creation completed
-    */
-    bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
-    {
-        result.first = string_t::npos; // size
-        result.second = 0; // type
-
-        get_ignore_noop();
-
-        if (current == '$')
-        {
-            result.second = get();  // must not ignore 'N', because 'N' maybe the type
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
-            {
-                return false;
-            }
-
-            get_ignore_noop();
-            if (JSON_HEDLEY_UNLIKELY(current != '#'))
-            {
-                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
-                {
-                    return false;
-                }
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"), BasicJsonType()));
-            }
-
-            return get_ubjson_size_value(result.first);
-        }
-
-        if (current == '#')
-        {
-            return get_ubjson_size_value(result.first);
-        }
-
-        return true;
-    }
-
-    /*!
-    @param prefix  the previously read or set type prefix
-    @return whether value creation completed
-    */
-    bool get_ubjson_value(const char_int_type prefix)
-    {
-        switch (prefix)
-        {
-            case std::char_traits<char_type>::eof():  // EOF
-                return unexpect_eof(input_format_t::ubjson, "value");
-
-            case 'T':  // true
-                return sax->boolean(true);
-            case 'F':  // false
-                return sax->boolean(false);
-
-            case 'Z':  // null
-                return sax->null();
-
-            case 'U':
-            {
-                std::uint8_t number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
-            }
-
-            case 'i':
-            {
-                std::int8_t number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
-            }
-
-            case 'I':
-            {
-                std::int16_t number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
-            }
-
-            case 'l':
-            {
-                std::int32_t number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
-            }
-
-            case 'L':
-            {
-                std::int64_t number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
-            }
-
-            case 'd':
-            {
-                float number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 'D':
-            {
-                double number{};
-                return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
-            }
-
-            case 'H':
-            {
-                return get_ubjson_high_precision_number();
-            }
-
-            case 'C':  // char
-            {
-                get();
-                if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
-                {
-                    return false;
-                }
-                if (JSON_HEDLEY_UNLIKELY(current > 127))
-                {
-                    auto last_token = get_token_string();
-                    return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"), BasicJsonType()));
-                }
-                string_t s(1, static_cast<typename string_t::value_type>(current));
-                return sax->string(s);
-            }
-
-            case 'S':  // string
-            {
-                string_t s;
-                return get_ubjson_string(s) && sax->string(s);
-            }
-
-            case '[':  // array
-                return get_ubjson_array();
-
-            case '{':  // object
-                return get_ubjson_object();
-
-            default: // anything else
-            {
-                auto last_token = get_token_string();
-                return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
-            }
-        }
-    }
-
-    /*!
-    @return whether array creation completed
-    */
-    bool get_ubjson_array()
-    {
-        std::pair<std::size_t, char_int_type> size_and_type;
-        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
-        {
-            return false;
-        }
-
-        if (size_and_type.first != string_t::npos)
-        {
-            if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
-            {
-                return false;
-            }
-
-            if (size_and_type.second != 0)
-            {
-                if (size_and_type.second != 'N')
-                {
-                    for (std::size_t i = 0; i < size_and_type.first; ++i)
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
-                        {
-                            return false;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                for (std::size_t i = 0; i < size_and_type.first; ++i)
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
-                    {
-                        return false;
-                    }
-                }
-            }
-        }
-        else
-        {
-            if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
-            {
-                return false;
-            }
-
-            while (current != ']')
-            {
-                if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
-                {
-                    return false;
-                }
-                get_ignore_noop();
-            }
-        }
-
-        return sax->end_array();
-    }
-
-    /*!
-    @return whether object creation completed
-    */
-    bool get_ubjson_object()
-    {
-        std::pair<std::size_t, char_int_type> size_and_type;
-        if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
-        {
-            return false;
-        }
-
-        string_t key;
-        if (size_and_type.first != string_t::npos)
-        {
-            if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
-            {
-                return false;
-            }
-
-            if (size_and_type.second != 0)
-            {
-                for (std::size_t i = 0; i < size_and_type.first; ++i)
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
-                    {
-                        return false;
-                    }
-                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
-                    {
-                        return false;
-                    }
-                    key.clear();
-                }
-            }
-            else
-            {
-                for (std::size_t i = 0; i < size_and_type.first; ++i)
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
-                    {
-                        return false;
-                    }
-                    if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
-                    {
-                        return false;
-                    }
-                    key.clear();
-                }
-            }
-        }
-        else
-        {
-            if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
-            {
-                return false;
-            }
-
-            while (current != '}')
-            {
-                if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
-                {
-                    return false;
-                }
-                if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
-                {
-                    return false;
-                }
-                get_ignore_noop();
-                key.clear();
-            }
-        }
-
-        return sax->end_object();
-    }
-
-    // Note, no reader for UBJSON binary types is implemented because they do
-    // not exist
-
-    bool get_ubjson_high_precision_number()
-    {
-        // get size of following number string
-        std::size_t size{};
-        auto res = get_ubjson_size_value(size);
-        if (JSON_HEDLEY_UNLIKELY(!res))
-        {
-            return res;
-        }
-
-        // get number string
-        std::vector<char> number_vector;
-        for (std::size_t i = 0; i < size; ++i)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
-            {
-                return false;
-            }
-            number_vector.push_back(static_cast<char>(current));
-        }
-
-        // parse number string
-        using ia_type = decltype(detail::input_adapter(number_vector));
-        auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
-        const auto result_number = number_lexer.scan();
-        const auto number_string = number_lexer.get_token_string();
-        const auto result_remainder = number_lexer.scan();
-
-        using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
-
-        if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
-        {
-            return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
-        }
-
-        switch (result_number)
-        {
-            case token_type::value_integer:
-                return sax->number_integer(number_lexer.get_number_integer());
-            case token_type::value_unsigned:
-                return sax->number_unsigned(number_lexer.get_number_unsigned());
-            case token_type::value_float:
-                return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
-            default:
-                return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
-        }
-    }
-
-    ///////////////////////
-    // Utility functions //
-    ///////////////////////
-
-    /*!
-    @brief get next character from the input
-
-    This function provides the interface to the used input adapter. It does
-    not throw in case the input reached EOF, but returns a -'ve valued
-    `std::char_traits<char_type>::eof()` in that case.
-
-    @return character read from the input
-    */
-    char_int_type get()
-    {
-        ++chars_read;
-        return current = ia.get_character();
-    }
-
-    /*!
-    @return character read from the input after ignoring all 'N' entries
-    */
-    char_int_type get_ignore_noop()
-    {
-        do
-        {
-            get();
-        }
-        while (current == 'N');
-
-        return current;
-    }
-
-    /*
-    @brief read a number from the input
-
-    @tparam NumberType the type of the number
-    @param[in] format   the current format (for diagnostics)
-    @param[out] result  number of type @a NumberType
-
-    @return whether conversion completed
-
-    @note This function needs to respect the system's endianess, because
-          bytes in CBOR, MessagePack, and UBJSON are stored in network order
-          (big endian) and therefore need reordering on little endian systems.
-    */
-    template<typename NumberType, bool InputIsLittleEndian = false>
-    bool get_number(const input_format_t format, NumberType& result)
-    {
-        // step 1: read input into array with system's byte order
-        std::array<std::uint8_t, sizeof(NumberType)> vec{};
-        for (std::size_t i = 0; i < sizeof(NumberType); ++i)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
-            {
-                return false;
-            }
-
-            // reverse byte order prior to conversion if necessary
-            if (is_little_endian != InputIsLittleEndian)
-            {
-                vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
-            }
-            else
-            {
-                vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
-            }
-        }
-
-        // step 2: convert array into number of type T and return
-        std::memcpy(&result, vec.data(), sizeof(NumberType));
-        return true;
-    }
-
-    /*!
-    @brief create a string by reading characters from the input
-
-    @tparam NumberType the type of the number
-    @param[in] format the current format (for diagnostics)
-    @param[in] len number of characters to read
-    @param[out] result string created by reading @a len bytes
-
-    @return whether string creation completed
-
-    @note We can not reserve @a len bytes for the result, because @a len
-          may be too large. Usually, @ref unexpect_eof() detects the end of
-          the input before we run out of string memory.
-    */
-    template<typename NumberType>
-    bool get_string(const input_format_t format,
-                    const NumberType len,
-                    string_t& result)
-    {
-        bool success = true;
-        for (NumberType i = 0; i < len; i++)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
-            {
-                success = false;
-                break;
-            }
-            result.push_back(static_cast<typename string_t::value_type>(current));
-        }
-        return success;
-    }
-
-    /*!
-    @brief create a byte array by reading bytes from the input
-
-    @tparam NumberType the type of the number
-    @param[in] format the current format (for diagnostics)
-    @param[in] len number of bytes to read
-    @param[out] result byte array created by reading @a len bytes
-
-    @return whether byte array creation completed
-
-    @note We can not reserve @a len bytes for the result, because @a len
-          may be too large. Usually, @ref unexpect_eof() detects the end of
-          the input before we run out of memory.
-    */
-    template<typename NumberType>
-    bool get_binary(const input_format_t format,
-                    const NumberType len,
-                    binary_t& result)
-    {
-        bool success = true;
-        for (NumberType i = 0; i < len; i++)
-        {
-            get();
-            if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
-            {
-                success = false;
-                break;
-            }
-            result.push_back(static_cast<std::uint8_t>(current));
-        }
-        return success;
-    }
-
-    /*!
-    @param[in] format   the current format (for diagnostics)
-    @param[in] context  further context information (for diagnostics)
-    @return whether the last read character is not EOF
-    */
-    JSON_HEDLEY_NON_NULL(3)
-    bool unexpect_eof(const input_format_t format, const char* context) const
-    {
-        if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
-        {
-            return sax->parse_error(chars_read, "<end of file>",
-                                    parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), BasicJsonType()));
-        }
-        return true;
-    }
-
-    /*!
-    @return a string representation of the last read byte
-    */
-    std::string get_token_string() const
-    {
-        std::array<char, 3> cr{{}};
-        (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-        return std::string{cr.data()};
-    }
-
-    /*!
-    @param[in] format   the current format
-    @param[in] detail   a detailed error message
-    @param[in] context  further context information
-    @return a message string to use in the parse_error exceptions
-    */
-    std::string exception_message(const input_format_t format,
-                                  const std::string& detail,
-                                  const std::string& context) const
-    {
-        std::string error_msg = "syntax error while parsing ";
-
-        switch (format)
-        {
-            case input_format_t::cbor:
-                error_msg += "CBOR";
-                break;
-
-            case input_format_t::msgpack:
-                error_msg += "MessagePack";
-                break;
-
-            case input_format_t::ubjson:
-                error_msg += "UBJSON";
-                break;
-
-            case input_format_t::bson:
-                error_msg += "BSON";
-                break;
-
-            default:            // LCOV_EXCL_LINE
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        }
-
-        return error_msg + " " + context + ": " + detail;
-    }
-
-  private:
-    /// input adapter
-    InputAdapterType ia;
-
-    /// the current character
-    char_int_type current = std::char_traits<char_type>::eof();
-
-    /// the number of characters read
-    std::size_t chars_read = 0;
-
-    /// whether we can assume little endianess
-    const bool is_little_endian = little_endianess();
-
-    /// the SAX parser
-    json_sax_t* sax = nullptr;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/input/input_adapters.hpp>
-
-// #include <nlohmann/detail/input/lexer.hpp>
-
-// #include <nlohmann/detail/input/parser.hpp>
-
-
-#include <cmath> // isfinite
-#include <cstdint> // uint8_t
-#include <functional> // function
-#include <string> // string
-#include <utility> // move
-#include <vector> // vector
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/input/input_adapters.hpp>
-
-// #include <nlohmann/detail/input/json_sax.hpp>
-
-// #include <nlohmann/detail/input/lexer.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/is_sax.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-////////////
-// parser //
-////////////
-
-enum class parse_event_t : uint8_t
-{
-    /// the parser read `{` and started to process a JSON object
-    object_start,
-    /// the parser read `}` and finished processing a JSON object
-    object_end,
-    /// the parser read `[` and started to process a JSON array
-    array_start,
-    /// the parser read `]` and finished processing a JSON array
-    array_end,
-    /// the parser read a key of a value in an object
-    key,
-    /// the parser finished reading a JSON value
-    value
-};
-
-template<typename BasicJsonType>
-using parser_callback_t =
-    std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
-
-/*!
-@brief syntax analysis
-
-This class implements a recursive descent parser.
-*/
-template<typename BasicJsonType, typename InputAdapterType>
-class parser
-{
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using string_t = typename BasicJsonType::string_t;
-    using lexer_t = lexer<BasicJsonType, InputAdapterType>;
-    using token_type = typename lexer_t::token_type;
-
-  public:
-    /// a parser reading from an input adapter
-    explicit parser(InputAdapterType&& adapter,
-                    const parser_callback_t<BasicJsonType> cb = nullptr,
-                    const bool allow_exceptions_ = true,
-                    const bool skip_comments = false)
-        : callback(cb)
-        , m_lexer(std::move(adapter), skip_comments)
-        , allow_exceptions(allow_exceptions_)
-    {
-        // read first token
-        get_token();
-    }
-
-    /*!
-    @brief public parser interface
-
-    @param[in] strict      whether to expect the last token to be EOF
-    @param[in,out] result  parsed JSON value
-
-    @throw parse_error.101 in case of an unexpected token
-    @throw parse_error.102 if to_unicode fails or surrogate error
-    @throw parse_error.103 if to_unicode fails
-    */
-    void parse(const bool strict, BasicJsonType& result)
-    {
-        if (callback)
-        {
-            json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
-            sax_parse_internal(&sdp);
-
-            // in strict mode, input must be completely read
-            if (strict && (get_token() != token_type::end_of_input))
-            {
-                sdp.parse_error(m_lexer.get_position(),
-                                m_lexer.get_token_string(),
-                                parse_error::create(101, m_lexer.get_position(),
-                                                    exception_message(token_type::end_of_input, "value"), BasicJsonType()));
-            }
-
-            // in case of an error, return discarded value
-            if (sdp.is_errored())
-            {
-                result = value_t::discarded;
-                return;
-            }
-
-            // set top-level value to null if it was discarded by the callback
-            // function
-            if (result.is_discarded())
-            {
-                result = nullptr;
-            }
-        }
-        else
-        {
-            json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
-            sax_parse_internal(&sdp);
-
-            // in strict mode, input must be completely read
-            if (strict && (get_token() != token_type::end_of_input))
-            {
-                sdp.parse_error(m_lexer.get_position(),
-                                m_lexer.get_token_string(),
-                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
-            }
-
-            // in case of an error, return discarded value
-            if (sdp.is_errored())
-            {
-                result = value_t::discarded;
-                return;
-            }
-        }
-
-        result.assert_invariant();
-    }
-
-    /*!
-    @brief public accept interface
-
-    @param[in] strict  whether to expect the last token to be EOF
-    @return whether the input is a proper JSON text
-    */
-    bool accept(const bool strict = true)
-    {
-        json_sax_acceptor<BasicJsonType> sax_acceptor;
-        return sax_parse(&sax_acceptor, strict);
-    }
-
-    template<typename SAX>
-    JSON_HEDLEY_NON_NULL(2)
-    bool sax_parse(SAX* sax, const bool strict = true)
-    {
-        (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
-        const bool result = sax_parse_internal(sax);
-
-        // strict mode: next byte must be EOF
-        if (result && strict && (get_token() != token_type::end_of_input))
-        {
-            return sax->parse_error(m_lexer.get_position(),
-                                    m_lexer.get_token_string(),
-                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), BasicJsonType()));
-        }
-
-        return result;
-    }
-
-  private:
-    template<typename SAX>
-    JSON_HEDLEY_NON_NULL(2)
-    bool sax_parse_internal(SAX* sax)
-    {
-        // stack to remember the hierarchy of structured values we are parsing
-        // true = array; false = object
-        std::vector<bool> states;
-        // value to avoid a goto (see comment where set to true)
-        bool skip_to_state_evaluation = false;
-
-        while (true)
-        {
-            if (!skip_to_state_evaluation)
-            {
-                // invariant: get_token() was called before each iteration
-                switch (last_token)
-                {
-                    case token_type::begin_object:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
-                        {
-                            return false;
-                        }
-
-                        // closing } -> we are done
-                        if (get_token() == token_type::end_object)
-                        {
-                            if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
-                            {
-                                return false;
-                            }
-                            break;
-                        }
-
-                        // parse key
-                        if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
-                        {
-                            return sax->parse_error(m_lexer.get_position(),
-                                                    m_lexer.get_token_string(),
-                                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
-                        }
-                        if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
-                        {
-                            return false;
-                        }
-
-                        // parse separator (:)
-                        if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
-                        {
-                            return sax->parse_error(m_lexer.get_position(),
-                                                    m_lexer.get_token_string(),
-                                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
-                        }
-
-                        // remember we are now inside an object
-                        states.push_back(false);
-
-                        // parse values
-                        get_token();
-                        continue;
-                    }
-
-                    case token_type::begin_array:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
-                        {
-                            return false;
-                        }
-
-                        // closing ] -> we are done
-                        if (get_token() == token_type::end_array)
-                        {
-                            if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
-                            {
-                                return false;
-                            }
-                            break;
-                        }
-
-                        // remember we are now inside an array
-                        states.push_back(true);
-
-                        // parse values (no need to call get_token)
-                        continue;
-                    }
-
-                    case token_type::value_float:
-                    {
-                        const auto res = m_lexer.get_number_float();
-
-                        if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
-                        {
-                            return sax->parse_error(m_lexer.get_position(),
-                                                    m_lexer.get_token_string(),
-                                                    out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'", BasicJsonType()));
-                        }
-
-                        if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
-                        {
-                            return false;
-                        }
-
-                        break;
-                    }
-
-                    case token_type::literal_false:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::literal_null:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->null()))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::literal_true:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::value_integer:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::value_string:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::value_unsigned:
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
-                        {
-                            return false;
-                        }
-                        break;
-                    }
-
-                    case token_type::parse_error:
-                    {
-                        // using "uninitialized" to avoid "expected" message
-                        return sax->parse_error(m_lexer.get_position(),
-                                                m_lexer.get_token_string(),
-                                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), BasicJsonType()));
-                    }
-
-                    default: // the last token was unexpected
-                    {
-                        return sax->parse_error(m_lexer.get_position(),
-                                                m_lexer.get_token_string(),
-                                                parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), BasicJsonType()));
-                    }
-                }
-            }
-            else
-            {
-                skip_to_state_evaluation = false;
-            }
-
-            // we reached this line after we successfully parsed a value
-            if (states.empty())
-            {
-                // empty stack: we reached the end of the hierarchy: done
-                return true;
-            }
-
-            if (states.back())  // array
-            {
-                // comma -> next value
-                if (get_token() == token_type::value_separator)
-                {
-                    // parse a new value
-                    get_token();
-                    continue;
-                }
-
-                // closing ]
-                if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
-                {
-                    if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
-                    {
-                        return false;
-                    }
-
-                    // We are done with this array. Before we can parse a
-                    // new value, we need to evaluate the new state first.
-                    // By setting skip_to_state_evaluation to false, we
-                    // are effectively jumping to the beginning of this if.
-                    JSON_ASSERT(!states.empty());
-                    states.pop_back();
-                    skip_to_state_evaluation = true;
-                    continue;
-                }
-
-                return sax->parse_error(m_lexer.get_position(),
-                                        m_lexer.get_token_string(),
-                                        parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), BasicJsonType()));
-            }
-
-            // states.back() is false -> object
-
-            // comma -> next value
-            if (get_token() == token_type::value_separator)
-            {
-                // parse key
-                if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
-                {
-                    return sax->parse_error(m_lexer.get_position(),
-                                            m_lexer.get_token_string(),
-                                            parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), BasicJsonType()));
-                }
-
-                if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
-                {
-                    return false;
-                }
-
-                // parse separator (:)
-                if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
-                {
-                    return sax->parse_error(m_lexer.get_position(),
-                                            m_lexer.get_token_string(),
-                                            parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), BasicJsonType()));
-                }
-
-                // parse values
-                get_token();
-                continue;
-            }
-
-            // closing }
-            if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
-            {
-                if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
-                {
-                    return false;
-                }
-
-                // We are done with this object. Before we can parse a
-                // new value, we need to evaluate the new state first.
-                // By setting skip_to_state_evaluation to false, we
-                // are effectively jumping to the beginning of this if.
-                JSON_ASSERT(!states.empty());
-                states.pop_back();
-                skip_to_state_evaluation = true;
-                continue;
-            }
-
-            return sax->parse_error(m_lexer.get_position(),
-                                    m_lexer.get_token_string(),
-                                    parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), BasicJsonType()));
-        }
-    }
-
-    /// get next token from lexer
-    token_type get_token()
-    {
-        return last_token = m_lexer.scan();
-    }
-
-    std::string exception_message(const token_type expected, const std::string& context)
-    {
-        std::string error_msg = "syntax error ";
-
-        if (!context.empty())
-        {
-            error_msg += "while parsing " + context + " ";
-        }
-
-        error_msg += "- ";
-
-        if (last_token == token_type::parse_error)
-        {
-            error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
-                         m_lexer.get_token_string() + "'";
-        }
-        else
-        {
-            error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
-        }
-
-        if (expected != token_type::uninitialized)
-        {
-            error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
-        }
-
-        return error_msg;
-    }
-
-  private:
-    /// callback function
-    const parser_callback_t<BasicJsonType> callback = nullptr;
-    /// the type of the last read token
-    token_type last_token = token_type::uninitialized;
-    /// the lexer
-    lexer_t m_lexer;
-    /// whether to throw exceptions in case of errors
-    const bool allow_exceptions = true;
-};
-
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/iterators/internal_iterator.hpp>
-
-
-// #include <nlohmann/detail/iterators/primitive_iterator.hpp>
-
-
-#include <cstddef> // ptrdiff_t
-#include <limits>  // numeric_limits
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-/*
-@brief an iterator for primitive JSON types
-
-This class models an iterator for primitive JSON types (boolean, number,
-string). It's only purpose is to allow the iterator/const_iterator classes
-to "iterate" over primitive values. Internally, the iterator is modeled by
-a `difference_type` variable. Value begin_value (`0`) models the begin,
-end_value (`1`) models past the end.
-*/
-class primitive_iterator_t
-{
-  private:
-    using difference_type = std::ptrdiff_t;
-    static constexpr difference_type begin_value = 0;
-    static constexpr difference_type end_value = begin_value + 1;
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    /// iterator as signed integer type
-    difference_type m_it = (std::numeric_limits<std::ptrdiff_t>::min)();
-
-  public:
-    constexpr difference_type get_value() const noexcept
-    {
-        return m_it;
-    }
-
-    /// set iterator to a defined beginning
-    void set_begin() noexcept
-    {
-        m_it = begin_value;
-    }
-
-    /// set iterator to a defined past the end
-    void set_end() noexcept
-    {
-        m_it = end_value;
-    }
-
-    /// return whether the iterator can be dereferenced
-    constexpr bool is_begin() const noexcept
-    {
-        return m_it == begin_value;
-    }
-
-    /// return whether the iterator is at end
-    constexpr bool is_end() const noexcept
-    {
-        return m_it == end_value;
-    }
-
-    friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
-    {
-        return lhs.m_it == rhs.m_it;
-    }
-
-    friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
-    {
-        return lhs.m_it < rhs.m_it;
-    }
-
-    primitive_iterator_t operator+(difference_type n) noexcept
-    {
-        auto result = *this;
-        result += n;
-        return result;
-    }
-
-    friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept
-    {
-        return lhs.m_it - rhs.m_it;
-    }
-
-    primitive_iterator_t& operator++() noexcept
-    {
-        ++m_it;
-        return *this;
-    }
-
-    primitive_iterator_t const operator++(int) noexcept // NOLINT(readability-const-return-type)
-    {
-        auto result = *this;
-        ++m_it;
-        return result;
-    }
-
-    primitive_iterator_t& operator--() noexcept
-    {
-        --m_it;
-        return *this;
-    }
-
-    primitive_iterator_t const operator--(int) noexcept // NOLINT(readability-const-return-type)
-    {
-        auto result = *this;
-        --m_it;
-        return result;
-    }
-
-    primitive_iterator_t& operator+=(difference_type n) noexcept
-    {
-        m_it += n;
-        return *this;
-    }
-
-    primitive_iterator_t& operator-=(difference_type n) noexcept
-    {
-        m_it -= n;
-        return *this;
-    }
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-
-namespace nlohmann
-{
-namespace detail
-{
-/*!
-@brief an iterator value
-
-@note This structure could easily be a union, but MSVC currently does not allow
-unions members with complex constructors, see https://github.com/nlohmann/json/pull/105.
-*/
-template<typename BasicJsonType> struct internal_iterator
-{
-    /// iterator for JSON objects
-    typename BasicJsonType::object_t::iterator object_iterator {};
-    /// iterator for JSON arrays
-    typename BasicJsonType::array_t::iterator array_iterator {};
-    /// generic iterator for all other types
-    primitive_iterator_t primitive_iterator {};
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/iterators/iter_impl.hpp>
-
-
-#include <iterator> // iterator, random_access_iterator_tag, bidirectional_iterator_tag, advance, next
-#include <type_traits> // conditional, is_const, remove_const
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/iterators/internal_iterator.hpp>
-
-// #include <nlohmann/detail/iterators/primitive_iterator.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-// forward declare, to be able to friend it later on
-template<typename IteratorType> class iteration_proxy;
-template<typename IteratorType> class iteration_proxy_value;
-
-/*!
-@brief a template for a bidirectional iterator for the @ref basic_json class
-This class implements a both iterators (iterator and const_iterator) for the
-@ref basic_json class.
-@note An iterator is called *initialized* when a pointer to a JSON value has
-      been set (e.g., by a constructor or a copy assignment). If the iterator is
-      default-constructed, it is *uninitialized* and most methods are undefined.
-      **The library uses assertions to detect calls on uninitialized iterators.**
-@requirement The class satisfies the following concept requirements:
--
-[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator):
-  The iterator that can be moved can be moved in both directions (i.e.
-  incremented and decremented).
-@since version 1.0.0, simplified in version 2.0.9, change to bidirectional
-       iterators in version 3.0.0 (see https://github.com/nlohmann/json/issues/593)
-*/
-template<typename BasicJsonType>
-class iter_impl
-{
-    /// the iterator with BasicJsonType of different const-ness
-    using other_iter_impl = iter_impl<typename std::conditional<std::is_const<BasicJsonType>::value, typename std::remove_const<BasicJsonType>::type, const BasicJsonType>::type>;
-    /// allow basic_json to access private members
-    friend other_iter_impl;
-    friend BasicJsonType;
-    friend iteration_proxy<iter_impl>;
-    friend iteration_proxy_value<iter_impl>;
-
-    using object_t = typename BasicJsonType::object_t;
-    using array_t = typename BasicJsonType::array_t;
-    // make sure BasicJsonType is basic_json or const basic_json
-    static_assert(is_basic_json<typename std::remove_const<BasicJsonType>::type>::value,
-                  "iter_impl only accepts (const) basic_json");
-
-  public:
-
-    /// The std::iterator class template (used as a base class to provide typedefs) is deprecated in C++17.
-    /// The C++ Standard has never required user-defined iterators to derive from std::iterator.
-    /// A user-defined iterator should provide publicly accessible typedefs named
-    /// iterator_category, value_type, difference_type, pointer, and reference.
-    /// Note that value_type is required to be non-const, even for constant iterators.
-    using iterator_category = std::bidirectional_iterator_tag;
-
-    /// the type of the values when the iterator is dereferenced
-    using value_type = typename BasicJsonType::value_type;
-    /// a type to represent differences between iterators
-    using difference_type = typename BasicJsonType::difference_type;
-    /// defines a pointer to the type iterated over (value_type)
-    using pointer = typename std::conditional<std::is_const<BasicJsonType>::value,
-          typename BasicJsonType::const_pointer,
-          typename BasicJsonType::pointer>::type;
-    /// defines a reference to the type iterated over (value_type)
-    using reference =
-        typename std::conditional<std::is_const<BasicJsonType>::value,
-        typename BasicJsonType::const_reference,
-        typename BasicJsonType::reference>::type;
-
-    iter_impl() = default;
-    ~iter_impl() = default;
-    iter_impl(iter_impl&&) noexcept = default;
-    iter_impl& operator=(iter_impl&&) noexcept = default;
-
-    /*!
-    @brief constructor for a given JSON instance
-    @param[in] object  pointer to a JSON object for this iterator
-    @pre object != nullptr
-    @post The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    explicit iter_impl(pointer object) noexcept : m_object(object)
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                m_it.object_iterator = typename object_t::iterator();
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_it.array_iterator = typename array_t::iterator();
-                break;
-            }
-
-            default:
-            {
-                m_it.primitive_iterator = primitive_iterator_t();
-                break;
-            }
-        }
-    }
-
-    /*!
-    @note The conventional copy constructor and copy assignment are implicitly
-          defined. Combined with the following converting constructor and
-          assignment, they support: (1) copy from iterator to iterator, (2)
-          copy from const iterator to const iterator, and (3) conversion from
-          iterator to const iterator. However conversion from const iterator
-          to iterator is not defined.
-    */
-
-    /*!
-    @brief const copy constructor
-    @param[in] other const iterator to copy from
-    @note This copy constructor had to be defined explicitly to circumvent a bug
-          occurring on msvc v19.0 compiler (VS 2015) debug build. For more
-          information refer to: https://github.com/nlohmann/json/issues/1608
-    */
-    iter_impl(const iter_impl<const BasicJsonType>& other) noexcept
-        : m_object(other.m_object), m_it(other.m_it)
-    {}
-
-    /*!
-    @brief converting assignment
-    @param[in] other const iterator to copy from
-    @return const/non-const iterator
-    @note It is not checked whether @a other is initialized.
-    */
-    iter_impl& operator=(const iter_impl<const BasicJsonType>& other) noexcept
-    {
-        if (&other != this)
-        {
-            m_object = other.m_object;
-            m_it = other.m_it;
-        }
-        return *this;
-    }
-
-    /*!
-    @brief converting constructor
-    @param[in] other  non-const iterator to copy from
-    @note It is not checked whether @a other is initialized.
-    */
-    iter_impl(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept
-        : m_object(other.m_object), m_it(other.m_it)
-    {}
-
-    /*!
-    @brief converting assignment
-    @param[in] other  non-const iterator to copy from
-    @return const/non-const iterator
-    @note It is not checked whether @a other is initialized.
-    */
-    iter_impl& operator=(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept // NOLINT(cert-oop54-cpp)
-    {
-        m_object = other.m_object;
-        m_it = other.m_it;
-        return *this;
-    }
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    /*!
-    @brief set the iterator to the first value
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    void set_begin() noexcept
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                m_it.object_iterator = m_object->m_value.object->begin();
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_it.array_iterator = m_object->m_value.array->begin();
-                break;
-            }
-
-            case value_t::null:
-            {
-                // set to end so begin()==end() is true: null is empty
-                m_it.primitive_iterator.set_end();
-                break;
-            }
-
-            default:
-            {
-                m_it.primitive_iterator.set_begin();
-                break;
-            }
-        }
-    }
-
-    /*!
-    @brief set the iterator past the last value
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    void set_end() noexcept
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                m_it.object_iterator = m_object->m_value.object->end();
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_it.array_iterator = m_object->m_value.array->end();
-                break;
-            }
-
-            default:
-            {
-                m_it.primitive_iterator.set_end();
-                break;
-            }
-        }
-    }
-
-  public:
-    /*!
-    @brief return a reference to the value pointed to by the iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    reference operator*() const
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                JSON_ASSERT(m_it.object_iterator != m_object->m_value.object->end());
-                return m_it.object_iterator->second;
-            }
-
-            case value_t::array:
-            {
-                JSON_ASSERT(m_it.array_iterator != m_object->m_value.array->end());
-                return *m_it.array_iterator;
-            }
-
-            case value_t::null:
-                JSON_THROW(invalid_iterator::create(214, "cannot get value", *m_object));
-
-            default:
-            {
-                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.is_begin()))
-                {
-                    return *m_object;
-                }
-
-                JSON_THROW(invalid_iterator::create(214, "cannot get value", *m_object));
-            }
-        }
-    }
-
-    /*!
-    @brief dereference the iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    pointer operator->() const
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                JSON_ASSERT(m_it.object_iterator != m_object->m_value.object->end());
-                return &(m_it.object_iterator->second);
-            }
-
-            case value_t::array:
-            {
-                JSON_ASSERT(m_it.array_iterator != m_object->m_value.array->end());
-                return &*m_it.array_iterator;
-            }
-
-            default:
-            {
-                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.is_begin()))
-                {
-                    return m_object;
-                }
-
-                JSON_THROW(invalid_iterator::create(214, "cannot get value", *m_object));
-            }
-        }
-    }
-
-    /*!
-    @brief post-increment (it++)
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl const operator++(int) // NOLINT(readability-const-return-type)
-    {
-        auto result = *this;
-        ++(*this);
-        return result;
-    }
-
-    /*!
-    @brief pre-increment (++it)
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl& operator++()
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                std::advance(m_it.object_iterator, 1);
-                break;
-            }
-
-            case value_t::array:
-            {
-                std::advance(m_it.array_iterator, 1);
-                break;
-            }
-
-            default:
-            {
-                ++m_it.primitive_iterator;
-                break;
-            }
-        }
-
-        return *this;
-    }
-
-    /*!
-    @brief post-decrement (it--)
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl const operator--(int) // NOLINT(readability-const-return-type)
-    {
-        auto result = *this;
-        --(*this);
-        return result;
-    }
-
-    /*!
-    @brief pre-decrement (--it)
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl& operator--()
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-            {
-                std::advance(m_it.object_iterator, -1);
-                break;
-            }
-
-            case value_t::array:
-            {
-                std::advance(m_it.array_iterator, -1);
-                break;
-            }
-
-            default:
-            {
-                --m_it.primitive_iterator;
-                break;
-            }
-        }
-
-        return *this;
-    }
-
-    /*!
-    @brief comparison: equal
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    template < typename IterImpl, detail::enable_if_t < (std::is_same<IterImpl, iter_impl>::value || std::is_same<IterImpl, other_iter_impl>::value), std::nullptr_t > = nullptr >
-    bool operator==(const IterImpl& other) const
-    {
-        // if objects are not the same, the comparison is undefined
-        if (JSON_HEDLEY_UNLIKELY(m_object != other.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", *m_object));
-        }
-
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-                return (m_it.object_iterator == other.m_it.object_iterator);
-
-            case value_t::array:
-                return (m_it.array_iterator == other.m_it.array_iterator);
-
-            default:
-                return (m_it.primitive_iterator == other.m_it.primitive_iterator);
-        }
-    }
-
-    /*!
-    @brief comparison: not equal
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    template < typename IterImpl, detail::enable_if_t < (std::is_same<IterImpl, iter_impl>::value || std::is_same<IterImpl, other_iter_impl>::value), std::nullptr_t > = nullptr >
-    bool operator!=(const IterImpl& other) const
-    {
-        return !operator==(other);
-    }
-
-    /*!
-    @brief comparison: smaller
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    bool operator<(const iter_impl& other) const
-    {
-        // if objects are not the same, the comparison is undefined
-        if (JSON_HEDLEY_UNLIKELY(m_object != other.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", *m_object));
-        }
-
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-                JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators", *m_object));
-
-            case value_t::array:
-                return (m_it.array_iterator < other.m_it.array_iterator);
-
-            default:
-                return (m_it.primitive_iterator < other.m_it.primitive_iterator);
-        }
-    }
-
-    /*!
-    @brief comparison: less than or equal
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    bool operator<=(const iter_impl& other) const
-    {
-        return !other.operator < (*this);
-    }
-
-    /*!
-    @brief comparison: greater than
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    bool operator>(const iter_impl& other) const
-    {
-        return !operator<=(other);
-    }
-
-    /*!
-    @brief comparison: greater than or equal
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    bool operator>=(const iter_impl& other) const
-    {
-        return !operator<(other);
-    }
-
-    /*!
-    @brief add to iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl& operator+=(difference_type i)
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-                JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators", *m_object));
-
-            case value_t::array:
-            {
-                std::advance(m_it.array_iterator, i);
-                break;
-            }
-
-            default:
-            {
-                m_it.primitive_iterator += i;
-                break;
-            }
-        }
-
-        return *this;
-    }
-
-    /*!
-    @brief subtract from iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl& operator-=(difference_type i)
-    {
-        return operator+=(-i);
-    }
-
-    /*!
-    @brief add to iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl operator+(difference_type i) const
-    {
-        auto result = *this;
-        result += i;
-        return result;
-    }
-
-    /*!
-    @brief addition of distance and iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    friend iter_impl operator+(difference_type i, const iter_impl& it)
-    {
-        auto result = it;
-        result += i;
-        return result;
-    }
-
-    /*!
-    @brief subtract from iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    iter_impl operator-(difference_type i) const
-    {
-        auto result = *this;
-        result -= i;
-        return result;
-    }
-
-    /*!
-    @brief return difference
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    difference_type operator-(const iter_impl& other) const
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-                JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators", *m_object));
-
-            case value_t::array:
-                return m_it.array_iterator - other.m_it.array_iterator;
-
-            default:
-                return m_it.primitive_iterator - other.m_it.primitive_iterator;
-        }
-    }
-
-    /*!
-    @brief access to successor
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    reference operator[](difference_type n) const
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        switch (m_object->m_type)
-        {
-            case value_t::object:
-                JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators", *m_object));
-
-            case value_t::array:
-                return *std::next(m_it.array_iterator, n);
-
-            case value_t::null:
-                JSON_THROW(invalid_iterator::create(214, "cannot get value", *m_object));
-
-            default:
-            {
-                if (JSON_HEDLEY_LIKELY(m_it.primitive_iterator.get_value() == -n))
-                {
-                    return *m_object;
-                }
-
-                JSON_THROW(invalid_iterator::create(214, "cannot get value", *m_object));
-            }
-        }
-    }
-
-    /*!
-    @brief return the key of an object iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    const typename object_t::key_type& key() const
-    {
-        JSON_ASSERT(m_object != nullptr);
-
-        if (JSON_HEDLEY_LIKELY(m_object->is_object()))
-        {
-            return m_it.object_iterator->first;
-        }
-
-        JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators", *m_object));
-    }
-
-    /*!
-    @brief return the value of an iterator
-    @pre The iterator is initialized; i.e. `m_object != nullptr`.
-    */
-    reference value() const
-    {
-        return operator*();
-    }
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    /// associated JSON instance
-    pointer m_object = nullptr;
-    /// the actual iterator of the associated instance
-    internal_iterator<typename std::remove_const<BasicJsonType>::type> m_it {};
-};
-} // namespace detail
-} // namespace nlohmann
-
-// #include <nlohmann/detail/iterators/iteration_proxy.hpp>
-
-// #include <nlohmann/detail/iterators/json_reverse_iterator.hpp>
-
-
-#include <cstddef> // ptrdiff_t
-#include <iterator> // reverse_iterator
-#include <utility> // declval
-
-namespace nlohmann
-{
-namespace detail
-{
-//////////////////////
-// reverse_iterator //
-//////////////////////
-
-/*!
-@brief a template for a reverse iterator class
-
-@tparam Base the base iterator type to reverse. Valid types are @ref
-iterator (to create @ref reverse_iterator) and @ref const_iterator (to
-create @ref const_reverse_iterator).
-
-@requirement The class satisfies the following concept requirements:
--
-[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator):
-  The iterator that can be moved can be moved in both directions (i.e.
-  incremented and decremented).
-- [OutputIterator](https://en.cppreference.com/w/cpp/named_req/OutputIterator):
-  It is possible to write to the pointed-to element (only if @a Base is
-  @ref iterator).
-
-@since version 1.0.0
-*/
-template<typename Base>
-class json_reverse_iterator : public std::reverse_iterator<Base>
-{
-  public:
-    using difference_type = std::ptrdiff_t;
-    /// shortcut to the reverse iterator adapter
-    using base_iterator = std::reverse_iterator<Base>;
-    /// the reference type for the pointed-to element
-    using reference = typename Base::reference;
-
-    /// create reverse iterator from iterator
-    explicit json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept
-        : base_iterator(it) {}
-
-    /// create reverse iterator from base class
-    explicit json_reverse_iterator(const base_iterator& it) noexcept : base_iterator(it) {}
-
-    /// post-increment (it++)
-    json_reverse_iterator const operator++(int) // NOLINT(readability-const-return-type)
-    {
-        return static_cast<json_reverse_iterator>(base_iterator::operator++(1));
-    }
-
-    /// pre-increment (++it)
-    json_reverse_iterator& operator++()
-    {
-        return static_cast<json_reverse_iterator&>(base_iterator::operator++());
-    }
-
-    /// post-decrement (it--)
-    json_reverse_iterator const operator--(int) // NOLINT(readability-const-return-type)
-    {
-        return static_cast<json_reverse_iterator>(base_iterator::operator--(1));
-    }
-
-    /// pre-decrement (--it)
-    json_reverse_iterator& operator--()
-    {
-        return static_cast<json_reverse_iterator&>(base_iterator::operator--());
-    }
-
-    /// add to iterator
-    json_reverse_iterator& operator+=(difference_type i)
-    {
-        return static_cast<json_reverse_iterator&>(base_iterator::operator+=(i));
-    }
-
-    /// add to iterator
-    json_reverse_iterator operator+(difference_type i) const
-    {
-        return static_cast<json_reverse_iterator>(base_iterator::operator+(i));
-    }
-
-    /// subtract from iterator
-    json_reverse_iterator operator-(difference_type i) const
-    {
-        return static_cast<json_reverse_iterator>(base_iterator::operator-(i));
-    }
-
-    /// return difference
-    difference_type operator-(const json_reverse_iterator& other) const
-    {
-        return base_iterator(*this) - base_iterator(other);
-    }
-
-    /// access to successor
-    reference operator[](difference_type n) const
-    {
-        return *(this->operator+(n));
-    }
-
-    /// return the key of an object iterator
-    auto key() const -> decltype(std::declval<Base>().key())
-    {
-        auto it = --this->base();
-        return it.key();
-    }
-
-    /// return the value of an iterator
-    reference value() const
-    {
-        auto it = --this->base();
-        return it.operator * ();
-    }
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/iterators/primitive_iterator.hpp>
-
-// #include <nlohmann/detail/json_pointer.hpp>
-
-
-#include <algorithm> // all_of
-#include <cctype> // isdigit
-#include <limits> // max
-#include <numeric> // accumulate
-#include <string> // string
-#include <utility> // move
-#include <vector> // vector
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/string_escape.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-template<typename BasicJsonType>
-class json_pointer
-{
-    // allow basic_json to access private members
-    NLOHMANN_BASIC_JSON_TPL_DECLARATION
-    friend class basic_json;
-
-  public:
-    /*!
-    @brief create JSON pointer
-
-    Create a JSON pointer according to the syntax described in
-    [Section 3 of RFC6901](https://tools.ietf.org/html/rfc6901#section-3).
-
-    @param[in] s  string representing the JSON pointer; if omitted, the empty
-                  string is assumed which references the whole JSON value
-
-    @throw parse_error.107 if the given JSON pointer @a s is nonempty and does
-                           not begin with a slash (`/`); see example below
-
-    @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s is
-    not followed by `0` (representing `~`) or `1` (representing `/`); see
-    example below
-
-    @liveexample{The example shows the construction several valid JSON pointers
-    as well as the exceptional behavior.,json_pointer}
-
-    @since version 2.0.0
-    */
-    explicit json_pointer(const std::string& s = "")
-        : reference_tokens(split(s))
-    {}
-
-    /*!
-    @brief return a string representation of the JSON pointer
-
-    @invariant For each JSON pointer `ptr`, it holds:
-    @code {.cpp}
-    ptr == json_pointer(ptr.to_string());
-    @endcode
-
-    @return a string representation of the JSON pointer
-
-    @liveexample{The example shows the result of `to_string`.,json_pointer__to_string}
-
-    @since version 2.0.0
-    */
-    std::string to_string() const
-    {
-        return std::accumulate(reference_tokens.begin(), reference_tokens.end(),
-                               std::string{},
-                               [](const std::string & a, const std::string & b)
-        {
-            return a + "/" + detail::escape(b);
-        });
-    }
-
-    /// @copydoc to_string()
-    operator std::string() const
-    {
-        return to_string();
-    }
-
-    /*!
-    @brief append another JSON pointer at the end of this JSON pointer
-
-    @param[in] ptr  JSON pointer to append
-    @return JSON pointer with @a ptr appended
-
-    @liveexample{The example shows the usage of `operator/=`.,json_pointer__operator_add}
-
-    @complexity Linear in the length of @a ptr.
-
-    @sa see @ref operator/=(std::string) to append a reference token
-    @sa see @ref operator/=(std::size_t) to append an array index
-    @sa see @ref operator/(const json_pointer&, const json_pointer&) for a binary operator
-
-    @since version 3.6.0
-    */
-    json_pointer& operator/=(const json_pointer& ptr)
-    {
-        reference_tokens.insert(reference_tokens.end(),
-                                ptr.reference_tokens.begin(),
-                                ptr.reference_tokens.end());
-        return *this;
-    }
-
-    /*!
-    @brief append an unescaped reference token at the end of this JSON pointer
-
-    @param[in] token  reference token to append
-    @return JSON pointer with @a token appended without escaping @a token
-
-    @liveexample{The example shows the usage of `operator/=`.,json_pointer__operator_add}
-
-    @complexity Amortized constant.
-
-    @sa see @ref operator/=(const json_pointer&) to append a JSON pointer
-    @sa see @ref operator/=(std::size_t) to append an array index
-    @sa see @ref operator/(const json_pointer&, std::size_t) for a binary operator
-
-    @since version 3.6.0
-    */
-    json_pointer& operator/=(std::string token)
-    {
-        push_back(std::move(token));
-        return *this;
-    }
-
-    /*!
-    @brief append an array index at the end of this JSON pointer
-
-    @param[in] array_idx  array index to append
-    @return JSON pointer with @a array_idx appended
-
-    @liveexample{The example shows the usage of `operator/=`.,json_pointer__operator_add}
-
-    @complexity Amortized constant.
-
-    @sa see @ref operator/=(const json_pointer&) to append a JSON pointer
-    @sa see @ref operator/=(std::string) to append a reference token
-    @sa see @ref operator/(const json_pointer&, std::string) for a binary operator
-
-    @since version 3.6.0
-    */
-    json_pointer& operator/=(std::size_t array_idx)
-    {
-        return *this /= std::to_string(array_idx);
-    }
-
-    /*!
-    @brief create a new JSON pointer by appending the right JSON pointer at the end of the left JSON pointer
-
-    @param[in] lhs  JSON pointer
-    @param[in] rhs  JSON pointer
-    @return a new JSON pointer with @a rhs appended to @a lhs
-
-    @liveexample{The example shows the usage of `operator/`.,json_pointer__operator_add_binary}
-
-    @complexity Linear in the length of @a lhs and @a rhs.
-
-    @sa see @ref operator/=(const json_pointer&) to append a JSON pointer
-
-    @since version 3.6.0
-    */
-    friend json_pointer operator/(const json_pointer& lhs,
-                                  const json_pointer& rhs)
-    {
-        return json_pointer(lhs) /= rhs;
-    }
-
-    /*!
-    @brief create a new JSON pointer by appending the unescaped token at the end of the JSON pointer
-
-    @param[in] ptr  JSON pointer
-    @param[in] token  reference token
-    @return a new JSON pointer with unescaped @a token appended to @a ptr
-
-    @liveexample{The example shows the usage of `operator/`.,json_pointer__operator_add_binary}
-
-    @complexity Linear in the length of @a ptr.
-
-    @sa see @ref operator/=(std::string) to append a reference token
-
-    @since version 3.6.0
-    */
-    friend json_pointer operator/(const json_pointer& ptr, std::string token) // NOLINT(performance-unnecessary-value-param)
-    {
-        return json_pointer(ptr) /= std::move(token);
-    }
-
-    /*!
-    @brief create a new JSON pointer by appending the array-index-token at the end of the JSON pointer
-
-    @param[in] ptr  JSON pointer
-    @param[in] array_idx  array index
-    @return a new JSON pointer with @a array_idx appended to @a ptr
-
-    @liveexample{The example shows the usage of `operator/`.,json_pointer__operator_add_binary}
-
-    @complexity Linear in the length of @a ptr.
-
-    @sa see @ref operator/=(std::size_t) to append an array index
-
-    @since version 3.6.0
-    */
-    friend json_pointer operator/(const json_pointer& ptr, std::size_t array_idx)
-    {
-        return json_pointer(ptr) /= array_idx;
-    }
-
-    /*!
-    @brief returns the parent of this JSON pointer
-
-    @return parent of this JSON pointer; in case this JSON pointer is the root,
-            the root itself is returned
-
-    @complexity Linear in the length of the JSON pointer.
-
-    @liveexample{The example shows the result of `parent_pointer` for different
-    JSON Pointers.,json_pointer__parent_pointer}
-
-    @since version 3.6.0
-    */
-    json_pointer parent_pointer() const
-    {
-        if (empty())
-        {
-            return *this;
-        }
-
-        json_pointer res = *this;
-        res.pop_back();
-        return res;
-    }
-
-    /*!
-    @brief remove last reference token
-
-    @pre not `empty()`
-
-    @liveexample{The example shows the usage of `pop_back`.,json_pointer__pop_back}
-
-    @complexity Constant.
-
-    @throw out_of_range.405 if JSON pointer has no parent
-
-    @since version 3.6.0
-    */
-    void pop_back()
-    {
-        if (JSON_HEDLEY_UNLIKELY(empty()))
-        {
-            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", BasicJsonType()));
-        }
-
-        reference_tokens.pop_back();
-    }
-
-    /*!
-    @brief return last reference token
-
-    @pre not `empty()`
-    @return last reference token
-
-    @liveexample{The example shows the usage of `back`.,json_pointer__back}
-
-    @complexity Constant.
-
-    @throw out_of_range.405 if JSON pointer has no parent
-
-    @since version 3.6.0
-    */
-    const std::string& back() const
-    {
-        if (JSON_HEDLEY_UNLIKELY(empty()))
-        {
-            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", BasicJsonType()));
-        }
-
-        return reference_tokens.back();
-    }
-
-    /*!
-    @brief append an unescaped token at the end of the reference pointer
-
-    @param[in] token  token to add
-
-    @complexity Amortized constant.
-
-    @liveexample{The example shows the result of `push_back` for different
-    JSON Pointers.,json_pointer__push_back}
-
-    @since version 3.6.0
-    */
-    void push_back(const std::string& token)
-    {
-        reference_tokens.push_back(token);
-    }
-
-    /// @copydoc push_back(const std::string&)
-    void push_back(std::string&& token)
-    {
-        reference_tokens.push_back(std::move(token));
-    }
-
-    /*!
-    @brief return whether pointer points to the root document
-
-    @return true iff the JSON pointer points to the root document
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example shows the result of `empty` for different JSON
-    Pointers.,json_pointer__empty}
-
-    @since version 3.6.0
-    */
-    bool empty() const noexcept
-    {
-        return reference_tokens.empty();
-    }
-
-  private:
-    /*!
-    @param[in] s  reference token to be converted into an array index
-
-    @return integer representation of @a s
-
-    @throw parse_error.106  if an array index begins with '0'
-    @throw parse_error.109  if an array index begins not with a digit
-    @throw out_of_range.404 if string @a s could not be converted to an integer
-    @throw out_of_range.410 if an array index exceeds size_type
-    */
-    static typename BasicJsonType::size_type array_index(const std::string& s)
-    {
-        using size_type = typename BasicJsonType::size_type;
-
-        // error condition (cf. RFC 6901, Sect. 4)
-        if (JSON_HEDLEY_UNLIKELY(s.size() > 1 && s[0] == '0'))
-        {
-            JSON_THROW(detail::parse_error::create(106, 0, "array index '" + s + "' must not begin with '0'", BasicJsonType()));
-        }
-
-        // error condition (cf. RFC 6901, Sect. 4)
-        if (JSON_HEDLEY_UNLIKELY(s.size() > 1 && !(s[0] >= '1' && s[0] <= '9')))
-        {
-            JSON_THROW(detail::parse_error::create(109, 0, "array index '" + s + "' is not a number", BasicJsonType()));
-        }
-
-        std::size_t processed_chars = 0;
-        unsigned long long res = 0;  // NOLINT(runtime/int)
-        JSON_TRY
-        {
-            res = std::stoull(s, &processed_chars);
-        }
-        JSON_CATCH(std::out_of_range&)
-        {
-            JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + s + "'", BasicJsonType()));
-        }
-
-        // check if the string was completely read
-        if (JSON_HEDLEY_UNLIKELY(processed_chars != s.size()))
-        {
-            JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + s + "'", BasicJsonType()));
-        }
-
-        // only triggered on special platforms (like 32bit), see also
-        // https://github.com/nlohmann/json/pull/2203
-        if (res >= static_cast<unsigned long long>((std::numeric_limits<size_type>::max)()))  // NOLINT(runtime/int)
-        {
-            JSON_THROW(detail::out_of_range::create(410, "array index " + s + " exceeds size_type", BasicJsonType())); // LCOV_EXCL_LINE
-        }
-
-        return static_cast<size_type>(res);
-    }
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    json_pointer top() const
-    {
-        if (JSON_HEDLEY_UNLIKELY(empty()))
-        {
-            JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent", BasicJsonType()));
-        }
-
-        json_pointer result = *this;
-        result.reference_tokens = {reference_tokens[0]};
-        return result;
-    }
-
-  private:
-    /*!
-    @brief create and return a reference to the pointed to value
-
-    @complexity Linear in the number of reference tokens.
-
-    @throw parse_error.109 if array index is not a number
-    @throw type_error.313 if value cannot be unflattened
-    */
-    BasicJsonType& get_and_create(BasicJsonType& j) const
-    {
-        auto* result = &j;
-
-        // in case no reference tokens exist, return a reference to the JSON value
-        // j which will be overwritten by a primitive value
-        for (const auto& reference_token : reference_tokens)
-        {
-            switch (result->type())
-            {
-                case detail::value_t::null:
-                {
-                    if (reference_token == "0")
-                    {
-                        // start a new array if reference token is 0
-                        result = &result->operator[](0);
-                    }
-                    else
-                    {
-                        // start a new object otherwise
-                        result = &result->operator[](reference_token);
-                    }
-                    break;
-                }
-
-                case detail::value_t::object:
-                {
-                    // create an entry in the object
-                    result = &result->operator[](reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    // create an entry in the array
-                    result = &result->operator[](array_index(reference_token));
-                    break;
-                }
-
-                /*
-                The following code is only reached if there exists a reference
-                token _and_ the current value is primitive. In this case, we have
-                an error situation, because primitive values may only occur as
-                single value; that is, with an empty list of reference tokens.
-                */
-                default:
-                    JSON_THROW(detail::type_error::create(313, "invalid value to unflatten", j));
-            }
-        }
-
-        return *result;
-    }
-
-    /*!
-    @brief return a reference to the pointed to value
-
-    @note This version does not throw if a value is not present, but tries to
-          create nested values instead. For instance, calling this function
-          with pointer `"/this/that"` on a null value is equivalent to calling
-          `operator[]("this").operator[]("that")` on that value, effectively
-          changing the null value to an object.
-
-    @param[in] ptr  a JSON value
-
-    @return reference to the JSON value pointed to by the JSON pointer
-
-    @complexity Linear in the length of the JSON pointer.
-
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-    */
-    BasicJsonType& get_unchecked(BasicJsonType* ptr) const
-    {
-        for (const auto& reference_token : reference_tokens)
-        {
-            // convert null values to arrays or objects before continuing
-            if (ptr->is_null())
-            {
-                // check if reference token is a number
-                const bool nums =
-                    std::all_of(reference_token.begin(), reference_token.end(),
-                                [](const unsigned char x)
-                {
-                    return std::isdigit(x);
-                });
-
-                // change value to array for numbers or "-" or to object otherwise
-                *ptr = (nums || reference_token == "-")
-                       ? detail::value_t::array
-                       : detail::value_t::object;
-            }
-
-            switch (ptr->type())
-            {
-                case detail::value_t::object:
-                {
-                    // use unchecked object access
-                    ptr = &ptr->operator[](reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    if (reference_token == "-")
-                    {
-                        // explicitly treat "-" as index beyond the end
-                        ptr = &ptr->operator[](ptr->m_value.array->size());
-                    }
-                    else
-                    {
-                        // convert array index to number; unchecked access
-                        ptr = &ptr->operator[](array_index(reference_token));
-                    }
-                    break;
-                }
-
-                default:
-                    JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'", *ptr));
-            }
-        }
-
-        return *ptr;
-    }
-
-    /*!
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.402  if the array index '-' is used
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-    */
-    BasicJsonType& get_checked(BasicJsonType* ptr) const
-    {
-        for (const auto& reference_token : reference_tokens)
-        {
-            switch (ptr->type())
-            {
-                case detail::value_t::object:
-                {
-                    // note: at performs range check
-                    ptr = &ptr->at(reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
-                    {
-                        // "-" always fails the range check
-                        JSON_THROW(detail::out_of_range::create(402,
-                                                                "array index '-' (" + std::to_string(ptr->m_value.array->size()) +
-                                                                ") is out of range", *ptr));
-                    }
-
-                    // note: at performs range check
-                    ptr = &ptr->at(array_index(reference_token));
-                    break;
-                }
-
-                default:
-                    JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'", *ptr));
-            }
-        }
-
-        return *ptr;
-    }
-
-    /*!
-    @brief return a const reference to the pointed to value
-
-    @param[in] ptr  a JSON value
-
-    @return const reference to the JSON value pointed to by the JSON
-    pointer
-
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.402  if the array index '-' is used
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-    */
-    const BasicJsonType& get_unchecked(const BasicJsonType* ptr) const
-    {
-        for (const auto& reference_token : reference_tokens)
-        {
-            switch (ptr->type())
-            {
-                case detail::value_t::object:
-                {
-                    // use unchecked object access
-                    ptr = &ptr->operator[](reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
-                    {
-                        // "-" cannot be used for const access
-                        JSON_THROW(detail::out_of_range::create(402, "array index '-' (" + std::to_string(ptr->m_value.array->size()) + ") is out of range", *ptr));
-                    }
-
-                    // use unchecked array access
-                    ptr = &ptr->operator[](array_index(reference_token));
-                    break;
-                }
-
-                default:
-                    JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'", *ptr));
-            }
-        }
-
-        return *ptr;
-    }
-
-    /*!
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.402  if the array index '-' is used
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-    */
-    const BasicJsonType& get_checked(const BasicJsonType* ptr) const
-    {
-        for (const auto& reference_token : reference_tokens)
-        {
-            switch (ptr->type())
-            {
-                case detail::value_t::object:
-                {
-                    // note: at performs range check
-                    ptr = &ptr->at(reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
-                    {
-                        // "-" always fails the range check
-                        JSON_THROW(detail::out_of_range::create(402,
-                                                                "array index '-' (" + std::to_string(ptr->m_value.array->size()) +
-                                                                ") is out of range", *ptr));
-                    }
-
-                    // note: at performs range check
-                    ptr = &ptr->at(array_index(reference_token));
-                    break;
-                }
-
-                default:
-                    JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'", *ptr));
-            }
-        }
-
-        return *ptr;
-    }
-
-    /*!
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    */
-    bool contains(const BasicJsonType* ptr) const
-    {
-        for (const auto& reference_token : reference_tokens)
-        {
-            switch (ptr->type())
-            {
-                case detail::value_t::object:
-                {
-                    if (!ptr->contains(reference_token))
-                    {
-                        // we did not find the key in the object
-                        return false;
-                    }
-
-                    ptr = &ptr->operator[](reference_token);
-                    break;
-                }
-
-                case detail::value_t::array:
-                {
-                    if (JSON_HEDLEY_UNLIKELY(reference_token == "-"))
-                    {
-                        // "-" always fails the range check
-                        return false;
-                    }
-                    if (JSON_HEDLEY_UNLIKELY(reference_token.size() == 1 && !("0" <= reference_token && reference_token <= "9")))
-                    {
-                        // invalid char
-                        return false;
-                    }
-                    if (JSON_HEDLEY_UNLIKELY(reference_token.size() > 1))
-                    {
-                        if (JSON_HEDLEY_UNLIKELY(!('1' <= reference_token[0] && reference_token[0] <= '9')))
-                        {
-                            // first char should be between '1' and '9'
-                            return false;
-                        }
-                        for (std::size_t i = 1; i < reference_token.size(); i++)
-                        {
-                            if (JSON_HEDLEY_UNLIKELY(!('0' <= reference_token[i] && reference_token[i] <= '9')))
-                            {
-                                // other char should be between '0' and '9'
-                                return false;
-                            }
-                        }
-                    }
-
-                    const auto idx = array_index(reference_token);
-                    if (idx >= ptr->size())
-                    {
-                        // index out of range
-                        return false;
-                    }
-
-                    ptr = &ptr->operator[](idx);
-                    break;
-                }
-
-                default:
-                {
-                    // we do not expect primitive values if there is still a
-                    // reference token to process
-                    return false;
-                }
-            }
-        }
-
-        // no reference token left means we found a primitive value
-        return true;
-    }
-
-    /*!
-    @brief split the string input to reference tokens
-
-    @note This function is only called by the json_pointer constructor.
-          All exceptions below are documented there.
-
-    @throw parse_error.107  if the pointer is not empty or begins with '/'
-    @throw parse_error.108  if character '~' is not followed by '0' or '1'
-    */
-    static std::vector<std::string> split(const std::string& reference_string)
-    {
-        std::vector<std::string> result;
-
-        // special case: empty reference string -> no reference tokens
-        if (reference_string.empty())
-        {
-            return result;
-        }
-
-        // check if nonempty reference string begins with slash
-        if (JSON_HEDLEY_UNLIKELY(reference_string[0] != '/'))
-        {
-            JSON_THROW(detail::parse_error::create(107, 1, "JSON pointer must be empty or begin with '/' - was: '" + reference_string + "'", BasicJsonType()));
-        }
-
-        // extract the reference tokens:
-        // - slash: position of the last read slash (or end of string)
-        // - start: position after the previous slash
-        for (
-            // search for the first slash after the first character
-            std::size_t slash = reference_string.find_first_of('/', 1),
-            // set the beginning of the first reference token
-            start = 1;
-            // we can stop if start == 0 (if slash == std::string::npos)
-            start != 0;
-            // set the beginning of the next reference token
-            // (will eventually be 0 if slash == std::string::npos)
-            start = (slash == std::string::npos) ? 0 : slash + 1,
-            // find next slash
-            slash = reference_string.find_first_of('/', start))
-        {
-            // use the text between the beginning of the reference token
-            // (start) and the last slash (slash).
-            auto reference_token = reference_string.substr(start, slash - start);
-
-            // check reference tokens are properly escaped
-            for (std::size_t pos = reference_token.find_first_of('~');
-                    pos != std::string::npos;
-                    pos = reference_token.find_first_of('~', pos + 1))
-            {
-                JSON_ASSERT(reference_token[pos] == '~');
-
-                // ~ must be followed by 0 or 1
-                if (JSON_HEDLEY_UNLIKELY(pos == reference_token.size() - 1 ||
-                                         (reference_token[pos + 1] != '0' &&
-                                          reference_token[pos + 1] != '1')))
-                {
-                    JSON_THROW(detail::parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'", BasicJsonType()));
-                }
-            }
-
-            // finally, store the reference token
-            detail::unescape(reference_token);
-            result.push_back(reference_token);
-        }
-
-        return result;
-    }
-
-  private:
-    /*!
-    @param[in] reference_string  the reference string to the current value
-    @param[in] value             the value to consider
-    @param[in,out] result        the result object to insert values to
-
-    @note Empty objects or arrays are flattened to `null`.
-    */
-    static void flatten(const std::string& reference_string,
-                        const BasicJsonType& value,
-                        BasicJsonType& result)
-    {
-        switch (value.type())
-        {
-            case detail::value_t::array:
-            {
-                if (value.m_value.array->empty())
-                {
-                    // flatten empty array as null
-                    result[reference_string] = nullptr;
-                }
-                else
-                {
-                    // iterate array and use index as reference string
-                    for (std::size_t i = 0; i < value.m_value.array->size(); ++i)
-                    {
-                        flatten(reference_string + "/" + std::to_string(i),
-                                value.m_value.array->operator[](i), result);
-                    }
-                }
-                break;
-            }
-
-            case detail::value_t::object:
-            {
-                if (value.m_value.object->empty())
-                {
-                    // flatten empty object as null
-                    result[reference_string] = nullptr;
-                }
-                else
-                {
-                    // iterate object and use keys as reference string
-                    for (const auto& element : *value.m_value.object)
-                    {
-                        flatten(reference_string + "/" + detail::escape(element.first), element.second, result);
-                    }
-                }
-                break;
-            }
-
-            default:
-            {
-                // add primitive value with its reference string
-                result[reference_string] = value;
-                break;
-            }
-        }
-    }
-
-    /*!
-    @param[in] value  flattened JSON
-
-    @return unflattened JSON
-
-    @throw parse_error.109 if array index is not a number
-    @throw type_error.314  if value is not an object
-    @throw type_error.315  if object values are not primitive
-    @throw type_error.313  if value cannot be unflattened
-    */
-    static BasicJsonType
-    unflatten(const BasicJsonType& value)
-    {
-        if (JSON_HEDLEY_UNLIKELY(!value.is_object()))
-        {
-            JSON_THROW(detail::type_error::create(314, "only objects can be unflattened", value));
-        }
-
-        BasicJsonType result;
-
-        // iterate the JSON object values
-        for (const auto& element : *value.m_value.object)
-        {
-            if (JSON_HEDLEY_UNLIKELY(!element.second.is_primitive()))
-            {
-                JSON_THROW(detail::type_error::create(315, "values in object must be primitive", element.second));
-            }
-
-            // assign value to reference pointed to by JSON pointer; Note that if
-            // the JSON pointer is "" (i.e., points to the whole value), function
-            // get_and_create returns a reference to result itself. An assignment
-            // will then create a primitive value.
-            json_pointer(element.first).get_and_create(result) = element.second;
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief compares two JSON pointers for equality
-
-    @param[in] lhs  JSON pointer to compare
-    @param[in] rhs  JSON pointer to compare
-    @return whether @a lhs is equal to @a rhs
-
-    @complexity Linear in the length of the JSON pointer
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-    */
-    friend bool operator==(json_pointer const& lhs,
-                           json_pointer const& rhs) noexcept
-    {
-        return lhs.reference_tokens == rhs.reference_tokens;
-    }
-
-    /*!
-    @brief compares two JSON pointers for inequality
-
-    @param[in] lhs  JSON pointer to compare
-    @param[in] rhs  JSON pointer to compare
-    @return whether @a lhs is not equal @a rhs
-
-    @complexity Linear in the length of the JSON pointer
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-    */
-    friend bool operator!=(json_pointer const& lhs,
-                           json_pointer const& rhs) noexcept
-    {
-        return !(lhs == rhs);
-    }
-
-    /// the reference tokens
-    std::vector<std::string> reference_tokens;
-};
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/json_ref.hpp>
-
-
-#include <initializer_list>
-#include <utility>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-template<typename BasicJsonType>
-class json_ref
-{
-  public:
-    using value_type = BasicJsonType;
-
-    json_ref(value_type&& value)
-        : owned_value(std::move(value))
-    {}
-
-    json_ref(const value_type& value)
-        : value_ref(&value)
-    {}
-
-    json_ref(std::initializer_list<json_ref> init)
-        : owned_value(init)
-    {}
-
-    template <
-        class... Args,
-        enable_if_t<std::is_constructible<value_type, Args...>::value, int> = 0 >
-    json_ref(Args && ... args)
-        : owned_value(std::forward<Args>(args)...)
-    {}
-
-    // class should be movable only
-    json_ref(json_ref&&) noexcept = default;
-    json_ref(const json_ref&) = delete;
-    json_ref& operator=(const json_ref&) = delete;
-    json_ref& operator=(json_ref&&) = delete;
-    ~json_ref() = default;
-
-    value_type moved_or_copied() const
-    {
-        if (value_ref == nullptr)
-        {
-            return std::move(owned_value);
-        }
-        return *value_ref;
-    }
-
-    value_type const& operator*() const
-    {
-        return value_ref ? *value_ref : owned_value;
-    }
-
-    value_type const* operator->() const
-    {
-        return &** this;
-    }
-
-  private:
-    mutable value_type owned_value = nullptr;
-    value_type const* value_ref = nullptr;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/string_escape.hpp>
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-// #include <nlohmann/detail/meta/type_traits.hpp>
-
-// #include <nlohmann/detail/output/binary_writer.hpp>
-
-
-#include <algorithm> // reverse
-#include <array> // array
-#include <cmath> // isnan, isinf
-#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
-#include <cstring> // memcpy
-#include <limits> // numeric_limits
-#include <string> // string
-#include <utility> // move
-
-// #include <nlohmann/detail/input/binary_reader.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/output/output_adapters.hpp>
-
-
-#include <algorithm> // copy
-#include <cstddef> // size_t
-#include <ios> // streamsize
-#include <iterator> // back_inserter
-#include <memory> // shared_ptr, make_shared
-#include <ostream> // basic_ostream
-#include <string> // basic_string
-#include <vector> // vector
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-/// abstract output adapter interface
-template<typename CharType> struct output_adapter_protocol
-{
-    virtual void write_character(CharType c) = 0;
-    virtual void write_characters(const CharType* s, std::size_t length) = 0;
-    virtual ~output_adapter_protocol() = default;
-
-    output_adapter_protocol() = default;
-    output_adapter_protocol(const output_adapter_protocol&) = default;
-    output_adapter_protocol(output_adapter_protocol&&) noexcept = default;
-    output_adapter_protocol& operator=(const output_adapter_protocol&) = default;
-    output_adapter_protocol& operator=(output_adapter_protocol&&) noexcept = default;
-};
-
-/// a type to simplify interfaces
-template<typename CharType>
-using output_adapter_t = std::shared_ptr<output_adapter_protocol<CharType>>;
-
-/// output adapter for byte vectors
-template<typename CharType>
-class output_vector_adapter : public output_adapter_protocol<CharType>
-{
-  public:
-    explicit output_vector_adapter(std::vector<CharType>& vec) noexcept
-        : v(vec)
-    {}
-
-    void write_character(CharType c) override
-    {
-        v.push_back(c);
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    void write_characters(const CharType* s, std::size_t length) override
-    {
-        std::copy(s, s + length, std::back_inserter(v));
-    }
-
-  private:
-    std::vector<CharType>& v;
-};
-
-/// output adapter for output streams
-template<typename CharType>
-class output_stream_adapter : public output_adapter_protocol<CharType>
-{
-  public:
-    explicit output_stream_adapter(std::basic_ostream<CharType>& s) noexcept
-        : stream(s)
-    {}
-
-    void write_character(CharType c) override
-    {
-        stream.put(c);
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    void write_characters(const CharType* s, std::size_t length) override
-    {
-        stream.write(s, static_cast<std::streamsize>(length));
-    }
-
-  private:
-    std::basic_ostream<CharType>& stream;
-};
-
-/// output adapter for basic_string
-template<typename CharType, typename StringType = std::basic_string<CharType>>
-class output_string_adapter : public output_adapter_protocol<CharType>
-{
-  public:
-    explicit output_string_adapter(StringType& s) noexcept
-        : str(s)
-    {}
-
-    void write_character(CharType c) override
-    {
-        str.push_back(c);
-    }
-
-    JSON_HEDLEY_NON_NULL(2)
-    void write_characters(const CharType* s, std::size_t length) override
-    {
-        str.append(s, length);
-    }
-
-  private:
-    StringType& str;
-};
-
-template<typename CharType, typename StringType = std::basic_string<CharType>>
-class output_adapter
-{
-  public:
-    output_adapter(std::vector<CharType>& vec)
-        : oa(std::make_shared<output_vector_adapter<CharType>>(vec)) {}
-
-    output_adapter(std::basic_ostream<CharType>& s)
-        : oa(std::make_shared<output_stream_adapter<CharType>>(s)) {}
-
-    output_adapter(StringType& s)
-        : oa(std::make_shared<output_string_adapter<CharType, StringType>>(s)) {}
-
-    operator output_adapter_t<CharType>()
-    {
-        return oa;
-    }
-
-  private:
-    output_adapter_t<CharType> oa = nullptr;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-
-namespace nlohmann
-{
-namespace detail
-{
-///////////////////
-// binary writer //
-///////////////////
-
-/*!
-@brief serialization to CBOR and MessagePack values
-*/
-template<typename BasicJsonType, typename CharType>
-class binary_writer
-{
-    using string_t = typename BasicJsonType::string_t;
-    using binary_t = typename BasicJsonType::binary_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-
-  public:
-    /*!
-    @brief create a binary writer
-
-    @param[in] adapter  output adapter to write to
-    */
-    explicit binary_writer(output_adapter_t<CharType> adapter) : oa(std::move(adapter))
-    {
-        JSON_ASSERT(oa);
-    }
-
-    /*!
-    @param[in] j  JSON value to serialize
-    @pre       j.type() == value_t::object
-    */
-    void write_bson(const BasicJsonType& j)
-    {
-        switch (j.type())
-        {
-            case value_t::object:
-            {
-                write_bson_object(*j.m_value.object);
-                break;
-            }
-
-            default:
-            {
-                JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()), j));;
-            }
-        }
-    }
-
-    /*!
-    @param[in] j  JSON value to serialize
-    */
-    void write_cbor(const BasicJsonType& j)
-    {
-        switch (j.type())
-        {
-            case value_t::null:
-            {
-                oa->write_character(to_char_type(0xF6));
-                break;
-            }
-
-            case value_t::boolean:
-            {
-                oa->write_character(j.m_value.boolean
-                                    ? to_char_type(0xF5)
-                                    : to_char_type(0xF4));
-                break;
-            }
-
-            case value_t::number_integer:
-            {
-                if (j.m_value.number_integer >= 0)
-                {
-                    // CBOR does not differentiate between positive signed
-                    // integers and unsigned integers. Therefore, we used the
-                    // code from the value_t::number_unsigned case here.
-                    if (j.m_value.number_integer <= 0x17)
-                    {
-                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint8_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x18));
-                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint16_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x19));
-                        write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer <= (std::numeric_limits<std::uint32_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x1A));
-                        write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
-                    }
-                    else
-                    {
-                        oa->write_character(to_char_type(0x1B));
-                        write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
-                    }
-                }
-                else
-                {
-                    // The conversions below encode the sign in the first
-                    // byte, and the value is converted to a positive number.
-                    const auto positive_number = -1 - j.m_value.number_integer;
-                    if (j.m_value.number_integer >= -24)
-                    {
-                        write_number(static_cast<std::uint8_t>(0x20 + positive_number));
-                    }
-                    else if (positive_number <= (std::numeric_limits<std::uint8_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x38));
-                        write_number(static_cast<std::uint8_t>(positive_number));
-                    }
-                    else if (positive_number <= (std::numeric_limits<std::uint16_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x39));
-                        write_number(static_cast<std::uint16_t>(positive_number));
-                    }
-                    else if (positive_number <= (std::numeric_limits<std::uint32_t>::max)())
-                    {
-                        oa->write_character(to_char_type(0x3A));
-                        write_number(static_cast<std::uint32_t>(positive_number));
-                    }
-                    else
-                    {
-                        oa->write_character(to_char_type(0x3B));
-                        write_number(static_cast<std::uint64_t>(positive_number));
-                    }
-                }
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                if (j.m_value.number_unsigned <= 0x17)
-                {
-                    write_number(static_cast<std::uint8_t>(j.m_value.number_unsigned));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x18));
-                    write_number(static_cast<std::uint8_t>(j.m_value.number_unsigned));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x19));
-                    write_number(static_cast<std::uint16_t>(j.m_value.number_unsigned));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x1A));
-                    write_number(static_cast<std::uint32_t>(j.m_value.number_unsigned));
-                }
-                else
-                {
-                    oa->write_character(to_char_type(0x1B));
-                    write_number(static_cast<std::uint64_t>(j.m_value.number_unsigned));
-                }
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                if (std::isnan(j.m_value.number_float))
-                {
-                    // NaN is 0xf97e00 in CBOR
-                    oa->write_character(to_char_type(0xF9));
-                    oa->write_character(to_char_type(0x7E));
-                    oa->write_character(to_char_type(0x00));
-                }
-                else if (std::isinf(j.m_value.number_float))
-                {
-                    // Infinity is 0xf97c00, -Infinity is 0xf9fc00
-                    oa->write_character(to_char_type(0xf9));
-                    oa->write_character(j.m_value.number_float > 0 ? to_char_type(0x7C) : to_char_type(0xFC));
-                    oa->write_character(to_char_type(0x00));
-                }
-                else
-                {
-                    write_compact_float(j.m_value.number_float, detail::input_format_t::cbor);
-                }
-                break;
-            }
-
-            case value_t::string:
-            {
-                // step 1: write control byte and the string length
-                const auto N = j.m_value.string->size();
-                if (N <= 0x17)
-                {
-                    write_number(static_cast<std::uint8_t>(0x60 + N));
-                }
-                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x78));
-                    write_number(static_cast<std::uint8_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x79));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x7A));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-                // LCOV_EXCL_START
-                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x7B));
-                    write_number(static_cast<std::uint64_t>(N));
-                }
-                // LCOV_EXCL_STOP
-
-                // step 2: write the string
-                oa->write_characters(
-                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
-                    j.m_value.string->size());
-                break;
-            }
-
-            case value_t::array:
-            {
-                // step 1: write control byte and the array size
-                const auto N = j.m_value.array->size();
-                if (N <= 0x17)
-                {
-                    write_number(static_cast<std::uint8_t>(0x80 + N));
-                }
-                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x98));
-                    write_number(static_cast<std::uint8_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x99));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x9A));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-                // LCOV_EXCL_START
-                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x9B));
-                    write_number(static_cast<std::uint64_t>(N));
-                }
-                // LCOV_EXCL_STOP
-
-                // step 2: write each element
-                for (const auto& el : *j.m_value.array)
-                {
-                    write_cbor(el);
-                }
-                break;
-            }
-
-            case value_t::binary:
-            {
-                if (j.m_value.binary->has_subtype())
-                {
-                    write_number(static_cast<std::uint8_t>(0xd8));
-                    write_number(j.m_value.binary->subtype());
-                }
-
-                // step 1: write control byte and the binary array size
-                const auto N = j.m_value.binary->size();
-                if (N <= 0x17)
-                {
-                    write_number(static_cast<std::uint8_t>(0x40 + N));
-                }
-                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x58));
-                    write_number(static_cast<std::uint8_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x59));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x5A));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-                // LCOV_EXCL_START
-                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
-                {
-                    oa->write_character(to_char_type(0x5B));
-                    write_number(static_cast<std::uint64_t>(N));
-                }
-                // LCOV_EXCL_STOP
-
-                // step 2: write each element
-                oa->write_characters(
-                    reinterpret_cast<const CharType*>(j.m_value.binary->data()),
-                    N);
-
-                break;
-            }
-
-            case value_t::object:
-            {
-                // step 1: write control byte and the object size
-                const auto N = j.m_value.object->size();
-                if (N <= 0x17)
-                {
-                    write_number(static_cast<std::uint8_t>(0xA0 + N));
-                }
-                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    oa->write_character(to_char_type(0xB8));
-                    write_number(static_cast<std::uint8_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    oa->write_character(to_char_type(0xB9));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    oa->write_character(to_char_type(0xBA));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-                // LCOV_EXCL_START
-                else if (N <= (std::numeric_limits<std::uint64_t>::max)())
-                {
-                    oa->write_character(to_char_type(0xBB));
-                    write_number(static_cast<std::uint64_t>(N));
-                }
-                // LCOV_EXCL_STOP
-
-                // step 2: write each element
-                for (const auto& el : *j.m_value.object)
-                {
-                    write_cbor(el.first);
-                    write_cbor(el.second);
-                }
-                break;
-            }
-
-            default:
-                break;
-        }
-    }
-
-    /*!
-    @param[in] j  JSON value to serialize
-    */
-    void write_msgpack(const BasicJsonType& j)
-    {
-        switch (j.type())
-        {
-            case value_t::null: // nil
-            {
-                oa->write_character(to_char_type(0xC0));
-                break;
-            }
-
-            case value_t::boolean: // true and false
-            {
-                oa->write_character(j.m_value.boolean
-                                    ? to_char_type(0xC3)
-                                    : to_char_type(0xC2));
-                break;
-            }
-
-            case value_t::number_integer:
-            {
-                if (j.m_value.number_integer >= 0)
-                {
-                    // MessagePack does not differentiate between positive
-                    // signed integers and unsigned integers. Therefore, we used
-                    // the code from the value_t::number_unsigned case here.
-                    if (j.m_value.number_unsigned < 128)
-                    {
-                        // positive fixnum
-                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
-                    {
-                        // uint 8
-                        oa->write_character(to_char_type(0xCC));
-                        write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
-                    {
-                        // uint 16
-                        oa->write_character(to_char_type(0xCD));
-                        write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
-                    {
-                        // uint 32
-                        oa->write_character(to_char_type(0xCE));
-                        write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint64_t>::max)())
-                    {
-                        // uint 64
-                        oa->write_character(to_char_type(0xCF));
-                        write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
-                    }
-                }
-                else
-                {
-                    if (j.m_value.number_integer >= -32)
-                    {
-                        // negative fixnum
-                        write_number(static_cast<std::int8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int8_t>::min)() &&
-                             j.m_value.number_integer <= (std::numeric_limits<std::int8_t>::max)())
-                    {
-                        // int 8
-                        oa->write_character(to_char_type(0xD0));
-                        write_number(static_cast<std::int8_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int16_t>::min)() &&
-                             j.m_value.number_integer <= (std::numeric_limits<std::int16_t>::max)())
-                    {
-                        // int 16
-                        oa->write_character(to_char_type(0xD1));
-                        write_number(static_cast<std::int16_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int32_t>::min)() &&
-                             j.m_value.number_integer <= (std::numeric_limits<std::int32_t>::max)())
-                    {
-                        // int 32
-                        oa->write_character(to_char_type(0xD2));
-                        write_number(static_cast<std::int32_t>(j.m_value.number_integer));
-                    }
-                    else if (j.m_value.number_integer >= (std::numeric_limits<std::int64_t>::min)() &&
-                             j.m_value.number_integer <= (std::numeric_limits<std::int64_t>::max)())
-                    {
-                        // int 64
-                        oa->write_character(to_char_type(0xD3));
-                        write_number(static_cast<std::int64_t>(j.m_value.number_integer));
-                    }
-                }
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                if (j.m_value.number_unsigned < 128)
-                {
-                    // positive fixnum
-                    write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    // uint 8
-                    oa->write_character(to_char_type(0xCC));
-                    write_number(static_cast<std::uint8_t>(j.m_value.number_integer));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    // uint 16
-                    oa->write_character(to_char_type(0xCD));
-                    write_number(static_cast<std::uint16_t>(j.m_value.number_integer));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    // uint 32
-                    oa->write_character(to_char_type(0xCE));
-                    write_number(static_cast<std::uint32_t>(j.m_value.number_integer));
-                }
-                else if (j.m_value.number_unsigned <= (std::numeric_limits<std::uint64_t>::max)())
-                {
-                    // uint 64
-                    oa->write_character(to_char_type(0xCF));
-                    write_number(static_cast<std::uint64_t>(j.m_value.number_integer));
-                }
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                write_compact_float(j.m_value.number_float, detail::input_format_t::msgpack);
-                break;
-            }
-
-            case value_t::string:
-            {
-                // step 1: write control byte and the string length
-                const auto N = j.m_value.string->size();
-                if (N <= 31)
-                {
-                    // fixstr
-                    write_number(static_cast<std::uint8_t>(0xA0 | N));
-                }
-                else if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    // str 8
-                    oa->write_character(to_char_type(0xD9));
-                    write_number(static_cast<std::uint8_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    // str 16
-                    oa->write_character(to_char_type(0xDA));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    // str 32
-                    oa->write_character(to_char_type(0xDB));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-
-                // step 2: write the string
-                oa->write_characters(
-                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
-                    j.m_value.string->size());
-                break;
-            }
-
-            case value_t::array:
-            {
-                // step 1: write control byte and the array size
-                const auto N = j.m_value.array->size();
-                if (N <= 15)
-                {
-                    // fixarray
-                    write_number(static_cast<std::uint8_t>(0x90 | N));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    // array 16
-                    oa->write_character(to_char_type(0xDC));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    // array 32
-                    oa->write_character(to_char_type(0xDD));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-
-                // step 2: write each element
-                for (const auto& el : *j.m_value.array)
-                {
-                    write_msgpack(el);
-                }
-                break;
-            }
-
-            case value_t::binary:
-            {
-                // step 0: determine if the binary type has a set subtype to
-                // determine whether or not to use the ext or fixext types
-                const bool use_ext = j.m_value.binary->has_subtype();
-
-                // step 1: write control byte and the byte string length
-                const auto N = j.m_value.binary->size();
-                if (N <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    std::uint8_t output_type{};
-                    bool fixed = true;
-                    if (use_ext)
-                    {
-                        switch (N)
-                        {
-                            case 1:
-                                output_type = 0xD4; // fixext 1
-                                break;
-                            case 2:
-                                output_type = 0xD5; // fixext 2
-                                break;
-                            case 4:
-                                output_type = 0xD6; // fixext 4
-                                break;
-                            case 8:
-                                output_type = 0xD7; // fixext 8
-                                break;
-                            case 16:
-                                output_type = 0xD8; // fixext 16
-                                break;
-                            default:
-                                output_type = 0xC7; // ext 8
-                                fixed = false;
-                                break;
-                        }
-
-                    }
-                    else
-                    {
-                        output_type = 0xC4; // bin 8
-                        fixed = false;
-                    }
-
-                    oa->write_character(to_char_type(output_type));
-                    if (!fixed)
-                    {
-                        write_number(static_cast<std::uint8_t>(N));
-                    }
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    std::uint8_t output_type = use_ext
-                                               ? 0xC8 // ext 16
-                                               : 0xC5; // bin 16
-
-                    oa->write_character(to_char_type(output_type));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    std::uint8_t output_type = use_ext
-                                               ? 0xC9 // ext 32
-                                               : 0xC6; // bin 32
-
-                    oa->write_character(to_char_type(output_type));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-
-                // step 1.5: if this is an ext type, write the subtype
-                if (use_ext)
-                {
-                    write_number(static_cast<std::int8_t>(j.m_value.binary->subtype()));
-                }
-
-                // step 2: write the byte string
-                oa->write_characters(
-                    reinterpret_cast<const CharType*>(j.m_value.binary->data()),
-                    N);
-
-                break;
-            }
-
-            case value_t::object:
-            {
-                // step 1: write control byte and the object size
-                const auto N = j.m_value.object->size();
-                if (N <= 15)
-                {
-                    // fixmap
-                    write_number(static_cast<std::uint8_t>(0x80 | (N & 0xF)));
-                }
-                else if (N <= (std::numeric_limits<std::uint16_t>::max)())
-                {
-                    // map 16
-                    oa->write_character(to_char_type(0xDE));
-                    write_number(static_cast<std::uint16_t>(N));
-                }
-                else if (N <= (std::numeric_limits<std::uint32_t>::max)())
-                {
-                    // map 32
-                    oa->write_character(to_char_type(0xDF));
-                    write_number(static_cast<std::uint32_t>(N));
-                }
-
-                // step 2: write each element
-                for (const auto& el : *j.m_value.object)
-                {
-                    write_msgpack(el.first);
-                    write_msgpack(el.second);
-                }
-                break;
-            }
-
-            default:
-                break;
-        }
-    }
-
-    /*!
-    @param[in] j  JSON value to serialize
-    @param[in] use_count   whether to use '#' prefixes (optimized format)
-    @param[in] use_type    whether to use '$' prefixes (optimized format)
-    @param[in] add_prefix  whether prefixes need to be used for this value
-    */
-    void write_ubjson(const BasicJsonType& j, const bool use_count,
-                      const bool use_type, const bool add_prefix = true)
-    {
-        switch (j.type())
-        {
-            case value_t::null:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(to_char_type('Z'));
-                }
-                break;
-            }
-
-            case value_t::boolean:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(j.m_value.boolean
-                                        ? to_char_type('T')
-                                        : to_char_type('F'));
-                }
-                break;
-            }
-
-            case value_t::number_integer:
-            {
-                write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix);
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix);
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix);
-                break;
-            }
-
-            case value_t::string:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(to_char_type('S'));
-                }
-                write_number_with_ubjson_prefix(j.m_value.string->size(), true);
-                oa->write_characters(
-                    reinterpret_cast<const CharType*>(j.m_value.string->c_str()),
-                    j.m_value.string->size());
-                break;
-            }
-
-            case value_t::array:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(to_char_type('['));
-                }
-
-                bool prefix_required = true;
-                if (use_type && !j.m_value.array->empty())
-                {
-                    JSON_ASSERT(use_count);
-                    const CharType first_prefix = ubjson_prefix(j.front());
-                    const bool same_prefix = std::all_of(j.begin() + 1, j.end(),
-                                                         [this, first_prefix](const BasicJsonType & v)
-                    {
-                        return ubjson_prefix(v) == first_prefix;
-                    });
-
-                    if (same_prefix)
-                    {
-                        prefix_required = false;
-                        oa->write_character(to_char_type('$'));
-                        oa->write_character(first_prefix);
-                    }
-                }
-
-                if (use_count)
-                {
-                    oa->write_character(to_char_type('#'));
-                    write_number_with_ubjson_prefix(j.m_value.array->size(), true);
-                }
-
-                for (const auto& el : *j.m_value.array)
-                {
-                    write_ubjson(el, use_count, use_type, prefix_required);
-                }
-
-                if (!use_count)
-                {
-                    oa->write_character(to_char_type(']'));
-                }
-
-                break;
-            }
-
-            case value_t::binary:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(to_char_type('['));
-                }
-
-                if (use_type && !j.m_value.binary->empty())
-                {
-                    JSON_ASSERT(use_count);
-                    oa->write_character(to_char_type('$'));
-                    oa->write_character('U');
-                }
-
-                if (use_count)
-                {
-                    oa->write_character(to_char_type('#'));
-                    write_number_with_ubjson_prefix(j.m_value.binary->size(), true);
-                }
-
-                if (use_type)
-                {
-                    oa->write_characters(
-                        reinterpret_cast<const CharType*>(j.m_value.binary->data()),
-                        j.m_value.binary->size());
-                }
-                else
-                {
-                    for (size_t i = 0; i < j.m_value.binary->size(); ++i)
-                    {
-                        oa->write_character(to_char_type('U'));
-                        oa->write_character(j.m_value.binary->data()[i]);
-                    }
-                }
-
-                if (!use_count)
-                {
-                    oa->write_character(to_char_type(']'));
-                }
-
-                break;
-            }
-
-            case value_t::object:
-            {
-                if (add_prefix)
-                {
-                    oa->write_character(to_char_type('{'));
-                }
-
-                bool prefix_required = true;
-                if (use_type && !j.m_value.object->empty())
-                {
-                    JSON_ASSERT(use_count);
-                    const CharType first_prefix = ubjson_prefix(j.front());
-                    const bool same_prefix = std::all_of(j.begin(), j.end(),
-                                                         [this, first_prefix](const BasicJsonType & v)
-                    {
-                        return ubjson_prefix(v) == first_prefix;
-                    });
-
-                    if (same_prefix)
-                    {
-                        prefix_required = false;
-                        oa->write_character(to_char_type('$'));
-                        oa->write_character(first_prefix);
-                    }
-                }
-
-                if (use_count)
-                {
-                    oa->write_character(to_char_type('#'));
-                    write_number_with_ubjson_prefix(j.m_value.object->size(), true);
-                }
-
-                for (const auto& el : *j.m_value.object)
-                {
-                    write_number_with_ubjson_prefix(el.first.size(), true);
-                    oa->write_characters(
-                        reinterpret_cast<const CharType*>(el.first.c_str()),
-                        el.first.size());
-                    write_ubjson(el.second, use_count, use_type, prefix_required);
-                }
-
-                if (!use_count)
-                {
-                    oa->write_character(to_char_type('}'));
-                }
-
-                break;
-            }
-
-            default:
-                break;
-        }
-    }
-
-  private:
-    //////////
-    // BSON //
-    //////////
-
-    /*!
-    @return The size of a BSON document entry header, including the id marker
-            and the entry name size (and its null-terminator).
-    */
-    static std::size_t calc_bson_entry_header_size(const string_t& name, const BasicJsonType& j)
-    {
-        const auto it = name.find(static_cast<typename string_t::value_type>(0));
-        if (JSON_HEDLEY_UNLIKELY(it != BasicJsonType::string_t::npos))
-        {
-            JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")", j));
-        }
-
-        return /*id*/ 1ul + name.size() + /*zero-terminator*/1u;
-    }
-
-    /*!
-    @brief Writes the given @a element_type and @a name to the output adapter
-    */
-    void write_bson_entry_header(const string_t& name,
-                                 const std::uint8_t element_type)
-    {
-        oa->write_character(to_char_type(element_type)); // boolean
-        oa->write_characters(
-            reinterpret_cast<const CharType*>(name.c_str()),
-            name.size() + 1u);
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and boolean value @a value
-    */
-    void write_bson_boolean(const string_t& name,
-                            const bool value)
-    {
-        write_bson_entry_header(name, 0x08);
-        oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00));
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and double value @a value
-    */
-    void write_bson_double(const string_t& name,
-                           const double value)
-    {
-        write_bson_entry_header(name, 0x01);
-        write_number<double, true>(value);
-    }
-
-    /*!
-    @return The size of the BSON-encoded string in @a value
-    */
-    static std::size_t calc_bson_string_size(const string_t& value)
-    {
-        return sizeof(std::int32_t) + value.size() + 1ul;
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and string value @a value
-    */
-    void write_bson_string(const string_t& name,
-                           const string_t& value)
-    {
-        write_bson_entry_header(name, 0x02);
-
-        write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size() + 1ul));
-        oa->write_characters(
-            reinterpret_cast<const CharType*>(value.c_str()),
-            value.size() + 1);
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and null value
-    */
-    void write_bson_null(const string_t& name)
-    {
-        write_bson_entry_header(name, 0x0A);
-    }
-
-    /*!
-    @return The size of the BSON-encoded integer @a value
-    */
-    static std::size_t calc_bson_integer_size(const std::int64_t value)
-    {
-        return (std::numeric_limits<std::int32_t>::min)() <= value && value <= (std::numeric_limits<std::int32_t>::max)()
-               ? sizeof(std::int32_t)
-               : sizeof(std::int64_t);
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and integer @a value
-    */
-    void write_bson_integer(const string_t& name,
-                            const std::int64_t value)
-    {
-        if ((std::numeric_limits<std::int32_t>::min)() <= value && value <= (std::numeric_limits<std::int32_t>::max)())
-        {
-            write_bson_entry_header(name, 0x10); // int32
-            write_number<std::int32_t, true>(static_cast<std::int32_t>(value));
-        }
-        else
-        {
-            write_bson_entry_header(name, 0x12); // int64
-            write_number<std::int64_t, true>(static_cast<std::int64_t>(value));
-        }
-    }
-
-    /*!
-    @return The size of the BSON-encoded unsigned integer in @a j
-    */
-    static constexpr std::size_t calc_bson_unsigned_size(const std::uint64_t value) noexcept
-    {
-        return (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
-               ? sizeof(std::int32_t)
-               : sizeof(std::int64_t);
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and unsigned @a value
-    */
-    void write_bson_unsigned(const string_t& name,
-                             const BasicJsonType& j)
-    {
-        if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
-        {
-            write_bson_entry_header(name, 0x10 /* int32 */);
-            write_number<std::int32_t, true>(static_cast<std::int32_t>(j.m_value.number_unsigned));
-        }
-        else if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
-        {
-            write_bson_entry_header(name, 0x12 /* int64 */);
-            write_number<std::int64_t, true>(static_cast<std::int64_t>(j.m_value.number_unsigned));
-        }
-        else
-        {
-            JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(j.m_value.number_unsigned) + " cannot be represented by BSON as it does not fit int64", j));
-        }
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and object @a value
-    */
-    void write_bson_object_entry(const string_t& name,
-                                 const typename BasicJsonType::object_t& value)
-    {
-        write_bson_entry_header(name, 0x03); // object
-        write_bson_object(value);
-    }
-
-    /*!
-    @return The size of the BSON-encoded array @a value
-    */
-    static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value)
-    {
-        std::size_t array_index = 0ul;
-
-        const std::size_t embedded_document_size = std::accumulate(std::begin(value), std::end(value), std::size_t(0), [&array_index](std::size_t result, const typename BasicJsonType::array_t::value_type & el)
-        {
-            return result + calc_bson_element_size(std::to_string(array_index++), el);
-        });
-
-        return sizeof(std::int32_t) + embedded_document_size + 1ul;
-    }
-
-    /*!
-    @return The size of the BSON-encoded binary array @a value
-    */
-    static std::size_t calc_bson_binary_size(const typename BasicJsonType::binary_t& value)
-    {
-        return sizeof(std::int32_t) + value.size() + 1ul;
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and array @a value
-    */
-    void write_bson_array(const string_t& name,
-                          const typename BasicJsonType::array_t& value)
-    {
-        write_bson_entry_header(name, 0x04); // array
-        write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value)));
-
-        std::size_t array_index = 0ul;
-
-        for (const auto& el : value)
-        {
-            write_bson_element(std::to_string(array_index++), el);
-        }
-
-        oa->write_character(to_char_type(0x00));
-    }
-
-    /*!
-    @brief Writes a BSON element with key @a name and binary value @a value
-    */
-    void write_bson_binary(const string_t& name,
-                           const binary_t& value)
-    {
-        write_bson_entry_header(name, 0x05);
-
-        write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size()));
-        write_number(value.has_subtype() ? value.subtype() : std::uint8_t(0x00));
-
-        oa->write_characters(reinterpret_cast<const CharType*>(value.data()), value.size());
-    }
-
-    /*!
-    @brief Calculates the size necessary to serialize the JSON value @a j with its @a name
-    @return The calculated size for the BSON document entry for @a j with the given @a name.
-    */
-    static std::size_t calc_bson_element_size(const string_t& name,
-            const BasicJsonType& j)
-    {
-        const auto header_size = calc_bson_entry_header_size(name, j);
-        switch (j.type())
-        {
-            case value_t::object:
-                return header_size + calc_bson_object_size(*j.m_value.object);
-
-            case value_t::array:
-                return header_size + calc_bson_array_size(*j.m_value.array);
-
-            case value_t::binary:
-                return header_size + calc_bson_binary_size(*j.m_value.binary);
-
-            case value_t::boolean:
-                return header_size + 1ul;
-
-            case value_t::number_float:
-                return header_size + 8ul;
-
-            case value_t::number_integer:
-                return header_size + calc_bson_integer_size(j.m_value.number_integer);
-
-            case value_t::number_unsigned:
-                return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned);
-
-            case value_t::string:
-                return header_size + calc_bson_string_size(*j.m_value.string);
-
-            case value_t::null:
-                return header_size + 0ul;
-
-            // LCOV_EXCL_START
-            default:
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert)
-                return 0ul;
-                // LCOV_EXCL_STOP
-        }
-    }
-
-    /*!
-    @brief Serializes the JSON value @a j to BSON and associates it with the
-           key @a name.
-    @param name The name to associate with the JSON entity @a j within the
-                current BSON document
-    */
-    void write_bson_element(const string_t& name,
-                            const BasicJsonType& j)
-    {
-        switch (j.type())
-        {
-            case value_t::object:
-                return write_bson_object_entry(name, *j.m_value.object);
-
-            case value_t::array:
-                return write_bson_array(name, *j.m_value.array);
-
-            case value_t::binary:
-                return write_bson_binary(name, *j.m_value.binary);
-
-            case value_t::boolean:
-                return write_bson_boolean(name, j.m_value.boolean);
-
-            case value_t::number_float:
-                return write_bson_double(name, j.m_value.number_float);
-
-            case value_t::number_integer:
-                return write_bson_integer(name, j.m_value.number_integer);
-
-            case value_t::number_unsigned:
-                return write_bson_unsigned(name, j);
-
-            case value_t::string:
-                return write_bson_string(name, *j.m_value.string);
-
-            case value_t::null:
-                return write_bson_null(name);
-
-            // LCOV_EXCL_START
-            default:
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert)
-                return;
-                // LCOV_EXCL_STOP
-        }
-    }
-
-    /*!
-    @brief Calculates the size of the BSON serialization of the given
-           JSON-object @a j.
-    @param[in] value  JSON value to serialize
-    @pre       value.type() == value_t::object
-    */
-    static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value)
-    {
-        std::size_t document_size = std::accumulate(value.begin(), value.end(), std::size_t(0),
-                                    [](size_t result, const typename BasicJsonType::object_t::value_type & el)
-        {
-            return result += calc_bson_element_size(el.first, el.second);
-        });
-
-        return sizeof(std::int32_t) + document_size + 1ul;
-    }
-
-    /*!
-    @param[in] value  JSON value to serialize
-    @pre       value.type() == value_t::object
-    */
-    void write_bson_object(const typename BasicJsonType::object_t& value)
-    {
-        write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value)));
-
-        for (const auto& el : value)
-        {
-            write_bson_element(el.first, el.second);
-        }
-
-        oa->write_character(to_char_type(0x00));
-    }
-
-    //////////
-    // CBOR //
-    //////////
-
-    static constexpr CharType get_cbor_float_prefix(float /*unused*/)
-    {
-        return to_char_type(0xFA);  // Single-Precision Float
-    }
-
-    static constexpr CharType get_cbor_float_prefix(double /*unused*/)
-    {
-        return to_char_type(0xFB);  // Double-Precision Float
-    }
-
-    /////////////
-    // MsgPack //
-    /////////////
-
-    static constexpr CharType get_msgpack_float_prefix(float /*unused*/)
-    {
-        return to_char_type(0xCA);  // float 32
-    }
-
-    static constexpr CharType get_msgpack_float_prefix(double /*unused*/)
-    {
-        return to_char_type(0xCB);  // float 64
-    }
-
-    ////////////
-    // UBJSON //
-    ////////////
-
-    // UBJSON: write number (floating point)
-    template<typename NumberType, typename std::enable_if<
-                 std::is_floating_point<NumberType>::value, int>::type = 0>
-    void write_number_with_ubjson_prefix(const NumberType n,
-                                         const bool add_prefix)
-    {
-        if (add_prefix)
-        {
-            oa->write_character(get_ubjson_float_prefix(n));
-        }
-        write_number(n);
-    }
-
-    // UBJSON: write number (unsigned integer)
-    template<typename NumberType, typename std::enable_if<
-                 std::is_unsigned<NumberType>::value, int>::type = 0>
-    void write_number_with_ubjson_prefix(const NumberType n,
-                                         const bool add_prefix)
-    {
-        if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int8_t>::max)()))
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('i'));  // int8
-            }
-            write_number(static_cast<std::uint8_t>(n));
-        }
-        else if (n <= (std::numeric_limits<std::uint8_t>::max)())
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('U'));  // uint8
-            }
-            write_number(static_cast<std::uint8_t>(n));
-        }
-        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int16_t>::max)()))
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('I'));  // int16
-            }
-            write_number(static_cast<std::int16_t>(n));
-        }
-        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('l'));  // int32
-            }
-            write_number(static_cast<std::int32_t>(n));
-        }
-        else if (n <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('L'));  // int64
-            }
-            write_number(static_cast<std::int64_t>(n));
-        }
-        else
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('H'));  // high-precision number
-            }
-
-            const auto number = BasicJsonType(n).dump();
-            write_number_with_ubjson_prefix(number.size(), true);
-            for (std::size_t i = 0; i < number.size(); ++i)
-            {
-                oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
-            }
-        }
-    }
-
-    // UBJSON: write number (signed integer)
-    template < typename NumberType, typename std::enable_if <
-                   std::is_signed<NumberType>::value&&
-                   !std::is_floating_point<NumberType>::value, int >::type = 0 >
-    void write_number_with_ubjson_prefix(const NumberType n,
-                                         const bool add_prefix)
-    {
-        if ((std::numeric_limits<std::int8_t>::min)() <= n && n <= (std::numeric_limits<std::int8_t>::max)())
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('i'));  // int8
-            }
-            write_number(static_cast<std::int8_t>(n));
-        }
-        else if (static_cast<std::int64_t>((std::numeric_limits<std::uint8_t>::min)()) <= n && n <= static_cast<std::int64_t>((std::numeric_limits<std::uint8_t>::max)()))
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('U'));  // uint8
-            }
-            write_number(static_cast<std::uint8_t>(n));
-        }
-        else if ((std::numeric_limits<std::int16_t>::min)() <= n && n <= (std::numeric_limits<std::int16_t>::max)())
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('I'));  // int16
-            }
-            write_number(static_cast<std::int16_t>(n));
-        }
-        else if ((std::numeric_limits<std::int32_t>::min)() <= n && n <= (std::numeric_limits<std::int32_t>::max)())
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('l'));  // int32
-            }
-            write_number(static_cast<std::int32_t>(n));
-        }
-        else if ((std::numeric_limits<std::int64_t>::min)() <= n && n <= (std::numeric_limits<std::int64_t>::max)())
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('L'));  // int64
-            }
-            write_number(static_cast<std::int64_t>(n));
-        }
-        // LCOV_EXCL_START
-        else
-        {
-            if (add_prefix)
-            {
-                oa->write_character(to_char_type('H'));  // high-precision number
-            }
-
-            const auto number = BasicJsonType(n).dump();
-            write_number_with_ubjson_prefix(number.size(), true);
-            for (std::size_t i = 0; i < number.size(); ++i)
-            {
-                oa->write_character(to_char_type(static_cast<std::uint8_t>(number[i])));
-            }
-        }
-        // LCOV_EXCL_STOP
-    }
-
-    /*!
-    @brief determine the type prefix of container values
-    */
-    CharType ubjson_prefix(const BasicJsonType& j) const noexcept
-    {
-        switch (j.type())
-        {
-            case value_t::null:
-                return 'Z';
-
-            case value_t::boolean:
-                return j.m_value.boolean ? 'T' : 'F';
-
-            case value_t::number_integer:
-            {
-                if ((std::numeric_limits<std::int8_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int8_t>::max)())
-                {
-                    return 'i';
-                }
-                if ((std::numeric_limits<std::uint8_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::uint8_t>::max)())
-                {
-                    return 'U';
-                }
-                if ((std::numeric_limits<std::int16_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int16_t>::max)())
-                {
-                    return 'I';
-                }
-                if ((std::numeric_limits<std::int32_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int32_t>::max)())
-                {
-                    return 'l';
-                }
-                if ((std::numeric_limits<std::int64_t>::min)() <= j.m_value.number_integer && j.m_value.number_integer <= (std::numeric_limits<std::int64_t>::max)())
-                {
-                    return 'L';
-                }
-                // anything else is treated as high-precision number
-                return 'H'; // LCOV_EXCL_LINE
-            }
-
-            case value_t::number_unsigned:
-            {
-                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int8_t>::max)()))
-                {
-                    return 'i';
-                }
-                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::uint8_t>::max)()))
-                {
-                    return 'U';
-                }
-                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int16_t>::max)()))
-                {
-                    return 'I';
-                }
-                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)()))
-                {
-                    return 'l';
-                }
-                if (j.m_value.number_unsigned <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)()))
-                {
-                    return 'L';
-                }
-                // anything else is treated as high-precision number
-                return 'H'; // LCOV_EXCL_LINE
-            }
-
-            case value_t::number_float:
-                return get_ubjson_float_prefix(j.m_value.number_float);
-
-            case value_t::string:
-                return 'S';
-
-            case value_t::array: // fallthrough
-            case value_t::binary:
-                return '[';
-
-            case value_t::object:
-                return '{';
-
-            default:  // discarded values
-                return 'N';
-        }
-    }
-
-    static constexpr CharType get_ubjson_float_prefix(float /*unused*/)
-    {
-        return 'd';  // float 32
-    }
-
-    static constexpr CharType get_ubjson_float_prefix(double /*unused*/)
-    {
-        return 'D';  // float 64
-    }
-
-    ///////////////////////
-    // Utility functions //
-    ///////////////////////
-
-    /*
-    @brief write a number to output input
-    @param[in] n number of type @a NumberType
-    @tparam NumberType the type of the number
-    @tparam OutputIsLittleEndian Set to true if output data is
-                                 required to be little endian
-
-    @note This function needs to respect the system's endianess, because bytes
-          in CBOR, MessagePack, and UBJSON are stored in network order (big
-          endian) and therefore need reordering on little endian systems.
-    */
-    template<typename NumberType, bool OutputIsLittleEndian = false>
-    void write_number(const NumberType n)
-    {
-        // step 1: write number to array of length NumberType
-        std::array<CharType, sizeof(NumberType)> vec{};
-        std::memcpy(vec.data(), &n, sizeof(NumberType));
-
-        // step 2: write array to output (with possible reordering)
-        if (is_little_endian != OutputIsLittleEndian)
-        {
-            // reverse byte order prior to conversion if necessary
-            std::reverse(vec.begin(), vec.end());
-        }
-
-        oa->write_characters(vec.data(), sizeof(NumberType));
-    }
-
-    void write_compact_float(const number_float_t n, detail::input_format_t format)
-    {
-        if (static_cast<double>(n) >= static_cast<double>(std::numeric_limits<float>::lowest()) &&
-                static_cast<double>(n) <= static_cast<double>((std::numeric_limits<float>::max)()) &&
-                static_cast<double>(static_cast<float>(n)) == static_cast<double>(n))
-        {
-            oa->write_character(format == detail::input_format_t::cbor
-                                ? get_cbor_float_prefix(static_cast<float>(n))
-                                : get_msgpack_float_prefix(static_cast<float>(n)));
-            write_number(static_cast<float>(n));
-        }
-        else
-        {
-            oa->write_character(format == detail::input_format_t::cbor
-                                ? get_cbor_float_prefix(n)
-                                : get_msgpack_float_prefix(n));
-            write_number(n);
-        }
-    }
-
-  public:
-    // The following to_char_type functions are implement the conversion
-    // between uint8_t and CharType. In case CharType is not unsigned,
-    // such a conversion is required to allow values greater than 128.
-    // See <https://github.com/nlohmann/json/issues/1286> for a discussion.
-    template < typename C = CharType,
-               enable_if_t < std::is_signed<C>::value && std::is_signed<char>::value > * = nullptr >
-    static constexpr CharType to_char_type(std::uint8_t x) noexcept
-    {
-        return *reinterpret_cast<char*>(&x);
-    }
-
-    template < typename C = CharType,
-               enable_if_t < std::is_signed<C>::value && std::is_unsigned<char>::value > * = nullptr >
-    static CharType to_char_type(std::uint8_t x) noexcept
-    {
-        static_assert(sizeof(std::uint8_t) == sizeof(CharType), "size of CharType must be equal to std::uint8_t");
-        static_assert(std::is_trivial<CharType>::value, "CharType must be trivial");
-        CharType result;
-        std::memcpy(&result, &x, sizeof(x));
-        return result;
-    }
-
-    template<typename C = CharType,
-             enable_if_t<std::is_unsigned<C>::value>* = nullptr>
-    static constexpr CharType to_char_type(std::uint8_t x) noexcept
-    {
-        return x;
-    }
-
-    template < typename InputCharType, typename C = CharType,
-               enable_if_t <
-                   std::is_signed<C>::value &&
-                   std::is_signed<char>::value &&
-                   std::is_same<char, typename std::remove_cv<InputCharType>::type>::value
-                   > * = nullptr >
-    static constexpr CharType to_char_type(InputCharType x) noexcept
-    {
-        return x;
-    }
-
-  private:
-    /// whether we can assume little endianess
-    const bool is_little_endian = little_endianess();
-
-    /// the output
-    output_adapter_t<CharType> oa = nullptr;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/output/output_adapters.hpp>
-
-// #include <nlohmann/detail/output/serializer.hpp>
-
-
-#include <algorithm> // reverse, remove, fill, find, none_of
-#include <array> // array
-#include <clocale> // localeconv, lconv
-#include <cmath> // labs, isfinite, isnan, signbit
-#include <cstddef> // size_t, ptrdiff_t
-#include <cstdint> // uint8_t
-#include <cstdio> // snprintf
-#include <limits> // numeric_limits
-#include <string> // string, char_traits
-#include <type_traits> // is_same
-#include <utility> // move
-
-// #include <nlohmann/detail/conversions/to_chars.hpp>
-
-
-#include <array> // array
-#include <cmath>   // signbit, isfinite
-#include <cstdint> // intN_t, uintN_t
-#include <cstring> // memcpy, memmove
-#include <limits> // numeric_limits
-#include <type_traits> // conditional
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-
-/*!
-@brief implements the Grisu2 algorithm for binary to decimal floating-point
-conversion.
-
-This implementation is a slightly modified version of the reference
-implementation which may be obtained from
-http://florian.loitsch.com/publications (bench.tar.gz).
-
-The code is distributed under the MIT license, Copyright (c) 2009 Florian Loitsch.
-
-For a detailed description of the algorithm see:
-
-[1] Loitsch, "Printing Floating-Point Numbers Quickly and Accurately with
-    Integers", Proceedings of the ACM SIGPLAN 2010 Conference on Programming
-    Language Design and Implementation, PLDI 2010
-[2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and Accurately",
-    Proceedings of the ACM SIGPLAN 1996 Conference on Programming Language
-    Design and Implementation, PLDI 1996
-*/
-namespace dtoa_impl
-{
-
-template<typename Target, typename Source>
-Target reinterpret_bits(const Source source)
-{
-    static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
-
-    Target target;
-    std::memcpy(&target, &source, sizeof(Source));
-    return target;
-}
-
-struct diyfp // f * 2^e
-{
-    static constexpr int kPrecision = 64; // = q
-
-    std::uint64_t f = 0;
-    int e = 0;
-
-    constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
-
-    /*!
-    @brief returns x - y
-    @pre x.e == y.e and x.f >= y.f
-    */
-    static diyfp sub(const diyfp& x, const diyfp& y) noexcept
-    {
-        JSON_ASSERT(x.e == y.e);
-        JSON_ASSERT(x.f >= y.f);
-
-        return {x.f - y.f, x.e};
-    }
-
-    /*!
-    @brief returns x * y
-    @note The result is rounded. (Only the upper q bits are returned.)
-    */
-    static diyfp mul(const diyfp& x, const diyfp& y) noexcept
-    {
-        static_assert(kPrecision == 64, "internal error");
-
-        // Computes:
-        //  f = round((x.f * y.f) / 2^q)
-        //  e = x.e + y.e + q
-
-        // Emulate the 64-bit * 64-bit multiplication:
-        //
-        // p = u * v
-        //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
-        //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo         )) + 2^64 (u_hi v_hi         )
-        //   = (p0                ) + 2^32 ((p1                ) + (p2                )) + 2^64 (p3                )
-        //   = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                )
-        //   = (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo                      ) + 2^64 (p1_hi + p2_hi + p3)
-        //   = (p0_lo             ) + 2^32 (Q                                          ) + 2^64 (H                 )
-        //   = (p0_lo             ) + 2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H                 )
-        //
-        // (Since Q might be larger than 2^32 - 1)
-        //
-        //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
-        //
-        // (Q_hi + H does not overflow a 64-bit int)
-        //
-        //   = p_lo + 2^64 p_hi
-
-        const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
-        const std::uint64_t u_hi = x.f >> 32u;
-        const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
-        const std::uint64_t v_hi = y.f >> 32u;
-
-        const std::uint64_t p0 = u_lo * v_lo;
-        const std::uint64_t p1 = u_lo * v_hi;
-        const std::uint64_t p2 = u_hi * v_lo;
-        const std::uint64_t p3 = u_hi * v_hi;
-
-        const std::uint64_t p0_hi = p0 >> 32u;
-        const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
-        const std::uint64_t p1_hi = p1 >> 32u;
-        const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
-        const std::uint64_t p2_hi = p2 >> 32u;
-
-        std::uint64_t Q = p0_hi + p1_lo + p2_lo;
-
-        // The full product might now be computed as
-        //
-        // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
-        // p_lo = p0_lo + (Q << 32)
-        //
-        // But in this particular case here, the full p_lo is not required.
-        // Effectively we only need to add the highest bit in p_lo to p_hi (and
-        // Q_hi + 1 does not overflow).
-
-        Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
-
-        const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
-
-        return {h, x.e + y.e + 64};
-    }
-
-    /*!
-    @brief normalize x such that the significand is >= 2^(q-1)
-    @pre x.f != 0
-    */
-    static diyfp normalize(diyfp x) noexcept
-    {
-        JSON_ASSERT(x.f != 0);
-
-        while ((x.f >> 63u) == 0)
-        {
-            x.f <<= 1u;
-            x.e--;
-        }
-
-        return x;
-    }
-
-    /*!
-    @brief normalize x such that the result has the exponent E
-    @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
-    */
-    static diyfp normalize_to(const diyfp& x, const int target_exponent) noexcept
-    {
-        const int delta = x.e - target_exponent;
-
-        JSON_ASSERT(delta >= 0);
-        JSON_ASSERT(((x.f << delta) >> delta) == x.f);
-
-        return {x.f << delta, target_exponent};
-    }
-};
-
-struct boundaries
-{
-    diyfp w;
-    diyfp minus;
-    diyfp plus;
-};
-
-/*!
-Compute the (normalized) diyfp representing the input number 'value' and its
-boundaries.
-
-@pre value must be finite and positive
-*/
-template<typename FloatType>
-boundaries compute_boundaries(FloatType value)
-{
-    JSON_ASSERT(std::isfinite(value));
-    JSON_ASSERT(value > 0);
-
-    // Convert the IEEE representation into a diyfp.
-    //
-    // If v is denormal:
-    //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
-    // If v is normalized:
-    //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
-
-    static_assert(std::numeric_limits<FloatType>::is_iec559,
-                  "internal error: dtoa_short requires an IEEE-754 floating-point implementation");
-
-    constexpr int      kPrecision = std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
-    constexpr int      kBias      = std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
-    constexpr int      kMinExp    = 1 - kBias;
-    constexpr std::uint64_t kHiddenBit = std::uint64_t{1} << (kPrecision - 1); // = 2^(p-1)
-
-    using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t, std::uint64_t >::type;
-
-    const auto bits = static_cast<std::uint64_t>(reinterpret_bits<bits_type>(value));
-    const std::uint64_t E = bits >> (kPrecision - 1);
-    const std::uint64_t F = bits & (kHiddenBit - 1);
-
-    const bool is_denormal = E == 0;
-    const diyfp v = is_denormal
-                    ? diyfp(F, kMinExp)
-                    : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
-
-    // Compute the boundaries m- and m+ of the floating-point value
-    // v = f * 2^e.
-    //
-    // Determine v- and v+, the floating-point predecessor and successor if v,
-    // respectively.
-    //
-    //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
-    //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
-    //
-    //      v+ = v + 2^e
-    //
-    // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
-    // between m- and m+ round to v, regardless of how the input rounding
-    // algorithm breaks ties.
-    //
-    //      ---+-------------+-------------+-------------+-------------+---  (A)
-    //         v-            m-            v             m+            v+
-    //
-    //      -----------------+------+------+-------------+-------------+---  (B)
-    //                       v-     m-     v             m+            v+
-
-    const bool lower_boundary_is_closer = F == 0 && E > 1;
-    const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
-    const diyfp m_minus = lower_boundary_is_closer
-                          ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
-                          : diyfp(2 * v.f - 1, v.e - 1); // (A)
-
-    // Determine the normalized w+ = m+.
-    const diyfp w_plus = diyfp::normalize(m_plus);
-
-    // Determine w- = m- such that e_(w-) = e_(w+).
-    const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
-
-    return {diyfp::normalize(v), w_minus, w_plus};
-}
-
-// Given normalized diyfp w, Grisu needs to find a (normalized) cached
-// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
-// within a certain range [alpha, gamma] (Definition 3.2 from [1])
-//
-//      alpha <= e = e_c + e_w + q <= gamma
-//
-// or
-//
-//      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
-//                          <= f_c * f_w * 2^gamma
-//
-// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
-//
-//      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
-//
-// or
-//
-//      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
-//
-// The choice of (alpha,gamma) determines the size of the table and the form of
-// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
-// in practice:
-//
-// The idea is to cut the number c * w = f * 2^e into two parts, which can be
-// processed independently: An integral part p1, and a fractional part p2:
-//
-//      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
-//              = (f div 2^-e) + (f mod 2^-e) * 2^e
-//              = p1 + p2 * 2^e
-//
-// The conversion of p1 into decimal form requires a series of divisions and
-// modulos by (a power of) 10. These operations are faster for 32-bit than for
-// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
-// achieved by choosing
-//
-//      -e >= 32   or   e <= -32 := gamma
-//
-// In order to convert the fractional part
-//
-//      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
-//
-// into decimal form, the fraction is repeatedly multiplied by 10 and the digits
-// d[-i] are extracted in order:
-//
-//      (10 * p2) div 2^-e = d[-1]
-//      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
-//
-// The multiplication by 10 must not overflow. It is sufficient to choose
-//
-//      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
-//
-// Since p2 = f mod 2^-e < 2^-e,
-//
-//      -e <= 60   or   e >= -60 := alpha
-
-constexpr int kAlpha = -60;
-constexpr int kGamma = -32;
-
-struct cached_power // c = f * 2^e ~= 10^k
-{
-    std::uint64_t f;
-    int e;
-    int k;
-};
-
-/*!
-For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
-power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
-satisfies (Definition 3.2 from [1])
-
-     alpha <= e_c + e + q <= gamma.
-*/
-inline cached_power get_cached_power_for_binary_exponent(int e)
-{
-    // Now
-    //
-    //      alpha <= e_c + e + q <= gamma                                    (1)
-    //      ==> f_c * 2^alpha <= c * 2^e * 2^q
-    //
-    // and since the c's are normalized, 2^(q-1) <= f_c,
-    //
-    //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
-    //      ==> 2^(alpha - e - 1) <= c
-    //
-    // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
-    //
-    //      k = ceil( log_10( 2^(alpha - e - 1) ) )
-    //        = ceil( (alpha - e - 1) * log_10(2) )
-    //
-    // From the paper:
-    // "In theory the result of the procedure could be wrong since c is rounded,
-    //  and the computation itself is approximated [...]. In practice, however,
-    //  this simple function is sufficient."
-    //
-    // For IEEE double precision floating-point numbers converted into
-    // normalized diyfp's w = f * 2^e, with q = 64,
-    //
-    //      e >= -1022      (min IEEE exponent)
-    //           -52        (p - 1)
-    //           -52        (p - 1, possibly normalize denormal IEEE numbers)
-    //           -11        (normalize the diyfp)
-    //         = -1137
-    //
-    // and
-    //
-    //      e <= +1023      (max IEEE exponent)
-    //           -52        (p - 1)
-    //           -11        (normalize the diyfp)
-    //         = 960
-    //
-    // This binary exponent range [-1137,960] results in a decimal exponent
-    // range [-307,324]. One does not need to store a cached power for each
-    // k in this range. For each such k it suffices to find a cached power
-    // such that the exponent of the product lies in [alpha,gamma].
-    // This implies that the difference of the decimal exponents of adjacent
-    // table entries must be less than or equal to
-    //
-    //      floor( (gamma - alpha) * log_10(2) ) = 8.
-    //
-    // (A smaller distance gamma-alpha would require a larger table.)
-
-    // NB:
-    // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
-
-    constexpr int kCachedPowersMinDecExp = -300;
-    constexpr int kCachedPowersDecStep = 8;
-
-    static constexpr std::array<cached_power, 79> kCachedPowers =
-    {
-        {
-            { 0xAB70FE17C79AC6CA, -1060, -300 },
-            { 0xFF77B1FCBEBCDC4F, -1034, -292 },
-            { 0xBE5691EF416BD60C, -1007, -284 },
-            { 0x8DD01FAD907FFC3C,  -980, -276 },
-            { 0xD3515C2831559A83,  -954, -268 },
-            { 0x9D71AC8FADA6C9B5,  -927, -260 },
-            { 0xEA9C227723EE8BCB,  -901, -252 },
-            { 0xAECC49914078536D,  -874, -244 },
-            { 0x823C12795DB6CE57,  -847, -236 },
-            { 0xC21094364DFB5637,  -821, -228 },
-            { 0x9096EA6F3848984F,  -794, -220 },
-            { 0xD77485CB25823AC7,  -768, -212 },
-            { 0xA086CFCD97BF97F4,  -741, -204 },
-            { 0xEF340A98172AACE5,  -715, -196 },
-            { 0xB23867FB2A35B28E,  -688, -188 },
-            { 0x84C8D4DFD2C63F3B,  -661, -180 },
-            { 0xC5DD44271AD3CDBA,  -635, -172 },
-            { 0x936B9FCEBB25C996,  -608, -164 },
-            { 0xDBAC6C247D62A584,  -582, -156 },
-            { 0xA3AB66580D5FDAF6,  -555, -148 },
-            { 0xF3E2F893DEC3F126,  -529, -140 },
-            { 0xB5B5ADA8AAFF80B8,  -502, -132 },
-            { 0x87625F056C7C4A8B,  -475, -124 },
-            { 0xC9BCFF6034C13053,  -449, -116 },
-            { 0x964E858C91BA2655,  -422, -108 },
-            { 0xDFF9772470297EBD,  -396, -100 },
-            { 0xA6DFBD9FB8E5B88F,  -369,  -92 },
-            { 0xF8A95FCF88747D94,  -343,  -84 },
-            { 0xB94470938FA89BCF,  -316,  -76 },
-            { 0x8A08F0F8BF0F156B,  -289,  -68 },
-            { 0xCDB02555653131B6,  -263,  -60 },
-            { 0x993FE2C6D07B7FAC,  -236,  -52 },
-            { 0xE45C10C42A2B3B06,  -210,  -44 },
-            { 0xAA242499697392D3,  -183,  -36 },
-            { 0xFD87B5F28300CA0E,  -157,  -28 },
-            { 0xBCE5086492111AEB,  -130,  -20 },
-            { 0x8CBCCC096F5088CC,  -103,  -12 },
-            { 0xD1B71758E219652C,   -77,   -4 },
-            { 0x9C40000000000000,   -50,    4 },
-            { 0xE8D4A51000000000,   -24,   12 },
-            { 0xAD78EBC5AC620000,     3,   20 },
-            { 0x813F3978F8940984,    30,   28 },
-            { 0xC097CE7BC90715B3,    56,   36 },
-            { 0x8F7E32CE7BEA5C70,    83,   44 },
-            { 0xD5D238A4ABE98068,   109,   52 },
-            { 0x9F4F2726179A2245,   136,   60 },
-            { 0xED63A231D4C4FB27,   162,   68 },
-            { 0xB0DE65388CC8ADA8,   189,   76 },
-            { 0x83C7088E1AAB65DB,   216,   84 },
-            { 0xC45D1DF942711D9A,   242,   92 },
-            { 0x924D692CA61BE758,   269,  100 },
-            { 0xDA01EE641A708DEA,   295,  108 },
-            { 0xA26DA3999AEF774A,   322,  116 },
-            { 0xF209787BB47D6B85,   348,  124 },
-            { 0xB454E4A179DD1877,   375,  132 },
-            { 0x865B86925B9BC5C2,   402,  140 },
-            { 0xC83553C5C8965D3D,   428,  148 },
-            { 0x952AB45CFA97A0B3,   455,  156 },
-            { 0xDE469FBD99A05FE3,   481,  164 },
-            { 0xA59BC234DB398C25,   508,  172 },
-            { 0xF6C69A72A3989F5C,   534,  180 },
-            { 0xB7DCBF5354E9BECE,   561,  188 },
-            { 0x88FCF317F22241E2,   588,  196 },
-            { 0xCC20CE9BD35C78A5,   614,  204 },
-            { 0x98165AF37B2153DF,   641,  212 },
-            { 0xE2A0B5DC971F303A,   667,  220 },
-            { 0xA8D9D1535CE3B396,   694,  228 },
-            { 0xFB9B7CD9A4A7443C,   720,  236 },
-            { 0xBB764C4CA7A44410,   747,  244 },
-            { 0x8BAB8EEFB6409C1A,   774,  252 },
-            { 0xD01FEF10A657842C,   800,  260 },
-            { 0x9B10A4E5E9913129,   827,  268 },
-            { 0xE7109BFBA19C0C9D,   853,  276 },
-            { 0xAC2820D9623BF429,   880,  284 },
-            { 0x80444B5E7AA7CF85,   907,  292 },
-            { 0xBF21E44003ACDD2D,   933,  300 },
-            { 0x8E679C2F5E44FF8F,   960,  308 },
-            { 0xD433179D9C8CB841,   986,  316 },
-            { 0x9E19DB92B4E31BA9,  1013,  324 },
-        }
-    };
-
-    // This computation gives exactly the same results for k as
-    //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
-    // for |e| <= 1500, but doesn't require floating-point operations.
-    // NB: log_10(2) ~= 78913 / 2^18
-    JSON_ASSERT(e >= -1500);
-    JSON_ASSERT(e <=  1500);
-    const int f = kAlpha - e - 1;
-    const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
-
-    const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep;
-    JSON_ASSERT(index >= 0);
-    JSON_ASSERT(static_cast<std::size_t>(index) < kCachedPowers.size());
-
-    const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
-    JSON_ASSERT(kAlpha <= cached.e + e + 64);
-    JSON_ASSERT(kGamma >= cached.e + e + 64);
-
-    return cached;
-}
-
-/*!
-For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
-For n == 0, returns 1 and sets pow10 := 1.
-*/
-inline int find_largest_pow10(const std::uint32_t n, std::uint32_t& pow10)
-{
-    // LCOV_EXCL_START
-    if (n >= 1000000000)
-    {
-        pow10 = 1000000000;
-        return 10;
-    }
-    // LCOV_EXCL_STOP
-    if (n >= 100000000)
-    {
-        pow10 = 100000000;
-        return  9;
-    }
-    if (n >= 10000000)
-    {
-        pow10 = 10000000;
-        return  8;
-    }
-    if (n >= 1000000)
-    {
-        pow10 = 1000000;
-        return  7;
-    }
-    if (n >= 100000)
-    {
-        pow10 = 100000;
-        return  6;
-    }
-    if (n >= 10000)
-    {
-        pow10 = 10000;
-        return  5;
-    }
-    if (n >= 1000)
-    {
-        pow10 = 1000;
-        return  4;
-    }
-    if (n >= 100)
-    {
-        pow10 = 100;
-        return  3;
-    }
-    if (n >= 10)
-    {
-        pow10 = 10;
-        return  2;
-    }
-
-    pow10 = 1;
-    return 1;
-}
-
-inline void grisu2_round(char* buf, int len, std::uint64_t dist, std::uint64_t delta,
-                         std::uint64_t rest, std::uint64_t ten_k)
-{
-    JSON_ASSERT(len >= 1);
-    JSON_ASSERT(dist <= delta);
-    JSON_ASSERT(rest <= delta);
-    JSON_ASSERT(ten_k > 0);
-
-    //               <--------------------------- delta ---->
-    //                                  <---- dist --------->
-    // --------------[------------------+-------------------]--------------
-    //               M-                 w                   M+
-    //
-    //                                  ten_k
-    //                                <------>
-    //                                       <---- rest ---->
-    // --------------[------------------+----+--------------]--------------
-    //                                  w    V
-    //                                       = buf * 10^k
-    //
-    // ten_k represents a unit-in-the-last-place in the decimal representation
-    // stored in buf.
-    // Decrement buf by ten_k while this takes buf closer to w.
-
-    // The tests are written in this order to avoid overflow in unsigned
-    // integer arithmetic.
-
-    while (rest < dist
-            && delta - rest >= ten_k
-            && (rest + ten_k < dist || dist - rest > rest + ten_k - dist))
-    {
-        JSON_ASSERT(buf[len - 1] != '0');
-        buf[len - 1]--;
-        rest += ten_k;
-    }
-}
-
-/*!
-Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
-M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
-*/
-inline void grisu2_digit_gen(char* buffer, int& length, int& decimal_exponent,
-                             diyfp M_minus, diyfp w, diyfp M_plus)
-{
-    static_assert(kAlpha >= -60, "internal error");
-    static_assert(kGamma <= -32, "internal error");
-
-    // Generates the digits (and the exponent) of a decimal floating-point
-    // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
-    // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= gamma.
-    //
-    //               <--------------------------- delta ---->
-    //                                  <---- dist --------->
-    // --------------[------------------+-------------------]--------------
-    //               M-                 w                   M+
-    //
-    // Grisu2 generates the digits of M+ from left to right and stops as soon as
-    // V is in [M-,M+].
-
-    JSON_ASSERT(M_plus.e >= kAlpha);
-    JSON_ASSERT(M_plus.e <= kGamma);
-
-    std::uint64_t delta = diyfp::sub(M_plus, M_minus).f; // (significand of (M+ - M-), implicit exponent is e)
-    std::uint64_t dist  = diyfp::sub(M_plus, w      ).f; // (significand of (M+ - w ), implicit exponent is e)
-
-    // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
-    //
-    //      M+ = f * 2^e
-    //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
-    //         = ((p1        ) * 2^-e + (p2        )) * 2^e
-    //         = p1 + p2 * 2^e
-
-    const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
-
-    auto p1 = static_cast<std::uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
-    std::uint64_t p2 = M_plus.f & (one.f - 1);                    // p2 = f mod 2^-e
-
-    // 1)
-    //
-    // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
-
-    JSON_ASSERT(p1 > 0);
-
-    std::uint32_t pow10{};
-    const int k = find_largest_pow10(p1, pow10);
-
-    //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
-    //
-    //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
-    //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
-    //
-    //      M+ = p1                                             + p2 * 2^e
-    //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
-    //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
-    //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
-    //
-    // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
-    //
-    //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
-    //
-    // but stop as soon as
-    //
-    //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
-
-    int n = k;
-    while (n > 0)
-    {
-        // Invariants:
-        //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
-        //      pow10 = 10^(n-1) <= p1 < 10^n
-        //
-        const std::uint32_t d = p1 / pow10;  // d = p1 div 10^(n-1)
-        const std::uint32_t r = p1 % pow10;  // r = p1 mod 10^(n-1)
-        //
-        //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
-        //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
-        //
-        JSON_ASSERT(d <= 9);
-        buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
-        //
-        //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
-        //
-        p1 = r;
-        n--;
-        //
-        //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
-        //      pow10 = 10^n
-        //
-
-        // Now check if enough digits have been generated.
-        // Compute
-        //
-        //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
-        //
-        // Note:
-        // Since rest and delta share the same exponent e, it suffices to
-        // compare the significands.
-        const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
-        if (rest <= delta)
-        {
-            // V = buffer * 10^n, with M- <= V <= M+.
-
-            decimal_exponent += n;
-
-            // We may now just stop. But instead look if the buffer could be
-            // decremented to bring V closer to w.
-            //
-            // pow10 = 10^n is now 1 ulp in the decimal representation V.
-            // The rounding procedure works with diyfp's with an implicit
-            // exponent of e.
-            //
-            //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
-            //
-            const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
-            grisu2_round(buffer, length, dist, delta, rest, ten_n);
-
-            return;
-        }
-
-        pow10 /= 10;
-        //
-        //      pow10 = 10^(n-1) <= p1 < 10^n
-        // Invariants restored.
-    }
-
-    // 2)
-    //
-    // The digits of the integral part have been generated:
-    //
-    //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
-    //         = buffer            + p2 * 2^e
-    //
-    // Now generate the digits of the fractional part p2 * 2^e.
-    //
-    // Note:
-    // No decimal point is generated: the exponent is adjusted instead.
-    //
-    // p2 actually represents the fraction
-    //
-    //      p2 * 2^e
-    //          = p2 / 2^-e
-    //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
-    //
-    // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
-    //
-    //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
-    //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
-    //
-    // using
-    //
-    //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
-    //                = (                   d) * 2^-e + (                   r)
-    //
-    // or
-    //      10^m * p2 * 2^e = d + r * 2^e
-    //
-    // i.e.
-    //
-    //      M+ = buffer + p2 * 2^e
-    //         = buffer + 10^-m * (d + r * 2^e)
-    //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
-    //
-    // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
-
-    JSON_ASSERT(p2 > delta);
-
-    int m = 0;
-    for (;;)
-    {
-        // Invariant:
-        //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) * 2^e
-        //         = buffer * 10^-m + 10^-m * (p2                                 ) * 2^e
-        //         = buffer * 10^-m + 10^-m * (1/10 * (10 * p2)                   ) * 2^e
-        //         = buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + (10*p2 mod 2^-e)) * 2^e
-        //
-        JSON_ASSERT(p2 <= (std::numeric_limits<std::uint64_t>::max)() / 10);
-        p2 *= 10;
-        const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
-        const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
-        //
-        //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
-        //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
-        //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
-        //
-        JSON_ASSERT(d <= 9);
-        buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
-        //
-        //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
-        //
-        p2 = r;
-        m++;
-        //
-        //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
-        // Invariant restored.
-
-        // Check if enough digits have been generated.
-        //
-        //      10^-m * p2 * 2^e <= delta * 2^e
-        //              p2 * 2^e <= 10^m * delta * 2^e
-        //                    p2 <= 10^m * delta
-        delta *= 10;
-        dist  *= 10;
-        if (p2 <= delta)
-        {
-            break;
-        }
-    }
-
-    // V = buffer * 10^-m, with M- <= V <= M+.
-
-    decimal_exponent -= m;
-
-    // 1 ulp in the decimal representation is now 10^-m.
-    // Since delta and dist are now scaled by 10^m, we need to do the
-    // same with ulp in order to keep the units in sync.
-    //
-    //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
-    //
-    const std::uint64_t ten_m = one.f;
-    grisu2_round(buffer, length, dist, delta, p2, ten_m);
-
-    // By construction this algorithm generates the shortest possible decimal
-    // number (Loitsch, Theorem 6.2) which rounds back to w.
-    // For an input number of precision p, at least
-    //
-    //      N = 1 + ceil(p * log_10(2))
-    //
-    // decimal digits are sufficient to identify all binary floating-point
-    // numbers (Matula, "In-and-Out conversions").
-    // This implies that the algorithm does not produce more than N decimal
-    // digits.
-    //
-    //      N = 17 for p = 53 (IEEE double precision)
-    //      N = 9  for p = 24 (IEEE single precision)
-}
-
-/*!
-v = buf * 10^decimal_exponent
-len is the length of the buffer (number of decimal digits)
-The buffer must be large enough, i.e. >= max_digits10.
-*/
-JSON_HEDLEY_NON_NULL(1)
-inline void grisu2(char* buf, int& len, int& decimal_exponent,
-                   diyfp m_minus, diyfp v, diyfp m_plus)
-{
-    JSON_ASSERT(m_plus.e == m_minus.e);
-    JSON_ASSERT(m_plus.e == v.e);
-
-    //  --------(-----------------------+-----------------------)--------    (A)
-    //          m-                      v                       m+
-    //
-    //  --------------------(-----------+-----------------------)--------    (B)
-    //                      m-          v                       m+
-    //
-    // First scale v (and m- and m+) such that the exponent is in the range
-    // [alpha, gamma].
-
-    const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
-
-    const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
-
-    // The exponent of the products is = v.e + c_minus_k.e + q and is in the range [alpha,gamma]
-    const diyfp w       = diyfp::mul(v,       c_minus_k);
-    const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
-    const diyfp w_plus  = diyfp::mul(m_plus,  c_minus_k);
-
-    //  ----(---+---)---------------(---+---)---------------(---+---)----
-    //          w-                      w                       w+
-    //          = c*m-                  = c*v                   = c*m+
-    //
-    // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
-    // w+ are now off by a small amount.
-    // In fact:
-    //
-    //      w - v * 10^k < 1 ulp
-    //
-    // To account for this inaccuracy, add resp. subtract 1 ulp.
-    //
-    //  --------+---[---------------(---+---)---------------]---+--------
-    //          w-  M-                  w                   M+  w+
-    //
-    // Now any number in [M-, M+] (bounds included) will round to w when input,
-    // regardless of how the input rounding algorithm breaks ties.
-    //
-    // And digit_gen generates the shortest possible such number in [M-, M+].
-    // Note that this does not mean that Grisu2 always generates the shortest
-    // possible number in the interval (m-, m+).
-    const diyfp M_minus(w_minus.f + 1, w_minus.e);
-    const diyfp M_plus (w_plus.f  - 1, w_plus.e );
-
-    decimal_exponent = -cached.k; // = -(-k) = k
-
-    grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
-}
-
-/*!
-v = buf * 10^decimal_exponent
-len is the length of the buffer (number of decimal digits)
-The buffer must be large enough, i.e. >= max_digits10.
-*/
-template<typename FloatType>
-JSON_HEDLEY_NON_NULL(1)
-void grisu2(char* buf, int& len, int& decimal_exponent, FloatType value)
-{
-    static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
-                  "internal error: not enough precision");
-
-    JSON_ASSERT(std::isfinite(value));
-    JSON_ASSERT(value > 0);
-
-    // If the neighbors (and boundaries) of 'value' are always computed for double-precision
-    // numbers, all float's can be recovered using strtod (and strtof). However, the resulting
-    // decimal representations are not exactly "short".
-    //
-    // The documentation for 'std::to_chars' (https://en.cppreference.com/w/cpp/utility/to_chars)
-    // says "value is converted to a string as if by std::sprintf in the default ("C") locale"
-    // and since sprintf promotes float's to double's, I think this is exactly what 'std::to_chars'
-    // does.
-    // On the other hand, the documentation for 'std::to_chars' requires that "parsing the
-    // representation using the corresponding std::from_chars function recovers value exactly". That
-    // indicates that single precision floating-point numbers should be recovered using
-    // 'std::strtof'.
-    //
-    // NB: If the neighbors are computed for single-precision numbers, there is a single float
-    //     (7.0385307e-26f) which can't be recovered using strtod. The resulting double precision
-    //     value is off by 1 ulp.
-#if 0
-    const boundaries w = compute_boundaries(static_cast<double>(value));
-#else
-    const boundaries w = compute_boundaries(value);
-#endif
-
-    grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
-}
-
-/*!
-@brief appends a decimal representation of e to buf
-@return a pointer to the element following the exponent.
-@pre -1000 < e < 1000
-*/
-JSON_HEDLEY_NON_NULL(1)
-JSON_HEDLEY_RETURNS_NON_NULL
-inline char* append_exponent(char* buf, int e)
-{
-    JSON_ASSERT(e > -1000);
-    JSON_ASSERT(e <  1000);
-
-    if (e < 0)
-    {
-        e = -e;
-        *buf++ = '-';
-    }
-    else
-    {
-        *buf++ = '+';
-    }
-
-    auto k = static_cast<std::uint32_t>(e);
-    if (k < 10)
-    {
-        // Always print at least two digits in the exponent.
-        // This is for compatibility with printf("%g").
-        *buf++ = '0';
-        *buf++ = static_cast<char>('0' + k);
-    }
-    else if (k < 100)
-    {
-        *buf++ = static_cast<char>('0' + k / 10);
-        k %= 10;
-        *buf++ = static_cast<char>('0' + k);
-    }
-    else
-    {
-        *buf++ = static_cast<char>('0' + k / 100);
-        k %= 100;
-        *buf++ = static_cast<char>('0' + k / 10);
-        k %= 10;
-        *buf++ = static_cast<char>('0' + k);
-    }
-
-    return buf;
-}
-
-/*!
-@brief prettify v = buf * 10^decimal_exponent
-
-If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
-notation. Otherwise it will be printed in exponential notation.
-
-@pre min_exp < 0
-@pre max_exp > 0
-*/
-JSON_HEDLEY_NON_NULL(1)
-JSON_HEDLEY_RETURNS_NON_NULL
-inline char* format_buffer(char* buf, int len, int decimal_exponent,
-                           int min_exp, int max_exp)
-{
-    JSON_ASSERT(min_exp < 0);
-    JSON_ASSERT(max_exp > 0);
-
-    const int k = len;
-    const int n = len + decimal_exponent;
-
-    // v = buf * 10^(n-k)
-    // k is the length of the buffer (number of decimal digits)
-    // n is the position of the decimal point relative to the start of the buffer.
-
-    if (k <= n && n <= max_exp)
-    {
-        // digits[000]
-        // len <= max_exp + 2
-
-        std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
-        // Make it look like a floating-point number (#362, #378)
-        buf[n + 0] = '.';
-        buf[n + 1] = '0';
-        return buf + (static_cast<size_t>(n) + 2);
-    }
-
-    if (0 < n && n <= max_exp)
-    {
-        // dig.its
-        // len <= max_digits10 + 1
-
-        JSON_ASSERT(k > n);
-
-        std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n, static_cast<size_t>(k) - static_cast<size_t>(n));
-        buf[n] = '.';
-        return buf + (static_cast<size_t>(k) + 1U);
-    }
-
-    if (min_exp < n && n <= 0)
-    {
-        // 0.[000]digits
-        // len <= 2 + (-min_exp - 1) + max_digits10
-
-        std::memmove(buf + (2 + static_cast<size_t>(-n)), buf, static_cast<size_t>(k));
-        buf[0] = '0';
-        buf[1] = '.';
-        std::memset(buf + 2, '0', static_cast<size_t>(-n));
-        return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
-    }
-
-    if (k == 1)
-    {
-        // dE+123
-        // len <= 1 + 5
-
-        buf += 1;
-    }
-    else
-    {
-        // d.igitsE+123
-        // len <= max_digits10 + 1 + 5
-
-        std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
-        buf[1] = '.';
-        buf += 1 + static_cast<size_t>(k);
-    }
-
-    *buf++ = 'e';
-    return append_exponent(buf, n - 1);
-}
-
-} // namespace dtoa_impl
-
-/*!
-@brief generates a decimal representation of the floating-point number value in [first, last).
-
-The format of the resulting decimal representation is similar to printf's %g
-format. Returns an iterator pointing past-the-end of the decimal representation.
-
-@note The input number must be finite, i.e. NaN's and Inf's are not supported.
-@note The buffer must be large enough.
-@note The result is NOT null-terminated.
-*/
-template<typename FloatType>
-JSON_HEDLEY_NON_NULL(1, 2)
-JSON_HEDLEY_RETURNS_NON_NULL
-char* to_chars(char* first, const char* last, FloatType value)
-{
-    static_cast<void>(last); // maybe unused - fix warning
-    JSON_ASSERT(std::isfinite(value));
-
-    // Use signbit(value) instead of (value < 0) since signbit works for -0.
-    if (std::signbit(value))
-    {
-        value = -value;
-        *first++ = '-';
-    }
-
-    if (value == 0) // +-0
-    {
-        *first++ = '0';
-        // Make it look like a floating-point number (#362, #378)
-        *first++ = '.';
-        *first++ = '0';
-        return first;
-    }
-
-    JSON_ASSERT(last - first >= std::numeric_limits<FloatType>::max_digits10);
-
-    // Compute v = buffer * 10^decimal_exponent.
-    // The decimal digits are stored in the buffer, which needs to be interpreted
-    // as an unsigned decimal integer.
-    // len is the length of the buffer, i.e. the number of decimal digits.
-    int len = 0;
-    int decimal_exponent = 0;
-    dtoa_impl::grisu2(first, len, decimal_exponent, value);
-
-    JSON_ASSERT(len <= std::numeric_limits<FloatType>::max_digits10);
-
-    // Format the buffer like printf("%.*g", prec, value)
-    constexpr int kMinExp = -4;
-    // Use digits10 here to increase compatibility with version 2.
-    constexpr int kMaxExp = std::numeric_limits<FloatType>::digits10;
-
-    JSON_ASSERT(last - first >= kMaxExp + 2);
-    JSON_ASSERT(last - first >= 2 + (-kMinExp - 1) + std::numeric_limits<FloatType>::max_digits10);
-    JSON_ASSERT(last - first >= std::numeric_limits<FloatType>::max_digits10 + 6);
-
-    return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, kMaxExp);
-}
-
-} // namespace detail
-} // namespace nlohmann
-
-// #include <nlohmann/detail/exceptions.hpp>
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-// #include <nlohmann/detail/meta/cpp_future.hpp>
-
-// #include <nlohmann/detail/output/binary_writer.hpp>
-
-// #include <nlohmann/detail/output/output_adapters.hpp>
-
-// #include <nlohmann/detail/value_t.hpp>
-
-
-namespace nlohmann
-{
-namespace detail
-{
-///////////////////
-// serialization //
-///////////////////
-
-/// how to treat decoding errors
-enum class error_handler_t
-{
-    strict,  ///< throw a type_error exception in case of invalid UTF-8
-    replace, ///< replace invalid UTF-8 sequences with U+FFFD
-    ignore   ///< ignore invalid UTF-8 sequences
-};
-
-template<typename BasicJsonType>
-class serializer
-{
-    using string_t = typename BasicJsonType::string_t;
-    using number_float_t = typename BasicJsonType::number_float_t;
-    using number_integer_t = typename BasicJsonType::number_integer_t;
-    using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-    using binary_char_t = typename BasicJsonType::binary_t::value_type;
-    static constexpr std::uint8_t UTF8_ACCEPT = 0;
-    static constexpr std::uint8_t UTF8_REJECT = 1;
-
-  public:
-    /*!
-    @param[in] s  output stream to serialize to
-    @param[in] ichar  indentation character to use
-    @param[in] error_handler_  how to react on decoding errors
-    */
-    serializer(output_adapter_t<char> s, const char ichar,
-               error_handler_t error_handler_ = error_handler_t::strict)
-        : o(std::move(s))
-        , loc(std::localeconv())
-        , thousands_sep(loc->thousands_sep == nullptr ? '\0' : std::char_traits<char>::to_char_type(* (loc->thousands_sep)))
-        , decimal_point(loc->decimal_point == nullptr ? '\0' : std::char_traits<char>::to_char_type(* (loc->decimal_point)))
-        , indent_char(ichar)
-        , indent_string(512, indent_char)
-        , error_handler(error_handler_)
-    {}
-
-    // delete because of pointer members
-    serializer(const serializer&) = delete;
-    serializer& operator=(const serializer&) = delete;
-    serializer(serializer&&) = delete;
-    serializer& operator=(serializer&&) = delete;
-    ~serializer() = default;
-
-    /*!
-    @brief internal implementation of the serialization function
-
-    This function is called by the public member function dump and organizes
-    the serialization internally. The indentation level is propagated as
-    additional parameter. In case of arrays and objects, the function is
-    called recursively.
-
-    - strings and object keys are escaped using `escape_string()`
-    - integer numbers are converted implicitly via `operator<<`
-    - floating-point numbers are converted to a string using `"%g"` format
-    - binary values are serialized as objects containing the subtype and the
-      byte array
-
-    @param[in] val               value to serialize
-    @param[in] pretty_print      whether the output shall be pretty-printed
-    @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
-    in the output are escaped with `\uXXXX` sequences, and the result consists
-    of ASCII characters only.
-    @param[in] indent_step       the indent level
-    @param[in] current_indent    the current indent level (only used internally)
-    */
-    void dump(const BasicJsonType& val,
-              const bool pretty_print,
-              const bool ensure_ascii,
-              const unsigned int indent_step,
-              const unsigned int current_indent = 0)
-    {
-        switch (val.m_type)
-        {
-            case value_t::object:
-            {
-                if (val.m_value.object->empty())
-                {
-                    o->write_characters("{}", 2);
-                    return;
-                }
-
-                if (pretty_print)
-                {
-                    o->write_characters("{\n", 2);
-
-                    // variable to hold indentation for recursive calls
-                    const auto new_indent = current_indent + indent_step;
-                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
-                    {
-                        indent_string.resize(indent_string.size() * 2, ' ');
-                    }
-
-                    // first n-1 elements
-                    auto i = val.m_value.object->cbegin();
-                    for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
-                    {
-                        o->write_characters(indent_string.c_str(), new_indent);
-                        o->write_character('\"');
-                        dump_escaped(i->first, ensure_ascii);
-                        o->write_characters("\": ", 3);
-                        dump(i->second, true, ensure_ascii, indent_step, new_indent);
-                        o->write_characters(",\n", 2);
-                    }
-
-                    // last element
-                    JSON_ASSERT(i != val.m_value.object->cend());
-                    JSON_ASSERT(std::next(i) == val.m_value.object->cend());
-                    o->write_characters(indent_string.c_str(), new_indent);
-                    o->write_character('\"');
-                    dump_escaped(i->first, ensure_ascii);
-                    o->write_characters("\": ", 3);
-                    dump(i->second, true, ensure_ascii, indent_step, new_indent);
-
-                    o->write_character('\n');
-                    o->write_characters(indent_string.c_str(), current_indent);
-                    o->write_character('}');
-                }
-                else
-                {
-                    o->write_character('{');
-
-                    // first n-1 elements
-                    auto i = val.m_value.object->cbegin();
-                    for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
-                    {
-                        o->write_character('\"');
-                        dump_escaped(i->first, ensure_ascii);
-                        o->write_characters("\":", 2);
-                        dump(i->second, false, ensure_ascii, indent_step, current_indent);
-                        o->write_character(',');
-                    }
-
-                    // last element
-                    JSON_ASSERT(i != val.m_value.object->cend());
-                    JSON_ASSERT(std::next(i) == val.m_value.object->cend());
-                    o->write_character('\"');
-                    dump_escaped(i->first, ensure_ascii);
-                    o->write_characters("\":", 2);
-                    dump(i->second, false, ensure_ascii, indent_step, current_indent);
-
-                    o->write_character('}');
-                }
-
-                return;
-            }
-
-            case value_t::array:
-            {
-                if (val.m_value.array->empty())
-                {
-                    o->write_characters("[]", 2);
-                    return;
-                }
-
-                if (pretty_print)
-                {
-                    o->write_characters("[\n", 2);
-
-                    // variable to hold indentation for recursive calls
-                    const auto new_indent = current_indent + indent_step;
-                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
-                    {
-                        indent_string.resize(indent_string.size() * 2, ' ');
-                    }
-
-                    // first n-1 elements
-                    for (auto i = val.m_value.array->cbegin();
-                            i != val.m_value.array->cend() - 1; ++i)
-                    {
-                        o->write_characters(indent_string.c_str(), new_indent);
-                        dump(*i, true, ensure_ascii, indent_step, new_indent);
-                        o->write_characters(",\n", 2);
-                    }
-
-                    // last element
-                    JSON_ASSERT(!val.m_value.array->empty());
-                    o->write_characters(indent_string.c_str(), new_indent);
-                    dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
-
-                    o->write_character('\n');
-                    o->write_characters(indent_string.c_str(), current_indent);
-                    o->write_character(']');
-                }
-                else
-                {
-                    o->write_character('[');
-
-                    // first n-1 elements
-                    for (auto i = val.m_value.array->cbegin();
-                            i != val.m_value.array->cend() - 1; ++i)
-                    {
-                        dump(*i, false, ensure_ascii, indent_step, current_indent);
-                        o->write_character(',');
-                    }
-
-                    // last element
-                    JSON_ASSERT(!val.m_value.array->empty());
-                    dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
-
-                    o->write_character(']');
-                }
-
-                return;
-            }
-
-            case value_t::string:
-            {
-                o->write_character('\"');
-                dump_escaped(*val.m_value.string, ensure_ascii);
-                o->write_character('\"');
-                return;
-            }
-
-            case value_t::binary:
-            {
-                if (pretty_print)
-                {
-                    o->write_characters("{\n", 2);
-
-                    // variable to hold indentation for recursive calls
-                    const auto new_indent = current_indent + indent_step;
-                    if (JSON_HEDLEY_UNLIKELY(indent_string.size() < new_indent))
-                    {
-                        indent_string.resize(indent_string.size() * 2, ' ');
-                    }
-
-                    o->write_characters(indent_string.c_str(), new_indent);
-
-                    o->write_characters("\"bytes\": [", 10);
-
-                    if (!val.m_value.binary->empty())
-                    {
-                        for (auto i = val.m_value.binary->cbegin();
-                                i != val.m_value.binary->cend() - 1; ++i)
-                        {
-                            dump_integer(*i);
-                            o->write_characters(", ", 2);
-                        }
-                        dump_integer(val.m_value.binary->back());
-                    }
-
-                    o->write_characters("],\n", 3);
-                    o->write_characters(indent_string.c_str(), new_indent);
-
-                    o->write_characters("\"subtype\": ", 11);
-                    if (val.m_value.binary->has_subtype())
-                    {
-                        dump_integer(val.m_value.binary->subtype());
-                    }
-                    else
-                    {
-                        o->write_characters("null", 4);
-                    }
-                    o->write_character('\n');
-                    o->write_characters(indent_string.c_str(), current_indent);
-                    o->write_character('}');
-                }
-                else
-                {
-                    o->write_characters("{\"bytes\":[", 10);
-
-                    if (!val.m_value.binary->empty())
-                    {
-                        for (auto i = val.m_value.binary->cbegin();
-                                i != val.m_value.binary->cend() - 1; ++i)
-                        {
-                            dump_integer(*i);
-                            o->write_character(',');
-                        }
-                        dump_integer(val.m_value.binary->back());
-                    }
-
-                    o->write_characters("],\"subtype\":", 12);
-                    if (val.m_value.binary->has_subtype())
-                    {
-                        dump_integer(val.m_value.binary->subtype());
-                        o->write_character('}');
-                    }
-                    else
-                    {
-                        o->write_characters("null}", 5);
-                    }
-                }
-                return;
-            }
-
-            case value_t::boolean:
-            {
-                if (val.m_value.boolean)
-                {
-                    o->write_characters("true", 4);
-                }
-                else
-                {
-                    o->write_characters("false", 5);
-                }
-                return;
-            }
-
-            case value_t::number_integer:
-            {
-                dump_integer(val.m_value.number_integer);
-                return;
-            }
-
-            case value_t::number_unsigned:
-            {
-                dump_integer(val.m_value.number_unsigned);
-                return;
-            }
-
-            case value_t::number_float:
-            {
-                dump_float(val.m_value.number_float);
-                return;
-            }
-
-            case value_t::discarded:
-            {
-                o->write_characters("<discarded>", 11);
-                return;
-            }
-
-            case value_t::null:
-            {
-                o->write_characters("null", 4);
-                return;
-            }
-
-            default:            // LCOV_EXCL_LINE
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        }
-    }
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    /*!
-    @brief dump escaped string
-
-    Escape a string by replacing certain special characters by a sequence of an
-    escape character (backslash) and another character and other control
-    characters by a sequence of "\u" followed by a four-digit hex
-    representation. The escaped string is written to output stream @a o.
-
-    @param[in] s  the string to escape
-    @param[in] ensure_ascii  whether to escape non-ASCII characters with
-                             \uXXXX sequences
-
-    @complexity Linear in the length of string @a s.
-    */
-    void dump_escaped(const string_t& s, const bool ensure_ascii)
-    {
-        std::uint32_t codepoint{};
-        std::uint8_t state = UTF8_ACCEPT;
-        std::size_t bytes = 0;  // number of bytes written to string_buffer
-
-        // number of bytes written at the point of the last valid byte
-        std::size_t bytes_after_last_accept = 0;
-        std::size_t undumped_chars = 0;
-
-        for (std::size_t i = 0; i < s.size(); ++i)
-        {
-            const auto byte = static_cast<uint8_t>(s[i]);
-
-            switch (decode(state, codepoint, byte))
-            {
-                case UTF8_ACCEPT:  // decode found a new code point
-                {
-                    switch (codepoint)
-                    {
-                        case 0x08: // backspace
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = 'b';
-                            break;
-                        }
-
-                        case 0x09: // horizontal tab
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = 't';
-                            break;
-                        }
-
-                        case 0x0A: // newline
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = 'n';
-                            break;
-                        }
-
-                        case 0x0C: // formfeed
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = 'f';
-                            break;
-                        }
-
-                        case 0x0D: // carriage return
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = 'r';
-                            break;
-                        }
-
-                        case 0x22: // quotation mark
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = '\"';
-                            break;
-                        }
-
-                        case 0x5C: // reverse solidus
-                        {
-                            string_buffer[bytes++] = '\\';
-                            string_buffer[bytes++] = '\\';
-                            break;
-                        }
-
-                        default:
-                        {
-                            // escape control characters (0x00..0x1F) or, if
-                            // ensure_ascii parameter is used, non-ASCII characters
-                            if ((codepoint <= 0x1F) || (ensure_ascii && (codepoint >= 0x7F)))
-                            {
-                                if (codepoint <= 0xFFFF)
-                                {
-                                    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                                    (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
-                                                    static_cast<std::uint16_t>(codepoint));
-                                    bytes += 6;
-                                }
-                                else
-                                {
-                                    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                                    (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
-                                                    static_cast<std::uint16_t>(0xD7C0u + (codepoint >> 10u)),
-                                                    static_cast<std::uint16_t>(0xDC00u + (codepoint & 0x3FFu)));
-                                    bytes += 12;
-                                }
-                            }
-                            else
-                            {
-                                // copy byte to buffer (all previous bytes
-                                // been copied have in default case above)
-                                string_buffer[bytes++] = s[i];
-                            }
-                            break;
-                        }
-                    }
-
-                    // write buffer and reset index; there must be 13 bytes
-                    // left, as this is the maximal number of bytes to be
-                    // written ("\uxxxx\uxxxx\0") for one code point
-                    if (string_buffer.size() - bytes < 13)
-                    {
-                        o->write_characters(string_buffer.data(), bytes);
-                        bytes = 0;
-                    }
-
-                    // remember the byte position of this accept
-                    bytes_after_last_accept = bytes;
-                    undumped_chars = 0;
-                    break;
-                }
-
-                case UTF8_REJECT:  // decode found invalid UTF-8 byte
-                {
-                    switch (error_handler)
-                    {
-                        case error_handler_t::strict:
-                        {
-                            std::string sn(3, '\0');
-                            // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                            (std::snprintf)(&sn[0], sn.size(), "%.2X", byte);
-                            JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn, BasicJsonType()));
-                        }
-
-                        case error_handler_t::ignore:
-                        case error_handler_t::replace:
-                        {
-                            // in case we saw this character the first time, we
-                            // would like to read it again, because the byte
-                            // may be OK for itself, but just not OK for the
-                            // previous sequence
-                            if (undumped_chars > 0)
-                            {
-                                --i;
-                            }
-
-                            // reset length buffer to the last accepted index;
-                            // thus removing/ignoring the invalid characters
-                            bytes = bytes_after_last_accept;
-
-                            if (error_handler == error_handler_t::replace)
-                            {
-                                // add a replacement character
-                                if (ensure_ascii)
-                                {
-                                    string_buffer[bytes++] = '\\';
-                                    string_buffer[bytes++] = 'u';
-                                    string_buffer[bytes++] = 'f';
-                                    string_buffer[bytes++] = 'f';
-                                    string_buffer[bytes++] = 'f';
-                                    string_buffer[bytes++] = 'd';
-                                }
-                                else
-                                {
-                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
-                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
-                                    string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
-                                }
-
-                                // write buffer and reset index; there must be 13 bytes
-                                // left, as this is the maximal number of bytes to be
-                                // written ("\uxxxx\uxxxx\0") for one code point
-                                if (string_buffer.size() - bytes < 13)
-                                {
-                                    o->write_characters(string_buffer.data(), bytes);
-                                    bytes = 0;
-                                }
-
-                                bytes_after_last_accept = bytes;
-                            }
-
-                            undumped_chars = 0;
-
-                            // continue processing the string
-                            state = UTF8_ACCEPT;
-                            break;
-                        }
-
-                        default:            // LCOV_EXCL_LINE
-                            JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-                    }
-                    break;
-                }
-
-                default:  // decode found yet incomplete multi-byte code point
-                {
-                    if (!ensure_ascii)
-                    {
-                        // code point will not be escaped - copy byte to buffer
-                        string_buffer[bytes++] = s[i];
-                    }
-                    ++undumped_chars;
-                    break;
-                }
-            }
-        }
-
-        // we finished processing the string
-        if (JSON_HEDLEY_LIKELY(state == UTF8_ACCEPT))
-        {
-            // write buffer
-            if (bytes > 0)
-            {
-                o->write_characters(string_buffer.data(), bytes);
-            }
-        }
-        else
-        {
-            // we finish reading, but do not accept: string was incomplete
-            switch (error_handler)
-            {
-                case error_handler_t::strict:
-                {
-                    std::string sn(3, '\0');
-                    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-                    (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<std::uint8_t>(s.back()));
-                    JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn, BasicJsonType()));
-                }
-
-                case error_handler_t::ignore:
-                {
-                    // write all accepted bytes
-                    o->write_characters(string_buffer.data(), bytes_after_last_accept);
-                    break;
-                }
-
-                case error_handler_t::replace:
-                {
-                    // write all accepted bytes
-                    o->write_characters(string_buffer.data(), bytes_after_last_accept);
-                    // add a replacement character
-                    if (ensure_ascii)
-                    {
-                        o->write_characters("\\ufffd", 6);
-                    }
-                    else
-                    {
-                        o->write_characters("\xEF\xBF\xBD", 3);
-                    }
-                    break;
-                }
-
-                default:            // LCOV_EXCL_LINE
-                    JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-            }
-        }
-    }
-
-  private:
-    /*!
-    @brief count digits
-
-    Count the number of decimal (base 10) digits for an input unsigned integer.
-
-    @param[in] x  unsigned integer number to count its digits
-    @return    number of decimal digits
-    */
-    inline unsigned int count_digits(number_unsigned_t x) noexcept
-    {
-        unsigned int n_digits = 1;
-        for (;;)
-        {
-            if (x < 10)
-            {
-                return n_digits;
-            }
-            if (x < 100)
-            {
-                return n_digits + 1;
-            }
-            if (x < 1000)
-            {
-                return n_digits + 2;
-            }
-            if (x < 10000)
-            {
-                return n_digits + 3;
-            }
-            x = x / 10000u;
-            n_digits += 4;
-        }
-    }
-
-    /*!
-    @brief dump an integer
-
-    Dump a given integer to output stream @a o. Works internally with
-    @a number_buffer.
-
-    @param[in] x  integer number (signed or unsigned) to dump
-    @tparam NumberType either @a number_integer_t or @a number_unsigned_t
-    */
-    template < typename NumberType, detail::enable_if_t <
-                   std::is_same<NumberType, number_unsigned_t>::value ||
-                   std::is_same<NumberType, number_integer_t>::value ||
-                   std::is_same<NumberType, binary_char_t>::value,
-                   int > = 0 >
-    void dump_integer(NumberType x)
-    {
-        static constexpr std::array<std::array<char, 2>, 100> digits_to_99
-        {
-            {
-                {{'0', '0'}}, {{'0', '1'}}, {{'0', '2'}}, {{'0', '3'}}, {{'0', '4'}}, {{'0', '5'}}, {{'0', '6'}}, {{'0', '7'}}, {{'0', '8'}}, {{'0', '9'}},
-                {{'1', '0'}}, {{'1', '1'}}, {{'1', '2'}}, {{'1', '3'}}, {{'1', '4'}}, {{'1', '5'}}, {{'1', '6'}}, {{'1', '7'}}, {{'1', '8'}}, {{'1', '9'}},
-                {{'2', '0'}}, {{'2', '1'}}, {{'2', '2'}}, {{'2', '3'}}, {{'2', '4'}}, {{'2', '5'}}, {{'2', '6'}}, {{'2', '7'}}, {{'2', '8'}}, {{'2', '9'}},
-                {{'3', '0'}}, {{'3', '1'}}, {{'3', '2'}}, {{'3', '3'}}, {{'3', '4'}}, {{'3', '5'}}, {{'3', '6'}}, {{'3', '7'}}, {{'3', '8'}}, {{'3', '9'}},
-                {{'4', '0'}}, {{'4', '1'}}, {{'4', '2'}}, {{'4', '3'}}, {{'4', '4'}}, {{'4', '5'}}, {{'4', '6'}}, {{'4', '7'}}, {{'4', '8'}}, {{'4', '9'}},
-                {{'5', '0'}}, {{'5', '1'}}, {{'5', '2'}}, {{'5', '3'}}, {{'5', '4'}}, {{'5', '5'}}, {{'5', '6'}}, {{'5', '7'}}, {{'5', '8'}}, {{'5', '9'}},
-                {{'6', '0'}}, {{'6', '1'}}, {{'6', '2'}}, {{'6', '3'}}, {{'6', '4'}}, {{'6', '5'}}, {{'6', '6'}}, {{'6', '7'}}, {{'6', '8'}}, {{'6', '9'}},
-                {{'7', '0'}}, {{'7', '1'}}, {{'7', '2'}}, {{'7', '3'}}, {{'7', '4'}}, {{'7', '5'}}, {{'7', '6'}}, {{'7', '7'}}, {{'7', '8'}}, {{'7', '9'}},
-                {{'8', '0'}}, {{'8', '1'}}, {{'8', '2'}}, {{'8', '3'}}, {{'8', '4'}}, {{'8', '5'}}, {{'8', '6'}}, {{'8', '7'}}, {{'8', '8'}}, {{'8', '9'}},
-                {{'9', '0'}}, {{'9', '1'}}, {{'9', '2'}}, {{'9', '3'}}, {{'9', '4'}}, {{'9', '5'}}, {{'9', '6'}}, {{'9', '7'}}, {{'9', '8'}}, {{'9', '9'}},
-            }
-        };
-
-        // special case for "0"
-        if (x == 0)
-        {
-            o->write_character('0');
-            return;
-        }
-
-        // use a pointer to fill the buffer
-        auto buffer_ptr = number_buffer.begin(); // NOLINT(llvm-qualified-auto,readability-qualified-auto,cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-
-        const bool is_negative = std::is_same<NumberType, number_integer_t>::value && !(x >= 0); // see issue #755
-        number_unsigned_t abs_value;
-
-        unsigned int n_chars{};
-
-        if (is_negative)
-        {
-            *buffer_ptr = '-';
-            abs_value = remove_sign(static_cast<number_integer_t>(x));
-
-            // account one more byte for the minus sign
-            n_chars = 1 + count_digits(abs_value);
-        }
-        else
-        {
-            abs_value = static_cast<number_unsigned_t>(x);
-            n_chars = count_digits(abs_value);
-        }
-
-        // spare 1 byte for '\0'
-        JSON_ASSERT(n_chars < number_buffer.size() - 1);
-
-        // jump to the end to generate the string from backward
-        // so we later avoid reversing the result
-        buffer_ptr += n_chars;
-
-        // Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
-        // See: https://www.youtube.com/watch?v=o4-CwDo2zpg
-        while (abs_value >= 100)
-        {
-            const auto digits_index = static_cast<unsigned>((abs_value % 100));
-            abs_value /= 100;
-            *(--buffer_ptr) = digits_to_99[digits_index][1];
-            *(--buffer_ptr) = digits_to_99[digits_index][0];
-        }
-
-        if (abs_value >= 10)
-        {
-            const auto digits_index = static_cast<unsigned>(abs_value);
-            *(--buffer_ptr) = digits_to_99[digits_index][1];
-            *(--buffer_ptr) = digits_to_99[digits_index][0];
-        }
-        else
-        {
-            *(--buffer_ptr) = static_cast<char>('0' + abs_value);
-        }
-
-        o->write_characters(number_buffer.data(), n_chars);
-    }
-
-    /*!
-    @brief dump a floating-point number
-
-    Dump a given floating-point number to output stream @a o. Works internally
-    with @a number_buffer.
-
-    @param[in] x  floating-point number to dump
-    */
-    void dump_float(number_float_t x)
-    {
-        // NaN / inf
-        if (!std::isfinite(x))
-        {
-            o->write_characters("null", 4);
-            return;
-        }
-
-        // If number_float_t is an IEEE-754 single or double precision number,
-        // use the Grisu2 algorithm to produce short numbers which are
-        // guaranteed to round-trip, using strtof and strtod, resp.
-        //
-        // NB: The test below works if <long double> == <double>.
-        static constexpr bool is_ieee_single_or_double
-            = (std::numeric_limits<number_float_t>::is_iec559 && std::numeric_limits<number_float_t>::digits == 24 && std::numeric_limits<number_float_t>::max_exponent == 128) ||
-              (std::numeric_limits<number_float_t>::is_iec559 && std::numeric_limits<number_float_t>::digits == 53 && std::numeric_limits<number_float_t>::max_exponent == 1024);
-
-        dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>());
-    }
-
-    void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/)
-    {
-        auto* begin = number_buffer.data();
-        auto* end = ::nlohmann::detail::to_chars(begin, begin + number_buffer.size(), x);
-
-        o->write_characters(begin, static_cast<size_t>(end - begin));
-    }
-
-    void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/)
-    {
-        // get number of digits for a float -> text -> float round-trip
-        static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
-
-        // the actual conversion
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
-        std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
-
-        // negative value indicates an error
-        JSON_ASSERT(len > 0);
-        // check if buffer was large enough
-        JSON_ASSERT(static_cast<std::size_t>(len) < number_buffer.size());
-
-        // erase thousands separator
-        if (thousands_sep != '\0')
-        {
-            auto* const end = std::remove(number_buffer.begin(),
-                                          number_buffer.begin() + len, thousands_sep);
-            std::fill(end, number_buffer.end(), '\0');
-            JSON_ASSERT((end - number_buffer.begin()) <= len);
-            len = (end - number_buffer.begin());
-        }
-
-        // convert decimal point to '.'
-        if (decimal_point != '\0' && decimal_point != '.')
-        {
-            auto* const dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point);
-            if (dec_pos != number_buffer.end())
-            {
-                *dec_pos = '.';
-            }
-        }
-
-        o->write_characters(number_buffer.data(), static_cast<std::size_t>(len));
-
-        // determine if need to append ".0"
-        const bool value_is_int_like =
-            std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1,
-                         [](char c)
-        {
-            return c == '.' || c == 'e';
-        });
-
-        if (value_is_int_like)
-        {
-            o->write_characters(".0", 2);
-        }
-    }
-
-    /*!
-    @brief check whether a string is UTF-8 encoded
-
-    The function checks each byte of a string whether it is UTF-8 encoded. The
-    result of the check is stored in the @a state parameter. The function must
-    be called initially with state 0 (accept). State 1 means the string must
-    be rejected, because the current byte is not allowed. If the string is
-    completely processed, but the state is non-zero, the string ended
-    prematurely; that is, the last byte indicated more bytes should have
-    followed.
-
-    @param[in,out] state  the state of the decoding
-    @param[in,out] codep  codepoint (valid only if resulting state is UTF8_ACCEPT)
-    @param[in] byte       next byte to decode
-    @return               new state
-
-    @note The function has been edited: a std::array is used.
-
-    @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-    @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
-    */
-    static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, const std::uint8_t byte) noexcept
-    {
-        static const std::array<std::uint8_t, 400> utf8d =
-        {
-            {
-                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F
-                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F
-                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F
-                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F
-                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F
-                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF
-                8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF
-                0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF
-                0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF
-                0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
-                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
-                1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
-                1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
-                1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8
-            }
-        };
-
-        JSON_ASSERT(byte < utf8d.size());
-        const std::uint8_t type = utf8d[byte];
-
-        codep = (state != UTF8_ACCEPT)
-                ? (byte & 0x3fu) | (codep << 6u)
-                : (0xFFu >> type) & (byte);
-
-        std::size_t index = 256u + static_cast<size_t>(state) * 16u + static_cast<size_t>(type);
-        JSON_ASSERT(index < 400);
-        state = utf8d[index];
-        return state;
-    }
-
-    /*
-     * Overload to make the compiler happy while it is instantiating
-     * dump_integer for number_unsigned_t.
-     * Must never be called.
-     */
-    number_unsigned_t remove_sign(number_unsigned_t x)
-    {
-        JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        return x; // LCOV_EXCL_LINE
-    }
-
-    /*
-     * Helper function for dump_integer
-     *
-     * This function takes a negative signed integer and returns its absolute
-     * value as unsigned integer. The plus/minus shuffling is necessary as we can
-     * not directly remove the sign of an arbitrary signed integer as the
-     * absolute values of INT_MIN and INT_MAX are usually not the same. See
-     * #1708 for details.
-     */
-    inline number_unsigned_t remove_sign(number_integer_t x) noexcept
-    {
-        JSON_ASSERT(x < 0 && x < (std::numeric_limits<number_integer_t>::max)()); // NOLINT(misc-redundant-expression)
-        return static_cast<number_unsigned_t>(-(x + 1)) + 1;
-    }
-
-  private:
-    /// the output of the serializer
-    output_adapter_t<char> o = nullptr;
-
-    /// a (hopefully) large enough character buffer
-    std::array<char, 64> number_buffer{{}};
-
-    /// the locale
-    const std::lconv* loc = nullptr;
-    /// the locale's thousand separator character
-    const char thousands_sep = '\0';
-    /// the locale's decimal point character
-    const char decimal_point = '\0';
-
-    /// string buffer
-    std::array<char, 512> string_buffer{{}};
-
-    /// the indentation character
-    const char indent_char;
-    /// the indentation string
-    string_t indent_string;
-
-    /// error_handler how to react on decoding errors
-    const error_handler_t error_handler;
-};
-}  // namespace detail
-}  // namespace nlohmann
-
-// #include <nlohmann/detail/value_t.hpp>
-
-// #include <nlohmann/json_fwd.hpp>
-
-// #include <nlohmann/ordered_map.hpp>
-
-
-#include <functional> // less
-#include <initializer_list> // initializer_list
-#include <iterator> // input_iterator_tag, iterator_traits
-#include <memory> // allocator
-#include <stdexcept> // for out_of_range
-#include <type_traits> // enable_if, is_convertible
-#include <utility> // pair
-#include <vector> // vector
-
-// #include <nlohmann/detail/macro_scope.hpp>
-
-
-namespace nlohmann
-{
-
-/// ordered_map: a minimal map-like container that preserves insertion order
-/// for use within nlohmann::basic_json<ordered_map>
-template <class Key, class T, class IgnoredLess = std::less<Key>,
-          class Allocator = std::allocator<std::pair<const Key, T>>>
-                  struct ordered_map : std::vector<std::pair<const Key, T>, Allocator>
-{
-    using key_type = Key;
-    using mapped_type = T;
-    using Container = std::vector<std::pair<const Key, T>, Allocator>;
-    using typename Container::iterator;
-    using typename Container::const_iterator;
-    using typename Container::size_type;
-    using typename Container::value_type;
-
-    // Explicit constructors instead of `using Container::Container`
-    // otherwise older compilers choke on it (GCC <= 5.5, xcode <= 9.4)
-    ordered_map(const Allocator& alloc = Allocator()) : Container{alloc} {}
-    template <class It>
-    ordered_map(It first, It last, const Allocator& alloc = Allocator())
-        : Container{first, last, alloc} {}
-    ordered_map(std::initializer_list<T> init, const Allocator& alloc = Allocator() )
-        : Container{init, alloc} {}
-
-    std::pair<iterator, bool> emplace(const key_type& key, T&& t)
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return {it, false};
-            }
-        }
-        Container::emplace_back(key, t);
-        return {--this->end(), true};
-    }
-
-    T& operator[](const Key& key)
-    {
-        return emplace(key, T{}).first->second;
-    }
-
-    const T& operator[](const Key& key) const
-    {
-        return at(key);
-    }
-
-    T& at(const Key& key)
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return it->second;
-            }
-        }
-
-        JSON_THROW(std::out_of_range("key not found"));
-    }
-
-    const T& at(const Key& key) const
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return it->second;
-            }
-        }
-
-        JSON_THROW(std::out_of_range("key not found"));
-    }
-
-    size_type erase(const Key& key)
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                // Since we cannot move const Keys, re-construct them in place
-                for (auto next = it; ++next != this->end(); ++it)
-                {
-                    it->~value_type(); // Destroy but keep allocation
-                    new (&*it) value_type{std::move(*next)};
-                }
-                Container::pop_back();
-                return 1;
-            }
-        }
-        return 0;
-    }
-
-    iterator erase(iterator pos)
-    {
-        auto it = pos;
-
-        // Since we cannot move const Keys, re-construct them in place
-        for (auto next = it; ++next != this->end(); ++it)
-        {
-            it->~value_type(); // Destroy but keep allocation
-            new (&*it) value_type{std::move(*next)};
-        }
-        Container::pop_back();
-        return pos;
-    }
-
-    size_type count(const Key& key) const
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return 1;
-            }
-        }
-        return 0;
-    }
-
-    iterator find(const Key& key)
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return it;
-            }
-        }
-        return Container::end();
-    }
-
-    const_iterator find(const Key& key) const
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == key)
-            {
-                return it;
-            }
-        }
-        return Container::end();
-    }
-
-    std::pair<iterator, bool> insert( value_type&& value )
-    {
-        return emplace(value.first, std::move(value.second));
-    }
-
-    std::pair<iterator, bool> insert( const value_type& value )
-    {
-        for (auto it = this->begin(); it != this->end(); ++it)
-        {
-            if (it->first == value.first)
-            {
-                return {it, false};
-            }
-        }
-        Container::push_back(value);
-        return {--this->end(), true};
-    }
-
-    template<typename InputIt>
-    using require_input_iter = typename std::enable_if<std::is_convertible<typename std::iterator_traits<InputIt>::iterator_category,
-            std::input_iterator_tag>::value>::type;
-
-    template<typename InputIt, typename = require_input_iter<InputIt>>
-    void insert(InputIt first, InputIt last)
-    {
-        for (auto it = first; it != last; ++it)
-        {
-            insert(*it);
-        }
-    }
-};
-
-}  // namespace nlohmann
-
-
-#if defined(JSON_HAS_CPP_17)
-    #include <string_view>
-#endif
-
-/*!
-@brief namespace for Niels Lohmann
-@see https://github.com/nlohmann
-@since version 1.0.0
-*/
-namespace nlohmann
-{
-
-/*!
-@brief a class to store JSON values
-
-@tparam ObjectType type for JSON objects (`std::map` by default; will be used
-in @ref object_t)
-@tparam ArrayType type for JSON arrays (`std::vector` by default; will be used
-in @ref array_t)
-@tparam StringType type for JSON strings and object keys (`std::string` by
-default; will be used in @ref string_t)
-@tparam BooleanType type for JSON booleans (`bool` by default; will be used
-in @ref boolean_t)
-@tparam NumberIntegerType type for JSON integer numbers (`int64_t` by
-default; will be used in @ref number_integer_t)
-@tparam NumberUnsignedType type for JSON unsigned integer numbers (@c
-`uint64_t` by default; will be used in @ref number_unsigned_t)
-@tparam NumberFloatType type for JSON floating-point numbers (`double` by
-default; will be used in @ref number_float_t)
-@tparam BinaryType type for packed binary data for compatibility with binary
-serialization formats (`std::vector<std::uint8_t>` by default; will be used in
-@ref binary_t)
-@tparam AllocatorType type of the allocator to use (`std::allocator` by
-default)
-@tparam JSONSerializer the serializer to resolve internal calls to `to_json()`
-and `from_json()` (@ref adl_serializer by default)
-
-@requirement The class satisfies the following concept requirements:
-- Basic
- - [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible):
-   JSON values can be default constructed. The result will be a JSON null
-   value.
- - [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible):
-   A JSON value can be constructed from an rvalue argument.
- - [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible):
-   A JSON value can be copy-constructed from an lvalue expression.
- - [MoveAssignable](https://en.cppreference.com/w/cpp/named_req/MoveAssignable):
-   A JSON value van be assigned from an rvalue argument.
- - [CopyAssignable](https://en.cppreference.com/w/cpp/named_req/CopyAssignable):
-   A JSON value can be copy-assigned from an lvalue expression.
- - [Destructible](https://en.cppreference.com/w/cpp/named_req/Destructible):
-   JSON values can be destructed.
-- Layout
- - [StandardLayoutType](https://en.cppreference.com/w/cpp/named_req/StandardLayoutType):
-   JSON values have
-   [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard_layout):
-   All non-static data members are private and standard layout types, the
-   class has no virtual functions or (virtual) base classes.
-- Library-wide
- - [EqualityComparable](https://en.cppreference.com/w/cpp/named_req/EqualityComparable):
-   JSON values can be compared with `==`, see @ref
-   operator==(const_reference,const_reference).
- - [LessThanComparable](https://en.cppreference.com/w/cpp/named_req/LessThanComparable):
-   JSON values can be compared with `<`, see @ref
-   operator<(const_reference,const_reference).
- - [Swappable](https://en.cppreference.com/w/cpp/named_req/Swappable):
-   Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of
-   other compatible types, using unqualified function call @ref swap().
- - [NullablePointer](https://en.cppreference.com/w/cpp/named_req/NullablePointer):
-   JSON values can be compared against `std::nullptr_t` objects which are used
-   to model the `null` value.
-- Container
- - [Container](https://en.cppreference.com/w/cpp/named_req/Container):
-   JSON values can be used like STL containers and provide iterator access.
- - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer);
-   JSON values can be used like STL containers and provide reverse iterator
-   access.
-
-@invariant The member variables @a m_value and @a m_type have the following
-relationship:
-- If `m_type == value_t::object`, then `m_value.object != nullptr`.
-- If `m_type == value_t::array`, then `m_value.array != nullptr`.
-- If `m_type == value_t::string`, then `m_value.string != nullptr`.
-The invariants are checked by member function assert_invariant().
-
-@internal
-@note ObjectType trick from https://stackoverflow.com/a/9860911
-@endinternal
-
-@see [RFC 8259: The JavaScript Object Notation (JSON) Data Interchange
-Format](https://tools.ietf.org/html/rfc8259)
-
-@since version 1.0.0
-
-@nosubgrouping
-*/
-NLOHMANN_BASIC_JSON_TPL_DECLARATION
-class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
-{
-  private:
-    template<detail::value_t> friend struct detail::external_constructor;
-    friend ::nlohmann::json_pointer<basic_json>;
-
-    template<typename BasicJsonType, typename InputType>
-    friend class ::nlohmann::detail::parser;
-    friend ::nlohmann::detail::serializer<basic_json>;
-    template<typename BasicJsonType>
-    friend class ::nlohmann::detail::iter_impl;
-    template<typename BasicJsonType, typename CharType>
-    friend class ::nlohmann::detail::binary_writer;
-    template<typename BasicJsonType, typename InputType, typename SAX>
-    friend class ::nlohmann::detail::binary_reader;
-    template<typename BasicJsonType>
-    friend class ::nlohmann::detail::json_sax_dom_parser;
-    template<typename BasicJsonType>
-    friend class ::nlohmann::detail::json_sax_dom_callback_parser;
-    friend class ::nlohmann::detail::exception;
-
-    /// workaround type for MSVC
-    using basic_json_t = NLOHMANN_BASIC_JSON_TPL;
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    // convenience aliases for types residing in namespace detail;
-    using lexer = ::nlohmann::detail::lexer_base<basic_json>;
-
-    template<typename InputAdapterType>
-    static ::nlohmann::detail::parser<basic_json, InputAdapterType> parser(
-        InputAdapterType adapter,
-        detail::parser_callback_t<basic_json>cb = nullptr,
-        const bool allow_exceptions = true,
-        const bool ignore_comments = false
-                                 )
-    {
-        return ::nlohmann::detail::parser<basic_json, InputAdapterType>(std::move(adapter),
-                std::move(cb), allow_exceptions, ignore_comments);
-    }
-
-  private:
-    using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t;
-    template<typename BasicJsonType>
-    using internal_iterator = ::nlohmann::detail::internal_iterator<BasicJsonType>;
-    template<typename BasicJsonType>
-    using iter_impl = ::nlohmann::detail::iter_impl<BasicJsonType>;
-    template<typename Iterator>
-    using iteration_proxy = ::nlohmann::detail::iteration_proxy<Iterator>;
-    template<typename Base> using json_reverse_iterator = ::nlohmann::detail::json_reverse_iterator<Base>;
-
-    template<typename CharType>
-    using output_adapter_t = ::nlohmann::detail::output_adapter_t<CharType>;
-
-    template<typename InputType>
-    using binary_reader = ::nlohmann::detail::binary_reader<basic_json, InputType>;
-    template<typename CharType> using binary_writer = ::nlohmann::detail::binary_writer<basic_json, CharType>;
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    using serializer = ::nlohmann::detail::serializer<basic_json>;
-
-  public:
-    using value_t = detail::value_t;
-    /// JSON Pointer, see @ref nlohmann::json_pointer
-    using json_pointer = ::nlohmann::json_pointer<basic_json>;
-    template<typename T, typename SFINAE>
-    using json_serializer = JSONSerializer<T, SFINAE>;
-    /// how to treat decoding errors
-    using error_handler_t = detail::error_handler_t;
-    /// how to treat CBOR tags
-    using cbor_tag_handler_t = detail::cbor_tag_handler_t;
-    /// helper type for initializer lists of basic_json values
-    using initializer_list_t = std::initializer_list<detail::json_ref<basic_json>>;
-
-    using input_format_t = detail::input_format_t;
-    /// SAX interface type, see @ref nlohmann::json_sax
-    using json_sax_t = json_sax<basic_json>;
-
-    ////////////////
-    // exceptions //
-    ////////////////
-
-    /// @name exceptions
-    /// Classes to implement user-defined exceptions.
-    /// @{
-
-    /// @copydoc detail::exception
-    using exception = detail::exception;
-    /// @copydoc detail::parse_error
-    using parse_error = detail::parse_error;
-    /// @copydoc detail::invalid_iterator
-    using invalid_iterator = detail::invalid_iterator;
-    /// @copydoc detail::type_error
-    using type_error = detail::type_error;
-    /// @copydoc detail::out_of_range
-    using out_of_range = detail::out_of_range;
-    /// @copydoc detail::other_error
-    using other_error = detail::other_error;
-
-    /// @}
-
-
-    /////////////////////
-    // container types //
-    /////////////////////
-
-    /// @name container types
-    /// The canonic container types to use @ref basic_json like any other STL
-    /// container.
-    /// @{
-
-    /// the type of elements in a basic_json container
-    using value_type = basic_json;
-
-    /// the type of an element reference
-    using reference = value_type&;
-    /// the type of an element const reference
-    using const_reference = const value_type&;
-
-    /// a type to represent differences between iterators
-    using difference_type = std::ptrdiff_t;
-    /// a type to represent container sizes
-    using size_type = std::size_t;
-
-    /// the allocator type
-    using allocator_type = AllocatorType<basic_json>;
-
-    /// the type of an element pointer
-    using pointer = typename std::allocator_traits<allocator_type>::pointer;
-    /// the type of an element const pointer
-    using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer;
-
-    /// an iterator for a basic_json container
-    using iterator = iter_impl<basic_json>;
-    /// a const iterator for a basic_json container
-    using const_iterator = iter_impl<const basic_json>;
-    /// a reverse iterator for a basic_json container
-    using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>;
-    /// a const reverse iterator for a basic_json container
-    using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>;
-
-    /// @}
-
-
-    /*!
-    @brief returns the allocator associated with the container
-    */
-    static allocator_type get_allocator()
-    {
-        return allocator_type();
-    }
-
-    /*!
-    @brief returns version information on the library
-
-    This function returns a JSON object with information about the library,
-    including the version number and information on the platform and compiler.
-
-    @return JSON object holding version information
-    key         | description
-    ----------- | ---------------
-    `compiler`  | Information on the used compiler. It is an object with the following keys: `c++` (the used C++ standard), `family` (the compiler family; possible values are `clang`, `icc`, `gcc`, `ilecpp`, `msvc`, `pgcpp`, `sunpro`, and `unknown`), and `version` (the compiler version).
-    `copyright` | The copyright line for the library as string.
-    `name`      | The name of the library as string.
-    `platform`  | The used platform as string. Possible values are `win32`, `linux`, `apple`, `unix`, and `unknown`.
-    `url`       | The URL of the project as string.
-    `version`   | The version of the library. It is an object with the following keys: `major`, `minor`, and `patch` as defined by [Semantic Versioning](http://semver.org), and `string` (the version string).
-
-    @liveexample{The following code shows an example output of the `meta()`
-    function.,meta}
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @complexity Constant.
-
-    @since 2.1.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json meta()
-    {
-        basic_json result;
-
-        result["copyright"] = "(C) 2013-2021 Niels Lohmann";
-        result["name"] = "JSON for Modern C++";
-        result["url"] = "https://github.com/nlohmann/json";
-        result["version"]["string"] =
-            std::to_string(NLOHMANN_JSON_VERSION_MAJOR) + "." +
-            std::to_string(NLOHMANN_JSON_VERSION_MINOR) + "." +
-            std::to_string(NLOHMANN_JSON_VERSION_PATCH);
-        result["version"]["major"] = NLOHMANN_JSON_VERSION_MAJOR;
-        result["version"]["minor"] = NLOHMANN_JSON_VERSION_MINOR;
-        result["version"]["patch"] = NLOHMANN_JSON_VERSION_PATCH;
-
-#ifdef _WIN32
-        result["platform"] = "win32";
-#elif defined __linux__
-        result["platform"] = "linux";
-#elif defined __APPLE__
-        result["platform"] = "apple";
-#elif defined __unix__
-        result["platform"] = "unix";
-#else
-        result["platform"] = "unknown";
-#endif
-
-#if defined(__ICC) || defined(__INTEL_COMPILER)
-        result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}};
-#elif defined(__clang__)
-        result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}};
-#elif defined(__GNUC__) || defined(__GNUG__)
-        result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}};
-#elif defined(__HP_cc) || defined(__HP_aCC)
-        result["compiler"] = "hp"
-#elif defined(__IBMCPP__)
-        result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}};
-#elif defined(_MSC_VER)
-        result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}};
-#elif defined(__PGI)
-        result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}};
-#elif defined(__SUNPRO_CC)
-        result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}};
-#else
-        result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}};
-#endif
-
-#ifdef __cplusplus
-        result["compiler"]["c++"] = std::to_string(__cplusplus);
-#else
-        result["compiler"]["c++"] = "unknown";
-#endif
-        return result;
-    }
-
-
-    ///////////////////////////
-    // JSON value data types //
-    ///////////////////////////
-
-    /// @name JSON value data types
-    /// The data types to store a JSON value. These types are derived from
-    /// the template arguments passed to class @ref basic_json.
-    /// @{
-
-#if defined(JSON_HAS_CPP_14)
-    // Use transparent comparator if possible, combined with perfect forwarding
-    // on find() and count() calls prevents unnecessary string construction.
-    using object_comparator_t = std::less<>;
-#else
-    using object_comparator_t = std::less<StringType>;
-#endif
-
-    /*!
-    @brief a type for an object
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes JSON objects as follows:
-    > An object is an unordered collection of zero or more name/value pairs,
-    > where a name is a string and a value is a string, number, boolean, null,
-    > object, or array.
-
-    To store objects in C++, a type is defined by the template parameters
-    described below.
-
-    @tparam ObjectType  the container to store objects (e.g., `std::map` or
-    `std::unordered_map`)
-    @tparam StringType the type of the keys or names (e.g., `std::string`).
-    The comparison function `std::less<StringType>` is used to order elements
-    inside the container.
-    @tparam AllocatorType the allocator to use for objects (e.g.,
-    `std::allocator`)
-
-    #### Default type
-
-    With the default values for @a ObjectType (`std::map`), @a StringType
-    (`std::string`), and @a AllocatorType (`std::allocator`), the default
-    value for @a object_t is:
-
-    @code {.cpp}
-    std::map<
-      std::string, // key_type
-      basic_json, // value_type
-      std::less<std::string>, // key_compare
-      std::allocator<std::pair<const std::string, basic_json>> // allocator_type
-    >
-    @endcode
-
-    #### Behavior
-
-    The choice of @a object_t influences the behavior of the JSON class. With
-    the default type, objects have the following behavior:
-
-    - When all names are unique, objects will be interoperable in the sense
-      that all software implementations receiving that object will agree on
-      the name-value mappings.
-    - When the names within an object are not unique, it is unspecified which
-      one of the values for a given key will be chosen. For instance,
-      `{"key": 2, "key": 1}` could be equal to either `{"key": 1}` or
-      `{"key": 2}`.
-    - Internally, name/value pairs are stored in lexicographical order of the
-      names. Objects will also be serialized (see @ref dump) in this order.
-      For instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored
-      and serialized as `{"a": 2, "b": 1}`.
-    - When comparing objects, the order of the name/value pairs is irrelevant.
-      This makes objects interoperable in the sense that they will not be
-      affected by these differences. For instance, `{"b": 1, "a": 2}` and
-      `{"a": 2, "b": 1}` will be treated as equal.
-
-    #### Limits
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) specifies:
-    > An implementation may set limits on the maximum depth of nesting.
-
-    In this class, the object's limit of nesting is not explicitly constrained.
-    However, a maximum depth of nesting may be introduced by the compiler or
-    runtime environment. A theoretical limit can be queried by calling the
-    @ref max_size function of a JSON object.
-
-    #### Storage
-
-    Objects are stored as pointers in a @ref basic_json type. That is, for any
-    access to object values, a pointer of type `object_t*` must be
-    dereferenced.
-
-    @sa see @ref array_t -- type for an array value
-
-    @since version 1.0.0
-
-    @note The order name/value pairs are added to the object is *not*
-    preserved by the library. Therefore, iterating an object may return
-    name/value pairs in a different order than they were originally stored. In
-    fact, keys will be traversed in alphabetical order as `std::map` with
-    `std::less` is used by default. Please note this behavior conforms to [RFC
-    8259](https://tools.ietf.org/html/rfc8259), because any order implements the
-    specified "unordered" nature of JSON objects.
-    */
-    using object_t = ObjectType<StringType,
-          basic_json,
-          object_comparator_t,
-          AllocatorType<std::pair<const StringType,
-          basic_json>>>;
-
-    /*!
-    @brief a type for an array
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes JSON arrays as follows:
-    > An array is an ordered sequence of zero or more values.
-
-    To store objects in C++, a type is defined by the template parameters
-    explained below.
-
-    @tparam ArrayType  container type to store arrays (e.g., `std::vector` or
-    `std::list`)
-    @tparam AllocatorType allocator to use for arrays (e.g., `std::allocator`)
-
-    #### Default type
-
-    With the default values for @a ArrayType (`std::vector`) and @a
-    AllocatorType (`std::allocator`), the default value for @a array_t is:
-
-    @code {.cpp}
-    std::vector<
-      basic_json, // value_type
-      std::allocator<basic_json> // allocator_type
-    >
-    @endcode
-
-    #### Limits
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) specifies:
-    > An implementation may set limits on the maximum depth of nesting.
-
-    In this class, the array's limit of nesting is not explicitly constrained.
-    However, a maximum depth of nesting may be introduced by the compiler or
-    runtime environment. A theoretical limit can be queried by calling the
-    @ref max_size function of a JSON array.
-
-    #### Storage
-
-    Arrays are stored as pointers in a @ref basic_json type. That is, for any
-    access to array values, a pointer of type `array_t*` must be dereferenced.
-
-    @sa see @ref object_t -- type for an object value
-
-    @since version 1.0.0
-    */
-    using array_t = ArrayType<basic_json, AllocatorType<basic_json>>;
-
-    /*!
-    @brief a type for a string
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes JSON strings as follows:
-    > A string is a sequence of zero or more Unicode characters.
-
-    To store objects in C++, a type is defined by the template parameter
-    described below. Unicode values are split by the JSON class into
-    byte-sized characters during deserialization.
-
-    @tparam StringType  the container to store strings (e.g., `std::string`).
-    Note this container is used for keys/names in objects, see @ref object_t.
-
-    #### Default type
-
-    With the default values for @a StringType (`std::string`), the default
-    value for @a string_t is:
-
-    @code {.cpp}
-    std::string
-    @endcode
-
-    #### Encoding
-
-    Strings are stored in UTF-8 encoding. Therefore, functions like
-    `std::string::size()` or `std::string::length()` return the number of
-    bytes in the string rather than the number of characters or glyphs.
-
-    #### String comparison
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) states:
-    > Software implementations are typically required to test names of object
-    > members for equality. Implementations that transform the textual
-    > representation into sequences of Unicode code units and then perform the
-    > comparison numerically, code unit by code unit, are interoperable in the
-    > sense that implementations will agree in all cases on equality or
-    > inequality of two strings. For example, implementations that compare
-    > strings with escaped characters unconverted may incorrectly find that
-    > `"a\\b"` and `"a\u005Cb"` are not equal.
-
-    This implementation is interoperable as it does compare strings code unit
-    by code unit.
-
-    #### Storage
-
-    String values are stored as pointers in a @ref basic_json type. That is,
-    for any access to string values, a pointer of type `string_t*` must be
-    dereferenced.
-
-    @since version 1.0.0
-    */
-    using string_t = StringType;
-
-    /*!
-    @brief a type for a boolean
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) implicitly describes a boolean as a
-    type which differentiates the two literals `true` and `false`.
-
-    To store objects in C++, a type is defined by the template parameter @a
-    BooleanType which chooses the type to use.
-
-    #### Default type
-
-    With the default values for @a BooleanType (`bool`), the default value for
-    @a boolean_t is:
-
-    @code {.cpp}
-    bool
-    @endcode
-
-    #### Storage
-
-    Boolean values are stored directly inside a @ref basic_json type.
-
-    @since version 1.0.0
-    */
-    using boolean_t = BooleanType;
-
-    /*!
-    @brief a type for a number (integer)
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes numbers as follows:
-    > The representation of numbers is similar to that used in most
-    > programming languages. A number is represented in base 10 using decimal
-    > digits. It contains an integer component that may be prefixed with an
-    > optional minus sign, which may be followed by a fraction part and/or an
-    > exponent part. Leading zeros are not allowed. (...) Numeric values that
-    > cannot be represented in the grammar below (such as Infinity and NaN)
-    > are not permitted.
-
-    This description includes both integer and floating-point numbers.
-    However, C++ allows more precise storage if it is known whether the number
-    is a signed integer, an unsigned integer or a floating-point number.
-    Therefore, three different types, @ref number_integer_t, @ref
-    number_unsigned_t and @ref number_float_t are used.
-
-    To store integer numbers in C++, a type is defined by the template
-    parameter @a NumberIntegerType which chooses the type to use.
-
-    #### Default type
-
-    With the default values for @a NumberIntegerType (`int64_t`), the default
-    value for @a number_integer_t is:
-
-    @code {.cpp}
-    int64_t
-    @endcode
-
-    #### Default behavior
-
-    - The restrictions about leading zeros is not enforced in C++. Instead,
-      leading zeros in integer literals lead to an interpretation as octal
-      number. Internally, the value will be stored as decimal number. For
-      instance, the C++ integer literal `010` will be serialized to `8`.
-      During deserialization, leading zeros yield an error.
-    - Not-a-number (NaN) values will be serialized to `null`.
-
-    #### Limits
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) specifies:
-    > An implementation may set limits on the range and precision of numbers.
-
-    When the default type is used, the maximal integer number that can be
-    stored is `9223372036854775807` (INT64_MAX) and the minimal integer number
-    that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers
-    that are out of range will yield over/underflow when used in a
-    constructor. During deserialization, too large or small integer numbers
-    will be automatically be stored as @ref number_unsigned_t or @ref
-    number_float_t.
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) further states:
-    > Note that when such software is used, numbers that are integers and are
-    > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense
-    > that implementations will agree exactly on their numeric values.
-
-    As this range is a subrange of the exactly supported range [INT64_MIN,
-    INT64_MAX], this class's integer type is interoperable.
-
-    #### Storage
-
-    Integer number values are stored directly inside a @ref basic_json type.
-
-    @sa see @ref number_float_t -- type for number values (floating-point)
-
-    @sa see @ref number_unsigned_t -- type for number values (unsigned integer)
-
-    @since version 1.0.0
-    */
-    using number_integer_t = NumberIntegerType;
-
-    /*!
-    @brief a type for a number (unsigned)
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes numbers as follows:
-    > The representation of numbers is similar to that used in most
-    > programming languages. A number is represented in base 10 using decimal
-    > digits. It contains an integer component that may be prefixed with an
-    > optional minus sign, which may be followed by a fraction part and/or an
-    > exponent part. Leading zeros are not allowed. (...) Numeric values that
-    > cannot be represented in the grammar below (such as Infinity and NaN)
-    > are not permitted.
-
-    This description includes both integer and floating-point numbers.
-    However, C++ allows more precise storage if it is known whether the number
-    is a signed integer, an unsigned integer or a floating-point number.
-    Therefore, three different types, @ref number_integer_t, @ref
-    number_unsigned_t and @ref number_float_t are used.
-
-    To store unsigned integer numbers in C++, a type is defined by the
-    template parameter @a NumberUnsignedType which chooses the type to use.
-
-    #### Default type
-
-    With the default values for @a NumberUnsignedType (`uint64_t`), the
-    default value for @a number_unsigned_t is:
-
-    @code {.cpp}
-    uint64_t
-    @endcode
-
-    #### Default behavior
-
-    - The restrictions about leading zeros is not enforced in C++. Instead,
-      leading zeros in integer literals lead to an interpretation as octal
-      number. Internally, the value will be stored as decimal number. For
-      instance, the C++ integer literal `010` will be serialized to `8`.
-      During deserialization, leading zeros yield an error.
-    - Not-a-number (NaN) values will be serialized to `null`.
-
-    #### Limits
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) specifies:
-    > An implementation may set limits on the range and precision of numbers.
-
-    When the default type is used, the maximal integer number that can be
-    stored is `18446744073709551615` (UINT64_MAX) and the minimal integer
-    number that can be stored is `0`. Integer numbers that are out of range
-    will yield over/underflow when used in a constructor. During
-    deserialization, too large or small integer numbers will be automatically
-    be stored as @ref number_integer_t or @ref number_float_t.
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) further states:
-    > Note that when such software is used, numbers that are integers and are
-    > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense
-    > that implementations will agree exactly on their numeric values.
-
-    As this range is a subrange (when considered in conjunction with the
-    number_integer_t type) of the exactly supported range [0, UINT64_MAX],
-    this class's integer type is interoperable.
-
-    #### Storage
-
-    Integer number values are stored directly inside a @ref basic_json type.
-
-    @sa see @ref number_float_t -- type for number values (floating-point)
-    @sa see @ref number_integer_t -- type for number values (integer)
-
-    @since version 2.0.0
-    */
-    using number_unsigned_t = NumberUnsignedType;
-
-    /*!
-    @brief a type for a number (floating-point)
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) describes numbers as follows:
-    > The representation of numbers is similar to that used in most
-    > programming languages. A number is represented in base 10 using decimal
-    > digits. It contains an integer component that may be prefixed with an
-    > optional minus sign, which may be followed by a fraction part and/or an
-    > exponent part. Leading zeros are not allowed. (...) Numeric values that
-    > cannot be represented in the grammar below (such as Infinity and NaN)
-    > are not permitted.
-
-    This description includes both integer and floating-point numbers.
-    However, C++ allows more precise storage if it is known whether the number
-    is a signed integer, an unsigned integer or a floating-point number.
-    Therefore, three different types, @ref number_integer_t, @ref
-    number_unsigned_t and @ref number_float_t are used.
-
-    To store floating-point numbers in C++, a type is defined by the template
-    parameter @a NumberFloatType which chooses the type to use.
-
-    #### Default type
-
-    With the default values for @a NumberFloatType (`double`), the default
-    value for @a number_float_t is:
-
-    @code {.cpp}
-    double
-    @endcode
-
-    #### Default behavior
-
-    - The restrictions about leading zeros is not enforced in C++. Instead,
-      leading zeros in floating-point literals will be ignored. Internally,
-      the value will be stored as decimal number. For instance, the C++
-      floating-point literal `01.2` will be serialized to `1.2`. During
-      deserialization, leading zeros yield an error.
-    - Not-a-number (NaN) values will be serialized to `null`.
-
-    #### Limits
-
-    [RFC 8259](https://tools.ietf.org/html/rfc8259) states:
-    > This specification allows implementations to set limits on the range and
-    > precision of numbers accepted. Since software that implements IEEE
-    > 754-2008 binary64 (double precision) numbers is generally available and
-    > widely used, good interoperability can be achieved by implementations
-    > that expect no more precision or range than these provide, in the sense
-    > that implementations will approximate JSON numbers within the expected
-    > precision.
-
-    This implementation does exactly follow this approach, as it uses double
-    precision floating-point numbers. Note values smaller than
-    `-1.79769313486232e+308` and values greater than `1.79769313486232e+308`
-    will be stored as NaN internally and be serialized to `null`.
-
-    #### Storage
-
-    Floating-point number values are stored directly inside a @ref basic_json
-    type.
-
-    @sa see @ref number_integer_t -- type for number values (integer)
-
-    @sa see @ref number_unsigned_t -- type for number values (unsigned integer)
-
-    @since version 1.0.0
-    */
-    using number_float_t = NumberFloatType;
-
-    /*!
-    @brief a type for a packed binary type
-
-    This type is a type designed to carry binary data that appears in various
-    serialized formats, such as CBOR's Major Type 2, MessagePack's bin, and
-    BSON's generic binary subtype. This type is NOT a part of standard JSON and
-    exists solely for compatibility with these binary types. As such, it is
-    simply defined as an ordered sequence of zero or more byte values.
-
-    Additionally, as an implementation detail, the subtype of the binary data is
-    carried around as a `std::uint8_t`, which is compatible with both of the
-    binary data formats that use binary subtyping, (though the specific
-    numbering is incompatible with each other, and it is up to the user to
-    translate between them).
-
-    [CBOR's RFC 7049](https://tools.ietf.org/html/rfc7049) describes this type
-    as:
-    > Major type 2: a byte string. The string's length in bytes is represented
-    > following the rules for positive integers (major type 0).
-
-    [MessagePack's documentation on the bin type
-    family](https://github.com/msgpack/msgpack/blob/master/spec.md#bin-format-family)
-    describes this type as:
-    > Bin format family stores an byte array in 2, 3, or 5 bytes of extra bytes
-    > in addition to the size of the byte array.
-
-    [BSON's specifications](http://bsonspec.org/spec.html) describe several
-    binary types; however, this type is intended to represent the generic binary
-    type which has the description:
-    > Generic binary subtype - This is the most commonly used binary subtype and
-    > should be the 'default' for drivers and tools.
-
-    None of these impose any limitations on the internal representation other
-    than the basic unit of storage be some type of array whose parts are
-    decomposable into bytes.
-
-    The default representation of this binary format is a
-    `std::vector<std::uint8_t>`, which is a very common way to represent a byte
-    array in modern C++.
-
-    #### Default type
-
-    The default values for @a BinaryType is `std::vector<std::uint8_t>`
-
-    #### Storage
-
-    Binary Arrays are stored as pointers in a @ref basic_json type. That is,
-    for any access to array values, a pointer of the type `binary_t*` must be
-    dereferenced.
-
-    #### Notes on subtypes
-
-    - CBOR
-       - Binary values are represented as byte strings. No subtypes are
-         supported and will be ignored when CBOR is written.
-    - MessagePack
-       - If a subtype is given and the binary array contains exactly 1, 2, 4, 8,
-         or 16 elements, the fixext family (fixext1, fixext2, fixext4, fixext8)
-         is used. For other sizes, the ext family (ext8, ext16, ext32) is used.
-         The subtype is then added as singed 8-bit integer.
-       - If no subtype is given, the bin family (bin8, bin16, bin32) is used.
-    - BSON
-       - If a subtype is given, it is used and added as unsigned 8-bit integer.
-       - If no subtype is given, the generic binary subtype 0x00 is used.
-
-    @sa see @ref binary -- create a binary array
-
-    @since version 3.8.0
-    */
-    using binary_t = nlohmann::byte_container_with_subtype<BinaryType>;
-    /// @}
-
-  private:
-
-    /// helper for exception-safe object creation
-    template<typename T, typename... Args>
-    JSON_HEDLEY_RETURNS_NON_NULL
-    static T* create(Args&& ... args)
-    {
-        AllocatorType<T> alloc;
-        using AllocatorTraits = std::allocator_traits<AllocatorType<T>>;
-
-        auto deleter = [&](T * obj)
-        {
-            AllocatorTraits::deallocate(alloc, obj, 1);
-        };
-        std::unique_ptr<T, decltype(deleter)> obj(AllocatorTraits::allocate(alloc, 1), deleter);
-        AllocatorTraits::construct(alloc, obj.get(), std::forward<Args>(args)...);
-        JSON_ASSERT(obj != nullptr);
-        return obj.release();
-    }
-
-    ////////////////////////
-    // JSON value storage //
-    ////////////////////////
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    /*!
-    @brief a JSON value
-
-    The actual storage for a JSON value of the @ref basic_json class. This
-    union combines the different storage types for the JSON value types
-    defined in @ref value_t.
-
-    JSON type | value_t type    | used type
-    --------- | --------------- | ------------------------
-    object    | object          | pointer to @ref object_t
-    array     | array           | pointer to @ref array_t
-    string    | string          | pointer to @ref string_t
-    boolean   | boolean         | @ref boolean_t
-    number    | number_integer  | @ref number_integer_t
-    number    | number_unsigned | @ref number_unsigned_t
-    number    | number_float    | @ref number_float_t
-    binary    | binary          | pointer to @ref binary_t
-    null      | null            | *no value is stored*
-
-    @note Variable-length types (objects, arrays, and strings) are stored as
-    pointers. The size of the union should not exceed 64 bits if the default
-    value types are used.
-
-    @since version 1.0.0
-    */
-    union json_value
-    {
-        /// object (stored with pointer to save storage)
-        object_t* object;
-        /// array (stored with pointer to save storage)
-        array_t* array;
-        /// string (stored with pointer to save storage)
-        string_t* string;
-        /// binary (stored with pointer to save storage)
-        binary_t* binary;
-        /// boolean
-        boolean_t boolean;
-        /// number (integer)
-        number_integer_t number_integer;
-        /// number (unsigned integer)
-        number_unsigned_t number_unsigned;
-        /// number (floating-point)
-        number_float_t number_float;
-
-        /// default constructor (for null values)
-        json_value() = default;
-        /// constructor for booleans
-        json_value(boolean_t v) noexcept : boolean(v) {}
-        /// constructor for numbers (integer)
-        json_value(number_integer_t v) noexcept : number_integer(v) {}
-        /// constructor for numbers (unsigned)
-        json_value(number_unsigned_t v) noexcept : number_unsigned(v) {}
-        /// constructor for numbers (floating-point)
-        json_value(number_float_t v) noexcept : number_float(v) {}
-        /// constructor for empty values of a given type
-        json_value(value_t t)
-        {
-            switch (t)
-            {
-                case value_t::object:
-                {
-                    object = create<object_t>();
-                    break;
-                }
-
-                case value_t::array:
-                {
-                    array = create<array_t>();
-                    break;
-                }
-
-                case value_t::string:
-                {
-                    string = create<string_t>("");
-                    break;
-                }
-
-                case value_t::binary:
-                {
-                    binary = create<binary_t>();
-                    break;
-                }
-
-                case value_t::boolean:
-                {
-                    boolean = boolean_t(false);
-                    break;
-                }
-
-                case value_t::number_integer:
-                {
-                    number_integer = number_integer_t(0);
-                    break;
-                }
-
-                case value_t::number_unsigned:
-                {
-                    number_unsigned = number_unsigned_t(0);
-                    break;
-                }
-
-                case value_t::number_float:
-                {
-                    number_float = number_float_t(0.0);
-                    break;
-                }
-
-                case value_t::null:
-                {
-                    object = nullptr;  // silence warning, see #821
-                    break;
-                }
-
-                default:
-                {
-                    object = nullptr;  // silence warning, see #821
-                    if (JSON_HEDLEY_UNLIKELY(t == value_t::null))
-                    {
-                        JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.9.1", basic_json())); // LCOV_EXCL_LINE
-                    }
-                    break;
-                }
-            }
-        }
-
-        /// constructor for strings
-        json_value(const string_t& value)
-        {
-            string = create<string_t>(value);
-        }
-
-        /// constructor for rvalue strings
-        json_value(string_t&& value)
-        {
-            string = create<string_t>(std::move(value));
-        }
-
-        /// constructor for objects
-        json_value(const object_t& value)
-        {
-            object = create<object_t>(value);
-        }
-
-        /// constructor for rvalue objects
-        json_value(object_t&& value)
-        {
-            object = create<object_t>(std::move(value));
-        }
-
-        /// constructor for arrays
-        json_value(const array_t& value)
-        {
-            array = create<array_t>(value);
-        }
-
-        /// constructor for rvalue arrays
-        json_value(array_t&& value)
-        {
-            array = create<array_t>(std::move(value));
-        }
-
-        /// constructor for binary arrays
-        json_value(const typename binary_t::container_type& value)
-        {
-            binary = create<binary_t>(value);
-        }
-
-        /// constructor for rvalue binary arrays
-        json_value(typename binary_t::container_type&& value)
-        {
-            binary = create<binary_t>(std::move(value));
-        }
-
-        /// constructor for binary arrays (internal type)
-        json_value(const binary_t& value)
-        {
-            binary = create<binary_t>(value);
-        }
-
-        /// constructor for rvalue binary arrays (internal type)
-        json_value(binary_t&& value)
-        {
-            binary = create<binary_t>(std::move(value));
-        }
-
-        void destroy(value_t t) noexcept
-        {
-            // flatten the current json_value to a heap-allocated stack
-            std::vector<basic_json> stack;
-
-            // move the top-level items to stack
-            if (t == value_t::array)
-            {
-                stack.reserve(array->size());
-                std::move(array->begin(), array->end(), std::back_inserter(stack));
-            }
-            else if (t == value_t::object)
-            {
-                stack.reserve(object->size());
-                for (auto&& it : *object)
-                {
-                    stack.push_back(std::move(it.second));
-                }
-            }
-
-            while (!stack.empty())
-            {
-                // move the last item to local variable to be processed
-                basic_json current_item(std::move(stack.back()));
-                stack.pop_back();
-
-                // if current_item is array/object, move
-                // its children to the stack to be processed later
-                if (current_item.is_array())
-                {
-                    std::move(current_item.m_value.array->begin(), current_item.m_value.array->end(),
-                              std::back_inserter(stack));
-
-                    current_item.m_value.array->clear();
-                }
-                else if (current_item.is_object())
-                {
-                    for (auto&& it : *current_item.m_value.object)
-                    {
-                        stack.push_back(std::move(it.second));
-                    }
-
-                    current_item.m_value.object->clear();
-                }
-
-                // it's now safe that current_item get destructed
-                // since it doesn't have any children
-            }
-
-            switch (t)
-            {
-                case value_t::object:
-                {
-                    AllocatorType<object_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, object);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, object, 1);
-                    break;
-                }
-
-                case value_t::array:
-                {
-                    AllocatorType<array_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, array);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, array, 1);
-                    break;
-                }
-
-                case value_t::string:
-                {
-                    AllocatorType<string_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, string);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, string, 1);
-                    break;
-                }
-
-                case value_t::binary:
-                {
-                    AllocatorType<binary_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, binary);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, binary, 1);
-                    break;
-                }
-
-                default:
-                {
-                    break;
-                }
-            }
-        }
-    };
-
-  private:
-    /*!
-    @brief checks the class invariants
-
-    This function asserts the class invariants. It needs to be called at the
-    end of every constructor to make sure that created objects respect the
-    invariant. Furthermore, it has to be called each time the type of a JSON
-    value is changed, because the invariant expresses a relationship between
-    @a m_type and @a m_value.
-
-    Furthermore, the parent relation is checked for arrays and objects: If
-    @a check_parents true and the value is an array or object, then the
-    container's elements must have the current value as parent.
-
-    @param[in] check_parents  whether the parent relation should be checked.
-               The value is true by default and should only be set to false
-               during destruction of objects when the invariant does not
-               need to hold.
-    */
-    void assert_invariant(bool check_parents = true) const noexcept
-    {
-        JSON_ASSERT(m_type != value_t::object || m_value.object != nullptr);
-        JSON_ASSERT(m_type != value_t::array || m_value.array != nullptr);
-        JSON_ASSERT(m_type != value_t::string || m_value.string != nullptr);
-        JSON_ASSERT(m_type != value_t::binary || m_value.binary != nullptr);
-
-#if JSON_DIAGNOSTICS
-        JSON_TRY
-        {
-            // cppcheck-suppress assertWithSideEffect
-            JSON_ASSERT(!check_parents || !is_structured() || std::all_of(begin(), end(), [this](const basic_json & j)
-            {
-                return j.m_parent == this;
-            }));
-        }
-        JSON_CATCH(...) {} // LCOV_EXCL_LINE
-#endif
-        static_cast<void>(check_parents);
-    }
-
-    void set_parents()
-    {
-#if JSON_DIAGNOSTICS
-        switch (m_type)
-        {
-            case value_t::array:
-            {
-                for (auto& element : *m_value.array)
-                {
-                    element.m_parent = this;
-                }
-                break;
-            }
-
-            case value_t::object:
-            {
-                for (auto& element : *m_value.object)
-                {
-                    element.second.m_parent = this;
-                }
-                break;
-            }
-
-            default:
-                break;
-        }
-#endif
-    }
-
-    iterator set_parents(iterator it, typename iterator::difference_type count)
-    {
-#if JSON_DIAGNOSTICS
-        for (typename iterator::difference_type i = 0; i < count; ++i)
-        {
-            (it + i)->m_parent = this;
-        }
-#else
-        static_cast<void>(count);
-#endif
-        return it;
-    }
-
-    reference set_parent(reference j)
-    {
-#if JSON_DIAGNOSTICS
-        j.m_parent = this;
-#else
-        static_cast<void>(j);
-#endif
-        return j;
-    }
-
-  public:
-    //////////////////////////
-    // JSON parser callback //
-    //////////////////////////
-
-    /*!
-    @brief parser event types
-
-    The parser callback distinguishes the following events:
-    - `object_start`: the parser read `{` and started to process a JSON object
-    - `key`: the parser read a key of a value in an object
-    - `object_end`: the parser read `}` and finished processing a JSON object
-    - `array_start`: the parser read `[` and started to process a JSON array
-    - `array_end`: the parser read `]` and finished processing a JSON array
-    - `value`: the parser finished reading a JSON value
-
-    @image html callback_events.png "Example when certain parse events are triggered"
-
-    @sa see @ref parser_callback_t for more information and examples
-    */
-    using parse_event_t = detail::parse_event_t;
-
-    /*!
-    @brief per-element parser callback type
-
-    With a parser callback function, the result of parsing a JSON text can be
-    influenced. When passed to @ref parse, it is called on certain events
-    (passed as @ref parse_event_t via parameter @a event) with a set recursion
-    depth @a depth and context JSON value @a parsed. The return value of the
-    callback function is a boolean indicating whether the element that emitted
-    the callback shall be kept or not.
-
-    We distinguish six scenarios (determined by the event type) in which the
-    callback function can be called. The following table describes the values
-    of the parameters @a depth, @a event, and @a parsed.
-
-    parameter @a event | description | parameter @a depth | parameter @a parsed
-    ------------------ | ----------- | ------------------ | -------------------
-    parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded
-    parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key
-    parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object
-    parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded
-    parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array
-    parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value
-
-    @image html callback_events.png "Example when certain parse events are triggered"
-
-    Discarding a value (i.e., returning `false`) has different effects
-    depending on the context in which function was called:
-
-    - Discarded values in structured types are skipped. That is, the parser
-      will behave as if the discarded value was never read.
-    - In case a value outside a structured type is skipped, it is replaced
-      with `null`. This case happens if the top-level element is skipped.
-
-    @param[in] depth  the depth of the recursion during parsing
-
-    @param[in] event  an event of type parse_event_t indicating the context in
-    the callback function has been called
-
-    @param[in,out] parsed  the current intermediate parse result; note that
-    writing to this value has no effect for parse_event_t::key events
-
-    @return Whether the JSON value which called the function during parsing
-    should be kept (`true`) or not (`false`). In the latter case, it is either
-    skipped completely or replaced by an empty discarded object.
-
-    @sa see @ref parse for examples
-
-    @since version 1.0.0
-    */
-    using parser_callback_t = detail::parser_callback_t<basic_json>;
-
-    //////////////////
-    // constructors //
-    //////////////////
-
-    /// @name constructors and destructors
-    /// Constructors of class @ref basic_json, copy/move constructor, copy
-    /// assignment, static functions creating objects, and the destructor.
-    /// @{
-
-    /*!
-    @brief create an empty value with a given type
-
-    Create an empty JSON value with a given type. The value will be default
-    initialized with an empty value which depends on the type:
-
-    Value type  | initial value
-    ----------- | -------------
-    null        | `null`
-    boolean     | `false`
-    string      | `""`
-    number      | `0`
-    object      | `{}`
-    array       | `[]`
-    binary      | empty array
-
-    @param[in] v  the type of the value to create
-
-    @complexity Constant.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The following code shows the constructor for different @ref
-    value_t values,basic_json__value_t}
-
-    @sa see @ref clear() -- restores the postcondition of this constructor
-
-    @since version 1.0.0
-    */
-    basic_json(const value_t v)
-        : m_type(v), m_value(v)
-    {
-        assert_invariant();
-    }
-
-    /*!
-    @brief create a null object
-
-    Create a `null` JSON value. It either takes a null pointer as parameter
-    (explicitly creating `null`) or no parameter (implicitly creating `null`).
-    The passed null pointer itself is not read -- it is only used to choose
-    the right constructor.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this constructor never throws
-    exceptions.
-
-    @liveexample{The following code shows the constructor with and without a
-    null pointer parameter.,basic_json__nullptr_t}
-
-    @since version 1.0.0
-    */
-    basic_json(std::nullptr_t = nullptr) noexcept
-        : basic_json(value_t::null)
-    {
-        assert_invariant();
-    }
-
-    /*!
-    @brief create a JSON value
-
-    This is a "catch all" constructor for all compatible JSON types; that is,
-    types for which a `to_json()` method exists. The constructor forwards the
-    parameter @a val to that method (to `json_serializer<U>::to_json` method
-    with `U = uncvref_t<CompatibleType>`, to be exact).
-
-    Template type @a CompatibleType includes, but is not limited to, the
-    following types:
-    - **arrays**: @ref array_t and all kinds of compatible containers such as
-      `std::vector`, `std::deque`, `std::list`, `std::forward_list`,
-      `std::array`, `std::valarray`, `std::set`, `std::unordered_set`,
-      `std::multiset`, and `std::unordered_multiset` with a `value_type` from
-      which a @ref basic_json value can be constructed.
-    - **objects**: @ref object_t and all kinds of compatible associative
-      containers such as `std::map`, `std::unordered_map`, `std::multimap`,
-      and `std::unordered_multimap` with a `key_type` compatible to
-      @ref string_t and a `value_type` from which a @ref basic_json value can
-      be constructed.
-    - **strings**: @ref string_t, string literals, and all compatible string
-      containers can be used.
-    - **numbers**: @ref number_integer_t, @ref number_unsigned_t,
-      @ref number_float_t, and all convertible number types such as `int`,
-      `size_t`, `int64_t`, `float` or `double` can be used.
-    - **boolean**: @ref boolean_t / `bool` can be used.
-    - **binary**: @ref binary_t / `std::vector<uint8_t>` may be used,
-      unfortunately because string literals cannot be distinguished from binary
-      character arrays by the C++ type system, all types compatible with `const
-      char*` will be directed to the string constructor instead.  This is both
-      for backwards compatibility, and due to the fact that a binary type is not
-      a standard JSON type.
-
-    See the examples below.
-
-    @tparam CompatibleType a type such that:
-    - @a CompatibleType is not derived from `std::istream`,
-    - @a CompatibleType is not @ref basic_json (to avoid hijacking copy/move
-         constructors),
-    - @a CompatibleType is not a different @ref basic_json type (i.e. with different template arguments)
-    - @a CompatibleType is not a @ref basic_json nested type (e.g.,
-         @ref json_pointer, @ref iterator, etc ...)
-    - `json_serializer<U>` has a `to_json(basic_json_t&, CompatibleType&&)` method
-
-    @tparam U = `uncvref_t<CompatibleType>`
-
-    @param[in] val the value to be forwarded to the respective constructor
-
-    @complexity Usually linear in the size of the passed @a val, also
-                depending on the implementation of the called `to_json()`
-                method.
-
-    @exceptionsafety Depends on the called constructor. For types directly
-    supported by the library (i.e., all types for which no `to_json()` function
-    was provided), strong guarantee holds: if an exception is thrown, there are
-    no changes to any JSON value.
-
-    @liveexample{The following code shows the constructor with several
-    compatible types.,basic_json__CompatibleType}
-
-    @since version 2.1.0
-    */
-    template < typename CompatibleType,
-               typename U = detail::uncvref_t<CompatibleType>,
-               detail::enable_if_t <
-                   !detail::is_basic_json<U>::value && detail::is_compatible_type<basic_json_t, U>::value, int > = 0 >
-    basic_json(CompatibleType && val) noexcept(noexcept( // NOLINT(bugprone-forwarding-reference-overload,bugprone-exception-escape)
-                JSONSerializer<U>::to_json(std::declval<basic_json_t&>(),
-                                           std::forward<CompatibleType>(val))))
-    {
-        JSONSerializer<U>::to_json(*this, std::forward<CompatibleType>(val));
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief create a JSON value from an existing one
-
-    This is a constructor for existing @ref basic_json types.
-    It does not hijack copy/move constructors, since the parameter has different
-    template arguments than the current ones.
-
-    The constructor tries to convert the internal @ref m_value of the parameter.
-
-    @tparam BasicJsonType a type such that:
-    - @a BasicJsonType is a @ref basic_json type.
-    - @a BasicJsonType has different template arguments than @ref basic_json_t.
-
-    @param[in] val the @ref basic_json value to be converted.
-
-    @complexity Usually linear in the size of the passed @a val, also
-                depending on the implementation of the called `to_json()`
-                method.
-
-    @exceptionsafety Depends on the called constructor. For types directly
-    supported by the library (i.e., all types for which no `to_json()` function
-    was provided), strong guarantee holds: if an exception is thrown, there are
-    no changes to any JSON value.
-
-    @since version 3.2.0
-    */
-    template < typename BasicJsonType,
-               detail::enable_if_t <
-                   detail::is_basic_json<BasicJsonType>::value&& !std::is_same<basic_json, BasicJsonType>::value, int > = 0 >
-    basic_json(const BasicJsonType& val)
-    {
-        using other_boolean_t = typename BasicJsonType::boolean_t;
-        using other_number_float_t = typename BasicJsonType::number_float_t;
-        using other_number_integer_t = typename BasicJsonType::number_integer_t;
-        using other_number_unsigned_t = typename BasicJsonType::number_unsigned_t;
-        using other_string_t = typename BasicJsonType::string_t;
-        using other_object_t = typename BasicJsonType::object_t;
-        using other_array_t = typename BasicJsonType::array_t;
-        using other_binary_t = typename BasicJsonType::binary_t;
-
-        switch (val.type())
-        {
-            case value_t::boolean:
-                JSONSerializer<other_boolean_t>::to_json(*this, val.template get<other_boolean_t>());
-                break;
-            case value_t::number_float:
-                JSONSerializer<other_number_float_t>::to_json(*this, val.template get<other_number_float_t>());
-                break;
-            case value_t::number_integer:
-                JSONSerializer<other_number_integer_t>::to_json(*this, val.template get<other_number_integer_t>());
-                break;
-            case value_t::number_unsigned:
-                JSONSerializer<other_number_unsigned_t>::to_json(*this, val.template get<other_number_unsigned_t>());
-                break;
-            case value_t::string:
-                JSONSerializer<other_string_t>::to_json(*this, val.template get_ref<const other_string_t&>());
-                break;
-            case value_t::object:
-                JSONSerializer<other_object_t>::to_json(*this, val.template get_ref<const other_object_t&>());
-                break;
-            case value_t::array:
-                JSONSerializer<other_array_t>::to_json(*this, val.template get_ref<const other_array_t&>());
-                break;
-            case value_t::binary:
-                JSONSerializer<other_binary_t>::to_json(*this, val.template get_ref<const other_binary_t&>());
-                break;
-            case value_t::null:
-                *this = nullptr;
-                break;
-            case value_t::discarded:
-                m_type = value_t::discarded;
-                break;
-            default:            // LCOV_EXCL_LINE
-                JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-        }
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief create a container (array or object) from an initializer list
-
-    Creates a JSON value of type array or object from the passed initializer
-    list @a init. In case @a type_deduction is `true` (default), the type of
-    the JSON value to be created is deducted from the initializer list @a init
-    according to the following rules:
-
-    1. If the list is empty, an empty JSON object value `{}` is created.
-    2. If the list consists of pairs whose first element is a string, a JSON
-       object value is created where the first elements of the pairs are
-       treated as keys and the second elements are as values.
-    3. In all other cases, an array is created.
-
-    The rules aim to create the best fit between a C++ initializer list and
-    JSON values. The rationale is as follows:
-
-    1. The empty initializer list is written as `{}` which is exactly an empty
-       JSON object.
-    2. C++ has no way of describing mapped types other than to list a list of
-       pairs. As JSON requires that keys must be of type string, rule 2 is the
-       weakest constraint one can pose on initializer lists to interpret them
-       as an object.
-    3. In all other cases, the initializer list could not be interpreted as
-       JSON object type, so interpreting it as JSON array type is safe.
-
-    With the rules described above, the following JSON values cannot be
-    expressed by an initializer list:
-
-    - the empty array (`[]`): use @ref array(initializer_list_t)
-      with an empty initializer list in this case
-    - arrays whose elements satisfy rule 2: use @ref
-      array(initializer_list_t) with the same initializer list
-      in this case
-
-    @note When used without parentheses around an empty initializer list, @ref
-    basic_json() is called instead of this function, yielding the JSON null
-    value.
-
-    @param[in] init  initializer list with JSON values
-
-    @param[in] type_deduction internal parameter; when set to `true`, the type
-    of the JSON value is deducted from the initializer list @a init; when set
-    to `false`, the type provided via @a manual_type is forced. This mode is
-    used by the functions @ref array(initializer_list_t) and
-    @ref object(initializer_list_t).
-
-    @param[in] manual_type internal parameter; when @a type_deduction is set
-    to `false`, the created JSON value will use the provided type (only @ref
-    value_t::array and @ref value_t::object are valid); when @a type_deduction
-    is set to `true`, this parameter has no effect
-
-    @throw type_error.301 if @a type_deduction is `false`, @a manual_type is
-    `value_t::object`, but @a init contains an element which is not a pair
-    whose first element is a string. In this case, the constructor could not
-    create an object. If @a type_deduction would have be `true`, an array
-    would have been created. See @ref object(initializer_list_t)
-    for an example.
-
-    @complexity Linear in the size of the initializer list @a init.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The example below shows how JSON values are created from
-    initializer lists.,basic_json__list_init_t}
-
-    @sa see @ref array(initializer_list_t) -- create a JSON array
-    value from an initializer list
-    @sa see @ref object(initializer_list_t) -- create a JSON object
-    value from an initializer list
-
-    @since version 1.0.0
-    */
-    basic_json(initializer_list_t init,
-               bool type_deduction = true,
-               value_t manual_type = value_t::array)
-    {
-        // check if each element is an array with two elements whose first
-        // element is a string
-        bool is_an_object = std::all_of(init.begin(), init.end(),
-                                        [](const detail::json_ref<basic_json>& element_ref)
-        {
-            return element_ref->is_array() && element_ref->size() == 2 && (*element_ref)[0].is_string();
-        });
-
-        // adjust type if type deduction is not wanted
-        if (!type_deduction)
-        {
-            // if array is wanted, do not create an object though possible
-            if (manual_type == value_t::array)
-            {
-                is_an_object = false;
-            }
-
-            // if object is wanted but impossible, throw an exception
-            if (JSON_HEDLEY_UNLIKELY(manual_type == value_t::object && !is_an_object))
-            {
-                JSON_THROW(type_error::create(301, "cannot create object from initializer list", basic_json()));
-            }
-        }
-
-        if (is_an_object)
-        {
-            // the initializer list is a list of pairs -> create object
-            m_type = value_t::object;
-            m_value = value_t::object;
-
-            for (auto& element_ref : init)
-            {
-                auto element = element_ref.moved_or_copied();
-                m_value.object->emplace(
-                    std::move(*((*element.m_value.array)[0].m_value.string)),
-                    std::move((*element.m_value.array)[1]));
-            }
-        }
-        else
-        {
-            // the initializer list describes an array -> create array
-            m_type = value_t::array;
-            m_value.array = create<array_t>(init.begin(), init.end());
-        }
-
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief explicitly create a binary array (without subtype)
-
-    Creates a JSON binary array value from a given binary container. Binary
-    values are part of various binary formats, such as CBOR, MessagePack, and
-    BSON. This constructor is used to create a value for serialization to those
-    formats.
-
-    @note Note, this function exists because of the difficulty in correctly
-    specifying the correct template overload in the standard value ctor, as both
-    JSON arrays and JSON binary arrays are backed with some form of a
-    `std::vector`. Because JSON binary arrays are a non-standard extension it
-    was decided that it would be best to prevent automatic initialization of a
-    binary array type, for backwards compatibility and so it does not happen on
-    accident.
-
-    @param[in] init container containing bytes to use as binary type
-
-    @return JSON binary array value
-
-    @complexity Linear in the size of @a init.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @since version 3.8.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json binary(const typename binary_t::container_type& init)
-    {
-        auto res = basic_json();
-        res.m_type = value_t::binary;
-        res.m_value = init;
-        return res;
-    }
-
-    /*!
-    @brief explicitly create a binary array (with subtype)
-
-    Creates a JSON binary array value from a given binary container. Binary
-    values are part of various binary formats, such as CBOR, MessagePack, and
-    BSON. This constructor is used to create a value for serialization to those
-    formats.
-
-    @note Note, this function exists because of the difficulty in correctly
-    specifying the correct template overload in the standard value ctor, as both
-    JSON arrays and JSON binary arrays are backed with some form of a
-    `std::vector`. Because JSON binary arrays are a non-standard extension it
-    was decided that it would be best to prevent automatic initialization of a
-    binary array type, for backwards compatibility and so it does not happen on
-    accident.
-
-    @param[in] init container containing bytes to use as binary type
-    @param[in] subtype subtype to use in MessagePack and BSON
-
-    @return JSON binary array value
-
-    @complexity Linear in the size of @a init.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @since version 3.8.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json binary(const typename binary_t::container_type& init, std::uint8_t subtype)
-    {
-        auto res = basic_json();
-        res.m_type = value_t::binary;
-        res.m_value = binary_t(init, subtype);
-        return res;
-    }
-
-    /// @copydoc binary(const typename binary_t::container_type&)
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json binary(typename binary_t::container_type&& init)
-    {
-        auto res = basic_json();
-        res.m_type = value_t::binary;
-        res.m_value = std::move(init);
-        return res;
-    }
-
-    /// @copydoc binary(const typename binary_t::container_type&, std::uint8_t)
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json binary(typename binary_t::container_type&& init, std::uint8_t subtype)
-    {
-        auto res = basic_json();
-        res.m_type = value_t::binary;
-        res.m_value = binary_t(std::move(init), subtype);
-        return res;
-    }
-
-    /*!
-    @brief explicitly create an array from an initializer list
-
-    Creates a JSON array value from a given initializer list. That is, given a
-    list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the
-    initializer list is empty, the empty array `[]` is created.
-
-    @note This function is only needed to express two edge cases that cannot
-    be realized with the initializer list constructor (@ref
-    basic_json(initializer_list_t, bool, value_t)). These cases
-    are:
-    1. creating an array whose elements are all pairs whose first element is a
-    string -- in this case, the initializer list constructor would create an
-    object, taking the first elements as keys
-    2. creating an empty array -- passing the empty initializer list to the
-    initializer list constructor yields an empty object
-
-    @param[in] init  initializer list with JSON values to create an array from
-    (optional)
-
-    @return JSON array value
-
-    @complexity Linear in the size of @a init.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The following code shows an example for the `array`
-    function.,array}
-
-    @sa see @ref basic_json(initializer_list_t, bool, value_t) --
-    create a JSON value from an initializer list
-    @sa see @ref object(initializer_list_t) -- create a JSON object
-    value from an initializer list
-
-    @since version 1.0.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json array(initializer_list_t init = {})
-    {
-        return basic_json(init, false, value_t::array);
-    }
-
-    /*!
-    @brief explicitly create an object from an initializer list
-
-    Creates a JSON object value from a given initializer list. The initializer
-    lists elements must be pairs, and their first elements must be strings. If
-    the initializer list is empty, the empty object `{}` is created.
-
-    @note This function is only added for symmetry reasons. In contrast to the
-    related function @ref array(initializer_list_t), there are
-    no cases which can only be expressed by this function. That is, any
-    initializer list @a init can also be passed to the initializer list
-    constructor @ref basic_json(initializer_list_t, bool, value_t).
-
-    @param[in] init  initializer list to create an object from (optional)
-
-    @return JSON object value
-
-    @throw type_error.301 if @a init is not a list of pairs whose first
-    elements are strings. In this case, no object can be created. When such a
-    value is passed to @ref basic_json(initializer_list_t, bool, value_t),
-    an array would have been created from the passed initializer list @a init.
-    See example below.
-
-    @complexity Linear in the size of @a init.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The following code shows an example for the `object`
-    function.,object}
-
-    @sa see @ref basic_json(initializer_list_t, bool, value_t) --
-    create a JSON value from an initializer list
-    @sa see @ref array(initializer_list_t) -- create a JSON array
-    value from an initializer list
-
-    @since version 1.0.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json object(initializer_list_t init = {})
-    {
-        return basic_json(init, false, value_t::object);
-    }
-
-    /*!
-    @brief construct an array with count copies of given value
-
-    Constructs a JSON array value by creating @a cnt copies of a passed value.
-    In case @a cnt is `0`, an empty array is created.
-
-    @param[in] cnt  the number of JSON copies of @a val to create
-    @param[in] val  the JSON value to copy
-
-    @post `std::distance(begin(),end()) == cnt` holds.
-
-    @complexity Linear in @a cnt.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The following code shows examples for the @ref
-    basic_json(size_type\, const basic_json&)
-    constructor.,basic_json__size_type_basic_json}
-
-    @since version 1.0.0
-    */
-    basic_json(size_type cnt, const basic_json& val)
-        : m_type(value_t::array)
-    {
-        m_value.array = create<array_t>(cnt, val);
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief construct a JSON container given an iterator range
-
-    Constructs the JSON value with the contents of the range `[first, last)`.
-    The semantics depends on the different types a JSON value can have:
-    - In case of a null type, invalid_iterator.206 is thrown.
-    - In case of other primitive types (number, boolean, or string), @a first
-      must be `begin()` and @a last must be `end()`. In this case, the value is
-      copied. Otherwise, invalid_iterator.204 is thrown.
-    - In case of structured types (array, object), the constructor behaves as
-      similar versions for `std::vector` or `std::map`; that is, a JSON array
-      or object is constructed from the values in the range.
-
-    @tparam InputIT an input iterator type (@ref iterator or @ref
-    const_iterator)
-
-    @param[in] first begin of the range to copy from (included)
-    @param[in] last end of the range to copy from (excluded)
-
-    @pre Iterators @a first and @a last must be initialized. **This
-         precondition is enforced with an assertion (see warning).** If
-         assertions are switched off, a violation of this precondition yields
-         undefined behavior.
-
-    @pre Range `[first, last)` is valid. Usually, this precondition cannot be
-         checked efficiently. Only certain edge cases are detected; see the
-         description of the exceptions below. A violation of this precondition
-         yields undefined behavior.
-
-    @warning A precondition is enforced with a runtime assertion that will
-             result in calling `std::abort` if this precondition is not met.
-             Assertions can be disabled by defining `NDEBUG` at compile time.
-             See https://en.cppreference.com/w/cpp/error/assert for more
-             information.
-
-    @throw invalid_iterator.201 if iterators @a first and @a last are not
-    compatible (i.e., do not belong to the same JSON value). In this case,
-    the range `[first, last)` is undefined.
-    @throw invalid_iterator.204 if iterators @a first and @a last belong to a
-    primitive type (number, boolean, or string), but @a first does not point
-    to the first element any more. In this case, the range `[first, last)` is
-    undefined. See example code below.
-    @throw invalid_iterator.206 if iterators @a first and @a last belong to a
-    null value. In this case, the range `[first, last)` is undefined.
-
-    @complexity Linear in distance between @a first and @a last.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @liveexample{The example below shows several ways to create JSON values by
-    specifying a subrange with iterators.,basic_json__InputIt_InputIt}
-
-    @since version 1.0.0
-    */
-    template < class InputIT, typename std::enable_if <
-                   std::is_same<InputIT, typename basic_json_t::iterator>::value ||
-                   std::is_same<InputIT, typename basic_json_t::const_iterator>::value, int >::type = 0 >
-    basic_json(InputIT first, InputIT last)
-    {
-        JSON_ASSERT(first.m_object != nullptr);
-        JSON_ASSERT(last.m_object != nullptr);
-
-        // make sure iterator fits the current value
-        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(201, "iterators are not compatible", basic_json()));
-        }
-
-        // copy type from first iterator
-        m_type = first.m_object->m_type;
-
-        // check if iterator range is complete for primitive values
-        switch (m_type)
-        {
-            case value_t::boolean:
-            case value_t::number_float:
-            case value_t::number_integer:
-            case value_t::number_unsigned:
-            case value_t::string:
-            {
-                if (JSON_HEDLEY_UNLIKELY(!first.m_it.primitive_iterator.is_begin()
-                                         || !last.m_it.primitive_iterator.is_end()))
-                {
-                    JSON_THROW(invalid_iterator::create(204, "iterators out of range", *first.m_object));
-                }
-                break;
-            }
-
-            default:
-                break;
-        }
-
-        switch (m_type)
-        {
-            case value_t::number_integer:
-            {
-                m_value.number_integer = first.m_object->m_value.number_integer;
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                m_value.number_unsigned = first.m_object->m_value.number_unsigned;
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                m_value.number_float = first.m_object->m_value.number_float;
-                break;
-            }
-
-            case value_t::boolean:
-            {
-                m_value.boolean = first.m_object->m_value.boolean;
-                break;
-            }
-
-            case value_t::string:
-            {
-                m_value = *first.m_object->m_value.string;
-                break;
-            }
-
-            case value_t::object:
-            {
-                m_value.object = create<object_t>(first.m_it.object_iterator,
-                                                  last.m_it.object_iterator);
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_value.array = create<array_t>(first.m_it.array_iterator,
-                                                last.m_it.array_iterator);
-                break;
-            }
-
-            case value_t::binary:
-            {
-                m_value = *first.m_object->m_value.binary;
-                break;
-            }
-
-            default:
-                JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + std::string(first.m_object->type_name()), *first.m_object));
-        }
-
-        set_parents();
-        assert_invariant();
-    }
-
-
-    ///////////////////////////////////////
-    // other constructors and destructor //
-    ///////////////////////////////////////
-
-    template<typename JsonRef,
-             detail::enable_if_t<detail::conjunction<detail::is_json_ref<JsonRef>,
-                                 std::is_same<typename JsonRef::value_type, basic_json>>::value, int> = 0 >
-    basic_json(const JsonRef& ref) : basic_json(ref.moved_or_copied()) {}
-
-    /*!
-    @brief copy constructor
-
-    Creates a copy of a given JSON value.
-
-    @param[in] other  the JSON value to copy
-
-    @post `*this == other`
-
-    @complexity Linear in the size of @a other.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes to any JSON value.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is linear.
-    - As postcondition, it holds: `other == basic_json(other)`.
-
-    @liveexample{The following code shows an example for the copy
-    constructor.,basic_json__basic_json}
-
-    @since version 1.0.0
-    */
-    basic_json(const basic_json& other)
-        : m_type(other.m_type)
-    {
-        // check of passed value is valid
-        other.assert_invariant();
-
-        switch (m_type)
-        {
-            case value_t::object:
-            {
-                m_value = *other.m_value.object;
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_value = *other.m_value.array;
-                break;
-            }
-
-            case value_t::string:
-            {
-                m_value = *other.m_value.string;
-                break;
-            }
-
-            case value_t::boolean:
-            {
-                m_value = other.m_value.boolean;
-                break;
-            }
-
-            case value_t::number_integer:
-            {
-                m_value = other.m_value.number_integer;
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                m_value = other.m_value.number_unsigned;
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                m_value = other.m_value.number_float;
-                break;
-            }
-
-            case value_t::binary:
-            {
-                m_value = *other.m_value.binary;
-                break;
-            }
-
-            default:
-                break;
-        }
-
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief move constructor
-
-    Move constructor. Constructs a JSON value with the contents of the given
-    value @a other using move semantics. It "steals" the resources from @a
-    other and leaves it as JSON null value.
-
-    @param[in,out] other  value to move to this object
-
-    @post `*this` has the same value as @a other before the call.
-    @post @a other is a JSON null value.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this constructor never throws
-    exceptions.
-
-    @requirement This function helps `basic_json` satisfying the
-    [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible)
-    requirements.
-
-    @liveexample{The code below shows the move constructor explicitly called
-    via std::move.,basic_json__moveconstructor}
-
-    @since version 1.0.0
-    */
-    basic_json(basic_json&& other) noexcept
-        : m_type(std::move(other.m_type)),
-          m_value(std::move(other.m_value))
-    {
-        // check that passed value is valid
-        other.assert_invariant(false);
-
-        // invalidate payload
-        other.m_type = value_t::null;
-        other.m_value = {};
-
-        set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief copy assignment
-
-    Copy assignment operator. Copies a JSON value via the "copy and swap"
-    strategy: It is expressed in terms of the copy constructor, destructor,
-    and the `swap()` member function.
-
-    @param[in] other  value to copy from
-
-    @complexity Linear.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is linear.
-
-    @liveexample{The code below shows and example for the copy assignment. It
-    creates a copy of value `a` which is then swapped with `b`. Finally\, the
-    copy of `a` (which is the null value after the swap) is
-    destroyed.,basic_json__copyassignment}
-
-    @since version 1.0.0
-    */
-    basic_json& operator=(basic_json other) noexcept (
-        std::is_nothrow_move_constructible<value_t>::value&&
-        std::is_nothrow_move_assignable<value_t>::value&&
-        std::is_nothrow_move_constructible<json_value>::value&&
-        std::is_nothrow_move_assignable<json_value>::value
-    )
-    {
-        // check that passed value is valid
-        other.assert_invariant();
-
-        using std::swap;
-        swap(m_type, other.m_type);
-        swap(m_value, other.m_value);
-
-        set_parents();
-        assert_invariant();
-        return *this;
-    }
-
-    /*!
-    @brief destructor
-
-    Destroys the JSON value and frees all allocated memory.
-
-    @complexity Linear.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is linear.
-    - All stored elements are destroyed and all memory is freed.
-
-    @since version 1.0.0
-    */
-    ~basic_json() noexcept
-    {
-        assert_invariant(false);
-        m_value.destroy(m_type);
-    }
-
-    /// @}
-
-  public:
-    ///////////////////////
-    // object inspection //
-    ///////////////////////
-
-    /// @name object inspection
-    /// Functions to inspect the type of a JSON value.
-    /// @{
-
-    /*!
-    @brief serialization
-
-    Serialization function for JSON values. The function tries to mimic
-    Python's `json.dumps()` function, and currently supports its @a indent
-    and @a ensure_ascii parameters.
-
-    @param[in] indent If indent is nonnegative, then array elements and object
-    members will be pretty-printed with that indent level. An indent level of
-    `0` will only insert newlines. `-1` (the default) selects the most compact
-    representation.
-    @param[in] indent_char The character to use for indentation if @a indent is
-    greater than `0`. The default is ` ` (space).
-    @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters
-    in the output are escaped with `\uXXXX` sequences, and the result consists
-    of ASCII characters only.
-    @param[in] error_handler  how to react on decoding errors; there are three
-    possible values: `strict` (throws and exception in case a decoding error
-    occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD),
-    and `ignore` (ignore invalid UTF-8 sequences during serialization; all
-    bytes are copied to the output unchanged).
-
-    @return string containing the serialization of the JSON value
-
-    @throw type_error.316 if a string stored inside the JSON value is not
-                          UTF-8 encoded and @a error_handler is set to strict
-
-    @note Binary values are serialized as object containing two keys:
-      - "bytes": an array of bytes as integers
-      - "subtype": the subtype as integer or "null" if the binary has no subtype
-
-    @complexity Linear.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @liveexample{The following example shows the effect of different @a indent\,
-    @a indent_char\, and @a ensure_ascii parameters to the result of the
-    serialization.,dump}
-
-    @see https://docs.python.org/2/library/json.html#json.dump
-
-    @since version 1.0.0; indentation character @a indent_char, option
-           @a ensure_ascii and exceptions added in version 3.0.0; error
-           handlers added in version 3.4.0; serialization of binary values added
-           in version 3.8.0.
-    */
-    string_t dump(const int indent = -1,
-                  const char indent_char = ' ',
-                  const bool ensure_ascii = false,
-                  const error_handler_t error_handler = error_handler_t::strict) const
-    {
-        string_t result;
-        serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler);
-
-        if (indent >= 0)
-        {
-            s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent));
-        }
-        else
-        {
-            s.dump(*this, false, ensure_ascii, 0);
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief return the type of the JSON value (explicit)
-
-    Return the type of the JSON value as a value from the @ref value_t
-    enumeration.
-
-    @return the type of the JSON value
-            Value type                | return value
-            ------------------------- | -------------------------
-            null                      | value_t::null
-            boolean                   | value_t::boolean
-            string                    | value_t::string
-            number (integer)          | value_t::number_integer
-            number (unsigned integer) | value_t::number_unsigned
-            number (floating-point)   | value_t::number_float
-            object                    | value_t::object
-            array                     | value_t::array
-            binary                    | value_t::binary
-            discarded                 | value_t::discarded
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `type()` for all JSON
-    types.,type}
-
-    @sa see @ref operator value_t() -- return the type of the JSON value (implicit)
-    @sa see @ref type_name() -- return the type as string
-
-    @since version 1.0.0
-    */
-    constexpr value_t type() const noexcept
-    {
-        return m_type;
-    }
-
-    /*!
-    @brief return whether type is primitive
-
-    This function returns true if and only if the JSON type is primitive
-    (string, number, boolean, or null).
-
-    @return `true` if type is primitive (string, number, boolean, or null),
-    `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_primitive()` for all JSON
-    types.,is_primitive}
-
-    @sa see @ref is_structured() -- returns whether JSON value is structured
-    @sa see @ref is_null() -- returns whether JSON value is `null`
-    @sa see @ref is_string() -- returns whether JSON value is a string
-    @sa see @ref is_boolean() -- returns whether JSON value is a boolean
-    @sa see @ref is_number() -- returns whether JSON value is a number
-    @sa see @ref is_binary() -- returns whether JSON value is a binary array
-
-    @since version 1.0.0
-    */
-    constexpr bool is_primitive() const noexcept
-    {
-        return is_null() || is_string() || is_boolean() || is_number() || is_binary();
-    }
-
-    /*!
-    @brief return whether type is structured
-
-    This function returns true if and only if the JSON type is structured
-    (array or object).
-
-    @return `true` if type is structured (array or object), `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_structured()` for all JSON
-    types.,is_structured}
-
-    @sa see @ref is_primitive() -- returns whether value is primitive
-    @sa see @ref is_array() -- returns whether value is an array
-    @sa see @ref is_object() -- returns whether value is an object
-
-    @since version 1.0.0
-    */
-    constexpr bool is_structured() const noexcept
-    {
-        return is_array() || is_object();
-    }
-
-    /*!
-    @brief return whether value is null
-
-    This function returns true if and only if the JSON value is null.
-
-    @return `true` if type is null, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_null()` for all JSON
-    types.,is_null}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_null() const noexcept
-    {
-        return m_type == value_t::null;
-    }
-
-    /*!
-    @brief return whether value is a boolean
-
-    This function returns true if and only if the JSON value is a boolean.
-
-    @return `true` if type is boolean, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_boolean()` for all JSON
-    types.,is_boolean}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_boolean() const noexcept
-    {
-        return m_type == value_t::boolean;
-    }
-
-    /*!
-    @brief return whether value is a number
-
-    This function returns true if and only if the JSON value is a number. This
-    includes both integer (signed and unsigned) and floating-point values.
-
-    @return `true` if type is number (regardless whether integer, unsigned
-    integer or floating-type), `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_number()` for all JSON
-    types.,is_number}
-
-    @sa see @ref is_number_integer() -- check if value is an integer or unsigned
-    integer number
-    @sa see @ref is_number_unsigned() -- check if value is an unsigned integer
-    number
-    @sa see @ref is_number_float() -- check if value is a floating-point number
-
-    @since version 1.0.0
-    */
-    constexpr bool is_number() const noexcept
-    {
-        return is_number_integer() || is_number_float();
-    }
-
-    /*!
-    @brief return whether value is an integer number
-
-    This function returns true if and only if the JSON value is a signed or
-    unsigned integer number. This excludes floating-point values.
-
-    @return `true` if type is an integer or unsigned integer number, `false`
-    otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_number_integer()` for all
-    JSON types.,is_number_integer}
-
-    @sa see @ref is_number() -- check if value is a number
-    @sa see @ref is_number_unsigned() -- check if value is an unsigned integer
-    number
-    @sa see @ref is_number_float() -- check if value is a floating-point number
-
-    @since version 1.0.0
-    */
-    constexpr bool is_number_integer() const noexcept
-    {
-        return m_type == value_t::number_integer || m_type == value_t::number_unsigned;
-    }
-
-    /*!
-    @brief return whether value is an unsigned integer number
-
-    This function returns true if and only if the JSON value is an unsigned
-    integer number. This excludes floating-point and signed integer values.
-
-    @return `true` if type is an unsigned integer number, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_number_unsigned()` for all
-    JSON types.,is_number_unsigned}
-
-    @sa see @ref is_number() -- check if value is a number
-    @sa see @ref is_number_integer() -- check if value is an integer or unsigned
-    integer number
-    @sa see @ref is_number_float() -- check if value is a floating-point number
-
-    @since version 2.0.0
-    */
-    constexpr bool is_number_unsigned() const noexcept
-    {
-        return m_type == value_t::number_unsigned;
-    }
-
-    /*!
-    @brief return whether value is a floating-point number
-
-    This function returns true if and only if the JSON value is a
-    floating-point number. This excludes signed and unsigned integer values.
-
-    @return `true` if type is a floating-point number, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_number_float()` for all
-    JSON types.,is_number_float}
-
-    @sa see @ref is_number() -- check if value is number
-    @sa see @ref is_number_integer() -- check if value is an integer number
-    @sa see @ref is_number_unsigned() -- check if value is an unsigned integer
-    number
-
-    @since version 1.0.0
-    */
-    constexpr bool is_number_float() const noexcept
-    {
-        return m_type == value_t::number_float;
-    }
-
-    /*!
-    @brief return whether value is an object
-
-    This function returns true if and only if the JSON value is an object.
-
-    @return `true` if type is object, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_object()` for all JSON
-    types.,is_object}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_object() const noexcept
-    {
-        return m_type == value_t::object;
-    }
-
-    /*!
-    @brief return whether value is an array
-
-    This function returns true if and only if the JSON value is an array.
-
-    @return `true` if type is array, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_array()` for all JSON
-    types.,is_array}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_array() const noexcept
-    {
-        return m_type == value_t::array;
-    }
-
-    /*!
-    @brief return whether value is a string
-
-    This function returns true if and only if the JSON value is a string.
-
-    @return `true` if type is string, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_string()` for all JSON
-    types.,is_string}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_string() const noexcept
-    {
-        return m_type == value_t::string;
-    }
-
-    /*!
-    @brief return whether value is a binary array
-
-    This function returns true if and only if the JSON value is a binary array.
-
-    @return `true` if type is binary array, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_binary()` for all JSON
-    types.,is_binary}
-
-    @since version 3.8.0
-    */
-    constexpr bool is_binary() const noexcept
-    {
-        return m_type == value_t::binary;
-    }
-
-    /*!
-    @brief return whether value is discarded
-
-    This function returns true if and only if the JSON value was discarded
-    during parsing with a callback function (see @ref parser_callback_t).
-
-    @note This function will always be `false` for JSON values after parsing.
-    That is, discarded values can only occur during parsing, but will be
-    removed when inside a structured value or replaced by null in other cases.
-
-    @return `true` if type is discarded, `false` otherwise.
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies `is_discarded()` for all JSON
-    types.,is_discarded}
-
-    @since version 1.0.0
-    */
-    constexpr bool is_discarded() const noexcept
-    {
-        return m_type == value_t::discarded;
-    }
-
-    /*!
-    @brief return the type of the JSON value (implicit)
-
-    Implicitly return the type of the JSON value as a value from the @ref
-    value_t enumeration.
-
-    @return the type of the JSON value
-
-    @complexity Constant.
-
-    @exceptionsafety No-throw guarantee: this member function never throws
-    exceptions.
-
-    @liveexample{The following code exemplifies the @ref value_t operator for
-    all JSON types.,operator__value_t}
-
-    @sa see @ref type() -- return the type of the JSON value (explicit)
-    @sa see @ref type_name() -- return the type as string
-
-    @since version 1.0.0
-    */
-    constexpr operator value_t() const noexcept
-    {
-        return m_type;
-    }
-
-    /// @}
-
-  private:
-    //////////////////
-    // value access //
-    //////////////////
-
-    /// get a boolean (explicit)
-    boolean_t get_impl(boolean_t* /*unused*/) const
-    {
-        if (JSON_HEDLEY_LIKELY(is_boolean()))
-        {
-            return m_value.boolean;
-        }
-
-        JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(type_name()), *this));
-    }
-
-    /// get a pointer to the value (object)
-    object_t* get_impl_ptr(object_t* /*unused*/) noexcept
-    {
-        return is_object() ? m_value.object : nullptr;
-    }
-
-    /// get a pointer to the value (object)
-    constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept
-    {
-        return is_object() ? m_value.object : nullptr;
-    }
-
-    /// get a pointer to the value (array)
-    array_t* get_impl_ptr(array_t* /*unused*/) noexcept
-    {
-        return is_array() ? m_value.array : nullptr;
-    }
-
-    /// get a pointer to the value (array)
-    constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept
-    {
-        return is_array() ? m_value.array : nullptr;
-    }
-
-    /// get a pointer to the value (string)
-    string_t* get_impl_ptr(string_t* /*unused*/) noexcept
-    {
-        return is_string() ? m_value.string : nullptr;
-    }
-
-    /// get a pointer to the value (string)
-    constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept
-    {
-        return is_string() ? m_value.string : nullptr;
-    }
-
-    /// get a pointer to the value (boolean)
-    boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept
-    {
-        return is_boolean() ? &m_value.boolean : nullptr;
-    }
-
-    /// get a pointer to the value (boolean)
-    constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept
-    {
-        return is_boolean() ? &m_value.boolean : nullptr;
-    }
-
-    /// get a pointer to the value (integer number)
-    number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept
-    {
-        return is_number_integer() ? &m_value.number_integer : nullptr;
-    }
-
-    /// get a pointer to the value (integer number)
-    constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept
-    {
-        return is_number_integer() ? &m_value.number_integer : nullptr;
-    }
-
-    /// get a pointer to the value (unsigned number)
-    number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept
-    {
-        return is_number_unsigned() ? &m_value.number_unsigned : nullptr;
-    }
-
-    /// get a pointer to the value (unsigned number)
-    constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept
-    {
-        return is_number_unsigned() ? &m_value.number_unsigned : nullptr;
-    }
-
-    /// get a pointer to the value (floating-point number)
-    number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept
-    {
-        return is_number_float() ? &m_value.number_float : nullptr;
-    }
-
-    /// get a pointer to the value (floating-point number)
-    constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept
-    {
-        return is_number_float() ? &m_value.number_float : nullptr;
-    }
-
-    /// get a pointer to the value (binary)
-    binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept
-    {
-        return is_binary() ? m_value.binary : nullptr;
-    }
-
-    /// get a pointer to the value (binary)
-    constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const noexcept
-    {
-        return is_binary() ? m_value.binary : nullptr;
-    }
-
-    /*!
-    @brief helper function to implement get_ref()
-
-    This function helps to implement get_ref() without code duplication for
-    const and non-const overloads
-
-    @tparam ThisType will be deduced as `basic_json` or `const basic_json`
-
-    @throw type_error.303 if ReferenceType does not match underlying value
-    type of the current JSON
-    */
-    template<typename ReferenceType, typename ThisType>
-    static ReferenceType get_ref_impl(ThisType& obj)
-    {
-        // delegate the call to get_ptr<>()
-        auto* ptr = obj.template get_ptr<typename std::add_pointer<ReferenceType>::type>();
-
-        if (JSON_HEDLEY_LIKELY(ptr != nullptr))
-        {
-            return *ptr;
-        }
-
-        JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + std::string(obj.type_name()), obj));
-    }
-
-  public:
-    /// @name value access
-    /// Direct access to the stored value of a JSON value.
-    /// @{
-
-    /*!
-    @brief get a pointer value (implicit)
-
-    Implicit pointer access to the internally stored JSON value. No copies are
-    made.
-
-    @warning Writing data to the pointee of the result yields an undefined
-    state.
-
-    @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
-    object_t, @ref string_t, @ref boolean_t, @ref number_integer_t,
-    @ref number_unsigned_t, or @ref number_float_t. Enforced by a static
-    assertion.
-
-    @return pointer to the internally stored JSON value if the requested
-    pointer type @a PointerType fits to the JSON value; `nullptr` otherwise
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how pointers to internal values of a
-    JSON value can be requested. Note that no type conversions are made and a
-    `nullptr` is returned if the value and the requested pointer type does not
-    match.,get_ptr}
-
-    @since version 1.0.0
-    */
-    template<typename PointerType, typename std::enable_if<
-                 std::is_pointer<PointerType>::value, int>::type = 0>
-    auto get_ptr() noexcept -> decltype(std::declval<basic_json_t&>().get_impl_ptr(std::declval<PointerType>()))
-    {
-        // delegate the call to get_impl_ptr<>()
-        return get_impl_ptr(static_cast<PointerType>(nullptr));
-    }
-
-    /*!
-    @brief get a pointer value (implicit)
-    @copydoc get_ptr()
-    */
-    template < typename PointerType, typename std::enable_if <
-                   std::is_pointer<PointerType>::value&&
-                   std::is_const<typename std::remove_pointer<PointerType>::type>::value, int >::type = 0 >
-    constexpr auto get_ptr() const noexcept -> decltype(std::declval<const basic_json_t&>().get_impl_ptr(std::declval<PointerType>()))
-    {
-        // delegate the call to get_impl_ptr<>() const
-        return get_impl_ptr(static_cast<PointerType>(nullptr));
-    }
-
-  private:
-    /*!
-    @brief get a value (explicit)
-
-    Explicit type conversion between the JSON value and a compatible value
-    which is [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
-    and [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible).
-    The value is converted by calling the @ref json_serializer<ValueType>
-    `from_json()` method.
-
-    The function is equivalent to executing
-    @code {.cpp}
-    ValueType ret;
-    JSONSerializer<ValueType>::from_json(*this, ret);
-    return ret;
-    @endcode
-
-    This overloads is chosen if:
-    - @a ValueType is not @ref basic_json,
-    - @ref json_serializer<ValueType> has a `from_json()` method of the form
-      `void from_json(const basic_json&, ValueType&)`, and
-    - @ref json_serializer<ValueType> does not have a `from_json()` method of
-      the form `ValueType from_json(const basic_json&)`
-
-    @tparam ValueType the returned value type
-
-    @return copy of the JSON value, converted to @a ValueType
-
-    @throw what @ref json_serializer<ValueType> `from_json()` method throws
-
-    @liveexample{The example below shows several conversions from JSON values
-    to other types. There a few things to note: (1) Floating-point numbers can
-    be converted to integers\, (2) A JSON array can be converted to a standard
-    `std::vector<short>`\, (3) A JSON object can be converted to C++
-    associative containers such as `std::unordered_map<std::string\,
-    json>`.,get__ValueType_const}
-
-    @since version 2.1.0
-    */
-    template < typename ValueType,
-               detail::enable_if_t <
-                   detail::is_default_constructible<ValueType>::value&&
-                   detail::has_from_json<basic_json_t, ValueType>::value,
-                   int > = 0 >
-    ValueType get_impl(detail::priority_tag<0> /*unused*/) const noexcept(noexcept(
-                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), std::declval<ValueType&>())))
-    {
-        ValueType ret{};
-        JSONSerializer<ValueType>::from_json(*this, ret);
-        return ret;
-    }
-
-    /*!
-    @brief get a value (explicit); special case
-
-    Explicit type conversion between the JSON value and a compatible value
-    which is **not** [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible)
-    and **not** [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible).
-    The value is converted by calling the @ref json_serializer<ValueType>
-    `from_json()` method.
-
-    The function is equivalent to executing
-    @code {.cpp}
-    return JSONSerializer<ValueType>::from_json(*this);
-    @endcode
-
-    This overloads is chosen if:
-    - @a ValueType is not @ref basic_json and
-    - @ref json_serializer<ValueType> has a `from_json()` method of the form
-      `ValueType from_json(const basic_json&)`
-
-    @note If @ref json_serializer<ValueType> has both overloads of
-    `from_json()`, this one is chosen.
-
-    @tparam ValueType the returned value type
-
-    @return copy of the JSON value, converted to @a ValueType
-
-    @throw what @ref json_serializer<ValueType> `from_json()` method throws
-
-    @since version 2.1.0
-    */
-    template < typename ValueType,
-               detail::enable_if_t <
-                   detail::has_non_default_from_json<basic_json_t, ValueType>::value,
-                   int > = 0 >
-    ValueType get_impl(detail::priority_tag<1> /*unused*/) const noexcept(noexcept(
-                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>())))
-    {
-        return JSONSerializer<ValueType>::from_json(*this);
-    }
-
-    /*!
-    @brief get special-case overload
-
-    This overloads converts the current @ref basic_json in a different
-    @ref basic_json type
-
-    @tparam BasicJsonType == @ref basic_json
-
-    @return a copy of *this, converted into @a BasicJsonType
-
-    @complexity Depending on the implementation of the called `from_json()`
-                method.
-
-    @since version 3.2.0
-    */
-    template < typename BasicJsonType,
-               detail::enable_if_t <
-                   detail::is_basic_json<BasicJsonType>::value,
-                   int > = 0 >
-    BasicJsonType get_impl(detail::priority_tag<2> /*unused*/) const
-    {
-        return *this;
-    }
-
-    /*!
-    @brief get special-case overload
-
-    This overloads avoids a lot of template boilerplate, it can be seen as the
-    identity method
-
-    @tparam BasicJsonType == @ref basic_json
-
-    @return a copy of *this
-
-    @complexity Constant.
-
-    @since version 2.1.0
-    */
-    template<typename BasicJsonType,
-             detail::enable_if_t<
-                 std::is_same<BasicJsonType, basic_json_t>::value,
-                 int> = 0>
-    basic_json get_impl(detail::priority_tag<3> /*unused*/) const
-    {
-        return *this;
-    }
-
-    /*!
-    @brief get a pointer value (explicit)
-    @copydoc get()
-    */
-    template<typename PointerType,
-             detail::enable_if_t<
-                 std::is_pointer<PointerType>::value,
-                 int> = 0>
-    constexpr auto get_impl(detail::priority_tag<4> /*unused*/) const noexcept
-    -> decltype(std::declval<const basic_json_t&>().template get_ptr<PointerType>())
-    {
-        // delegate the call to get_ptr
-        return get_ptr<PointerType>();
-    }
-
-  public:
-    /*!
-    @brief get a (pointer) value (explicit)
-
-    Performs explicit type conversion between the JSON value and a compatible value if required.
-
-    - If the requested type is a pointer to the internally stored JSON value that pointer is returned.
-    No copies are made.
-
-    - If the requested type is the current @ref basic_json, or a different @ref basic_json convertible
-    from the current @ref basic_json.
-
-    - Otherwise the value is converted by calling the @ref json_serializer<ValueType> `from_json()`
-    method.
-
-    @tparam ValueTypeCV the provided value type
-    @tparam ValueType the returned value type
-
-    @return copy of the JSON value, converted to @tparam ValueType if necessary
-
-    @throw what @ref json_serializer<ValueType> `from_json()` method throws if conversion is required
-
-    @since version 2.1.0
-    */
-    template < typename ValueTypeCV, typename ValueType = detail::uncvref_t<ValueTypeCV>>
-#if defined(JSON_HAS_CPP_14)
-    constexpr
-#endif
-    auto get() const noexcept(
-    noexcept(std::declval<const basic_json_t&>().template get_impl<ValueType>(detail::priority_tag<4> {})))
-    -> decltype(std::declval<const basic_json_t&>().template get_impl<ValueType>(detail::priority_tag<4> {}))
-    {
-        // we cannot static_assert on ValueTypeCV being non-const, because
-        // there is support for get<const basic_json_t>(), which is why we
-        // still need the uncvref
-        static_assert(!std::is_reference<ValueTypeCV>::value,
-                      "get() cannot be used with reference types, you might want to use get_ref()");
-        return get_impl<ValueType>(detail::priority_tag<4> {});
-    }
-
-    /*!
-    @brief get a pointer value (explicit)
-
-    Explicit pointer access to the internally stored JSON value. No copies are
-    made.
-
-    @warning The pointer becomes invalid if the underlying JSON object
-    changes.
-
-    @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref
-    object_t, @ref string_t, @ref boolean_t, @ref number_integer_t,
-    @ref number_unsigned_t, or @ref number_float_t.
-
-    @return pointer to the internally stored JSON value if the requested
-    pointer type @a PointerType fits to the JSON value; `nullptr` otherwise
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how pointers to internal values of a
-    JSON value can be requested. Note that no type conversions are made and a
-    `nullptr` is returned if the value and the requested pointer type does not
-    match.,get__PointerType}
-
-    @sa see @ref get_ptr() for explicit pointer-member access
-
-    @since version 1.0.0
-    */
-    template<typename PointerType, typename std::enable_if<
-                 std::is_pointer<PointerType>::value, int>::type = 0>
-    auto get() noexcept -> decltype(std::declval<basic_json_t&>().template get_ptr<PointerType>())
-    {
-        // delegate the call to get_ptr
-        return get_ptr<PointerType>();
-    }
-
-    /*!
-    @brief get a value (explicit)
-
-    Explicit type conversion between the JSON value and a compatible value.
-    The value is filled into the input parameter by calling the @ref json_serializer<ValueType>
-    `from_json()` method.
-
-    The function is equivalent to executing
-    @code {.cpp}
-    ValueType v;
-    JSONSerializer<ValueType>::from_json(*this, v);
-    @endcode
-
-    This overloads is chosen if:
-    - @a ValueType is not @ref basic_json,
-    - @ref json_serializer<ValueType> has a `from_json()` method of the form
-      `void from_json(const basic_json&, ValueType&)`, and
-
-    @tparam ValueType the input parameter type.
-
-    @return the input parameter, allowing chaining calls.
-
-    @throw what @ref json_serializer<ValueType> `from_json()` method throws
-
-    @liveexample{The example below shows several conversions from JSON values
-    to other types. There a few things to note: (1) Floating-point numbers can
-    be converted to integers\, (2) A JSON array can be converted to a standard
-    `std::vector<short>`\, (3) A JSON object can be converted to C++
-    associative containers such as `std::unordered_map<std::string\,
-    json>`.,get_to}
-
-    @since version 3.3.0
-    */
-    template < typename ValueType,
-               detail::enable_if_t <
-                   !detail::is_basic_json<ValueType>::value&&
-                   detail::has_from_json<basic_json_t, ValueType>::value,
-                   int > = 0 >
-    ValueType & get_to(ValueType& v) const noexcept(noexcept(
-                JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), v)))
-    {
-        JSONSerializer<ValueType>::from_json(*this, v);
-        return v;
-    }
-
-    // specialization to allow to call get_to with a basic_json value
-    // see https://github.com/nlohmann/json/issues/2175
-    template<typename ValueType,
-             detail::enable_if_t <
-                 detail::is_basic_json<ValueType>::value,
-                 int> = 0>
-    ValueType & get_to(ValueType& v) const
-    {
-        v = *this;
-        return v;
-    }
-
-    template <
-        typename T, std::size_t N,
-        typename Array = T (&)[N], // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-        detail::enable_if_t <
-            detail::has_from_json<basic_json_t, Array>::value, int > = 0 >
-    Array get_to(T (&v)[N]) const // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-    noexcept(noexcept(JSONSerializer<Array>::from_json(
-                          std::declval<const basic_json_t&>(), v)))
-    {
-        JSONSerializer<Array>::from_json(*this, v);
-        return v;
-    }
-
-    /*!
-    @brief get a reference value (implicit)
-
-    Implicit reference access to the internally stored JSON value. No copies
-    are made.
-
-    @warning Writing data to the referee of the result yields an undefined
-    state.
-
-    @tparam ReferenceType reference type; must be a reference to @ref array_t,
-    @ref object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or
-    @ref number_float_t. Enforced by static assertion.
-
-    @return reference to the internally stored JSON value if the requested
-    reference type @a ReferenceType fits to the JSON value; throws
-    type_error.303 otherwise
-
-    @throw type_error.303 in case passed type @a ReferenceType is incompatible
-    with the stored JSON value; see example below
-
-    @complexity Constant.
-
-    @liveexample{The example shows several calls to `get_ref()`.,get_ref}
-
-    @since version 1.1.0
-    */
-    template<typename ReferenceType, typename std::enable_if<
-                 std::is_reference<ReferenceType>::value, int>::type = 0>
-    ReferenceType get_ref()
-    {
-        // delegate call to get_ref_impl
-        return get_ref_impl<ReferenceType>(*this);
-    }
-
-    /*!
-    @brief get a reference value (implicit)
-    @copydoc get_ref()
-    */
-    template < typename ReferenceType, typename std::enable_if <
-                   std::is_reference<ReferenceType>::value&&
-                   std::is_const<typename std::remove_reference<ReferenceType>::type>::value, int >::type = 0 >
-    ReferenceType get_ref() const
-    {
-        // delegate call to get_ref_impl
-        return get_ref_impl<ReferenceType>(*this);
-    }
-
-    /*!
-    @brief get a value (implicit)
-
-    Implicit type conversion between the JSON value and a compatible value.
-    The call is realized by calling @ref get() const.
-
-    @tparam ValueType non-pointer type compatible to the JSON value, for
-    instance `int` for JSON integer numbers, `bool` for JSON booleans, or
-    `std::vector` types for JSON arrays. The character type of @ref string_t
-    as well as an initializer list of this type is excluded to avoid
-    ambiguities as these types implicitly convert to `std::string`.
-
-    @return copy of the JSON value, converted to type @a ValueType
-
-    @throw type_error.302 in case passed type @a ValueType is incompatible
-    to the JSON value type (e.g., the JSON value is of type boolean, but a
-    string is requested); see example below
-
-    @complexity Linear in the size of the JSON value.
-
-    @liveexample{The example below shows several conversions from JSON values
-    to other types. There a few things to note: (1) Floating-point numbers can
-    be converted to integers\, (2) A JSON array can be converted to a standard
-    `std::vector<short>`\, (3) A JSON object can be converted to C++
-    associative containers such as `std::unordered_map<std::string\,
-    json>`.,operator__ValueType}
-
-    @since version 1.0.0
-    */
-    template < typename ValueType, typename std::enable_if <
-                   !std::is_pointer<ValueType>::value&&
-                   !std::is_same<ValueType, detail::json_ref<basic_json>>::value&&
-                   !std::is_same<ValueType, typename string_t::value_type>::value&&
-                   !detail::is_basic_json<ValueType>::value
-                   && !std::is_same<ValueType, std::initializer_list<typename string_t::value_type>>::value
-#if defined(JSON_HAS_CPP_17) && (defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER <= 1914))
-                   && !std::is_same<ValueType, typename std::string_view>::value
-#endif
-                   && detail::is_detected<detail::get_template_function, const basic_json_t&, ValueType>::value
-                   , int >::type = 0 >
-    JSON_EXPLICIT operator ValueType() const
-    {
-        // delegate the call to get<>() const
-        return get<ValueType>();
-    }
-
-    /*!
-    @return reference to the binary value
-
-    @throw type_error.302 if the value is not binary
-
-    @sa see @ref is_binary() to check if the value is binary
-
-    @since version 3.8.0
-    */
-    binary_t& get_binary()
-    {
-        if (!is_binary())
-        {
-            JSON_THROW(type_error::create(302, "type must be binary, but is " + std::string(type_name()), *this));
-        }
-
-        return *get_ptr<binary_t*>();
-    }
-
-    /// @copydoc get_binary()
-    const binary_t& get_binary() const
-    {
-        if (!is_binary())
-        {
-            JSON_THROW(type_error::create(302, "type must be binary, but is " + std::string(type_name()), *this));
-        }
-
-        return *get_ptr<const binary_t*>();
-    }
-
-    /// @}
-
-
-    ////////////////////
-    // element access //
-    ////////////////////
-
-    /// @name element access
-    /// Access to the JSON value.
-    /// @{
-
-    /*!
-    @brief access specified array element with bounds checking
-
-    Returns a reference to the element at specified location @a idx, with
-    bounds checking.
-
-    @param[in] idx  index of the element to access
-
-    @return reference to the element at index @a idx
-
-    @throw type_error.304 if the JSON value is not an array; in this case,
-    calling `at` with an index makes no sense. See example below.
-    @throw out_of_range.401 if the index @a idx is out of range of the array;
-    that is, `idx >= size()`. See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @since version 1.0.0
-
-    @liveexample{The example below shows how array elements can be read and
-    written using `at()`. It also demonstrates the different exceptions that
-    can be thrown.,at__size_type}
-    */
-    reference at(size_type idx)
-    {
-        // at only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            JSON_TRY
-            {
-                return set_parent(m_value.array->at(idx));
-            }
-            JSON_CATCH (std::out_of_range&)
-            {
-                // create better exception explanation
-                JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range", *this));
-            }
-        }
-        else
-        {
-            JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief access specified array element with bounds checking
-
-    Returns a const reference to the element at specified location @a idx,
-    with bounds checking.
-
-    @param[in] idx  index of the element to access
-
-    @return const reference to the element at index @a idx
-
-    @throw type_error.304 if the JSON value is not an array; in this case,
-    calling `at` with an index makes no sense. See example below.
-    @throw out_of_range.401 if the index @a idx is out of range of the array;
-    that is, `idx >= size()`. See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @since version 1.0.0
-
-    @liveexample{The example below shows how array elements can be read using
-    `at()`. It also demonstrates the different exceptions that can be thrown.,
-    at__size_type_const}
-    */
-    const_reference at(size_type idx) const
-    {
-        // at only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            JSON_TRY
-            {
-                return m_value.array->at(idx);
-            }
-            JSON_CATCH (std::out_of_range&)
-            {
-                // create better exception explanation
-                JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range", *this));
-            }
-        }
-        else
-        {
-            JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief access specified object element with bounds checking
-
-    Returns a reference to the element at with specified key @a key, with
-    bounds checking.
-
-    @param[in] key  key of the element to access
-
-    @return reference to the element at key @a key
-
-    @throw type_error.304 if the JSON value is not an object; in this case,
-    calling `at` with a key makes no sense. See example below.
-    @throw out_of_range.403 if the key @a key is is not stored in the object;
-    that is, `find(key) == end()`. See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Logarithmic in the size of the container.
-
-    @sa see @ref operator[](const typename object_t::key_type&) for unchecked
-    access by reference
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.0.0
-
-    @liveexample{The example below shows how object elements can be read and
-    written using `at()`. It also demonstrates the different exceptions that
-    can be thrown.,at__object_t_key_type}
-    */
-    reference at(const typename object_t::key_type& key)
-    {
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            JSON_TRY
-            {
-                return set_parent(m_value.object->at(key));
-            }
-            JSON_CATCH (std::out_of_range&)
-            {
-                // create better exception explanation
-                JSON_THROW(out_of_range::create(403, "key '" + key + "' not found", *this));
-            }
-        }
-        else
-        {
-            JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief access specified object element with bounds checking
-
-    Returns a const reference to the element at with specified key @a key,
-    with bounds checking.
-
-    @param[in] key  key of the element to access
-
-    @return const reference to the element at key @a key
-
-    @throw type_error.304 if the JSON value is not an object; in this case,
-    calling `at` with a key makes no sense. See example below.
-    @throw out_of_range.403 if the key @a key is is not stored in the object;
-    that is, `find(key) == end()`. See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Logarithmic in the size of the container.
-
-    @sa see @ref operator[](const typename object_t::key_type&) for unchecked
-    access by reference
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.0.0
-
-    @liveexample{The example below shows how object elements can be read using
-    `at()`. It also demonstrates the different exceptions that can be thrown.,
-    at__object_t_key_type_const}
-    */
-    const_reference at(const typename object_t::key_type& key) const
-    {
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            JSON_TRY
-            {
-                return m_value.object->at(key);
-            }
-            JSON_CATCH (std::out_of_range&)
-            {
-                // create better exception explanation
-                JSON_THROW(out_of_range::create(403, "key '" + key + "' not found", *this));
-            }
-        }
-        else
-        {
-            JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief access specified array element
-
-    Returns a reference to the element at specified location @a idx.
-
-    @note If @a idx is beyond the range of the array (i.e., `idx >= size()`),
-    then the array is silently filled up with `null` values to make `idx` a
-    valid reference to the last stored element.
-
-    @param[in] idx  index of the element to access
-
-    @return reference to the element at index @a idx
-
-    @throw type_error.305 if the JSON value is not an array or null; in that
-    cases, using the [] operator with an index makes no sense.
-
-    @complexity Constant if @a idx is in the range of the array. Otherwise
-    linear in `idx - size()`.
-
-    @liveexample{The example below shows how array elements can be read and
-    written using `[]` operator. Note the addition of `null`
-    values.,operatorarray__size_type}
-
-    @since version 1.0.0
-    */
-    reference operator[](size_type idx)
-    {
-        // implicitly convert null value to an empty array
-        if (is_null())
-        {
-            m_type = value_t::array;
-            m_value.array = create<array_t>();
-            assert_invariant();
-        }
-
-        // operator[] only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            // fill up array with null values if given idx is outside range
-            if (idx >= m_value.array->size())
-            {
-#if JSON_DIAGNOSTICS
-                // remember array size before resizing
-                const auto previous_size = m_value.array->size();
-#endif
-                m_value.array->resize(idx + 1);
-
-#if JSON_DIAGNOSTICS
-                // set parent for values added above
-                set_parents(begin() + static_cast<typename iterator::difference_type>(previous_size), static_cast<typename iterator::difference_type>(idx + 1 - previous_size));
-#endif
-            }
-
-            return m_value.array->operator[](idx);
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief access specified array element
-
-    Returns a const reference to the element at specified location @a idx.
-
-    @param[in] idx  index of the element to access
-
-    @return const reference to the element at index @a idx
-
-    @throw type_error.305 if the JSON value is not an array; in that case,
-    using the [] operator with an index makes no sense.
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how array elements can be read using
-    the `[]` operator.,operatorarray__size_type_const}
-
-    @since version 1.0.0
-    */
-    const_reference operator[](size_type idx) const
-    {
-        // const operator[] only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            return m_value.array->operator[](idx);
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief access specified object element
-
-    Returns a reference to the element at with specified key @a key.
-
-    @note If @a key is not found in the object, then it is silently added to
-    the object and filled with a `null` value to make `key` a valid reference.
-    In case the value was `null` before, it is converted to an object.
-
-    @param[in] key  key of the element to access
-
-    @return reference to the element at key @a key
-
-    @throw type_error.305 if the JSON value is not an object or null; in that
-    cases, using the [] operator with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be read and
-    written using the `[]` operator.,operatorarray__key_type}
-
-    @sa see @ref at(const typename object_t::key_type&) for access by reference
-    with range checking
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.0.0
-    */
-    reference operator[](const typename object_t::key_type& key)
-    {
-        // implicitly convert null value to an empty object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value.object = create<object_t>();
-            assert_invariant();
-        }
-
-        // operator[] only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            return set_parent(m_value.object->operator[](key));
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief read-only access specified object element
-
-    Returns a const reference to the element at with specified key @a key. No
-    bounds checking is performed.
-
-    @warning If the element with key @a key does not exist, the behavior is
-    undefined.
-
-    @param[in] key  key of the element to access
-
-    @return const reference to the element at key @a key
-
-    @pre The element with key @a key must exist. **This precondition is
-         enforced with an assertion.**
-
-    @throw type_error.305 if the JSON value is not an object; in that case,
-    using the [] operator with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be read using
-    the `[]` operator.,operatorarray__key_type_const}
-
-    @sa see @ref at(const typename object_t::key_type&) for access by reference
-    with range checking
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.0.0
-    */
-    const_reference operator[](const typename object_t::key_type& key) const
-    {
-        // const operator[] only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            JSON_ASSERT(m_value.object->find(key) != m_value.object->end());
-            return m_value.object->find(key)->second;
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief access specified object element
-
-    Returns a reference to the element at with specified key @a key.
-
-    @note If @a key is not found in the object, then it is silently added to
-    the object and filled with a `null` value to make `key` a valid reference.
-    In case the value was `null` before, it is converted to an object.
-
-    @param[in] key  key of the element to access
-
-    @return reference to the element at key @a key
-
-    @throw type_error.305 if the JSON value is not an object or null; in that
-    cases, using the [] operator with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be read and
-    written using the `[]` operator.,operatorarray__key_type}
-
-    @sa see @ref at(const typename object_t::key_type&) for access by reference
-    with range checking
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.1.0
-    */
-    template<typename T>
-    JSON_HEDLEY_NON_NULL(2)
-    reference operator[](T* key)
-    {
-        // implicitly convert null to object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value = value_t::object;
-            assert_invariant();
-        }
-
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            return set_parent(m_value.object->operator[](key));
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief read-only access specified object element
-
-    Returns a const reference to the element at with specified key @a key. No
-    bounds checking is performed.
-
-    @warning If the element with key @a key does not exist, the behavior is
-    undefined.
-
-    @param[in] key  key of the element to access
-
-    @return const reference to the element at key @a key
-
-    @pre The element with key @a key must exist. **This precondition is
-         enforced with an assertion.**
-
-    @throw type_error.305 if the JSON value is not an object; in that case,
-    using the [] operator with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be read using
-    the `[]` operator.,operatorarray__key_type_const}
-
-    @sa see @ref at(const typename object_t::key_type&) for access by reference
-    with range checking
-    @sa see @ref value() for access by value with a default value
-
-    @since version 1.1.0
-    */
-    template<typename T>
-    JSON_HEDLEY_NON_NULL(2)
-    const_reference operator[](T* key) const
-    {
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            JSON_ASSERT(m_value.object->find(key) != m_value.object->end());
-            return m_value.object->find(key)->second;
-        }
-
-        JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief access specified object element with default value
-
-    Returns either a copy of an object's element at the specified key @a key
-    or a given default value if no element with key @a key exists.
-
-    The function is basically equivalent to executing
-    @code {.cpp}
-    try {
-        return at(key);
-    } catch(out_of_range) {
-        return default_value;
-    }
-    @endcode
-
-    @note Unlike @ref at(const typename object_t::key_type&), this function
-    does not throw if the given key @a key was not found.
-
-    @note Unlike @ref operator[](const typename object_t::key_type& key), this
-    function does not implicitly add an element to the position defined by @a
-    key. This function is furthermore also applicable to const objects.
-
-    @param[in] key  key of the element to access
-    @param[in] default_value  the value to return if @a key is not found
-
-    @tparam ValueType type compatible to JSON values, for instance `int` for
-    JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for
-    JSON arrays. Note the type of the expected value at @a key and the default
-    value @a default_value must be compatible.
-
-    @return copy of the element at key @a key or @a default_value if @a key
-    is not found
-
-    @throw type_error.302 if @a default_value does not match the type of the
-    value at @a key
-    @throw type_error.306 if the JSON value is not an object; in that case,
-    using `value()` with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be queried
-    with a default value.,basic_json__value}
-
-    @sa see @ref at(const typename object_t::key_type&) for access by reference
-    with range checking
-    @sa see @ref operator[](const typename object_t::key_type&) for unchecked
-    access by reference
-
-    @since version 1.0.0
-    */
-    // using std::is_convertible in a std::enable_if will fail when using explicit conversions
-    template < class ValueType, typename std::enable_if <
-                   detail::is_getable<basic_json_t, ValueType>::value
-                   && !std::is_same<value_t, ValueType>::value, int >::type = 0 >
-    ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const
-    {
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            // if key is found, return value and given default value otherwise
-            const auto it = find(key);
-            if (it != end())
-            {
-                return it->template get<ValueType>();
-            }
-
-            return default_value;
-        }
-
-        JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief overload for a default value of type const char*
-    @copydoc basic_json::value(const typename object_t::key_type&, const ValueType&) const
-    */
-    string_t value(const typename object_t::key_type& key, const char* default_value) const
-    {
-        return value(key, string_t(default_value));
-    }
-
-    /*!
-    @brief access specified object element via JSON Pointer with default value
-
-    Returns either a copy of an object's element at the specified key @a key
-    or a given default value if no element with key @a key exists.
-
-    The function is basically equivalent to executing
-    @code {.cpp}
-    try {
-        return at(ptr);
-    } catch(out_of_range) {
-        return default_value;
-    }
-    @endcode
-
-    @note Unlike @ref at(const json_pointer&), this function does not throw
-    if the given key @a key was not found.
-
-    @param[in] ptr  a JSON pointer to the element to access
-    @param[in] default_value  the value to return if @a ptr found no value
-
-    @tparam ValueType type compatible to JSON values, for instance `int` for
-    JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for
-    JSON arrays. Note the type of the expected value at @a key and the default
-    value @a default_value must be compatible.
-
-    @return copy of the element at key @a key or @a default_value if @a key
-    is not found
-
-    @throw type_error.302 if @a default_value does not match the type of the
-    value at @a ptr
-    @throw type_error.306 if the JSON value is not an object; in that case,
-    using `value()` with a key makes no sense.
-
-    @complexity Logarithmic in the size of the container.
-
-    @liveexample{The example below shows how object elements can be queried
-    with a default value.,basic_json__value_ptr}
-
-    @sa see @ref operator[](const json_pointer&) for unchecked access by reference
-
-    @since version 2.0.2
-    */
-    template<class ValueType, typename std::enable_if<
-                 detail::is_getable<basic_json_t, ValueType>::value, int>::type = 0>
-    ValueType value(const json_pointer& ptr, const ValueType& default_value) const
-    {
-        // at only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            // if pointer resolves a value, return it or use default value
-            JSON_TRY
-            {
-                return ptr.get_checked(this).template get<ValueType>();
-            }
-            JSON_INTERNAL_CATCH (out_of_range&)
-            {
-                return default_value;
-            }
-        }
-
-        JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief overload for a default value of type const char*
-    @copydoc basic_json::value(const json_pointer&, ValueType) const
-    */
-    JSON_HEDLEY_NON_NULL(3)
-    string_t value(const json_pointer& ptr, const char* default_value) const
-    {
-        return value(ptr, string_t(default_value));
-    }
-
-    /*!
-    @brief access the first element
-
-    Returns a reference to the first element in the container. For a JSON
-    container `c`, the expression `c.front()` is equivalent to `*c.begin()`.
-
-    @return In case of a structured type (array or object), a reference to the
-    first element is returned. In case of number, string, boolean, or binary
-    values, a reference to the value is returned.
-
-    @complexity Constant.
-
-    @pre The JSON value must not be `null` (would throw `std::out_of_range`)
-    or an empty array or object (undefined behavior, **guarded by
-    assertions**).
-    @post The JSON value remains unchanged.
-
-    @throw invalid_iterator.214 when called on `null` value
-
-    @liveexample{The following code shows an example for `front()`.,front}
-
-    @sa see @ref back() -- access the last element
-
-    @since version 1.0.0
-    */
-    reference front()
-    {
-        return *begin();
-    }
-
-    /*!
-    @copydoc basic_json::front()
-    */
-    const_reference front() const
-    {
-        return *cbegin();
-    }
-
-    /*!
-    @brief access the last element
-
-    Returns a reference to the last element in the container. For a JSON
-    container `c`, the expression `c.back()` is equivalent to
-    @code {.cpp}
-    auto tmp = c.end();
-    --tmp;
-    return *tmp;
-    @endcode
-
-    @return In case of a structured type (array or object), a reference to the
-    last element is returned. In case of number, string, boolean, or binary
-    values, a reference to the value is returned.
-
-    @complexity Constant.
-
-    @pre The JSON value must not be `null` (would throw `std::out_of_range`)
-    or an empty array or object (undefined behavior, **guarded by
-    assertions**).
-    @post The JSON value remains unchanged.
-
-    @throw invalid_iterator.214 when called on a `null` value. See example
-    below.
-
-    @liveexample{The following code shows an example for `back()`.,back}
-
-    @sa see @ref front() -- access the first element
-
-    @since version 1.0.0
-    */
-    reference back()
-    {
-        auto tmp = end();
-        --tmp;
-        return *tmp;
-    }
-
-    /*!
-    @copydoc basic_json::back()
-    */
-    const_reference back() const
-    {
-        auto tmp = cend();
-        --tmp;
-        return *tmp;
-    }
-
-    /*!
-    @brief remove element given an iterator
-
-    Removes the element specified by iterator @a pos. The iterator @a pos must
-    be valid and dereferenceable. Thus the `end()` iterator (which is valid,
-    but is not dereferenceable) cannot be used as a value for @a pos.
-
-    If called on a primitive type other than `null`, the resulting JSON value
-    will be `null`.
-
-    @param[in] pos iterator to the element to remove
-    @return Iterator following the last removed element. If the iterator @a
-    pos refers to the last element, the `end()` iterator is returned.
-
-    @tparam IteratorType an @ref iterator or @ref const_iterator
-
-    @post Invalidates iterators and references at or after the point of the
-    erase, including the `end()` iterator.
-
-    @throw type_error.307 if called on a `null` value; example: `"cannot use
-    erase() with null"`
-    @throw invalid_iterator.202 if called on an iterator which does not belong
-    to the current JSON value; example: `"iterator does not fit current
-    value"`
-    @throw invalid_iterator.205 if called on a primitive type with invalid
-    iterator (i.e., any iterator which is not `begin()`); example: `"iterator
-    out of range"`
-
-    @complexity The complexity depends on the type:
-    - objects: amortized constant
-    - arrays: linear in distance between @a pos and the end of the container
-    - strings and binary: linear in the length of the member
-    - other types: constant
-
-    @liveexample{The example shows the result of `erase()` for different JSON
-    types.,erase__IteratorType}
-
-    @sa see @ref erase(IteratorType, IteratorType) -- removes the elements in
-    the given range
-    @sa see @ref erase(const typename object_t::key_type&) -- removes the element
-    from an object at the given key
-    @sa see @ref erase(const size_type) -- removes the element from an array at
-    the given index
-
-    @since version 1.0.0
-    */
-    template < class IteratorType, typename std::enable_if <
-                   std::is_same<IteratorType, typename basic_json_t::iterator>::value ||
-                   std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int >::type
-               = 0 >
-    IteratorType erase(IteratorType pos)
-    {
-        // make sure iterator fits the current value
-        if (JSON_HEDLEY_UNLIKELY(this != pos.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", *this));
-        }
-
-        IteratorType result = end();
-
-        switch (m_type)
-        {
-            case value_t::boolean:
-            case value_t::number_float:
-            case value_t::number_integer:
-            case value_t::number_unsigned:
-            case value_t::string:
-            case value_t::binary:
-            {
-                if (JSON_HEDLEY_UNLIKELY(!pos.m_it.primitive_iterator.is_begin()))
-                {
-                    JSON_THROW(invalid_iterator::create(205, "iterator out of range", *this));
-                }
-
-                if (is_string())
-                {
-                    AllocatorType<string_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1);
-                    m_value.string = nullptr;
-                }
-                else if (is_binary())
-                {
-                    AllocatorType<binary_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.binary);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.binary, 1);
-                    m_value.binary = nullptr;
-                }
-
-                m_type = value_t::null;
-                assert_invariant();
-                break;
-            }
-
-            case value_t::object:
-            {
-                result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator);
-                break;
-            }
-
-            case value_t::array:
-            {
-                result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator);
-                break;
-            }
-
-            default:
-                JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()), *this));
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief remove elements given an iterator range
-
-    Removes the element specified by the range `[first; last)`. The iterator
-    @a first does not need to be dereferenceable if `first == last`: erasing
-    an empty range is a no-op.
-
-    If called on a primitive type other than `null`, the resulting JSON value
-    will be `null`.
-
-    @param[in] first iterator to the beginning of the range to remove
-    @param[in] last iterator past the end of the range to remove
-    @return Iterator following the last removed element. If the iterator @a
-    second refers to the last element, the `end()` iterator is returned.
-
-    @tparam IteratorType an @ref iterator or @ref const_iterator
-
-    @post Invalidates iterators and references at or after the point of the
-    erase, including the `end()` iterator.
-
-    @throw type_error.307 if called on a `null` value; example: `"cannot use
-    erase() with null"`
-    @throw invalid_iterator.203 if called on iterators which does not belong
-    to the current JSON value; example: `"iterators do not fit current value"`
-    @throw invalid_iterator.204 if called on a primitive type with invalid
-    iterators (i.e., if `first != begin()` and `last != end()`); example:
-    `"iterators out of range"`
-
-    @complexity The complexity depends on the type:
-    - objects: `log(size()) + std::distance(first, last)`
-    - arrays: linear in the distance between @a first and @a last, plus linear
-      in the distance between @a last and end of the container
-    - strings and binary: linear in the length of the member
-    - other types: constant
-
-    @liveexample{The example shows the result of `erase()` for different JSON
-    types.,erase__IteratorType_IteratorType}
-
-    @sa see @ref erase(IteratorType) -- removes the element at a given position
-    @sa see @ref erase(const typename object_t::key_type&) -- removes the element
-    from an object at the given key
-    @sa see @ref erase(const size_type) -- removes the element from an array at
-    the given index
-
-    @since version 1.0.0
-    */
-    template < class IteratorType, typename std::enable_if <
-                   std::is_same<IteratorType, typename basic_json_t::iterator>::value ||
-                   std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int >::type
-               = 0 >
-    IteratorType erase(IteratorType first, IteratorType last)
-    {
-        // make sure iterator fits the current value
-        if (JSON_HEDLEY_UNLIKELY(this != first.m_object || this != last.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value", *this));
-        }
-
-        IteratorType result = end();
-
-        switch (m_type)
-        {
-            case value_t::boolean:
-            case value_t::number_float:
-            case value_t::number_integer:
-            case value_t::number_unsigned:
-            case value_t::string:
-            case value_t::binary:
-            {
-                if (JSON_HEDLEY_LIKELY(!first.m_it.primitive_iterator.is_begin()
-                                       || !last.m_it.primitive_iterator.is_end()))
-                {
-                    JSON_THROW(invalid_iterator::create(204, "iterators out of range", *this));
-                }
-
-                if (is_string())
-                {
-                    AllocatorType<string_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1);
-                    m_value.string = nullptr;
-                }
-                else if (is_binary())
-                {
-                    AllocatorType<binary_t> alloc;
-                    std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.binary);
-                    std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.binary, 1);
-                    m_value.binary = nullptr;
-                }
-
-                m_type = value_t::null;
-                assert_invariant();
-                break;
-            }
-
-            case value_t::object:
-            {
-                result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator,
-                                              last.m_it.object_iterator);
-                break;
-            }
-
-            case value_t::array:
-            {
-                result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator,
-                                             last.m_it.array_iterator);
-                break;
-            }
-
-            default:
-                JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()), *this));
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief remove element from a JSON object given a key
-
-    Removes elements from a JSON object with the key value @a key.
-
-    @param[in] key value of the elements to remove
-
-    @return Number of elements removed. If @a ObjectType is the default
-    `std::map` type, the return value will always be `0` (@a key was not
-    found) or `1` (@a key was found).
-
-    @post References and iterators to the erased elements are invalidated.
-    Other references and iterators are not affected.
-
-    @throw type_error.307 when called on a type other than JSON object;
-    example: `"cannot use erase() with null"`
-
-    @complexity `log(size()) + count(key)`
-
-    @liveexample{The example shows the effect of `erase()`.,erase__key_type}
-
-    @sa see @ref erase(IteratorType) -- removes the element at a given position
-    @sa see @ref erase(IteratorType, IteratorType) -- removes the elements in
-    the given range
-    @sa see @ref erase(const size_type) -- removes the element from an array at
-    the given index
-
-    @since version 1.0.0
-    */
-    size_type erase(const typename object_t::key_type& key)
-    {
-        // this erase only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            return m_value.object->erase(key);
-        }
-
-        JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief remove element from a JSON array given an index
-
-    Removes element from a JSON array at the index @a idx.
-
-    @param[in] idx index of the element to remove
-
-    @throw type_error.307 when called on a type other than JSON object;
-    example: `"cannot use erase() with null"`
-    @throw out_of_range.401 when `idx >= size()`; example: `"array index 17
-    is out of range"`
-
-    @complexity Linear in distance between @a idx and the end of the container.
-
-    @liveexample{The example shows the effect of `erase()`.,erase__size_type}
-
-    @sa see @ref erase(IteratorType) -- removes the element at a given position
-    @sa see @ref erase(IteratorType, IteratorType) -- removes the elements in
-    the given range
-    @sa see @ref erase(const typename object_t::key_type&) -- removes the element
-    from an object at the given key
-
-    @since version 1.0.0
-    */
-    void erase(const size_type idx)
-    {
-        // this erase only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            if (JSON_HEDLEY_UNLIKELY(idx >= size()))
-            {
-                JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range", *this));
-            }
-
-            m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx));
-        }
-        else
-        {
-            JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /// @}
-
-
-    ////////////
-    // lookup //
-    ////////////
-
-    /// @name lookup
-    /// @{
-
-    /*!
-    @brief find an element in a JSON object
-
-    Finds an element in a JSON object with key equivalent to @a key. If the
-    element is not found or the JSON value is not an object, end() is
-    returned.
-
-    @note This method always returns @ref end() when executed on a JSON type
-          that is not an object.
-
-    @param[in] key key value of the element to search for.
-
-    @return Iterator to an element with key equivalent to @a key. If no such
-    element is found or the JSON value is not an object, past-the-end (see
-    @ref end()) iterator is returned.
-
-    @complexity Logarithmic in the size of the JSON object.
-
-    @liveexample{The example shows how `find()` is used.,find__key_type}
-
-    @sa see @ref contains(KeyT&&) const -- checks whether a key exists
-
-    @since version 1.0.0
-    */
-    template<typename KeyT>
-    iterator find(KeyT&& key)
-    {
-        auto result = end();
-
-        if (is_object())
-        {
-            result.m_it.object_iterator = m_value.object->find(std::forward<KeyT>(key));
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief find an element in a JSON object
-    @copydoc find(KeyT&&)
-    */
-    template<typename KeyT>
-    const_iterator find(KeyT&& key) const
-    {
-        auto result = cend();
-
-        if (is_object())
-        {
-            result.m_it.object_iterator = m_value.object->find(std::forward<KeyT>(key));
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief returns the number of occurrences of a key in a JSON object
-
-    Returns the number of elements with key @a key. If ObjectType is the
-    default `std::map` type, the return value will always be `0` (@a key was
-    not found) or `1` (@a key was found).
-
-    @note This method always returns `0` when executed on a JSON type that is
-          not an object.
-
-    @param[in] key key value of the element to count
-
-    @return Number of elements with key @a key. If the JSON value is not an
-    object, the return value will be `0`.
-
-    @complexity Logarithmic in the size of the JSON object.
-
-    @liveexample{The example shows how `count()` is used.,count}
-
-    @since version 1.0.0
-    */
-    template<typename KeyT>
-    size_type count(KeyT&& key) const
-    {
-        // return 0 for all nonobject types
-        return is_object() ? m_value.object->count(std::forward<KeyT>(key)) : 0;
-    }
-
-    /*!
-    @brief check the existence of an element in a JSON object
-
-    Check whether an element exists in a JSON object with key equivalent to
-    @a key. If the element is not found or the JSON value is not an object,
-    false is returned.
-
-    @note This method always returns false when executed on a JSON type
-          that is not an object.
-
-    @param[in] key key value to check its existence.
-
-    @return true if an element with specified @a key exists. If no such
-    element with such key is found or the JSON value is not an object,
-    false is returned.
-
-    @complexity Logarithmic in the size of the JSON object.
-
-    @liveexample{The following code shows an example for `contains()`.,contains}
-
-    @sa see @ref find(KeyT&&) -- returns an iterator to an object element
-    @sa see @ref contains(const json_pointer&) const -- checks the existence for a JSON pointer
-
-    @since version 3.6.0
-    */
-    template < typename KeyT, typename std::enable_if <
-                   !std::is_same<typename std::decay<KeyT>::type, json_pointer>::value, int >::type = 0 >
-    bool contains(KeyT && key) const
-    {
-        return is_object() && m_value.object->find(std::forward<KeyT>(key)) != m_value.object->end();
-    }
-
-    /*!
-    @brief check the existence of an element in a JSON object given a JSON pointer
-
-    Check whether the given JSON pointer @a ptr can be resolved in the current
-    JSON value.
-
-    @note This method can be executed on any JSON value type.
-
-    @param[in] ptr JSON pointer to check its existence.
-
-    @return true if the JSON pointer can be resolved to a stored value, false
-    otherwise.
-
-    @post If `j.contains(ptr)` returns true, it is safe to call `j[ptr]`.
-
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-
-    @complexity Logarithmic in the size of the JSON object.
-
-    @liveexample{The following code shows an example for `contains()`.,contains_json_pointer}
-
-    @sa see @ref contains(KeyT &&) const -- checks the existence of a key
-
-    @since version 3.7.0
-    */
-    bool contains(const json_pointer& ptr) const
-    {
-        return ptr.contains(this);
-    }
-
-    /// @}
-
-
-    ///////////////
-    // iterators //
-    ///////////////
-
-    /// @name iterators
-    /// @{
-
-    /*!
-    @brief returns an iterator to the first element
-
-    Returns an iterator to the first element.
-
-    @image html range-begin-end.svg "Illustration from cppreference.com"
-
-    @return iterator to the first element
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-
-    @liveexample{The following code shows an example for `begin()`.,begin}
-
-    @sa see @ref cbegin() -- returns a const iterator to the beginning
-    @sa see @ref end() -- returns an iterator to the end
-    @sa see @ref cend() -- returns a const iterator to the end
-
-    @since version 1.0.0
-    */
-    iterator begin() noexcept
-    {
-        iterator result(this);
-        result.set_begin();
-        return result;
-    }
-
-    /*!
-    @copydoc basic_json::cbegin()
-    */
-    const_iterator begin() const noexcept
-    {
-        return cbegin();
-    }
-
-    /*!
-    @brief returns a const iterator to the first element
-
-    Returns a const iterator to the first element.
-
-    @image html range-begin-end.svg "Illustration from cppreference.com"
-
-    @return const iterator to the first element
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `const_cast<const basic_json&>(*this).begin()`.
-
-    @liveexample{The following code shows an example for `cbegin()`.,cbegin}
-
-    @sa see @ref begin() -- returns an iterator to the beginning
-    @sa see @ref end() -- returns an iterator to the end
-    @sa see @ref cend() -- returns a const iterator to the end
-
-    @since version 1.0.0
-    */
-    const_iterator cbegin() const noexcept
-    {
-        const_iterator result(this);
-        result.set_begin();
-        return result;
-    }
-
-    /*!
-    @brief returns an iterator to one past the last element
-
-    Returns an iterator to one past the last element.
-
-    @image html range-begin-end.svg "Illustration from cppreference.com"
-
-    @return iterator one past the last element
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-
-    @liveexample{The following code shows an example for `end()`.,end}
-
-    @sa see @ref cend() -- returns a const iterator to the end
-    @sa see @ref begin() -- returns an iterator to the beginning
-    @sa see @ref cbegin() -- returns a const iterator to the beginning
-
-    @since version 1.0.0
-    */
-    iterator end() noexcept
-    {
-        iterator result(this);
-        result.set_end();
-        return result;
-    }
-
-    /*!
-    @copydoc basic_json::cend()
-    */
-    const_iterator end() const noexcept
-    {
-        return cend();
-    }
-
-    /*!
-    @brief returns a const iterator to one past the last element
-
-    Returns a const iterator to one past the last element.
-
-    @image html range-begin-end.svg "Illustration from cppreference.com"
-
-    @return const iterator one past the last element
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `const_cast<const basic_json&>(*this).end()`.
-
-    @liveexample{The following code shows an example for `cend()`.,cend}
-
-    @sa see @ref end() -- returns an iterator to the end
-    @sa see @ref begin() -- returns an iterator to the beginning
-    @sa see @ref cbegin() -- returns a const iterator to the beginning
-
-    @since version 1.0.0
-    */
-    const_iterator cend() const noexcept
-    {
-        const_iterator result(this);
-        result.set_end();
-        return result;
-    }
-
-    /*!
-    @brief returns an iterator to the reverse-beginning
-
-    Returns an iterator to the reverse-beginning; that is, the last element.
-
-    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `reverse_iterator(end())`.
-
-    @liveexample{The following code shows an example for `rbegin()`.,rbegin}
-
-    @sa see @ref crbegin() -- returns a const reverse iterator to the beginning
-    @sa see @ref rend() -- returns a reverse iterator to the end
-    @sa see @ref crend() -- returns a const reverse iterator to the end
-
-    @since version 1.0.0
-    */
-    reverse_iterator rbegin() noexcept
-    {
-        return reverse_iterator(end());
-    }
-
-    /*!
-    @copydoc basic_json::crbegin()
-    */
-    const_reverse_iterator rbegin() const noexcept
-    {
-        return crbegin();
-    }
-
-    /*!
-    @brief returns an iterator to the reverse-end
-
-    Returns an iterator to the reverse-end; that is, one before the first
-    element.
-
-    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `reverse_iterator(begin())`.
-
-    @liveexample{The following code shows an example for `rend()`.,rend}
-
-    @sa see @ref crend() -- returns a const reverse iterator to the end
-    @sa see @ref rbegin() -- returns a reverse iterator to the beginning
-    @sa see @ref crbegin() -- returns a const reverse iterator to the beginning
-
-    @since version 1.0.0
-    */
-    reverse_iterator rend() noexcept
-    {
-        return reverse_iterator(begin());
-    }
-
-    /*!
-    @copydoc basic_json::crend()
-    */
-    const_reverse_iterator rend() const noexcept
-    {
-        return crend();
-    }
-
-    /*!
-    @brief returns a const reverse iterator to the last element
-
-    Returns a const iterator to the reverse-beginning; that is, the last
-    element.
-
-    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `const_cast<const basic_json&>(*this).rbegin()`.
-
-    @liveexample{The following code shows an example for `crbegin()`.,crbegin}
-
-    @sa see @ref rbegin() -- returns a reverse iterator to the beginning
-    @sa see @ref rend() -- returns a reverse iterator to the end
-    @sa see @ref crend() -- returns a const reverse iterator to the end
-
-    @since version 1.0.0
-    */
-    const_reverse_iterator crbegin() const noexcept
-    {
-        return const_reverse_iterator(cend());
-    }
-
-    /*!
-    @brief returns a const reverse iterator to one before the first
-
-    Returns a const reverse iterator to the reverse-end; that is, one before
-    the first element.
-
-    @image html range-rbegin-rend.svg "Illustration from cppreference.com"
-
-    @complexity Constant.
-
-    @requirement This function helps `basic_json` satisfying the
-    [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `const_cast<const basic_json&>(*this).rend()`.
-
-    @liveexample{The following code shows an example for `crend()`.,crend}
-
-    @sa see @ref rend() -- returns a reverse iterator to the end
-    @sa see @ref rbegin() -- returns a reverse iterator to the beginning
-    @sa see @ref crbegin() -- returns a const reverse iterator to the beginning
-
-    @since version 1.0.0
-    */
-    const_reverse_iterator crend() const noexcept
-    {
-        return const_reverse_iterator(cbegin());
-    }
-
-  public:
-    /*!
-    @brief wrapper to access iterator member functions in range-based for
-
-    This function allows to access @ref iterator::key() and @ref
-    iterator::value() during range-based for loops. In these loops, a
-    reference to the JSON values is returned, so there is no access to the
-    underlying iterator.
-
-    For loop without iterator_wrapper:
-
-    @code{cpp}
-    for (auto it = j_object.begin(); it != j_object.end(); ++it)
-    {
-        std::cout << "key: " << it.key() << ", value:" << it.value() << '\n';
-    }
-    @endcode
-
-    Range-based for loop without iterator proxy:
-
-    @code{cpp}
-    for (auto it : j_object)
-    {
-        // "it" is of type json::reference and has no key() member
-        std::cout << "value: " << it << '\n';
-    }
-    @endcode
-
-    Range-based for loop with iterator proxy:
-
-    @code{cpp}
-    for (auto it : json::iterator_wrapper(j_object))
-    {
-        std::cout << "key: " << it.key() << ", value:" << it.value() << '\n';
-    }
-    @endcode
-
-    @note When iterating over an array, `key()` will return the index of the
-          element as string (see example).
-
-    @param[in] ref  reference to a JSON value
-    @return iteration proxy object wrapping @a ref with an interface to use in
-            range-based for loops
-
-    @liveexample{The following code shows how the wrapper is used,iterator_wrapper}
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @note The name of this function is not yet final and may change in the
-    future.
-
-    @deprecated This stream operator is deprecated and will be removed in
-                future 4.0.0 of the library. Please use @ref items() instead;
-                that is, replace `json::iterator_wrapper(j)` with `j.items()`.
-    */
-    JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items())
-    static iteration_proxy<iterator> iterator_wrapper(reference ref) noexcept
-    {
-        return ref.items();
-    }
-
-    /*!
-    @copydoc iterator_wrapper(reference)
-    */
-    JSON_HEDLEY_DEPRECATED_FOR(3.1.0, items())
-    static iteration_proxy<const_iterator> iterator_wrapper(const_reference ref) noexcept
-    {
-        return ref.items();
-    }
-
-    /*!
-    @brief helper to access iterator member functions in range-based for
-
-    This function allows to access @ref iterator::key() and @ref
-    iterator::value() during range-based for loops. In these loops, a
-    reference to the JSON values is returned, so there is no access to the
-    underlying iterator.
-
-    For loop without `items()` function:
-
-    @code{cpp}
-    for (auto it = j_object.begin(); it != j_object.end(); ++it)
-    {
-        std::cout << "key: " << it.key() << ", value:" << it.value() << '\n';
-    }
-    @endcode
-
-    Range-based for loop without `items()` function:
-
-    @code{cpp}
-    for (auto it : j_object)
-    {
-        // "it" is of type json::reference and has no key() member
-        std::cout << "value: " << it << '\n';
-    }
-    @endcode
-
-    Range-based for loop with `items()` function:
-
-    @code{cpp}
-    for (auto& el : j_object.items())
-    {
-        std::cout << "key: " << el.key() << ", value:" << el.value() << '\n';
-    }
-    @endcode
-
-    The `items()` function also allows to use
-    [structured bindings](https://en.cppreference.com/w/cpp/language/structured_binding)
-    (C++17):
-
-    @code{cpp}
-    for (auto& [key, val] : j_object.items())
-    {
-        std::cout << "key: " << key << ", value:" << val << '\n';
-    }
-    @endcode
-
-    @note When iterating over an array, `key()` will return the index of the
-          element as string (see example). For primitive types (e.g., numbers),
-          `key()` returns an empty string.
-
-    @warning Using `items()` on temporary objects is dangerous. Make sure the
-             object's lifetime exeeds the iteration. See
-             <https://github.com/nlohmann/json/issues/2040> for more
-             information.
-
-    @return iteration proxy object wrapping @a ref with an interface to use in
-            range-based for loops
-
-    @liveexample{The following code shows how the function is used.,items}
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @since version 3.1.0, structured bindings support since 3.5.0.
-    */
-    iteration_proxy<iterator> items() noexcept
-    {
-        return iteration_proxy<iterator>(*this);
-    }
-
-    /*!
-    @copydoc items()
-    */
-    iteration_proxy<const_iterator> items() const noexcept
-    {
-        return iteration_proxy<const_iterator>(*this);
-    }
-
-    /// @}
-
-
-    //////////////
-    // capacity //
-    //////////////
-
-    /// @name capacity
-    /// @{
-
-    /*!
-    @brief checks whether the container is empty.
-
-    Checks if a JSON value has no elements (i.e. whether its @ref size is `0`).
-
-    @return The return value depends on the different types and is
-            defined as follows:
-            Value type  | return value
-            ----------- | -------------
-            null        | `true`
-            boolean     | `false`
-            string      | `false`
-            number      | `false`
-            binary      | `false`
-            object      | result of function `object_t::empty()`
-            array       | result of function `array_t::empty()`
-
-    @liveexample{The following code uses `empty()` to check if a JSON
-    object contains any elements.,empty}
-
-    @complexity Constant, as long as @ref array_t and @ref object_t satisfy
-    the Container concept; that is, their `empty()` functions have constant
-    complexity.
-
-    @iterators No changes.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @note This function does not return whether a string stored as JSON value
-    is empty - it returns whether the JSON container itself is empty which is
-    false in the case of a string.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `begin() == end()`.
-
-    @sa see @ref size() -- returns the number of elements
-
-    @since version 1.0.0
-    */
-    bool empty() const noexcept
-    {
-        switch (m_type)
-        {
-            case value_t::null:
-            {
-                // null values are empty
-                return true;
-            }
-
-            case value_t::array:
-            {
-                // delegate call to array_t::empty()
-                return m_value.array->empty();
-            }
-
-            case value_t::object:
-            {
-                // delegate call to object_t::empty()
-                return m_value.object->empty();
-            }
-
-            default:
-            {
-                // all other types are nonempty
-                return false;
-            }
-        }
-    }
-
-    /*!
-    @brief returns the number of elements
-
-    Returns the number of elements in a JSON value.
-
-    @return The return value depends on the different types and is
-            defined as follows:
-            Value type  | return value
-            ----------- | -------------
-            null        | `0`
-            boolean     | `1`
-            string      | `1`
-            number      | `1`
-            binary      | `1`
-            object      | result of function object_t::size()
-            array       | result of function array_t::size()
-
-    @liveexample{The following code calls `size()` on the different value
-    types.,size}
-
-    @complexity Constant, as long as @ref array_t and @ref object_t satisfy
-    the Container concept; that is, their size() functions have constant
-    complexity.
-
-    @iterators No changes.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @note This function does not return the length of a string stored as JSON
-    value - it returns the number of elements in the JSON value which is 1 in
-    the case of a string.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of `std::distance(begin(), end())`.
-
-    @sa see @ref empty() -- checks whether the container is empty
-    @sa see @ref max_size() -- returns the maximal number of elements
-
-    @since version 1.0.0
-    */
-    size_type size() const noexcept
-    {
-        switch (m_type)
-        {
-            case value_t::null:
-            {
-                // null values are empty
-                return 0;
-            }
-
-            case value_t::array:
-            {
-                // delegate call to array_t::size()
-                return m_value.array->size();
-            }
-
-            case value_t::object:
-            {
-                // delegate call to object_t::size()
-                return m_value.object->size();
-            }
-
-            default:
-            {
-                // all other types have size 1
-                return 1;
-            }
-        }
-    }
-
-    /*!
-    @brief returns the maximum possible number of elements
-
-    Returns the maximum number of elements a JSON value is able to hold due to
-    system or library implementation limitations, i.e. `std::distance(begin(),
-    end())` for the JSON value.
-
-    @return The return value depends on the different types and is
-            defined as follows:
-            Value type  | return value
-            ----------- | -------------
-            null        | `0` (same as `size()`)
-            boolean     | `1` (same as `size()`)
-            string      | `1` (same as `size()`)
-            number      | `1` (same as `size()`)
-            binary      | `1` (same as `size()`)
-            object      | result of function `object_t::max_size()`
-            array       | result of function `array_t::max_size()`
-
-    @liveexample{The following code calls `max_size()` on the different value
-    types. Note the output is implementation specific.,max_size}
-
-    @complexity Constant, as long as @ref array_t and @ref object_t satisfy
-    the Container concept; that is, their `max_size()` functions have constant
-    complexity.
-
-    @iterators No changes.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @requirement This function helps `basic_json` satisfying the
-    [Container](https://en.cppreference.com/w/cpp/named_req/Container)
-    requirements:
-    - The complexity is constant.
-    - Has the semantics of returning `b.size()` where `b` is the largest
-      possible JSON value.
-
-    @sa see @ref size() -- returns the number of elements
-
-    @since version 1.0.0
-    */
-    size_type max_size() const noexcept
-    {
-        switch (m_type)
-        {
-            case value_t::array:
-            {
-                // delegate call to array_t::max_size()
-                return m_value.array->max_size();
-            }
-
-            case value_t::object:
-            {
-                // delegate call to object_t::max_size()
-                return m_value.object->max_size();
-            }
-
-            default:
-            {
-                // all other types have max_size() == size()
-                return size();
-            }
-        }
-    }
-
-    /// @}
-
-
-    ///////////////
-    // modifiers //
-    ///////////////
-
-    /// @name modifiers
-    /// @{
-
-    /*!
-    @brief clears the contents
-
-    Clears the content of a JSON value and resets it to the default value as
-    if @ref basic_json(value_t) would have been called with the current value
-    type from @ref type():
-
-    Value type  | initial value
-    ----------- | -------------
-    null        | `null`
-    boolean     | `false`
-    string      | `""`
-    number      | `0`
-    binary      | An empty byte vector
-    object      | `{}`
-    array       | `[]`
-
-    @post Has the same effect as calling
-    @code {.cpp}
-    *this = basic_json(type());
-    @endcode
-
-    @liveexample{The example below shows the effect of `clear()` to different
-    JSON types.,clear}
-
-    @complexity Linear in the size of the JSON value.
-
-    @iterators All iterators, pointers and references related to this container
-               are invalidated.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @sa see @ref basic_json(value_t) -- constructor that creates an object with the
-        same value than calling `clear()`
-
-    @since version 1.0.0
-    */
-    void clear() noexcept
-    {
-        switch (m_type)
-        {
-            case value_t::number_integer:
-            {
-                m_value.number_integer = 0;
-                break;
-            }
-
-            case value_t::number_unsigned:
-            {
-                m_value.number_unsigned = 0;
-                break;
-            }
-
-            case value_t::number_float:
-            {
-                m_value.number_float = 0.0;
-                break;
-            }
-
-            case value_t::boolean:
-            {
-                m_value.boolean = false;
-                break;
-            }
-
-            case value_t::string:
-            {
-                m_value.string->clear();
-                break;
-            }
-
-            case value_t::binary:
-            {
-                m_value.binary->clear();
-                break;
-            }
-
-            case value_t::array:
-            {
-                m_value.array->clear();
-                break;
-            }
-
-            case value_t::object:
-            {
-                m_value.object->clear();
-                break;
-            }
-
-            default:
-                break;
-        }
-    }
-
-    /*!
-    @brief add an object to an array
-
-    Appends the given element @a val to the end of the JSON value. If the
-    function is called on a JSON null value, an empty array is created before
-    appending @a val.
-
-    @param[in] val the value to add to the JSON array
-
-    @throw type_error.308 when called on a type other than JSON array or
-    null; example: `"cannot use push_back() with number"`
-
-    @complexity Amortized constant.
-
-    @liveexample{The example shows how `push_back()` and `+=` can be used to
-    add elements to a JSON array. Note how the `null` value was silently
-    converted to a JSON array.,push_back}
-
-    @since version 1.0.0
-    */
-    void push_back(basic_json&& val)
-    {
-        // push_back only works for null objects or arrays
-        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
-        {
-            JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()), *this));
-        }
-
-        // transform null object into an array
-        if (is_null())
-        {
-            m_type = value_t::array;
-            m_value = value_t::array;
-            assert_invariant();
-        }
-
-        // add element to array (move semantics)
-        m_value.array->push_back(std::move(val));
-        set_parent(m_value.array->back());
-        // if val is moved from, basic_json move constructor marks it null so we do not call the destructor
-    }
-
-    /*!
-    @brief add an object to an array
-    @copydoc push_back(basic_json&&)
-    */
-    reference operator+=(basic_json&& val)
-    {
-        push_back(std::move(val));
-        return *this;
-    }
-
-    /*!
-    @brief add an object to an array
-    @copydoc push_back(basic_json&&)
-    */
-    void push_back(const basic_json& val)
-    {
-        // push_back only works for null objects or arrays
-        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
-        {
-            JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()), *this));
-        }
-
-        // transform null object into an array
-        if (is_null())
-        {
-            m_type = value_t::array;
-            m_value = value_t::array;
-            assert_invariant();
-        }
-
-        // add element to array
-        m_value.array->push_back(val);
-        set_parent(m_value.array->back());
-    }
-
-    /*!
-    @brief add an object to an array
-    @copydoc push_back(basic_json&&)
-    */
-    reference operator+=(const basic_json& val)
-    {
-        push_back(val);
-        return *this;
-    }
-
-    /*!
-    @brief add an object to an object
-
-    Inserts the given element @a val to the JSON object. If the function is
-    called on a JSON null value, an empty object is created before inserting
-    @a val.
-
-    @param[in] val the value to add to the JSON object
-
-    @throw type_error.308 when called on a type other than JSON object or
-    null; example: `"cannot use push_back() with number"`
-
-    @complexity Logarithmic in the size of the container, O(log(`size()`)).
-
-    @liveexample{The example shows how `push_back()` and `+=` can be used to
-    add elements to a JSON object. Note how the `null` value was silently
-    converted to a JSON object.,push_back__object_t__value}
-
-    @since version 1.0.0
-    */
-    void push_back(const typename object_t::value_type& val)
-    {
-        // push_back only works for null objects or objects
-        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object())))
-        {
-            JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()), *this));
-        }
-
-        // transform null object into an object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value = value_t::object;
-            assert_invariant();
-        }
-
-        // add element to object
-        auto res = m_value.object->insert(val);
-        set_parent(res.first->second);
-    }
-
-    /*!
-    @brief add an object to an object
-    @copydoc push_back(const typename object_t::value_type&)
-    */
-    reference operator+=(const typename object_t::value_type& val)
-    {
-        push_back(val);
-        return *this;
-    }
-
-    /*!
-    @brief add an object to an object
-
-    This function allows to use `push_back` with an initializer list. In case
-
-    1. the current value is an object,
-    2. the initializer list @a init contains only two elements, and
-    3. the first element of @a init is a string,
-
-    @a init is converted into an object element and added using
-    @ref push_back(const typename object_t::value_type&). Otherwise, @a init
-    is converted to a JSON value and added using @ref push_back(basic_json&&).
-
-    @param[in] init  an initializer list
-
-    @complexity Linear in the size of the initializer list @a init.
-
-    @note This function is required to resolve an ambiguous overload error,
-          because pairs like `{"key", "value"}` can be both interpreted as
-          `object_t::value_type` or `std::initializer_list<basic_json>`, see
-          https://github.com/nlohmann/json/issues/235 for more information.
-
-    @liveexample{The example shows how initializer lists are treated as
-    objects when possible.,push_back__initializer_list}
-    */
-    void push_back(initializer_list_t init)
-    {
-        if (is_object() && init.size() == 2 && (*init.begin())->is_string())
-        {
-            basic_json&& key = init.begin()->moved_or_copied();
-            push_back(typename object_t::value_type(
-                          std::move(key.get_ref<string_t&>()), (init.begin() + 1)->moved_or_copied()));
-        }
-        else
-        {
-            push_back(basic_json(init));
-        }
-    }
-
-    /*!
-    @brief add an object to an object
-    @copydoc push_back(initializer_list_t)
-    */
-    reference operator+=(initializer_list_t init)
-    {
-        push_back(init);
-        return *this;
-    }
-
-    /*!
-    @brief add an object to an array
-
-    Creates a JSON value from the passed parameters @a args to the end of the
-    JSON value. If the function is called on a JSON null value, an empty array
-    is created before appending the value created from @a args.
-
-    @param[in] args arguments to forward to a constructor of @ref basic_json
-    @tparam Args compatible types to create a @ref basic_json object
-
-    @return reference to the inserted element
-
-    @throw type_error.311 when called on a type other than JSON array or
-    null; example: `"cannot use emplace_back() with number"`
-
-    @complexity Amortized constant.
-
-    @liveexample{The example shows how `push_back()` can be used to add
-    elements to a JSON array. Note how the `null` value was silently converted
-    to a JSON array.,emplace_back}
-
-    @since version 2.0.8, returns reference since 3.7.0
-    */
-    template<class... Args>
-    reference emplace_back(Args&& ... args)
-    {
-        // emplace_back only works for null objects or arrays
-        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_array())))
-        {
-            JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + std::string(type_name()), *this));
-        }
-
-        // transform null object into an array
-        if (is_null())
-        {
-            m_type = value_t::array;
-            m_value = value_t::array;
-            assert_invariant();
-        }
-
-        // add element to array (perfect forwarding)
-#ifdef JSON_HAS_CPP_17
-        return set_parent(m_value.array->emplace_back(std::forward<Args>(args)...));
-#else
-        m_value.array->emplace_back(std::forward<Args>(args)...);
-        return set_parent(m_value.array->back());
-#endif
-    }
-
-    /*!
-    @brief add an object to an object if key does not exist
-
-    Inserts a new element into a JSON object constructed in-place with the
-    given @a args if there is no element with the key in the container. If the
-    function is called on a JSON null value, an empty object is created before
-    appending the value created from @a args.
-
-    @param[in] args arguments to forward to a constructor of @ref basic_json
-    @tparam Args compatible types to create a @ref basic_json object
-
-    @return a pair consisting of an iterator to the inserted element, or the
-            already-existing element if no insertion happened, and a bool
-            denoting whether the insertion took place.
-
-    @throw type_error.311 when called on a type other than JSON object or
-    null; example: `"cannot use emplace() with number"`
-
-    @complexity Logarithmic in the size of the container, O(log(`size()`)).
-
-    @liveexample{The example shows how `emplace()` can be used to add elements
-    to a JSON object. Note how the `null` value was silently converted to a
-    JSON object. Further note how no value is added if there was already one
-    value stored with the same key.,emplace}
-
-    @since version 2.0.8
-    */
-    template<class... Args>
-    std::pair<iterator, bool> emplace(Args&& ... args)
-    {
-        // emplace only works for null objects or arrays
-        if (JSON_HEDLEY_UNLIKELY(!(is_null() || is_object())))
-        {
-            JSON_THROW(type_error::create(311, "cannot use emplace() with " + std::string(type_name()), *this));
-        }
-
-        // transform null object into an object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value = value_t::object;
-            assert_invariant();
-        }
-
-        // add element to array (perfect forwarding)
-        auto res = m_value.object->emplace(std::forward<Args>(args)...);
-        set_parent(res.first->second);
-
-        // create result iterator and set iterator to the result of emplace
-        auto it = begin();
-        it.m_it.object_iterator = res.first;
-
-        // return pair of iterator and boolean
-        return {it, res.second};
-    }
-
-    /// Helper for insertion of an iterator
-    /// @note: This uses std::distance to support GCC 4.8,
-    ///        see https://github.com/nlohmann/json/pull/1257
-    template<typename... Args>
-    iterator insert_iterator(const_iterator pos, Args&& ... args)
-    {
-        iterator result(this);
-        JSON_ASSERT(m_value.array != nullptr);
-
-        auto insert_pos = std::distance(m_value.array->begin(), pos.m_it.array_iterator);
-        m_value.array->insert(pos.m_it.array_iterator, std::forward<Args>(args)...);
-        result.m_it.array_iterator = m_value.array->begin() + insert_pos;
-
-        // This could have been written as:
-        // result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val);
-        // but the return value of insert is missing in GCC 4.8, so it is written this way instead.
-
-        return result;
-    }
-
-    /*!
-    @brief inserts element
-
-    Inserts element @a val before iterator @a pos.
-
-    @param[in] pos iterator before which the content will be inserted; may be
-    the end() iterator
-    @param[in] val element to insert
-    @return iterator pointing to the inserted @a val.
-
-    @throw type_error.309 if called on JSON values other than arrays;
-    example: `"cannot use insert() with string"`
-    @throw invalid_iterator.202 if @a pos is not an iterator of *this;
-    example: `"iterator does not fit current value"`
-
-    @complexity Constant plus linear in the distance between @a pos and end of
-    the container.
-
-    @liveexample{The example shows how `insert()` is used.,insert}
-
-    @since version 1.0.0
-    */
-    iterator insert(const_iterator pos, const basic_json& val)
-    {
-        // insert only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            // check if iterator pos fits to this JSON value
-            if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
-            {
-                JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", *this));
-            }
-
-            // insert to array and return iterator
-            return set_parents(insert_iterator(pos, val), static_cast<typename iterator::difference_type>(1));
-        }
-
-        JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief inserts element
-    @copydoc insert(const_iterator, const basic_json&)
-    */
-    iterator insert(const_iterator pos, basic_json&& val)
-    {
-        return insert(pos, val);
-    }
-
-    /*!
-    @brief inserts elements
-
-    Inserts @a cnt copies of @a val before iterator @a pos.
-
-    @param[in] pos iterator before which the content will be inserted; may be
-    the end() iterator
-    @param[in] cnt number of copies of @a val to insert
-    @param[in] val element to insert
-    @return iterator pointing to the first element inserted, or @a pos if
-    `cnt==0`
-
-    @throw type_error.309 if called on JSON values other than arrays; example:
-    `"cannot use insert() with string"`
-    @throw invalid_iterator.202 if @a pos is not an iterator of *this;
-    example: `"iterator does not fit current value"`
-
-    @complexity Linear in @a cnt plus linear in the distance between @a pos
-    and end of the container.
-
-    @liveexample{The example shows how `insert()` is used.,insert__count}
-
-    @since version 1.0.0
-    */
-    iterator insert(const_iterator pos, size_type cnt, const basic_json& val)
-    {
-        // insert only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            // check if iterator pos fits to this JSON value
-            if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
-            {
-                JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", *this));
-            }
-
-            // insert to array and return iterator
-            return set_parents(insert_iterator(pos, cnt, val), static_cast<typename iterator::difference_type>(cnt));
-        }
-
-        JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()), *this));
-    }
-
-    /*!
-    @brief inserts elements
-
-    Inserts elements from range `[first, last)` before iterator @a pos.
-
-    @param[in] pos iterator before which the content will be inserted; may be
-    the end() iterator
-    @param[in] first begin of the range of elements to insert
-    @param[in] last end of the range of elements to insert
-
-    @throw type_error.309 if called on JSON values other than arrays; example:
-    `"cannot use insert() with string"`
-    @throw invalid_iterator.202 if @a pos is not an iterator of *this;
-    example: `"iterator does not fit current value"`
-    @throw invalid_iterator.210 if @a first and @a last do not belong to the
-    same JSON value; example: `"iterators do not fit"`
-    @throw invalid_iterator.211 if @a first or @a last are iterators into
-    container for which insert is called; example: `"passed iterators may not
-    belong to container"`
-
-    @return iterator pointing to the first element inserted, or @a pos if
-    `first==last`
-
-    @complexity Linear in `std::distance(first, last)` plus linear in the
-    distance between @a pos and end of the container.
-
-    @liveexample{The example shows how `insert()` is used.,insert__range}
-
-    @since version 1.0.0
-    */
-    iterator insert(const_iterator pos, const_iterator first, const_iterator last)
-    {
-        // insert only works for arrays
-        if (JSON_HEDLEY_UNLIKELY(!is_array()))
-        {
-            JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()), *this));
-        }
-
-        // check if iterator pos fits to this JSON value
-        if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
-        {
-            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", *this));
-        }
-
-        // check if range iterators belong to the same JSON object
-        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", *this));
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(first.m_object == this))
-        {
-            JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container", *this));
-        }
-
-        // insert to array and return iterator
-        return set_parents(insert_iterator(pos, first.m_it.array_iterator, last.m_it.array_iterator), std::distance(first, last));
-    }
-
-    /*!
-    @brief inserts elements
-
-    Inserts elements from initializer list @a ilist before iterator @a pos.
-
-    @param[in] pos iterator before which the content will be inserted; may be
-    the end() iterator
-    @param[in] ilist initializer list to insert the values from
-
-    @throw type_error.309 if called on JSON values other than arrays; example:
-    `"cannot use insert() with string"`
-    @throw invalid_iterator.202 if @a pos is not an iterator of *this;
-    example: `"iterator does not fit current value"`
-
-    @return iterator pointing to the first element inserted, or @a pos if
-    `ilist` is empty
-
-    @complexity Linear in `ilist.size()` plus linear in the distance between
-    @a pos and end of the container.
-
-    @liveexample{The example shows how `insert()` is used.,insert__ilist}
-
-    @since version 1.0.0
-    */
-    iterator insert(const_iterator pos, initializer_list_t ilist)
-    {
-        // insert only works for arrays
-        if (JSON_HEDLEY_UNLIKELY(!is_array()))
-        {
-            JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()), *this));
-        }
-
-        // check if iterator pos fits to this JSON value
-        if (JSON_HEDLEY_UNLIKELY(pos.m_object != this))
-        {
-            JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value", *this));
-        }
-
-        // insert to array and return iterator
-        return set_parents(insert_iterator(pos, ilist.begin(), ilist.end()), static_cast<typename iterator::difference_type>(ilist.size()));
-    }
-
-    /*!
-    @brief inserts elements
-
-    Inserts elements from range `[first, last)`.
-
-    @param[in] first begin of the range of elements to insert
-    @param[in] last end of the range of elements to insert
-
-    @throw type_error.309 if called on JSON values other than objects; example:
-    `"cannot use insert() with string"`
-    @throw invalid_iterator.202 if iterator @a first or @a last does does not
-    point to an object; example: `"iterators first and last must point to
-    objects"`
-    @throw invalid_iterator.210 if @a first and @a last do not belong to the
-    same JSON value; example: `"iterators do not fit"`
-
-    @complexity Logarithmic: `O(N*log(size() + N))`, where `N` is the number
-    of elements to insert.
-
-    @liveexample{The example shows how `insert()` is used.,insert__range_object}
-
-    @since version 3.0.0
-    */
-    void insert(const_iterator first, const_iterator last)
-    {
-        // insert only works for objects
-        if (JSON_HEDLEY_UNLIKELY(!is_object()))
-        {
-            JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()), *this));
-        }
-
-        // check if range iterators belong to the same JSON object
-        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", *this));
-        }
-
-        // passed iterators must belong to objects
-        if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object()))
-        {
-            JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects", *this));
-        }
-
-        m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator);
-    }
-
-    /*!
-    @brief updates a JSON object from another object, overwriting existing keys
-
-    Inserts all values from JSON object @a j and overwrites existing keys.
-
-    @param[in] j  JSON object to read values from
-
-    @throw type_error.312 if called on JSON values other than objects; example:
-    `"cannot use update() with string"`
-
-    @complexity O(N*log(size() + N)), where N is the number of elements to
-                insert.
-
-    @liveexample{The example shows how `update()` is used.,update}
-
-    @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update
-
-    @since version 3.0.0
-    */
-    void update(const_reference j)
-    {
-        // implicitly convert null value to an empty object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value.object = create<object_t>();
-            assert_invariant();
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(!is_object()))
-        {
-            JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()), *this));
-        }
-        if (JSON_HEDLEY_UNLIKELY(!j.is_object()))
-        {
-            JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(j.type_name()), *this));
-        }
-
-        for (auto it = j.cbegin(); it != j.cend(); ++it)
-        {
-            m_value.object->operator[](it.key()) = it.value();
-        }
-    }
-
-    /*!
-    @brief updates a JSON object from another object, overwriting existing keys
-
-    Inserts all values from from range `[first, last)` and overwrites existing
-    keys.
-
-    @param[in] first begin of the range of elements to insert
-    @param[in] last end of the range of elements to insert
-
-    @throw type_error.312 if called on JSON values other than objects; example:
-    `"cannot use update() with string"`
-    @throw invalid_iterator.202 if iterator @a first or @a last does does not
-    point to an object; example: `"iterators first and last must point to
-    objects"`
-    @throw invalid_iterator.210 if @a first and @a last do not belong to the
-    same JSON value; example: `"iterators do not fit"`
-
-    @complexity O(N*log(size() + N)), where N is the number of elements to
-                insert.
-
-    @liveexample{The example shows how `update()` is used__range.,update}
-
-    @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update
-
-    @since version 3.0.0
-    */
-    void update(const_iterator first, const_iterator last)
-    {
-        // implicitly convert null value to an empty object
-        if (is_null())
-        {
-            m_type = value_t::object;
-            m_value.object = create<object_t>();
-            assert_invariant();
-        }
-
-        if (JSON_HEDLEY_UNLIKELY(!is_object()))
-        {
-            JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()), *this));
-        }
-
-        // check if range iterators belong to the same JSON object
-        if (JSON_HEDLEY_UNLIKELY(first.m_object != last.m_object))
-        {
-            JSON_THROW(invalid_iterator::create(210, "iterators do not fit", *this));
-        }
-
-        // passed iterators must belong to objects
-        if (JSON_HEDLEY_UNLIKELY(!first.m_object->is_object()
-                                 || !last.m_object->is_object()))
-        {
-            JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects", *this));
-        }
-
-        for (auto it = first; it != last; ++it)
-        {
-            m_value.object->operator[](it.key()) = it.value();
-        }
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of the JSON value with those of @a other. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated.
-
-    @param[in,out] other JSON value to exchange the contents with
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how JSON values can be swapped with
-    `swap()`.,swap__reference}
-
-    @since version 1.0.0
-    */
-    void swap(reference other) noexcept (
-        std::is_nothrow_move_constructible<value_t>::value&&
-        std::is_nothrow_move_assignable<value_t>::value&&
-        std::is_nothrow_move_constructible<json_value>::value&&
-        std::is_nothrow_move_assignable<json_value>::value
-    )
-    {
-        std::swap(m_type, other.m_type);
-        std::swap(m_value, other.m_value);
-
-        set_parents();
-        other.set_parents();
-        assert_invariant();
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of the JSON value from @a left with those of @a right. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated. implemented as a friend function callable via ADL.
-
-    @param[in,out] left JSON value to exchange the contents with
-    @param[in,out] right JSON value to exchange the contents with
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how JSON values can be swapped with
-    `swap()`.,swap__reference}
-
-    @since version 1.0.0
-    */
-    friend void swap(reference left, reference right) noexcept (
-        std::is_nothrow_move_constructible<value_t>::value&&
-        std::is_nothrow_move_assignable<value_t>::value&&
-        std::is_nothrow_move_constructible<json_value>::value&&
-        std::is_nothrow_move_assignable<json_value>::value
-    )
-    {
-        left.swap(right);
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of a JSON array with those of @a other. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated.
-
-    @param[in,out] other array to exchange the contents with
-
-    @throw type_error.310 when JSON value is not an array; example: `"cannot
-    use swap() with string"`
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how arrays can be swapped with
-    `swap()`.,swap__array_t}
-
-    @since version 1.0.0
-    */
-    void swap(array_t& other) // NOLINT(bugprone-exception-escape)
-    {
-        // swap only works for arrays
-        if (JSON_HEDLEY_LIKELY(is_array()))
-        {
-            std::swap(*(m_value.array), other);
-        }
-        else
-        {
-            JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of a JSON object with those of @a other. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated.
-
-    @param[in,out] other object to exchange the contents with
-
-    @throw type_error.310 when JSON value is not an object; example:
-    `"cannot use swap() with string"`
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how objects can be swapped with
-    `swap()`.,swap__object_t}
-
-    @since version 1.0.0
-    */
-    void swap(object_t& other) // NOLINT(bugprone-exception-escape)
-    {
-        // swap only works for objects
-        if (JSON_HEDLEY_LIKELY(is_object()))
-        {
-            std::swap(*(m_value.object), other);
-        }
-        else
-        {
-            JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of a JSON string with those of @a other. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated.
-
-    @param[in,out] other string to exchange the contents with
-
-    @throw type_error.310 when JSON value is not a string; example: `"cannot
-    use swap() with boolean"`
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how strings can be swapped with
-    `swap()`.,swap__string_t}
-
-    @since version 1.0.0
-    */
-    void swap(string_t& other) // NOLINT(bugprone-exception-escape)
-    {
-        // swap only works for strings
-        if (JSON_HEDLEY_LIKELY(is_string()))
-        {
-            std::swap(*(m_value.string), other);
-        }
-        else
-        {
-            JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /*!
-    @brief exchanges the values
-
-    Exchanges the contents of a JSON string with those of @a other. Does not
-    invoke any move, copy, or swap operations on individual elements. All
-    iterators and references remain valid. The past-the-end iterator is
-    invalidated.
-
-    @param[in,out] other binary to exchange the contents with
-
-    @throw type_error.310 when JSON value is not a string; example: `"cannot
-    use swap() with boolean"`
-
-    @complexity Constant.
-
-    @liveexample{The example below shows how strings can be swapped with
-    `swap()`.,swap__binary_t}
-
-    @since version 3.8.0
-    */
-    void swap(binary_t& other) // NOLINT(bugprone-exception-escape)
-    {
-        // swap only works for strings
-        if (JSON_HEDLEY_LIKELY(is_binary()))
-        {
-            std::swap(*(m_value.binary), other);
-        }
-        else
-        {
-            JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /// @copydoc swap(binary_t&)
-    void swap(typename binary_t::container_type& other) // NOLINT(bugprone-exception-escape)
-    {
-        // swap only works for strings
-        if (JSON_HEDLEY_LIKELY(is_binary()))
-        {
-            std::swap(*(m_value.binary), other);
-        }
-        else
-        {
-            JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()), *this));
-        }
-    }
-
-    /// @}
-
-  public:
-    //////////////////////////////////////////
-    // lexicographical comparison operators //
-    //////////////////////////////////////////
-
-    /// @name lexicographical comparison operators
-    /// @{
-
-    /*!
-    @brief comparison: equal
-
-    Compares two JSON values for equality according to the following rules:
-    - Two JSON values are equal if (1) they are from the same type and (2)
-      their stored values are the same according to their respective
-      `operator==`.
-    - Integer and floating-point numbers are automatically converted before
-      comparison. Note that two NaN values are always treated as unequal.
-    - Two JSON null values are equal.
-
-    @note Floating-point inside JSON values numbers are compared with
-    `json::number_float_t::operator==` which is `double::operator==` by
-    default. To compare floating-point while respecting an epsilon, an alternative
-    [comparison function](https://github.com/mariokonrad/marnav/blob/master/include/marnav/math/floatingpoint.hpp#L34-#L39)
-    could be used, for instance
-    @code {.cpp}
-    template<typename T, typename = typename std::enable_if<std::is_floating_point<T>::value, T>::type>
-    inline bool is_same(T a, T b, T epsilon = std::numeric_limits<T>::epsilon()) noexcept
-    {
-        return std::abs(a - b) <= epsilon;
-    }
-    @endcode
-    Or you can self-defined operator equal function like this:
-    @code {.cpp}
-    bool my_equal(const_reference lhs, const_reference rhs) {
-    const auto lhs_type lhs.type();
-    const auto rhs_type rhs.type();
-    if (lhs_type == rhs_type) {
-        switch(lhs_type)
-            // self_defined case
-            case value_t::number_float:
-                return std::abs(lhs - rhs) <= std::numeric_limits<float>::epsilon();
-            // other cases remain the same with the original
-            ...
-    }
-    ...
-    }
-    @endcode
-
-    @note NaN values never compare equal to themselves or to other NaN values.
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether the values @a lhs and @a rhs are equal
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @complexity Linear.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__equal}
-
-    @since version 1.0.0
-    */
-    friend bool operator==(const_reference lhs, const_reference rhs) noexcept
-    {
-        const auto lhs_type = lhs.type();
-        const auto rhs_type = rhs.type();
-
-        if (lhs_type == rhs_type)
-        {
-            switch (lhs_type)
-            {
-                case value_t::array:
-                    return *lhs.m_value.array == *rhs.m_value.array;
-
-                case value_t::object:
-                    return *lhs.m_value.object == *rhs.m_value.object;
-
-                case value_t::null:
-                    return true;
-
-                case value_t::string:
-                    return *lhs.m_value.string == *rhs.m_value.string;
-
-                case value_t::boolean:
-                    return lhs.m_value.boolean == rhs.m_value.boolean;
-
-                case value_t::number_integer:
-                    return lhs.m_value.number_integer == rhs.m_value.number_integer;
-
-                case value_t::number_unsigned:
-                    return lhs.m_value.number_unsigned == rhs.m_value.number_unsigned;
-
-                case value_t::number_float:
-                    return lhs.m_value.number_float == rhs.m_value.number_float;
-
-                case value_t::binary:
-                    return *lhs.m_value.binary == *rhs.m_value.binary;
-
-                default:
-                    return false;
-            }
-        }
-        else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_float)
-        {
-            return static_cast<number_float_t>(lhs.m_value.number_integer) == rhs.m_value.number_float;
-        }
-        else if (lhs_type == value_t::number_float && rhs_type == value_t::number_integer)
-        {
-            return lhs.m_value.number_float == static_cast<number_float_t>(rhs.m_value.number_integer);
-        }
-        else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_float)
-        {
-            return static_cast<number_float_t>(lhs.m_value.number_unsigned) == rhs.m_value.number_float;
-        }
-        else if (lhs_type == value_t::number_float && rhs_type == value_t::number_unsigned)
-        {
-            return lhs.m_value.number_float == static_cast<number_float_t>(rhs.m_value.number_unsigned);
-        }
-        else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_integer)
-        {
-            return static_cast<number_integer_t>(lhs.m_value.number_unsigned) == rhs.m_value.number_integer;
-        }
-        else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_unsigned)
-        {
-            return lhs.m_value.number_integer == static_cast<number_integer_t>(rhs.m_value.number_unsigned);
-        }
-
-        return false;
-    }
-
-    /*!
-    @brief comparison: equal
-    @copydoc operator==(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator==(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs == basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: equal
-    @copydoc operator==(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator==(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) == rhs;
-    }
-
-    /*!
-    @brief comparison: not equal
-
-    Compares two JSON values for inequality by calculating `not (lhs == rhs)`.
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether the values @a lhs and @a rhs are not equal
-
-    @complexity Linear.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__notequal}
-
-    @since version 1.0.0
-    */
-    friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
-    {
-        return !(lhs == rhs);
-    }
-
-    /*!
-    @brief comparison: not equal
-    @copydoc operator!=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator!=(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs != basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: not equal
-    @copydoc operator!=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator!=(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) != rhs;
-    }
-
-    /*!
-    @brief comparison: less than
-
-    Compares whether one JSON value @a lhs is less than another JSON value @a
-    rhs according to the following rules:
-    - If @a lhs and @a rhs have the same type, the values are compared using
-      the default `<` operator.
-    - Integer and floating-point numbers are automatically converted before
-      comparison
-    - In case @a lhs and @a rhs have different types, the values are ignored
-      and the order of the types is considered, see
-      @ref operator<(const value_t, const value_t).
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether @a lhs is less than @a rhs
-
-    @complexity Linear.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__less}
-
-    @since version 1.0.0
-    */
-    friend bool operator<(const_reference lhs, const_reference rhs) noexcept
-    {
-        const auto lhs_type = lhs.type();
-        const auto rhs_type = rhs.type();
-
-        if (lhs_type == rhs_type)
-        {
-            switch (lhs_type)
-            {
-                case value_t::array:
-                    // note parentheses are necessary, see
-                    // https://github.com/nlohmann/json/issues/1530
-                    return (*lhs.m_value.array) < (*rhs.m_value.array);
-
-                case value_t::object:
-                    return (*lhs.m_value.object) < (*rhs.m_value.object);
-
-                case value_t::null:
-                    return false;
-
-                case value_t::string:
-                    return (*lhs.m_value.string) < (*rhs.m_value.string);
-
-                case value_t::boolean:
-                    return (lhs.m_value.boolean) < (rhs.m_value.boolean);
-
-                case value_t::number_integer:
-                    return (lhs.m_value.number_integer) < (rhs.m_value.number_integer);
-
-                case value_t::number_unsigned:
-                    return (lhs.m_value.number_unsigned) < (rhs.m_value.number_unsigned);
-
-                case value_t::number_float:
-                    return (lhs.m_value.number_float) < (rhs.m_value.number_float);
-
-                case value_t::binary:
-                    return (*lhs.m_value.binary) < (*rhs.m_value.binary);
-
-                default:
-                    return false;
-            }
-        }
-        else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_float)
-        {
-            return static_cast<number_float_t>(lhs.m_value.number_integer) < rhs.m_value.number_float;
-        }
-        else if (lhs_type == value_t::number_float && rhs_type == value_t::number_integer)
-        {
-            return lhs.m_value.number_float < static_cast<number_float_t>(rhs.m_value.number_integer);
-        }
-        else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_float)
-        {
-            return static_cast<number_float_t>(lhs.m_value.number_unsigned) < rhs.m_value.number_float;
-        }
-        else if (lhs_type == value_t::number_float && rhs_type == value_t::number_unsigned)
-        {
-            return lhs.m_value.number_float < static_cast<number_float_t>(rhs.m_value.number_unsigned);
-        }
-        else if (lhs_type == value_t::number_integer && rhs_type == value_t::number_unsigned)
-        {
-            return lhs.m_value.number_integer < static_cast<number_integer_t>(rhs.m_value.number_unsigned);
-        }
-        else if (lhs_type == value_t::number_unsigned && rhs_type == value_t::number_integer)
-        {
-            return static_cast<number_integer_t>(lhs.m_value.number_unsigned) < rhs.m_value.number_integer;
-        }
-
-        // We only reach this line if we cannot compare values. In that case,
-        // we compare types. Note we have to call the operator explicitly,
-        // because MSVC has problems otherwise.
-        return operator<(lhs_type, rhs_type);
-    }
-
-    /*!
-    @brief comparison: less than
-    @copydoc operator<(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator<(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs < basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: less than
-    @copydoc operator<(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator<(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) < rhs;
-    }
-
-    /*!
-    @brief comparison: less than or equal
-
-    Compares whether one JSON value @a lhs is less than or equal to another
-    JSON value by calculating `not (rhs < lhs)`.
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether @a lhs is less than or equal to @a rhs
-
-    @complexity Linear.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__greater}
-
-    @since version 1.0.0
-    */
-    friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
-    {
-        return !(rhs < lhs);
-    }
-
-    /*!
-    @brief comparison: less than or equal
-    @copydoc operator<=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator<=(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs <= basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: less than or equal
-    @copydoc operator<=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator<=(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) <= rhs;
-    }
-
-    /*!
-    @brief comparison: greater than
-
-    Compares whether one JSON value @a lhs is greater than another
-    JSON value by calculating `not (lhs <= rhs)`.
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether @a lhs is greater than to @a rhs
-
-    @complexity Linear.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__lessequal}
-
-    @since version 1.0.0
-    */
-    friend bool operator>(const_reference lhs, const_reference rhs) noexcept
-    {
-        return !(lhs <= rhs);
-    }
-
-    /*!
-    @brief comparison: greater than
-    @copydoc operator>(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator>(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs > basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: greater than
-    @copydoc operator>(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator>(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) > rhs;
-    }
-
-    /*!
-    @brief comparison: greater than or equal
-
-    Compares whether one JSON value @a lhs is greater than or equal to another
-    JSON value by calculating `not (lhs < rhs)`.
-
-    @param[in] lhs  first JSON value to consider
-    @param[in] rhs  second JSON value to consider
-    @return whether @a lhs is greater than or equal to @a rhs
-
-    @complexity Linear.
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @liveexample{The example demonstrates comparing several JSON
-    types.,operator__greaterequal}
-
-    @since version 1.0.0
-    */
-    friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
-    {
-        return !(lhs < rhs);
-    }
-
-    /*!
-    @brief comparison: greater than or equal
-    @copydoc operator>=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator>=(const_reference lhs, ScalarType rhs) noexcept
-    {
-        return lhs >= basic_json(rhs);
-    }
-
-    /*!
-    @brief comparison: greater than or equal
-    @copydoc operator>=(const_reference, const_reference)
-    */
-    template<typename ScalarType, typename std::enable_if<
-                 std::is_scalar<ScalarType>::value, int>::type = 0>
-    friend bool operator>=(ScalarType lhs, const_reference rhs) noexcept
-    {
-        return basic_json(lhs) >= rhs;
-    }
-
-    /// @}
-
-    ///////////////////
-    // serialization //
-    ///////////////////
-
-    /// @name serialization
-    /// @{
-
-    /*!
-    @brief serialize to stream
-
-    Serialize the given JSON value @a j to the output stream @a o. The JSON
-    value will be serialized using the @ref dump member function.
-
-    - The indentation of the output can be controlled with the member variable
-      `width` of the output stream @a o. For instance, using the manipulator
-      `std::setw(4)` on @a o sets the indentation level to `4` and the
-      serialization result is the same as calling `dump(4)`.
-
-    - The indentation character can be controlled with the member variable
-      `fill` of the output stream @a o. For instance, the manipulator
-      `std::setfill('\\t')` sets indentation to use a tab character rather than
-      the default space character.
-
-    @param[in,out] o  stream to serialize to
-    @param[in] j  JSON value to serialize
-
-    @return the stream @a o
-
-    @throw type_error.316 if a string stored inside the JSON value is not
-                          UTF-8 encoded
-
-    @complexity Linear.
-
-    @liveexample{The example below shows the serialization with different
-    parameters to `width` to adjust the indentation level.,operator_serialize}
-
-    @since version 1.0.0; indentation character added in version 3.0.0
-    */
-    friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
-    {
-        // read width member and use it as indentation parameter if nonzero
-        const bool pretty_print = o.width() > 0;
-        const auto indentation = pretty_print ? o.width() : 0;
-
-        // reset width to 0 for subsequent calls to this stream
-        o.width(0);
-
-        // do the actual serialization
-        serializer s(detail::output_adapter<char>(o), o.fill());
-        s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation));
-        return o;
-    }
-
-    /*!
-    @brief serialize to stream
-    @deprecated This stream operator is deprecated and will be removed in
-                future 4.0.0 of the library. Please use
-                @ref operator<<(std::ostream&, const basic_json&)
-                instead; that is, replace calls like `j >> o;` with `o << j;`.
-    @since version 1.0.0; deprecated since version 3.0.0
-    */
-    JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator<<(std::ostream&, const basic_json&))
-    friend std::ostream& operator>>(const basic_json& j, std::ostream& o)
-    {
-        return o << j;
-    }
-
-    /// @}
-
-
-    /////////////////////
-    // deserialization //
-    /////////////////////
-
-    /// @name deserialization
-    /// @{
-
-    /*!
-    @brief deserialize from a compatible input
-
-    @tparam InputType A compatible input, for instance
-    - an std::istream object
-    - a FILE pointer
-    - a C-style array of characters
-    - a pointer to a null-terminated string of single byte characters
-    - an object obj for which begin(obj) and end(obj) produces a valid pair of
-      iterators.
-
-    @param[in] i  input to read from
-    @param[in] cb  a parser callback function of type @ref parser_callback_t
-    which is used to control the deserialization by filtering unwanted values
-    (optional)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-    @param[in] ignore_comments  whether comments should be ignored and treated
-    like whitespace (true) or yield a parse error (true); (optional, false by
-    default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.101 if a parse error occurs; example: `""unexpected end
-    of input; expected string literal""`
-    @throw parse_error.102 if to_unicode fails or surrogate error
-    @throw parse_error.103 if to_unicode fails
-
-    @complexity Linear in the length of the input. The parser is a predictive
-    LL(1) parser. The complexity can be higher if the parser callback function
-    @a cb or reading from the input @a i has a super-linear complexity.
-
-    @note A UTF-8 byte order mark is silently ignored.
-
-    @liveexample{The example below demonstrates the `parse()` function reading
-    from an array.,parse__array__parser_callback_t}
-
-    @liveexample{The example below demonstrates the `parse()` function with
-    and without callback function.,parse__string__parser_callback_t}
-
-    @liveexample{The example below demonstrates the `parse()` function with
-    and without callback function.,parse__istream__parser_callback_t}
-
-    @liveexample{The example below demonstrates the `parse()` function reading
-    from a contiguous container.,parse__contiguouscontainer__parser_callback_t}
-
-    @since version 2.0.3 (contiguous containers); version 3.9.0 allowed to
-    ignore comments.
-    */
-    template<typename InputType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json parse(InputType&& i,
-                            const parser_callback_t cb = nullptr,
-                            const bool allow_exceptions = true,
-                            const bool ignore_comments = false)
-    {
-        basic_json result;
-        parser(detail::input_adapter(std::forward<InputType>(i)), cb, allow_exceptions, ignore_comments).parse(true, result);
-        return result;
-    }
-
-    /*!
-    @brief deserialize from a pair of character iterators
-
-    The value_type of the iterator must be a integral type with size of 1, 2 or
-    4 bytes, which will be interpreted respectively as UTF-8, UTF-16 and UTF-32.
-
-    @param[in] first iterator to start of character range
-    @param[in] last  iterator to end of character range
-    @param[in] cb  a parser callback function of type @ref parser_callback_t
-    which is used to control the deserialization by filtering unwanted values
-    (optional)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-    @param[in] ignore_comments  whether comments should be ignored and treated
-    like whitespace (true) or yield a parse error (true); (optional, false by
-    default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.101 if a parse error occurs; example: `""unexpected end
-    of input; expected string literal""`
-    @throw parse_error.102 if to_unicode fails or surrogate error
-    @throw parse_error.103 if to_unicode fails
-    */
-    template<typename IteratorType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json parse(IteratorType first,
-                            IteratorType last,
-                            const parser_callback_t cb = nullptr,
-                            const bool allow_exceptions = true,
-                            const bool ignore_comments = false)
-    {
-        basic_json result;
-        parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result);
-        return result;
-    }
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len))
-    static basic_json parse(detail::span_input_adapter&& i,
-                            const parser_callback_t cb = nullptr,
-                            const bool allow_exceptions = true,
-                            const bool ignore_comments = false)
-    {
-        basic_json result;
-        parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result);
-        return result;
-    }
-
-    /*!
-    @brief check if the input is valid JSON
-
-    Unlike the @ref parse(InputType&&, const parser_callback_t,const bool)
-    function, this function neither throws an exception in case of invalid JSON
-    input (i.e., a parse error) nor creates diagnostic information.
-
-    @tparam InputType A compatible input, for instance
-    - an std::istream object
-    - a FILE pointer
-    - a C-style array of characters
-    - a pointer to a null-terminated string of single byte characters
-    - an object obj for which begin(obj) and end(obj) produces a valid pair of
-      iterators.
-
-    @param[in] i input to read from
-    @param[in] ignore_comments  whether comments should be ignored and treated
-    like whitespace (true) or yield a parse error (true); (optional, false by
-    default)
-
-    @return Whether the input read from @a i is valid JSON.
-
-    @complexity Linear in the length of the input. The parser is a predictive
-    LL(1) parser.
-
-    @note A UTF-8 byte order mark is silently ignored.
-
-    @liveexample{The example below demonstrates the `accept()` function reading
-    from a string.,accept__string}
-    */
-    template<typename InputType>
-    static bool accept(InputType&& i,
-                       const bool ignore_comments = false)
-    {
-        return parser(detail::input_adapter(std::forward<InputType>(i)), nullptr, false, ignore_comments).accept(true);
-    }
-
-    template<typename IteratorType>
-    static bool accept(IteratorType first, IteratorType last,
-                       const bool ignore_comments = false)
-    {
-        return parser(detail::input_adapter(std::move(first), std::move(last)), nullptr, false, ignore_comments).accept(true);
-    }
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, accept(ptr, ptr + len))
-    static bool accept(detail::span_input_adapter&& i,
-                       const bool ignore_comments = false)
-    {
-        return parser(i.get(), nullptr, false, ignore_comments).accept(true);
-    }
-
-    /*!
-    @brief generate SAX events
-
-    The SAX event lister must follow the interface of @ref json_sax.
-
-    This function reads from a compatible input. Examples are:
-    - an std::istream object
-    - a FILE pointer
-    - a C-style array of characters
-    - a pointer to a null-terminated string of single byte characters
-    - an object obj for which begin(obj) and end(obj) produces a valid pair of
-      iterators.
-
-    @param[in] i  input to read from
-    @param[in,out] sax  SAX event listener
-    @param[in] format  the format to parse (JSON, CBOR, MessagePack, or UBJSON)
-    @param[in] strict  whether the input has to be consumed completely
-    @param[in] ignore_comments  whether comments should be ignored and treated
-    like whitespace (true) or yield a parse error (true); (optional, false by
-    default); only applies to the JSON file format.
-
-    @return return value of the last processed SAX event
-
-    @throw parse_error.101 if a parse error occurs; example: `""unexpected end
-    of input; expected string literal""`
-    @throw parse_error.102 if to_unicode fails or surrogate error
-    @throw parse_error.103 if to_unicode fails
-
-    @complexity Linear in the length of the input. The parser is a predictive
-    LL(1) parser. The complexity can be higher if the SAX consumer @a sax has
-    a super-linear complexity.
-
-    @note A UTF-8 byte order mark is silently ignored.
-
-    @liveexample{The example below demonstrates the `sax_parse()` function
-    reading from string and processing the events with a user-defined SAX
-    event consumer.,sax_parse}
-
-    @since version 3.2.0
-    */
-    template <typename InputType, typename SAX>
-    JSON_HEDLEY_NON_NULL(2)
-    static bool sax_parse(InputType&& i, SAX* sax,
-                          input_format_t format = input_format_t::json,
-                          const bool strict = true,
-                          const bool ignore_comments = false)
-    {
-        auto ia = detail::input_adapter(std::forward<InputType>(i));
-        return format == input_format_t::json
-               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
-               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia)).sax_parse(format, sax, strict);
-    }
-
-    template<class IteratorType, class SAX>
-    JSON_HEDLEY_NON_NULL(3)
-    static bool sax_parse(IteratorType first, IteratorType last, SAX* sax,
-                          input_format_t format = input_format_t::json,
-                          const bool strict = true,
-                          const bool ignore_comments = false)
-    {
-        auto ia = detail::input_adapter(std::move(first), std::move(last));
-        return format == input_format_t::json
-               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
-               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia)).sax_parse(format, sax, strict);
-    }
-
-    template <typename SAX>
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, sax_parse(ptr, ptr + len, ...))
-    JSON_HEDLEY_NON_NULL(2)
-    static bool sax_parse(detail::span_input_adapter&& i, SAX* sax,
-                          input_format_t format = input_format_t::json,
-                          const bool strict = true,
-                          const bool ignore_comments = false)
-    {
-        auto ia = i.get();
-        return format == input_format_t::json
-               // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-               ? parser(std::move(ia), nullptr, true, ignore_comments).sax_parse(sax, strict)
-               // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-               : detail::binary_reader<basic_json, decltype(ia), SAX>(std::move(ia)).sax_parse(format, sax, strict);
-    }
-
-    /*!
-    @brief deserialize from stream
-    @deprecated This stream operator is deprecated and will be removed in
-                version 4.0.0 of the library. Please use
-                @ref operator>>(std::istream&, basic_json&)
-                instead; that is, replace calls like `j << i;` with `i >> j;`.
-    @since version 1.0.0; deprecated since version 3.0.0
-    */
-    JSON_HEDLEY_DEPRECATED_FOR(3.0.0, operator>>(std::istream&, basic_json&))
-    friend std::istream& operator<<(basic_json& j, std::istream& i)
-    {
-        return operator>>(i, j);
-    }
-
-    /*!
-    @brief deserialize from stream
-
-    Deserializes an input stream to a JSON value.
-
-    @param[in,out] i  input stream to read a serialized JSON value from
-    @param[in,out] j  JSON value to write the deserialized input to
-
-    @throw parse_error.101 in case of an unexpected token
-    @throw parse_error.102 if to_unicode fails or surrogate error
-    @throw parse_error.103 if to_unicode fails
-
-    @complexity Linear in the length of the input. The parser is a predictive
-    LL(1) parser.
-
-    @note A UTF-8 byte order mark is silently ignored.
-
-    @liveexample{The example below shows how a JSON value is constructed by
-    reading a serialization from a stream.,operator_deserialize}
-
-    @sa parse(std::istream&, const parser_callback_t) for a variant with a
-    parser callback function to filter values while parsing
-
-    @since version 1.0.0
-    */
-    friend std::istream& operator>>(std::istream& i, basic_json& j)
-    {
-        parser(detail::input_adapter(i)).parse(false, j);
-        return i;
-    }
-
-    /// @}
-
-    ///////////////////////////
-    // convenience functions //
-    ///////////////////////////
-
-    /*!
-    @brief return the type as string
-
-    Returns the type name as string to be used in error messages - usually to
-    indicate that a function was called on a wrong JSON type.
-
-    @return a string representation of a the @a m_type member:
-            Value type  | return value
-            ----------- | -------------
-            null        | `"null"`
-            boolean     | `"boolean"`
-            string      | `"string"`
-            number      | `"number"` (for all number types)
-            object      | `"object"`
-            array       | `"array"`
-            binary      | `"binary"`
-            discarded   | `"discarded"`
-
-    @exceptionsafety No-throw guarantee: this function never throws exceptions.
-
-    @complexity Constant.
-
-    @liveexample{The following code exemplifies `type_name()` for all JSON
-    types.,type_name}
-
-    @sa see @ref type() -- return the type of the JSON value
-    @sa see @ref operator value_t() -- return the type of the JSON value (implicit)
-
-    @since version 1.0.0, public since 2.1.0, `const char*` and `noexcept`
-    since 3.0.0
-    */
-    JSON_HEDLEY_RETURNS_NON_NULL
-    const char* type_name() const noexcept
-    {
-        {
-            switch (m_type)
-            {
-                case value_t::null:
-                    return "null";
-                case value_t::object:
-                    return "object";
-                case value_t::array:
-                    return "array";
-                case value_t::string:
-                    return "string";
-                case value_t::boolean:
-                    return "boolean";
-                case value_t::binary:
-                    return "binary";
-                case value_t::discarded:
-                    return "discarded";
-                default:
-                    return "number";
-            }
-        }
-    }
-
-
-  JSON_PRIVATE_UNLESS_TESTED:
-    //////////////////////
-    // member variables //
-    //////////////////////
-
-    /// the type of the current element
-    value_t m_type = value_t::null;
-
-    /// the value of the current element
-    json_value m_value = {};
-
-#if JSON_DIAGNOSTICS
-    /// a pointer to a parent value (for debugging purposes)
-    basic_json* m_parent = nullptr;
-#endif
-
-    //////////////////////////////////////////
-    // binary serialization/deserialization //
-    //////////////////////////////////////////
-
-    /// @name binary serialization/deserialization support
-    /// @{
-
-  public:
-    /*!
-    @brief create a CBOR serialization of a given JSON value
-
-    Serializes a given JSON value @a j to a byte vector using the CBOR (Concise
-    Binary Object Representation) serialization format. CBOR is a binary
-    serialization format which aims to be more compact than JSON itself, yet
-    more efficient to parse.
-
-    The library uses the following mapping from JSON values types to
-    CBOR types according to the CBOR specification (RFC 7049):
-
-    JSON value type | value/range                                | CBOR type                          | first byte
-    --------------- | ------------------------------------------ | ---------------------------------- | ---------------
-    null            | `null`                                     | Null                               | 0xF6
-    boolean         | `true`                                     | True                               | 0xF5
-    boolean         | `false`                                    | False                              | 0xF4
-    number_integer  | -9223372036854775808..-2147483649          | Negative integer (8 bytes follow)  | 0x3B
-    number_integer  | -2147483648..-32769                        | Negative integer (4 bytes follow)  | 0x3A
-    number_integer  | -32768..-129                               | Negative integer (2 bytes follow)  | 0x39
-    number_integer  | -128..-25                                  | Negative integer (1 byte follow)   | 0x38
-    number_integer  | -24..-1                                    | Negative integer                   | 0x20..0x37
-    number_integer  | 0..23                                      | Integer                            | 0x00..0x17
-    number_integer  | 24..255                                    | Unsigned integer (1 byte follow)   | 0x18
-    number_integer  | 256..65535                                 | Unsigned integer (2 bytes follow)  | 0x19
-    number_integer  | 65536..4294967295                          | Unsigned integer (4 bytes follow)  | 0x1A
-    number_integer  | 4294967296..18446744073709551615           | Unsigned integer (8 bytes follow)  | 0x1B
-    number_unsigned | 0..23                                      | Integer                            | 0x00..0x17
-    number_unsigned | 24..255                                    | Unsigned integer (1 byte follow)   | 0x18
-    number_unsigned | 256..65535                                 | Unsigned integer (2 bytes follow)  | 0x19
-    number_unsigned | 65536..4294967295                          | Unsigned integer (4 bytes follow)  | 0x1A
-    number_unsigned | 4294967296..18446744073709551615           | Unsigned integer (8 bytes follow)  | 0x1B
-    number_float    | *any value representable by a float*       | Single-Precision Float             | 0xFA
-    number_float    | *any value NOT representable by a float*   | Double-Precision Float             | 0xFB
-    string          | *length*: 0..23                            | UTF-8 string                       | 0x60..0x77
-    string          | *length*: 23..255                          | UTF-8 string (1 byte follow)       | 0x78
-    string          | *length*: 256..65535                       | UTF-8 string (2 bytes follow)      | 0x79
-    string          | *length*: 65536..4294967295                | UTF-8 string (4 bytes follow)      | 0x7A
-    string          | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow)      | 0x7B
-    array           | *size*: 0..23                              | array                              | 0x80..0x97
-    array           | *size*: 23..255                            | array (1 byte follow)              | 0x98
-    array           | *size*: 256..65535                         | array (2 bytes follow)             | 0x99
-    array           | *size*: 65536..4294967295                  | array (4 bytes follow)             | 0x9A
-    array           | *size*: 4294967296..18446744073709551615   | array (8 bytes follow)             | 0x9B
-    object          | *size*: 0..23                              | map                                | 0xA0..0xB7
-    object          | *size*: 23..255                            | map (1 byte follow)                | 0xB8
-    object          | *size*: 256..65535                         | map (2 bytes follow)               | 0xB9
-    object          | *size*: 65536..4294967295                  | map (4 bytes follow)               | 0xBA
-    object          | *size*: 4294967296..18446744073709551615   | map (8 bytes follow)               | 0xBB
-    binary          | *size*: 0..23                              | byte string                        | 0x40..0x57
-    binary          | *size*: 23..255                            | byte string (1 byte follow)        | 0x58
-    binary          | *size*: 256..65535                         | byte string (2 bytes follow)       | 0x59
-    binary          | *size*: 65536..4294967295                  | byte string (4 bytes follow)       | 0x5A
-    binary          | *size*: 4294967296..18446744073709551615   | byte string (8 bytes follow)       | 0x5B
-
-    @note The mapping is **complete** in the sense that any JSON value type
-          can be converted to a CBOR value.
-
-    @note If NaN or Infinity are stored inside a JSON number, they are
-          serialized properly. This behavior differs from the @ref dump()
-          function which serializes NaN or Infinity to `null`.
-
-    @note The following CBOR types are not used in the conversion:
-          - UTF-8 strings terminated by "break" (0x7F)
-          - arrays terminated by "break" (0x9F)
-          - maps terminated by "break" (0xBF)
-          - byte strings terminated by "break" (0x5F)
-          - date/time (0xC0..0xC1)
-          - bignum (0xC2..0xC3)
-          - decimal fraction (0xC4)
-          - bigfloat (0xC5)
-          - expected conversions (0xD5..0xD7)
-          - simple values (0xE0..0xF3, 0xF8)
-          - undefined (0xF7)
-          - half-precision floats (0xF9)
-          - break (0xFF)
-
-    @param[in] j  JSON value to serialize
-    @return CBOR serialization as byte vector
-
-    @complexity Linear in the size of the JSON value @a j.
-
-    @liveexample{The example shows the serialization of a JSON value to a byte
-    vector in CBOR format.,to_cbor}
-
-    @sa http://cbor.io
-    @sa see @ref from_cbor(InputType&&, const bool, const bool, const cbor_tag_handler_t) for the
-        analogous deserialization
-    @sa see @ref to_msgpack(const basic_json&) for the related MessagePack format
-    @sa see @ref to_ubjson(const basic_json&, const bool, const bool) for the
-             related UBJSON format
-
-    @since version 2.0.9; compact representation of floating-point numbers
-           since version 3.8.0
-    */
-    static std::vector<uint8_t> to_cbor(const basic_json& j)
-    {
-        std::vector<uint8_t> result;
-        to_cbor(j, result);
-        return result;
-    }
-
-    static void to_cbor(const basic_json& j, detail::output_adapter<uint8_t> o)
-    {
-        binary_writer<uint8_t>(o).write_cbor(j);
-    }
-
-    static void to_cbor(const basic_json& j, detail::output_adapter<char> o)
-    {
-        binary_writer<char>(o).write_cbor(j);
-    }
-
-    /*!
-    @brief create a MessagePack serialization of a given JSON value
-
-    Serializes a given JSON value @a j to a byte vector using the MessagePack
-    serialization format. MessagePack is a binary serialization format which
-    aims to be more compact than JSON itself, yet more efficient to parse.
-
-    The library uses the following mapping from JSON values types to
-    MessagePack types according to the MessagePack specification:
-
-    JSON value type | value/range                       | MessagePack type | first byte
-    --------------- | --------------------------------- | ---------------- | ----------
-    null            | `null`                            | nil              | 0xC0
-    boolean         | `true`                            | true             | 0xC3
-    boolean         | `false`                           | false            | 0xC2
-    number_integer  | -9223372036854775808..-2147483649 | int64            | 0xD3
-    number_integer  | -2147483648..-32769               | int32            | 0xD2
-    number_integer  | -32768..-129                      | int16            | 0xD1
-    number_integer  | -128..-33                         | int8             | 0xD0
-    number_integer  | -32..-1                           | negative fixint  | 0xE0..0xFF
-    number_integer  | 0..127                            | positive fixint  | 0x00..0x7F
-    number_integer  | 128..255                          | uint 8           | 0xCC
-    number_integer  | 256..65535                        | uint 16          | 0xCD
-    number_integer  | 65536..4294967295                 | uint 32          | 0xCE
-    number_integer  | 4294967296..18446744073709551615  | uint 64          | 0xCF
-    number_unsigned | 0..127                            | positive fixint  | 0x00..0x7F
-    number_unsigned | 128..255                          | uint 8           | 0xCC
-    number_unsigned | 256..65535                        | uint 16          | 0xCD
-    number_unsigned | 65536..4294967295                 | uint 32          | 0xCE
-    number_unsigned | 4294967296..18446744073709551615  | uint 64          | 0xCF
-    number_float    | *any value representable by a float*     | float 32 | 0xCA
-    number_float    | *any value NOT representable by a float* | float 64 | 0xCB
-    string          | *length*: 0..31                   | fixstr           | 0xA0..0xBF
-    string          | *length*: 32..255                 | str 8            | 0xD9
-    string          | *length*: 256..65535              | str 16           | 0xDA
-    string          | *length*: 65536..4294967295       | str 32           | 0xDB
-    array           | *size*: 0..15                     | fixarray         | 0x90..0x9F
-    array           | *size*: 16..65535                 | array 16         | 0xDC
-    array           | *size*: 65536..4294967295         | array 32         | 0xDD
-    object          | *size*: 0..15                     | fix map          | 0x80..0x8F
-    object          | *size*: 16..65535                 | map 16           | 0xDE
-    object          | *size*: 65536..4294967295         | map 32           | 0xDF
-    binary          | *size*: 0..255                    | bin 8            | 0xC4
-    binary          | *size*: 256..65535                | bin 16           | 0xC5
-    binary          | *size*: 65536..4294967295         | bin 32           | 0xC6
-
-    @note The mapping is **complete** in the sense that any JSON value type
-          can be converted to a MessagePack value.
-
-    @note The following values can **not** be converted to a MessagePack value:
-          - strings with more than 4294967295 bytes
-          - byte strings with more than 4294967295 bytes
-          - arrays with more than 4294967295 elements
-          - objects with more than 4294967295 elements
-
-    @note Any MessagePack output created @ref to_msgpack can be successfully
-          parsed by @ref from_msgpack.
-
-    @note If NaN or Infinity are stored inside a JSON number, they are
-          serialized properly. This behavior differs from the @ref dump()
-          function which serializes NaN or Infinity to `null`.
-
-    @param[in] j  JSON value to serialize
-    @return MessagePack serialization as byte vector
-
-    @complexity Linear in the size of the JSON value @a j.
-
-    @liveexample{The example shows the serialization of a JSON value to a byte
-    vector in MessagePack format.,to_msgpack}
-
-    @sa http://msgpack.org
-    @sa see @ref from_msgpack for the analogous deserialization
-    @sa see @ref to_cbor(const basic_json& for the related CBOR format
-    @sa see @ref to_ubjson(const basic_json&, const bool, const bool) for the
-             related UBJSON format
-
-    @since version 2.0.9
-    */
-    static std::vector<uint8_t> to_msgpack(const basic_json& j)
-    {
-        std::vector<uint8_t> result;
-        to_msgpack(j, result);
-        return result;
-    }
-
-    static void to_msgpack(const basic_json& j, detail::output_adapter<uint8_t> o)
-    {
-        binary_writer<uint8_t>(o).write_msgpack(j);
-    }
-
-    static void to_msgpack(const basic_json& j, detail::output_adapter<char> o)
-    {
-        binary_writer<char>(o).write_msgpack(j);
-    }
-
-    /*!
-    @brief create a UBJSON serialization of a given JSON value
-
-    Serializes a given JSON value @a j to a byte vector using the UBJSON
-    (Universal Binary JSON) serialization format. UBJSON aims to be more compact
-    than JSON itself, yet more efficient to parse.
-
-    The library uses the following mapping from JSON values types to
-    UBJSON types according to the UBJSON specification:
-
-    JSON value type | value/range                       | UBJSON type | marker
-    --------------- | --------------------------------- | ----------- | ------
-    null            | `null`                            | null        | `Z`
-    boolean         | `true`                            | true        | `T`
-    boolean         | `false`                           | false       | `F`
-    number_integer  | -9223372036854775808..-2147483649 | int64       | `L`
-    number_integer  | -2147483648..-32769               | int32       | `l`
-    number_integer  | -32768..-129                      | int16       | `I`
-    number_integer  | -128..127                         | int8        | `i`
-    number_integer  | 128..255                          | uint8       | `U`
-    number_integer  | 256..32767                        | int16       | `I`
-    number_integer  | 32768..2147483647                 | int32       | `l`
-    number_integer  | 2147483648..9223372036854775807   | int64       | `L`
-    number_unsigned | 0..127                            | int8        | `i`
-    number_unsigned | 128..255                          | uint8       | `U`
-    number_unsigned | 256..32767                        | int16       | `I`
-    number_unsigned | 32768..2147483647                 | int32       | `l`
-    number_unsigned | 2147483648..9223372036854775807   | int64       | `L`
-    number_unsigned | 2147483649..18446744073709551615  | high-precision | `H`
-    number_float    | *any value*                       | float64     | `D`
-    string          | *with shortest length indicator*  | string      | `S`
-    array           | *see notes on optimized format*   | array       | `[`
-    object          | *see notes on optimized format*   | map         | `{`
-
-    @note The mapping is **complete** in the sense that any JSON value type
-          can be converted to a UBJSON value.
-
-    @note The following values can **not** be converted to a UBJSON value:
-          - strings with more than 9223372036854775807 bytes (theoretical)
-
-    @note The following markers are not used in the conversion:
-          - `Z`: no-op values are not created.
-          - `C`: single-byte strings are serialized with `S` markers.
-
-    @note Any UBJSON output created @ref to_ubjson can be successfully parsed
-          by @ref from_ubjson.
-
-    @note If NaN or Infinity are stored inside a JSON number, they are
-          serialized properly. This behavior differs from the @ref dump()
-          function which serializes NaN or Infinity to `null`.
-
-    @note The optimized formats for containers are supported: Parameter
-          @a use_size adds size information to the beginning of a container and
-          removes the closing marker. Parameter @a use_type further checks
-          whether all elements of a container have the same type and adds the
-          type marker to the beginning of the container. The @a use_type
-          parameter must only be used together with @a use_size = true. Note
-          that @a use_size = true alone may result in larger representations -
-          the benefit of this parameter is that the receiving side is
-          immediately informed on the number of elements of the container.
-
-    @note If the JSON data contains the binary type, the value stored is a list
-          of integers, as suggested by the UBJSON documentation.  In particular,
-          this means that serialization and the deserialization of a JSON
-          containing binary values into UBJSON and back will result in a
-          different JSON object.
-
-    @param[in] j  JSON value to serialize
-    @param[in] use_size  whether to add size annotations to container types
-    @param[in] use_type  whether to add type annotations to container types
-                         (must be combined with @a use_size = true)
-    @return UBJSON serialization as byte vector
-
-    @complexity Linear in the size of the JSON value @a j.
-
-    @liveexample{The example shows the serialization of a JSON value to a byte
-    vector in UBJSON format.,to_ubjson}
-
-    @sa http://ubjson.org
-    @sa see @ref from_ubjson(InputType&&, const bool, const bool) for the
-        analogous deserialization
-    @sa see @ref to_cbor(const basic_json& for the related CBOR format
-    @sa see @ref to_msgpack(const basic_json&) for the related MessagePack format
-
-    @since version 3.1.0
-    */
-    static std::vector<uint8_t> to_ubjson(const basic_json& j,
-                                          const bool use_size = false,
-                                          const bool use_type = false)
-    {
-        std::vector<uint8_t> result;
-        to_ubjson(j, result, use_size, use_type);
-        return result;
-    }
-
-    static void to_ubjson(const basic_json& j, detail::output_adapter<uint8_t> o,
-                          const bool use_size = false, const bool use_type = false)
-    {
-        binary_writer<uint8_t>(o).write_ubjson(j, use_size, use_type);
-    }
-
-    static void to_ubjson(const basic_json& j, detail::output_adapter<char> o,
-                          const bool use_size = false, const bool use_type = false)
-    {
-        binary_writer<char>(o).write_ubjson(j, use_size, use_type);
-    }
-
-
-    /*!
-    @brief Serializes the given JSON object `j` to BSON and returns a vector
-           containing the corresponding BSON-representation.
-
-    BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are
-    stored as a single entity (a so-called document).
-
-    The library uses the following mapping from JSON values types to BSON types:
-
-    JSON value type | value/range                       | BSON type   | marker
-    --------------- | --------------------------------- | ----------- | ------
-    null            | `null`                            | null        | 0x0A
-    boolean         | `true`, `false`                   | boolean     | 0x08
-    number_integer  | -9223372036854775808..-2147483649 | int64       | 0x12
-    number_integer  | -2147483648..2147483647           | int32       | 0x10
-    number_integer  | 2147483648..9223372036854775807   | int64       | 0x12
-    number_unsigned | 0..2147483647                     | int32       | 0x10
-    number_unsigned | 2147483648..9223372036854775807   | int64       | 0x12
-    number_unsigned | 9223372036854775808..18446744073709551615| --   | --
-    number_float    | *any value*                       | double      | 0x01
-    string          | *any value*                       | string      | 0x02
-    array           | *any value*                       | document    | 0x04
-    object          | *any value*                       | document    | 0x03
-    binary          | *any value*                       | binary      | 0x05
-
-    @warning The mapping is **incomplete**, since only JSON-objects (and things
-    contained therein) can be serialized to BSON.
-    Also, integers larger than 9223372036854775807 cannot be serialized to BSON,
-    and the keys may not contain U+0000, since they are serialized a
-    zero-terminated c-strings.
-
-    @throw out_of_range.407  if `j.is_number_unsigned() && j.get<std::uint64_t>() > 9223372036854775807`
-    @throw out_of_range.409  if a key in `j` contains a NULL (U+0000)
-    @throw type_error.317    if `!j.is_object()`
-
-    @pre The input `j` is required to be an object: `j.is_object() == true`.
-
-    @note Any BSON output created via @ref to_bson can be successfully parsed
-          by @ref from_bson.
-
-    @param[in] j  JSON value to serialize
-    @return BSON serialization as byte vector
-
-    @complexity Linear in the size of the JSON value @a j.
-
-    @liveexample{The example shows the serialization of a JSON value to a byte
-    vector in BSON format.,to_bson}
-
-    @sa http://bsonspec.org/spec.html
-    @sa see @ref from_bson(detail::input_adapter&&, const bool strict) for the
-        analogous deserialization
-    @sa see @ref to_ubjson(const basic_json&, const bool, const bool) for the
-             related UBJSON format
-    @sa see @ref to_cbor(const basic_json&) for the related CBOR format
-    @sa see @ref to_msgpack(const basic_json&) for the related MessagePack format
-    */
-    static std::vector<uint8_t> to_bson(const basic_json& j)
-    {
-        std::vector<uint8_t> result;
-        to_bson(j, result);
-        return result;
-    }
-
-    /*!
-    @brief Serializes the given JSON object `j` to BSON and forwards the
-           corresponding BSON-representation to the given output_adapter `o`.
-    @param j The JSON object to convert to BSON.
-    @param o The output adapter that receives the binary BSON representation.
-    @pre The input `j` shall be an object: `j.is_object() == true`
-    @sa see @ref to_bson(const basic_json&)
-    */
-    static void to_bson(const basic_json& j, detail::output_adapter<uint8_t> o)
-    {
-        binary_writer<uint8_t>(o).write_bson(j);
-    }
-
-    /*!
-    @copydoc to_bson(const basic_json&, detail::output_adapter<uint8_t>)
-    */
-    static void to_bson(const basic_json& j, detail::output_adapter<char> o)
-    {
-        binary_writer<char>(o).write_bson(j);
-    }
-
-
-    /*!
-    @brief create a JSON value from an input in CBOR format
-
-    Deserializes a given input @a i to a JSON value using the CBOR (Concise
-    Binary Object Representation) serialization format.
-
-    The library maps CBOR types to JSON value types as follows:
-
-    CBOR type              | JSON value type | first byte
-    ---------------------- | --------------- | ----------
-    Integer                | number_unsigned | 0x00..0x17
-    Unsigned integer       | number_unsigned | 0x18
-    Unsigned integer       | number_unsigned | 0x19
-    Unsigned integer       | number_unsigned | 0x1A
-    Unsigned integer       | number_unsigned | 0x1B
-    Negative integer       | number_integer  | 0x20..0x37
-    Negative integer       | number_integer  | 0x38
-    Negative integer       | number_integer  | 0x39
-    Negative integer       | number_integer  | 0x3A
-    Negative integer       | number_integer  | 0x3B
-    Byte string            | binary          | 0x40..0x57
-    Byte string            | binary          | 0x58
-    Byte string            | binary          | 0x59
-    Byte string            | binary          | 0x5A
-    Byte string            | binary          | 0x5B
-    UTF-8 string           | string          | 0x60..0x77
-    UTF-8 string           | string          | 0x78
-    UTF-8 string           | string          | 0x79
-    UTF-8 string           | string          | 0x7A
-    UTF-8 string           | string          | 0x7B
-    UTF-8 string           | string          | 0x7F
-    array                  | array           | 0x80..0x97
-    array                  | array           | 0x98
-    array                  | array           | 0x99
-    array                  | array           | 0x9A
-    array                  | array           | 0x9B
-    array                  | array           | 0x9F
-    map                    | object          | 0xA0..0xB7
-    map                    | object          | 0xB8
-    map                    | object          | 0xB9
-    map                    | object          | 0xBA
-    map                    | object          | 0xBB
-    map                    | object          | 0xBF
-    False                  | `false`         | 0xF4
-    True                   | `true`          | 0xF5
-    Null                   | `null`          | 0xF6
-    Half-Precision Float   | number_float    | 0xF9
-    Single-Precision Float | number_float    | 0xFA
-    Double-Precision Float | number_float    | 0xFB
-
-    @warning The mapping is **incomplete** in the sense that not all CBOR
-             types can be converted to a JSON value. The following CBOR types
-             are not supported and will yield parse errors (parse_error.112):
-             - date/time (0xC0..0xC1)
-             - bignum (0xC2..0xC3)
-             - decimal fraction (0xC4)
-             - bigfloat (0xC5)
-             - expected conversions (0xD5..0xD7)
-             - simple values (0xE0..0xF3, 0xF8)
-             - undefined (0xF7)
-
-    @warning CBOR allows map keys of any type, whereas JSON only allows
-             strings as keys in object values. Therefore, CBOR maps with keys
-             other than UTF-8 strings are rejected (parse_error.113).
-
-    @note Any CBOR output created @ref to_cbor can be successfully parsed by
-          @ref from_cbor.
-
-    @param[in] i  an input in CBOR format convertible to an input adapter
-    @param[in] strict  whether to expect the input to be consumed until EOF
-                       (true by default)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-    @param[in] tag_handler how to treat CBOR tags (optional, error by default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.110 if the given input ends prematurely or the end of
-    file was not reached when @a strict was set to true
-    @throw parse_error.112 if unsupported features from CBOR were
-    used in the given input @a v or if the input is not valid CBOR
-    @throw parse_error.113 if a string was expected as map key, but not found
-
-    @complexity Linear in the size of the input @a i.
-
-    @liveexample{The example shows the deserialization of a byte vector in CBOR
-    format to a JSON value.,from_cbor}
-
-    @sa http://cbor.io
-    @sa see @ref to_cbor(const basic_json&) for the analogous serialization
-    @sa see @ref from_msgpack(InputType&&, const bool, const bool) for the
-        related MessagePack format
-    @sa see @ref from_ubjson(InputType&&, const bool, const bool) for the
-        related UBJSON format
-
-    @since version 2.0.9; parameter @a start_index since 2.1.1; changed to
-           consume input adapters, removed start_index parameter, and added
-           @a strict parameter since 3.0.0; added @a allow_exceptions parameter
-           since 3.2.0; added @a tag_handler parameter since 3.9.0.
-    */
-    template<typename InputType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_cbor(InputType&& i,
-                                const bool strict = true,
-                                const bool allow_exceptions = true,
-                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::forward<InputType>(i));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    /*!
-    @copydoc from_cbor(InputType&&, const bool, const bool, const cbor_tag_handler_t)
-    */
-    template<typename IteratorType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_cbor(IteratorType first, IteratorType last,
-                                const bool strict = true,
-                                const bool allow_exceptions = true,
-                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::move(first), std::move(last));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    template<typename T>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len))
-    static basic_json from_cbor(const T* ptr, std::size_t len,
-                                const bool strict = true,
-                                const bool allow_exceptions = true,
-                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
-    {
-        return from_cbor(ptr, ptr + len, strict, allow_exceptions, tag_handler);
-    }
-
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_cbor(ptr, ptr + len))
-    static basic_json from_cbor(detail::span_input_adapter&& i,
-                                const bool strict = true,
-                                const bool allow_exceptions = true,
-                                const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = i.get();
-        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    /*!
-    @brief create a JSON value from an input in MessagePack format
-
-    Deserializes a given input @a i to a JSON value using the MessagePack
-    serialization format.
-
-    The library maps MessagePack types to JSON value types as follows:
-
-    MessagePack type | JSON value type | first byte
-    ---------------- | --------------- | ----------
-    positive fixint  | number_unsigned | 0x00..0x7F
-    fixmap           | object          | 0x80..0x8F
-    fixarray         | array           | 0x90..0x9F
-    fixstr           | string          | 0xA0..0xBF
-    nil              | `null`          | 0xC0
-    false            | `false`         | 0xC2
-    true             | `true`          | 0xC3
-    float 32         | number_float    | 0xCA
-    float 64         | number_float    | 0xCB
-    uint 8           | number_unsigned | 0xCC
-    uint 16          | number_unsigned | 0xCD
-    uint 32          | number_unsigned | 0xCE
-    uint 64          | number_unsigned | 0xCF
-    int 8            | number_integer  | 0xD0
-    int 16           | number_integer  | 0xD1
-    int 32           | number_integer  | 0xD2
-    int 64           | number_integer  | 0xD3
-    str 8            | string          | 0xD9
-    str 16           | string          | 0xDA
-    str 32           | string          | 0xDB
-    array 16         | array           | 0xDC
-    array 32         | array           | 0xDD
-    map 16           | object          | 0xDE
-    map 32           | object          | 0xDF
-    bin 8            | binary          | 0xC4
-    bin 16           | binary          | 0xC5
-    bin 32           | binary          | 0xC6
-    ext 8            | binary          | 0xC7
-    ext 16           | binary          | 0xC8
-    ext 32           | binary          | 0xC9
-    fixext 1         | binary          | 0xD4
-    fixext 2         | binary          | 0xD5
-    fixext 4         | binary          | 0xD6
-    fixext 8         | binary          | 0xD7
-    fixext 16        | binary          | 0xD8
-    negative fixint  | number_integer  | 0xE0-0xFF
-
-    @note Any MessagePack output created @ref to_msgpack can be successfully
-          parsed by @ref from_msgpack.
-
-    @param[in] i  an input in MessagePack format convertible to an input
-                  adapter
-    @param[in] strict  whether to expect the input to be consumed until EOF
-                       (true by default)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.110 if the given input ends prematurely or the end of
-    file was not reached when @a strict was set to true
-    @throw parse_error.112 if unsupported features from MessagePack were
-    used in the given input @a i or if the input is not valid MessagePack
-    @throw parse_error.113 if a string was expected as map key, but not found
-
-    @complexity Linear in the size of the input @a i.
-
-    @liveexample{The example shows the deserialization of a byte vector in
-    MessagePack format to a JSON value.,from_msgpack}
-
-    @sa http://msgpack.org
-    @sa see @ref to_msgpack(const basic_json&) for the analogous serialization
-    @sa see @ref from_cbor(InputType&&, const bool, const bool, const cbor_tag_handler_t) for the
-        related CBOR format
-    @sa see @ref from_ubjson(InputType&&, const bool, const bool) for
-        the related UBJSON format
-    @sa see @ref from_bson(InputType&&, const bool, const bool) for
-        the related BSON format
-
-    @since version 2.0.9; parameter @a start_index since 2.1.1; changed to
-           consume input adapters, removed start_index parameter, and added
-           @a strict parameter since 3.0.0; added @a allow_exceptions parameter
-           since 3.2.0
-    */
-    template<typename InputType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_msgpack(InputType&& i,
-                                   const bool strict = true,
-                                   const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::forward<InputType>(i));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    /*!
-    @copydoc from_msgpack(InputType&&, const bool, const bool)
-    */
-    template<typename IteratorType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_msgpack(IteratorType first, IteratorType last,
-                                   const bool strict = true,
-                                   const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::move(first), std::move(last));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-
-    template<typename T>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len))
-    static basic_json from_msgpack(const T* ptr, std::size_t len,
-                                   const bool strict = true,
-                                   const bool allow_exceptions = true)
-    {
-        return from_msgpack(ptr, ptr + len, strict, allow_exceptions);
-    }
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_msgpack(ptr, ptr + len))
-    static basic_json from_msgpack(detail::span_input_adapter&& i,
-                                   const bool strict = true,
-                                   const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = i.get();
-        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::msgpack, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-
-    /*!
-    @brief create a JSON value from an input in UBJSON format
-
-    Deserializes a given input @a i to a JSON value using the UBJSON (Universal
-    Binary JSON) serialization format.
-
-    The library maps UBJSON types to JSON value types as follows:
-
-    UBJSON type | JSON value type                         | marker
-    ----------- | --------------------------------------- | ------
-    no-op       | *no value, next value is read*          | `N`
-    null        | `null`                                  | `Z`
-    false       | `false`                                 | `F`
-    true        | `true`                                  | `T`
-    float32     | number_float                            | `d`
-    float64     | number_float                            | `D`
-    uint8       | number_unsigned                         | `U`
-    int8        | number_integer                          | `i`
-    int16       | number_integer                          | `I`
-    int32       | number_integer                          | `l`
-    int64       | number_integer                          | `L`
-    high-precision number | number_integer, number_unsigned, or number_float - depends on number string | 'H'
-    string      | string                                  | `S`
-    char        | string                                  | `C`
-    array       | array (optimized values are supported)  | `[`
-    object      | object (optimized values are supported) | `{`
-
-    @note The mapping is **complete** in the sense that any UBJSON value can
-          be converted to a JSON value.
-
-    @param[in] i  an input in UBJSON format convertible to an input adapter
-    @param[in] strict  whether to expect the input to be consumed until EOF
-                       (true by default)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.110 if the given input ends prematurely or the end of
-    file was not reached when @a strict was set to true
-    @throw parse_error.112 if a parse error occurs
-    @throw parse_error.113 if a string could not be parsed successfully
-
-    @complexity Linear in the size of the input @a i.
-
-    @liveexample{The example shows the deserialization of a byte vector in
-    UBJSON format to a JSON value.,from_ubjson}
-
-    @sa http://ubjson.org
-    @sa see @ref to_ubjson(const basic_json&, const bool, const bool) for the
-             analogous serialization
-    @sa see @ref from_cbor(InputType&&, const bool, const bool, const cbor_tag_handler_t) for the
-        related CBOR format
-    @sa see @ref from_msgpack(InputType&&, const bool, const bool) for
-        the related MessagePack format
-    @sa see @ref from_bson(InputType&&, const bool, const bool) for
-        the related BSON format
-
-    @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0
-    */
-    template<typename InputType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_ubjson(InputType&& i,
-                                  const bool strict = true,
-                                  const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::forward<InputType>(i));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    /*!
-    @copydoc from_ubjson(InputType&&, const bool, const bool)
-    */
-    template<typename IteratorType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_ubjson(IteratorType first, IteratorType last,
-                                  const bool strict = true,
-                                  const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::move(first), std::move(last));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    template<typename T>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len))
-    static basic_json from_ubjson(const T* ptr, std::size_t len,
-                                  const bool strict = true,
-                                  const bool allow_exceptions = true)
-    {
-        return from_ubjson(ptr, ptr + len, strict, allow_exceptions);
-    }
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_ubjson(ptr, ptr + len))
-    static basic_json from_ubjson(detail::span_input_adapter&& i,
-                                  const bool strict = true,
-                                  const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = i.get();
-        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::ubjson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-
-    /*!
-    @brief Create a JSON value from an input in BSON format
-
-    Deserializes a given input @a i to a JSON value using the BSON (Binary JSON)
-    serialization format.
-
-    The library maps BSON record types to JSON value types as follows:
-
-    BSON type       | BSON marker byte | JSON value type
-    --------------- | ---------------- | ---------------------------
-    double          | 0x01             | number_float
-    string          | 0x02             | string
-    document        | 0x03             | object
-    array           | 0x04             | array
-    binary          | 0x05             | binary
-    undefined       | 0x06             | still unsupported
-    ObjectId        | 0x07             | still unsupported
-    boolean         | 0x08             | boolean
-    UTC Date-Time   | 0x09             | still unsupported
-    null            | 0x0A             | null
-    Regular Expr.   | 0x0B             | still unsupported
-    DB Pointer      | 0x0C             | still unsupported
-    JavaScript Code | 0x0D             | still unsupported
-    Symbol          | 0x0E             | still unsupported
-    JavaScript Code | 0x0F             | still unsupported
-    int32           | 0x10             | number_integer
-    Timestamp       | 0x11             | still unsupported
-    128-bit decimal float | 0x13       | still unsupported
-    Max Key         | 0x7F             | still unsupported
-    Min Key         | 0xFF             | still unsupported
-
-    @warning The mapping is **incomplete**. The unsupported mappings
-             are indicated in the table above.
-
-    @param[in] i  an input in BSON format convertible to an input adapter
-    @param[in] strict  whether to expect the input to be consumed until EOF
-                       (true by default)
-    @param[in] allow_exceptions  whether to throw exceptions in case of a
-    parse error (optional, true by default)
-
-    @return deserialized JSON value; in case of a parse error and
-            @a allow_exceptions set to `false`, the return value will be
-            value_t::discarded.
-
-    @throw parse_error.114 if an unsupported BSON record type is encountered
-
-    @complexity Linear in the size of the input @a i.
-
-    @liveexample{The example shows the deserialization of a byte vector in
-    BSON format to a JSON value.,from_bson}
-
-    @sa http://bsonspec.org/spec.html
-    @sa see @ref to_bson(const basic_json&) for the analogous serialization
-    @sa see @ref from_cbor(InputType&&, const bool, const bool, const cbor_tag_handler_t) for the
-        related CBOR format
-    @sa see @ref from_msgpack(InputType&&, const bool, const bool) for
-        the related MessagePack format
-    @sa see @ref from_ubjson(InputType&&, const bool, const bool) for the
-        related UBJSON format
-    */
-    template<typename InputType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_bson(InputType&& i,
-                                const bool strict = true,
-                                const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::forward<InputType>(i));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    /*!
-    @copydoc from_bson(InputType&&, const bool, const bool)
-    */
-    template<typename IteratorType>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json from_bson(IteratorType first, IteratorType last,
-                                const bool strict = true,
-                                const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = detail::input_adapter(std::move(first), std::move(last));
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-
-    template<typename T>
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len))
-    static basic_json from_bson(const T* ptr, std::size_t len,
-                                const bool strict = true,
-                                const bool allow_exceptions = true)
-    {
-        return from_bson(ptr, ptr + len, strict, allow_exceptions);
-    }
-
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    JSON_HEDLEY_DEPRECATED_FOR(3.8.0, from_bson(ptr, ptr + len))
-    static basic_json from_bson(detail::span_input_adapter&& i,
-                                const bool strict = true,
-                                const bool allow_exceptions = true)
-    {
-        basic_json result;
-        detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions);
-        auto ia = i.get();
-        // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg)
-        const bool res = binary_reader<decltype(ia)>(std::move(ia)).sax_parse(input_format_t::bson, &sdp, strict);
-        return res ? result : basic_json(value_t::discarded);
-    }
-    /// @}
-
-    //////////////////////////
-    // JSON Pointer support //
-    //////////////////////////
-
-    /// @name JSON Pointer functions
-    /// @{
-
-    /*!
-    @brief access specified element via JSON Pointer
-
-    Uses a JSON pointer to retrieve a reference to the respective JSON value.
-    No bound checking is performed. Similar to @ref operator[](const typename
-    object_t::key_type&), `null` values are created in arrays and objects if
-    necessary.
-
-    In particular:
-    - If the JSON pointer points to an object key that does not exist, it
-      is created an filled with a `null` value before a reference to it
-      is returned.
-    - If the JSON pointer points to an array index that does not exist, it
-      is created an filled with a `null` value before a reference to it
-      is returned. All indices between the current maximum and the given
-      index are also filled with `null`.
-    - The special value `-` is treated as a synonym for the index past the
-      end.
-
-    @param[in] ptr  a JSON pointer
-
-    @return reference to the element pointed to by @a ptr
-
-    @complexity Constant.
-
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-
-    @liveexample{The behavior is shown in the example.,operatorjson_pointer}
-
-    @since version 2.0.0
-    */
-    reference operator[](const json_pointer& ptr)
-    {
-        return ptr.get_unchecked(this);
-    }
-
-    /*!
-    @brief access specified element via JSON Pointer
-
-    Uses a JSON pointer to retrieve a reference to the respective JSON value.
-    No bound checking is performed. The function does not change the JSON
-    value; no `null` values are created. In particular, the special value
-    `-` yields an exception.
-
-    @param[in] ptr  JSON pointer to the desired element
-
-    @return const reference to the element pointed to by @a ptr
-
-    @complexity Constant.
-
-    @throw parse_error.106   if an array index begins with '0'
-    @throw parse_error.109   if an array index was not a number
-    @throw out_of_range.402  if the array index '-' is used
-    @throw out_of_range.404  if the JSON pointer can not be resolved
-
-    @liveexample{The behavior is shown in the example.,operatorjson_pointer_const}
-
-    @since version 2.0.0
-    */
-    const_reference operator[](const json_pointer& ptr) const
-    {
-        return ptr.get_unchecked(this);
-    }
-
-    /*!
-    @brief access specified element via JSON Pointer
-
-    Returns a reference to the element at with specified JSON pointer @a ptr,
-    with bounds checking.
-
-    @param[in] ptr  JSON pointer to the desired element
-
-    @return reference to the element pointed to by @a ptr
-
-    @throw parse_error.106 if an array index in the passed JSON pointer @a ptr
-    begins with '0'. See example below.
-
-    @throw parse_error.109 if an array index in the passed JSON pointer @a ptr
-    is not a number. See example below.
-
-    @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr
-    is out of range. See example below.
-
-    @throw out_of_range.402 if the array index '-' is used in the passed JSON
-    pointer @a ptr. As `at` provides checked access (and no elements are
-    implicitly inserted), the index '-' is always invalid. See example below.
-
-    @throw out_of_range.403 if the JSON pointer describes a key of an object
-    which cannot be found. See example below.
-
-    @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved.
-    See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @since version 2.0.0
-
-    @liveexample{The behavior is shown in the example.,at_json_pointer}
-    */
-    reference at(const json_pointer& ptr)
-    {
-        return ptr.get_checked(this);
-    }
-
-    /*!
-    @brief access specified element via JSON Pointer
-
-    Returns a const reference to the element at with specified JSON pointer @a
-    ptr, with bounds checking.
-
-    @param[in] ptr  JSON pointer to the desired element
-
-    @return reference to the element pointed to by @a ptr
-
-    @throw parse_error.106 if an array index in the passed JSON pointer @a ptr
-    begins with '0'. See example below.
-
-    @throw parse_error.109 if an array index in the passed JSON pointer @a ptr
-    is not a number. See example below.
-
-    @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr
-    is out of range. See example below.
-
-    @throw out_of_range.402 if the array index '-' is used in the passed JSON
-    pointer @a ptr. As `at` provides checked access (and no elements are
-    implicitly inserted), the index '-' is always invalid. See example below.
-
-    @throw out_of_range.403 if the JSON pointer describes a key of an object
-    which cannot be found. See example below.
-
-    @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved.
-    See example below.
-
-    @exceptionsafety Strong guarantee: if an exception is thrown, there are no
-    changes in the JSON value.
-
-    @complexity Constant.
-
-    @since version 2.0.0
-
-    @liveexample{The behavior is shown in the example.,at_json_pointer_const}
-    */
-    const_reference at(const json_pointer& ptr) const
-    {
-        return ptr.get_checked(this);
-    }
-
-    /*!
-    @brief return flattened JSON value
-
-    The function creates a JSON object whose keys are JSON pointers (see [RFC
-    6901](https://tools.ietf.org/html/rfc6901)) and whose values are all
-    primitive. The original JSON value can be restored using the @ref
-    unflatten() function.
-
-    @return an object that maps JSON pointers to primitive values
-
-    @note Empty objects and arrays are flattened to `null` and will not be
-          reconstructed correctly by the @ref unflatten() function.
-
-    @complexity Linear in the size the JSON value.
-
-    @liveexample{The following code shows how a JSON object is flattened to an
-    object whose keys consist of JSON pointers.,flatten}
-
-    @sa see @ref unflatten() for the reverse function
-
-    @since version 2.0.0
-    */
-    basic_json flatten() const
-    {
-        basic_json result(value_t::object);
-        json_pointer::flatten("", *this, result);
-        return result;
-    }
-
-    /*!
-    @brief unflatten a previously flattened JSON value
-
-    The function restores the arbitrary nesting of a JSON value that has been
-    flattened before using the @ref flatten() function. The JSON value must
-    meet certain constraints:
-    1. The value must be an object.
-    2. The keys must be JSON pointers (see
-       [RFC 6901](https://tools.ietf.org/html/rfc6901))
-    3. The mapped values must be primitive JSON types.
-
-    @return the original JSON from a flattened version
-
-    @note Empty objects and arrays are flattened by @ref flatten() to `null`
-          values and can not unflattened to their original type. Apart from
-          this example, for a JSON value `j`, the following is always true:
-          `j == j.flatten().unflatten()`.
-
-    @complexity Linear in the size the JSON value.
-
-    @throw type_error.314  if value is not an object
-    @throw type_error.315  if object values are not primitive
-
-    @liveexample{The following code shows how a flattened JSON object is
-    unflattened into the original nested JSON object.,unflatten}
-
-    @sa see @ref flatten() for the reverse function
-
-    @since version 2.0.0
-    */
-    basic_json unflatten() const
-    {
-        return json_pointer::unflatten(*this);
-    }
-
-    /// @}
-
-    //////////////////////////
-    // JSON Patch functions //
-    //////////////////////////
-
-    /// @name JSON Patch functions
-    /// @{
-
-    /*!
-    @brief applies a JSON patch
-
-    [JSON Patch](http://jsonpatch.com) defines a JSON document structure for
-    expressing a sequence of operations to apply to a JSON) document. With
-    this function, a JSON Patch is applied to the current JSON value by
-    executing all operations from the patch.
-
-    @param[in] json_patch  JSON patch document
-    @return patched document
-
-    @note The application of a patch is atomic: Either all operations succeed
-          and the patched document is returned or an exception is thrown. In
-          any case, the original value is not changed: the patch is applied
-          to a copy of the value.
-
-    @throw parse_error.104 if the JSON patch does not consist of an array of
-    objects
-
-    @throw parse_error.105 if the JSON patch is malformed (e.g., mandatory
-    attributes are missing); example: `"operation add must have member path"`
-
-    @throw out_of_range.401 if an array index is out of range.
-
-    @throw out_of_range.403 if a JSON pointer inside the patch could not be
-    resolved successfully in the current JSON value; example: `"key baz not
-    found"`
-
-    @throw out_of_range.405 if JSON pointer has no parent ("add", "remove",
-    "move")
-
-    @throw other_error.501 if "test" operation was unsuccessful
-
-    @complexity Linear in the size of the JSON value and the length of the
-    JSON patch. As usually only a fraction of the JSON value is affected by
-    the patch, the complexity can usually be neglected.
-
-    @liveexample{The following code shows how a JSON patch is applied to a
-    value.,patch}
-
-    @sa see @ref diff -- create a JSON patch by comparing two JSON values
-
-    @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902)
-    @sa [RFC 6901 (JSON Pointer)](https://tools.ietf.org/html/rfc6901)
-
-    @since version 2.0.0
-    */
-    basic_json patch(const basic_json& json_patch) const
-    {
-        // make a working copy to apply the patch to
-        basic_json result = *this;
-
-        // the valid JSON Patch operations
-        enum class patch_operations {add, remove, replace, move, copy, test, invalid};
-
-        const auto get_op = [](const std::string & op)
-        {
-            if (op == "add")
-            {
-                return patch_operations::add;
-            }
-            if (op == "remove")
-            {
-                return patch_operations::remove;
-            }
-            if (op == "replace")
-            {
-                return patch_operations::replace;
-            }
-            if (op == "move")
-            {
-                return patch_operations::move;
-            }
-            if (op == "copy")
-            {
-                return patch_operations::copy;
-            }
-            if (op == "test")
-            {
-                return patch_operations::test;
-            }
-
-            return patch_operations::invalid;
-        };
-
-        // wrapper for "add" operation; add value at ptr
-        const auto operation_add = [&result](json_pointer & ptr, basic_json val)
-        {
-            // adding to the root of the target document means replacing it
-            if (ptr.empty())
-            {
-                result = val;
-                return;
-            }
-
-            // make sure the top element of the pointer exists
-            json_pointer top_pointer = ptr.top();
-            if (top_pointer != ptr)
-            {
-                result.at(top_pointer);
-            }
-
-            // get reference to parent of JSON pointer ptr
-            const auto last_path = ptr.back();
-            ptr.pop_back();
-            basic_json& parent = result[ptr];
-
-            switch (parent.m_type)
-            {
-                case value_t::null:
-                case value_t::object:
-                {
-                    // use operator[] to add value
-                    parent[last_path] = val;
-                    break;
-                }
-
-                case value_t::array:
-                {
-                    if (last_path == "-")
-                    {
-                        // special case: append to back
-                        parent.push_back(val);
-                    }
-                    else
-                    {
-                        const auto idx = json_pointer::array_index(last_path);
-                        if (JSON_HEDLEY_UNLIKELY(idx > parent.size()))
-                        {
-                            // avoid undefined behavior
-                            JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range", parent));
-                        }
-
-                        // default case: insert add offset
-                        parent.insert(parent.begin() + static_cast<difference_type>(idx), val);
-                    }
-                    break;
-                }
-
-                // if there exists a parent it cannot be primitive
-                default:            // LCOV_EXCL_LINE
-                    JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
-            }
-        };
-
-        // wrapper for "remove" operation; remove value at ptr
-        const auto operation_remove = [this, &result](json_pointer & ptr)
-        {
-            // get reference to parent of JSON pointer ptr
-            const auto last_path = ptr.back();
-            ptr.pop_back();
-            basic_json& parent = result.at(ptr);
-
-            // remove child
-            if (parent.is_object())
-            {
-                // perform range check
-                auto it = parent.find(last_path);
-                if (JSON_HEDLEY_LIKELY(it != parent.end()))
-                {
-                    parent.erase(it);
-                }
-                else
-                {
-                    JSON_THROW(out_of_range::create(403, "key '" + last_path + "' not found", *this));
-                }
-            }
-            else if (parent.is_array())
-            {
-                // note erase performs range check
-                parent.erase(json_pointer::array_index(last_path));
-            }
-        };
-
-        // type check: top level value must be an array
-        if (JSON_HEDLEY_UNLIKELY(!json_patch.is_array()))
-        {
-            JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects", json_patch));
-        }
-
-        // iterate and apply the operations
-        for (const auto& val : json_patch)
-        {
-            // wrapper to get a value for an operation
-            const auto get_value = [&val](const std::string & op,
-                                          const std::string & member,
-                                          bool string_type) -> basic_json &
-            {
-                // find value
-                auto it = val.m_value.object->find(member);
-
-                // context-sensitive error message
-                const auto error_msg = (op == "op") ? "operation" : "operation '" + op + "'";
-
-                // check if desired value is present
-                if (JSON_HEDLEY_UNLIKELY(it == val.m_value.object->end()))
-                {
-                    // NOLINTNEXTLINE(performance-inefficient-string-concatenation)
-                    JSON_THROW(parse_error::create(105, 0, error_msg + " must have member '" + member + "'", val));
-                }
-
-                // check if result is of type string
-                if (JSON_HEDLEY_UNLIKELY(string_type && !it->second.is_string()))
-                {
-                    // NOLINTNEXTLINE(performance-inefficient-string-concatenation)
-                    JSON_THROW(parse_error::create(105, 0, error_msg + " must have string member '" + member + "'", val));
-                }
-
-                // no error: return value
-                return it->second;
-            };
-
-            // type check: every element of the array must be an object
-            if (JSON_HEDLEY_UNLIKELY(!val.is_object()))
-            {
-                JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects", val));
-            }
-
-            // collect mandatory members
-            const auto op = get_value("op", "op", true).template get<std::string>();
-            const auto path = get_value(op, "path", true).template get<std::string>();
-            json_pointer ptr(path);
-
-            switch (get_op(op))
-            {
-                case patch_operations::add:
-                {
-                    operation_add(ptr, get_value("add", "value", false));
-                    break;
-                }
-
-                case patch_operations::remove:
-                {
-                    operation_remove(ptr);
-                    break;
-                }
-
-                case patch_operations::replace:
-                {
-                    // the "path" location must exist - use at()
-                    result.at(ptr) = get_value("replace", "value", false);
-                    break;
-                }
-
-                case patch_operations::move:
-                {
-                    const auto from_path = get_value("move", "from", true).template get<std::string>();
-                    json_pointer from_ptr(from_path);
-
-                    // the "from" location must exist - use at()
-                    basic_json v = result.at(from_ptr);
-
-                    // The move operation is functionally identical to a
-                    // "remove" operation on the "from" location, followed
-                    // immediately by an "add" operation at the target
-                    // location with the value that was just removed.
-                    operation_remove(from_ptr);
-                    operation_add(ptr, v);
-                    break;
-                }
-
-                case patch_operations::copy:
-                {
-                    const auto from_path = get_value("copy", "from", true).template get<std::string>();
-                    const json_pointer from_ptr(from_path);
-
-                    // the "from" location must exist - use at()
-                    basic_json v = result.at(from_ptr);
-
-                    // The copy is functionally identical to an "add"
-                    // operation at the target location using the value
-                    // specified in the "from" member.
-                    operation_add(ptr, v);
-                    break;
-                }
-
-                case patch_operations::test:
-                {
-                    bool success = false;
-                    JSON_TRY
-                    {
-                        // check if "value" matches the one at "path"
-                        // the "path" location must exist - use at()
-                        success = (result.at(ptr) == get_value("test", "value", false));
-                    }
-                    JSON_INTERNAL_CATCH (out_of_range&)
-                    {
-                        // ignore out of range errors: success remains false
-                    }
-
-                    // throw an exception if test fails
-                    if (JSON_HEDLEY_UNLIKELY(!success))
-                    {
-                        JSON_THROW(other_error::create(501, "unsuccessful: " + val.dump(), val));
-                    }
-
-                    break;
-                }
-
-                default:
-                {
-                    // op must be "add", "remove", "replace", "move", "copy", or
-                    // "test"
-                    JSON_THROW(parse_error::create(105, 0, "operation value '" + op + "' is invalid", val));
-                }
-            }
-        }
-
-        return result;
-    }
-
-    /*!
-    @brief creates a diff as a JSON patch
-
-    Creates a [JSON Patch](http://jsonpatch.com) so that value @a source can
-    be changed into the value @a target by calling @ref patch function.
-
-    @invariant For two JSON values @a source and @a target, the following code
-    yields always `true`:
-    @code {.cpp}
-    source.patch(diff(source, target)) == target;
-    @endcode
-
-    @note Currently, only `remove`, `add`, and `replace` operations are
-          generated.
-
-    @param[in] source  JSON value to compare from
-    @param[in] target  JSON value to compare against
-    @param[in] path    helper value to create JSON pointers
-
-    @return a JSON patch to convert the @a source to @a target
-
-    @complexity Linear in the lengths of @a source and @a target.
-
-    @liveexample{The following code shows how a JSON patch is created as a
-    diff for two JSON values.,diff}
-
-    @sa see @ref patch -- apply a JSON patch
-    @sa see @ref merge_patch -- apply a JSON Merge Patch
-
-    @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902)
-
-    @since version 2.0.0
-    */
-    JSON_HEDLEY_WARN_UNUSED_RESULT
-    static basic_json diff(const basic_json& source, const basic_json& target,
-                           const std::string& path = "")
-    {
-        // the patch
-        basic_json result(value_t::array);
-
-        // if the values are the same, return empty patch
-        if (source == target)
-        {
-            return result;
-        }
-
-        if (source.type() != target.type())
-        {
-            // different types: replace value
-            result.push_back(
-            {
-                {"op", "replace"}, {"path", path}, {"value", target}
-            });
-            return result;
-        }
-
-        switch (source.type())
-        {
-            case value_t::array:
-            {
-                // first pass: traverse common elements
-                std::size_t i = 0;
-                while (i < source.size() && i < target.size())
-                {
-                    // recursive call to compare array values at index i
-                    auto temp_diff = diff(source[i], target[i], path + "/" + std::to_string(i));
-                    result.insert(result.end(), temp_diff.begin(), temp_diff.end());
-                    ++i;
-                }
-
-                // i now reached the end of at least one array
-                // in a second pass, traverse the remaining elements
-
-                // remove my remaining elements
-                const auto end_index = static_cast<difference_type>(result.size());
-                while (i < source.size())
-                {
-                    // add operations in reverse order to avoid invalid
-                    // indices
-                    result.insert(result.begin() + end_index, object(
-                    {
-                        {"op", "remove"},
-                        {"path", path + "/" + std::to_string(i)}
-                    }));
-                    ++i;
-                }
-
-                // add other remaining elements
-                while (i < target.size())
-                {
-                    result.push_back(
-                    {
-                        {"op", "add"},
-                        {"path", path + "/-"},
-                        {"value", target[i]}
-                    });
-                    ++i;
-                }
-
-                break;
-            }
-
-            case value_t::object:
-            {
-                // first pass: traverse this object's elements
-                for (auto it = source.cbegin(); it != source.cend(); ++it)
-                {
-                    // escape the key name to be used in a JSON patch
-                    const auto path_key = path + "/" + detail::escape(it.key());
-
-                    if (target.find(it.key()) != target.end())
-                    {
-                        // recursive call to compare object values at key it
-                        auto temp_diff = diff(it.value(), target[it.key()], path_key);
-                        result.insert(result.end(), temp_diff.begin(), temp_diff.end());
-                    }
-                    else
-                    {
-                        // found a key that is not in o -> remove it
-                        result.push_back(object(
-                        {
-                            {"op", "remove"}, {"path", path_key}
-                        }));
-                    }
-                }
-
-                // second pass: traverse other object's elements
-                for (auto it = target.cbegin(); it != target.cend(); ++it)
-                {
-                    if (source.find(it.key()) == source.end())
-                    {
-                        // found a key that is not in this -> add it
-                        const auto path_key = path + "/" + detail::escape(it.key());
-                        result.push_back(
-                        {
-                            {"op", "add"}, {"path", path_key},
-                            {"value", it.value()}
-                        });
-                    }
-                }
-
-                break;
-            }
-
-            default:
-            {
-                // both primitive type: replace value
-                result.push_back(
-                {
-                    {"op", "replace"}, {"path", path}, {"value", target}
-                });
-                break;
-            }
-        }
-
-        return result;
-    }
-
-    /// @}
-
-    ////////////////////////////////
-    // JSON Merge Patch functions //
-    ////////////////////////////////
-
-    /// @name JSON Merge Patch functions
-    /// @{
-
-    /*!
-    @brief applies a JSON Merge Patch
-
-    The merge patch format is primarily intended for use with the HTTP PATCH
-    method as a means of describing a set of modifications to a target
-    resource's content. This function applies a merge patch to the current
-    JSON value.
-
-    The function implements the following algorithm from Section 2 of
-    [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396):
-
-    ```
-    define MergePatch(Target, Patch):
-      if Patch is an Object:
-        if Target is not an Object:
-          Target = {} // Ignore the contents and set it to an empty Object
-        for each Name/Value pair in Patch:
-          if Value is null:
-            if Name exists in Target:
-              remove the Name/Value pair from Target
-          else:
-            Target[Name] = MergePatch(Target[Name], Value)
-        return Target
-      else:
-        return Patch
-    ```
-
-    Thereby, `Target` is the current object; that is, the patch is applied to
-    the current value.
-
-    @param[in] apply_patch  the patch to apply
-
-    @complexity Linear in the lengths of @a patch.
-
-    @liveexample{The following code shows how a JSON Merge Patch is applied to
-    a JSON document.,merge_patch}
-
-    @sa see @ref patch -- apply a JSON patch
-    @sa [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396)
-
-    @since version 3.0.0
-    */
-    void merge_patch(const basic_json& apply_patch)
-    {
-        if (apply_patch.is_object())
-        {
-            if (!is_object())
-            {
-                *this = object();
-            }
-            for (auto it = apply_patch.begin(); it != apply_patch.end(); ++it)
-            {
-                if (it.value().is_null())
-                {
-                    erase(it.key());
-                }
-                else
-                {
-                    operator[](it.key()).merge_patch(it.value());
-                }
-            }
-        }
-        else
-        {
-            *this = apply_patch;
-        }
-    }
-
-    /// @}
-};
-
-/*!
-@brief user-defined to_string function for JSON values
-
-This function implements a user-defined to_string  for JSON objects.
-
-@param[in] j  a JSON object
-@return a std::string object
-*/
-
-NLOHMANN_BASIC_JSON_TPL_DECLARATION
-std::string to_string(const NLOHMANN_BASIC_JSON_TPL& j)
-{
-    return j.dump();
-}
-} // namespace nlohmann
-
-///////////////////////
-// nonmember support //
-///////////////////////
-
-// specialization of std::swap, and std::hash
-namespace std
-{
-
-/// hash value for JSON objects
-template<>
-struct hash<nlohmann::json>
-{
-    /*!
-    @brief return a hash value for a JSON object
-
-    @since version 1.0.0
-    */
-    std::size_t operator()(const nlohmann::json& j) const
-    {
-        return nlohmann::detail::hash(j);
-    }
-};
-
-/// specialization for std::less<value_t>
-/// @note: do not remove the space after '<',
-///        see https://github.com/nlohmann/json/pull/679
-template<>
-struct less<::nlohmann::detail::value_t>
-{
-    /*!
-    @brief compare two value_t enum values
-    @since version 3.0.0
-    */
-    bool operator()(nlohmann::detail::value_t lhs,
-                    nlohmann::detail::value_t rhs) const noexcept
-    {
-        return nlohmann::detail::operator<(lhs, rhs);
-    }
-};
-
-// C++20 prohibit function specialization in the std namespace.
-#ifndef JSON_HAS_CPP_20
-
-/*!
-@brief exchanges the values of two JSON objects
-
-@since version 1.0.0
-*/
-template<>
-inline void swap<nlohmann::json>(nlohmann::json& j1, nlohmann::json& j2) noexcept( // NOLINT(readability-inconsistent-declaration-parameter-name)
-    is_nothrow_move_constructible<nlohmann::json>::value&&  // NOLINT(misc-redundant-expression)
-    is_nothrow_move_assignable<nlohmann::json>::value
-                              )
-{
-    j1.swap(j2);
-}
-
-#endif
-
-} // namespace std
-
-/*!
-@brief user-defined string literal for JSON values
-
-This operator implements a user-defined string literal for JSON objects. It
-can be used by adding `"_json"` to a string literal and returns a JSON object
-if no parse error occurred.
-
-@param[in] s  a string representation of a JSON object
-@param[in] n  the length of string @a s
-@return a JSON object
-
-@since version 1.0.0
-*/
-JSON_HEDLEY_NON_NULL(1)
-inline nlohmann::json operator "" _json(const char* s, std::size_t n)
-{
-    return nlohmann::json::parse(s, s + n);
-}
-
-/*!
-@brief user-defined string literal for JSON pointer
-
-This operator implements a user-defined string literal for JSON Pointers. It
-can be used by adding `"_json_pointer"` to a string literal and returns a JSON pointer
-object if no parse error occurred.
-
-@param[in] s  a string representation of a JSON Pointer
-@param[in] n  the length of string @a s
-@return a JSON pointer object
-
-@since version 2.0.0
-*/
-JSON_HEDLEY_NON_NULL(1)
-inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std::size_t n)
-{
-    return nlohmann::json::json_pointer(std::string(s, n));
-}
-
-// #include <nlohmann/detail/macro_unscope.hpp>
-
-
-// restore GCC/clang diagnostic settings
-#if defined(__clang__)
-    #pragma GCC diagnostic pop
-#endif
-
-// clean up
-#undef JSON_ASSERT
-#undef JSON_INTERNAL_CATCH
-#undef JSON_CATCH
-#undef JSON_THROW
-#undef JSON_TRY
-#undef JSON_PRIVATE_UNLESS_TESTED
-#undef JSON_HAS_CPP_11
-#undef JSON_HAS_CPP_14
-#undef JSON_HAS_CPP_17
-#undef JSON_HAS_CPP_20
-#undef NLOHMANN_BASIC_JSON_TPL_DECLARATION
-#undef NLOHMANN_BASIC_JSON_TPL
-#undef JSON_EXPLICIT
-
-// #include <nlohmann/thirdparty/hedley/hedley_undef.hpp>
-
-
-#undef JSON_HEDLEY_ALWAYS_INLINE
-#undef JSON_HEDLEY_ARM_VERSION
-#undef JSON_HEDLEY_ARM_VERSION_CHECK
-#undef JSON_HEDLEY_ARRAY_PARAM
-#undef JSON_HEDLEY_ASSUME
-#undef JSON_HEDLEY_BEGIN_C_DECLS
-#undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE
-#undef JSON_HEDLEY_CLANG_HAS_BUILTIN
-#undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE
-#undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE
-#undef JSON_HEDLEY_CLANG_HAS_EXTENSION
-#undef JSON_HEDLEY_CLANG_HAS_FEATURE
-#undef JSON_HEDLEY_CLANG_HAS_WARNING
-#undef JSON_HEDLEY_COMPCERT_VERSION
-#undef JSON_HEDLEY_COMPCERT_VERSION_CHECK
-#undef JSON_HEDLEY_CONCAT
-#undef JSON_HEDLEY_CONCAT3
-#undef JSON_HEDLEY_CONCAT3_EX
-#undef JSON_HEDLEY_CONCAT_EX
-#undef JSON_HEDLEY_CONST
-#undef JSON_HEDLEY_CONSTEXPR
-#undef JSON_HEDLEY_CONST_CAST
-#undef JSON_HEDLEY_CPP_CAST
-#undef JSON_HEDLEY_CRAY_VERSION
-#undef JSON_HEDLEY_CRAY_VERSION_CHECK
-#undef JSON_HEDLEY_C_DECL
-#undef JSON_HEDLEY_DEPRECATED
-#undef JSON_HEDLEY_DEPRECATED_FOR
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS
-#undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION
-#undef JSON_HEDLEY_DIAGNOSTIC_POP
-#undef JSON_HEDLEY_DIAGNOSTIC_PUSH
-#undef JSON_HEDLEY_DMC_VERSION
-#undef JSON_HEDLEY_DMC_VERSION_CHECK
-#undef JSON_HEDLEY_EMPTY_BASES
-#undef JSON_HEDLEY_EMSCRIPTEN_VERSION
-#undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK
-#undef JSON_HEDLEY_END_C_DECLS
-#undef JSON_HEDLEY_FLAGS
-#undef JSON_HEDLEY_FLAGS_CAST
-#undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE
-#undef JSON_HEDLEY_GCC_HAS_BUILTIN
-#undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE
-#undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE
-#undef JSON_HEDLEY_GCC_HAS_EXTENSION
-#undef JSON_HEDLEY_GCC_HAS_FEATURE
-#undef JSON_HEDLEY_GCC_HAS_WARNING
-#undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK
-#undef JSON_HEDLEY_GCC_VERSION
-#undef JSON_HEDLEY_GCC_VERSION_CHECK
-#undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE
-#undef JSON_HEDLEY_GNUC_HAS_BUILTIN
-#undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE
-#undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE
-#undef JSON_HEDLEY_GNUC_HAS_EXTENSION
-#undef JSON_HEDLEY_GNUC_HAS_FEATURE
-#undef JSON_HEDLEY_GNUC_HAS_WARNING
-#undef JSON_HEDLEY_GNUC_VERSION
-#undef JSON_HEDLEY_GNUC_VERSION_CHECK
-#undef JSON_HEDLEY_HAS_ATTRIBUTE
-#undef JSON_HEDLEY_HAS_BUILTIN
-#undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE
-#undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS
-#undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE
-#undef JSON_HEDLEY_HAS_EXTENSION
-#undef JSON_HEDLEY_HAS_FEATURE
-#undef JSON_HEDLEY_HAS_WARNING
-#undef JSON_HEDLEY_IAR_VERSION
-#undef JSON_HEDLEY_IAR_VERSION_CHECK
-#undef JSON_HEDLEY_IBM_VERSION
-#undef JSON_HEDLEY_IBM_VERSION_CHECK
-#undef JSON_HEDLEY_IMPORT
-#undef JSON_HEDLEY_INLINE
-#undef JSON_HEDLEY_INTEL_CL_VERSION
-#undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK
-#undef JSON_HEDLEY_INTEL_VERSION
-#undef JSON_HEDLEY_INTEL_VERSION_CHECK
-#undef JSON_HEDLEY_IS_CONSTANT
-#undef JSON_HEDLEY_IS_CONSTEXPR_
-#undef JSON_HEDLEY_LIKELY
-#undef JSON_HEDLEY_MALLOC
-#undef JSON_HEDLEY_MCST_LCC_VERSION
-#undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK
-#undef JSON_HEDLEY_MESSAGE
-#undef JSON_HEDLEY_MSVC_VERSION
-#undef JSON_HEDLEY_MSVC_VERSION_CHECK
-#undef JSON_HEDLEY_NEVER_INLINE
-#undef JSON_HEDLEY_NON_NULL
-#undef JSON_HEDLEY_NO_ESCAPE
-#undef JSON_HEDLEY_NO_RETURN
-#undef JSON_HEDLEY_NO_THROW
-#undef JSON_HEDLEY_NULL
-#undef JSON_HEDLEY_PELLES_VERSION
-#undef JSON_HEDLEY_PELLES_VERSION_CHECK
-#undef JSON_HEDLEY_PGI_VERSION
-#undef JSON_HEDLEY_PGI_VERSION_CHECK
-#undef JSON_HEDLEY_PREDICT
-#undef JSON_HEDLEY_PRINTF_FORMAT
-#undef JSON_HEDLEY_PRIVATE
-#undef JSON_HEDLEY_PUBLIC
-#undef JSON_HEDLEY_PURE
-#undef JSON_HEDLEY_REINTERPRET_CAST
-#undef JSON_HEDLEY_REQUIRE
-#undef JSON_HEDLEY_REQUIRE_CONSTEXPR
-#undef JSON_HEDLEY_REQUIRE_MSG
-#undef JSON_HEDLEY_RESTRICT
-#undef JSON_HEDLEY_RETURNS_NON_NULL
-#undef JSON_HEDLEY_SENTINEL
-#undef JSON_HEDLEY_STATIC_ASSERT
-#undef JSON_HEDLEY_STATIC_CAST
-#undef JSON_HEDLEY_STRINGIFY
-#undef JSON_HEDLEY_STRINGIFY_EX
-#undef JSON_HEDLEY_SUNPRO_VERSION
-#undef JSON_HEDLEY_SUNPRO_VERSION_CHECK
-#undef JSON_HEDLEY_TINYC_VERSION
-#undef JSON_HEDLEY_TINYC_VERSION_CHECK
-#undef JSON_HEDLEY_TI_ARMCL_VERSION
-#undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK
-#undef JSON_HEDLEY_TI_CL2000_VERSION
-#undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK
-#undef JSON_HEDLEY_TI_CL430_VERSION
-#undef JSON_HEDLEY_TI_CL430_VERSION_CHECK
-#undef JSON_HEDLEY_TI_CL6X_VERSION
-#undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK
-#undef JSON_HEDLEY_TI_CL7X_VERSION
-#undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK
-#undef JSON_HEDLEY_TI_CLPRU_VERSION
-#undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK
-#undef JSON_HEDLEY_TI_VERSION
-#undef JSON_HEDLEY_TI_VERSION_CHECK
-#undef JSON_HEDLEY_UNAVAILABLE
-#undef JSON_HEDLEY_UNLIKELY
-#undef JSON_HEDLEY_UNPREDICTABLE
-#undef JSON_HEDLEY_UNREACHABLE
-#undef JSON_HEDLEY_UNREACHABLE_RETURN
-#undef JSON_HEDLEY_VERSION
-#undef JSON_HEDLEY_VERSION_DECODE_MAJOR
-#undef JSON_HEDLEY_VERSION_DECODE_MINOR
-#undef JSON_HEDLEY_VERSION_DECODE_REVISION
-#undef JSON_HEDLEY_VERSION_ENCODE
-#undef JSON_HEDLEY_WARNING
-#undef JSON_HEDLEY_WARN_UNUSED_RESULT
-#undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG
-#undef JSON_HEDLEY_FALL_THROUGH
-
-
-
-#endif  // INCLUDE_NLOHMANN_JSON_HPP_
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/onnx_leaky_relu_custom_plugin.cc b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/onnx_leaky_relu_custom_plugin.cc
deleted file mode 100644
index 7c17f742f..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/framework/onnx_plugin/onnx_leaky_relu_custom_plugin.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Apache License for more details at
- * http://www.apache.org/licenses/LICENSE-2.0
- */
-
-#include "graph/operator.h"
-#include "register/register.h"
-#include "json.hpp"
-
-using namespace ge;
-using json = nlohmann::json;
-
-namespace domi {
-namespace {
-const int kTypeFloat = 1;
-}
-Status ParseOnnxParamsLeakyReluCustom(const ge::Operator& op_src, ge::Operator& op_dest) {
-  // trans op_src to op_dest
-  // if op_src get required attr failed, need to return Failed
-  // if op_src get optional attr failed, need to return Failed or set a default value
-  float negative_slope = 0.01f;
-  string negative_slope_str;
-  AscendString attrs_string;
-  if (ge::GRAPH_SUCCESS == op_src.GetAttr("attribute", attrs_string)) {
-    json attrs = json::parse(attrs_string.GetString());
-    for (json attr : attrs["attribute"]) {
-      if (attr["name"] == "alpha" && attr["type"] == kTypeFloat) {
-        negative_slope_str = attr["f"];  // float type in json has accuracy loss, so we use string type to store it
-        negative_slope = atof(negative_slope_str.c_str());
-      }
-    }
-  }
-
-  op_dest.SetAttr("negative_slope", negative_slope);
-  return SUCCESS;
-}
-
-REGISTER_CUSTOM_OP("LeakyReluCustom")
-    .FrameworkType(ONNX)
-    .OriginOpType({ge::AscendString("ai.onnx::8::LeakyReluCustom"),
-                   ge::AscendString("ai.onnx::9::LeakyReluCustom"),
-                   ge::AscendString("ai.onnx::10::LeakyReluCustom"),
-                   ge::AscendString("ai.onnx::11::LeakyReluCustom"),
-                   ge::AscendString("ai.onnx::12::LeakyReluCustom"),
-                   ge::AscendString("ai.onnx::13::LeakyReluCustom")})
-    .ParseParamsByOperatorFn(ParseOnnxParamsLeakyReluCustom)
-    .ImplyType(ImplyType::TVM);
-}  // namespace domi
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/fusion_off.cfg b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/fusion_off.cfg
deleted file mode 100644
index 2472195ec..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/fusion_off.cfg
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "Switch": {
-    "GraphFusion": {
-      "BatchNormPreprocessFusionPass": "off",
-      "ConvBatchnormFusionPass": "off",
-      "BatchNormBnInferFusionPass": "off",
-      "HostBNFusionPass": "off"
-    },
-    "UBFusion": {
-     "ALL": "off"
-    }
-  }
-}
-
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/leaky_relu_custom.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/leaky_relu_custom.json
deleted file mode 100644
index f582c9b0e..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/leaky_relu_custom.json
+++ /dev/null
@@ -1,38 +0,0 @@
-[
-    {
-        "op": "LeakyReluCustom",
-        "language": "cpp",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": [
-                    "ND"
-                ],
-                "type": [
-                    "float"
-                ]
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "y",
-                "param_type": "required",
-                "format": [
-                    "ND"
-                ],
-                "type": [
-                    "float"
-                ]
-            }
-        ],
-        "attr": [
-            {
-                "name": "negative_slope",
-                "param_type": "optional",
-                "type": "float",
-                "default_value": "0.0"
-            }
-        ]
-    }
-]
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/CMakeLists.txt
deleted file mode 100755
index 0552a6727..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/CMakeLists.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-
-aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs)
-
-opbuild(OPS_SRC ${ops_srcs}
-        OUT_DIR ${ASCEND_AUTOGEN_PATH}
-)
-
-add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc)
-target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB)
-target_compile_options(cust_op_proto PRIVATE
-        -fvisibility=hidden
-)
-target_link_libraries(cust_op_proto PRIVATE
-        intf_pub
-        exe_graph
-        register
-        tiling_api
-        -Wl,--whole-archive
-        rt2_registry
-        -Wl,--no-whole-archive
-)
-set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME
-                      cust_opsproto_rt2.0
-)
-
-add_library(cust_optiling SHARED ${ops_srcs})
-target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB)
-target_compile_options(cust_optiling PRIVATE
-        -fvisibility=hidden
-)
-target_link_libraries(cust_optiling PRIVATE
-        intf_pub
-        exe_graph
-        register
-        tiling_api
-        -Wl,--whole-archive
-        rt2_registry
-        -Wl,--no-whole-archive
-)
-set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME
-                      cust_opmaster_rt2.0
-)
-
-file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp)
-file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h)
-add_library(cust_opapi SHARED ${aclnn_src})
-target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase)
-
-add_custom_target(optiling_compat ALL
-                  COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$<TARGET_FILE_NAME:cust_optiling>
-                          ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so
-)
-
-install(TARGETS cust_op_proto
-        LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR})
-install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h
-        DESTINATION packages/vendors/${vendor_name}/op_proto/inc)
-install(TARGETS cust_optiling
-        LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR})
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so
-        DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling)
-install(TARGETS cust_opapi
-        LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib)
-install(FILES ${aclnn_inc}
-        DESTINATION packages/vendors/${vendor_name}/op_api/include)
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom.cpp
deleted file mode 100644
index 6a5448446..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-
-#include "leaky_relu_custom_tiling.h"
-#include "register/op_def_registry.h"
-
-
-namespace optiling {
-const uint32_t BLOCK_DIM = 8;
-const uint32_t TILE_NUM = 16;
-
-static ge::graphStatus TilingFunc(gert::TilingContext* context)
-{
-    TilingData tiling;
-    uint32_t totalLength = context->GetInputTensor(0)->GetShapeSize();
-    const gert::RuntimeAttrs* attrs = context->GetAttrs();
-    const float* negativeSlope = attrs->GetAttrPointer<float>(0);
-
-    context->SetBlockDim(BLOCK_DIM);
-    tiling.set_totalLength(totalLength);
-    tiling.set_tileNum(TILE_NUM);
-    tiling.set_negativeSlope(*negativeSlope);
-
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
-    currentWorkspace[0] = 0;
-    return ge::GRAPH_SUCCESS;
-}
-}
-
-
-namespace ge {
-static ge::graphStatus InferShape(gert::InferShapeContext* context)
-{
-    const gert::Shape* x1_shape = context->GetInputShape(0);
-    gert::Shape* y_shape = context->GetOutputShape(0);
-    *y_shape = *x1_shape;
-    return GRAPH_SUCCESS;
-}
-}
-
-
-namespace ops {
-class LeakyReluCustom : public OpDef {
-public:
-    LeakyReluCustom(const char* name) : OpDef(name)
-    {
-        this->Input("x")
-            .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT})
-            .Format({ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND});
-        this->Output("y")
-            .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT})
-            .Format({ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND});
-        this->Attr("negative_slope").AttrType(OPTIONAL).Float(0.0);
-
-        this->SetInferShape(ge::InferShape);
-
-        this->AICore()
-            .SetTiling(optiling::TilingFunc);
-        this->AICore().AddConfig("ascend910");
-        this->AICore().AddConfig("ascend910b");
-        this->AICore().AddConfig("ascend310p");
-
-    }
-};
-
-OP_ADD(LeakyReluCustom);
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
deleted file mode 100644
index 8f3c82933..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef LEAKYRELU_CUSTOM_TILING_H
-#define LEAKYRELU_CUSTOM_TILING_H
-#include "register/tilingdata_base.h"
-
-namespace optiling {
-BEGIN_TILING_DATA_DEF(TilingData)
-  TILING_DATA_FIELD_DEF(uint32_t, totalLength);
-  TILING_DATA_FIELD_DEF(uint32_t, tileNum);
-  TILING_DATA_FIELD_DEF(float, negativeSlope);
-END_TILING_DATA_DEF;
-
-REGISTER_TILING_DATA_CLASS(LeakyReluCustom, TilingData)
-}
-#endif // LEAKYRELU_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/CMakeLists.txt b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/CMakeLists.txt
deleted file mode 100755
index 6983d1536..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/CMakeLists.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-foreach(compute_unit ${ASCEND_COMPUTE_UNIT})
-
-    # generate aic-${compute_unit}-ops-info.json
-    add_ops_info_target(TARGET ops_info_gen_${compute_unit}
-        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json
-        OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini
-        INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit}
-    )
-
-    # generate ascendc impl py once
-    if (NOT TARGET ascendc_impl_gen)
-        add_ops_impl_target(TARGET ascendc_impl_gen
-            OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini
-            IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR}
-            OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe
-            INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl
-        )
-    endif()
-
-    # dynamic shape binary compile
-    if (${ENABLE_BINARY_PACKAGE})
-        add_bin_compile_target(TARGET ascendc_bin_${compute_unit}
-            OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini
-            IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR}
-            ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic
-            OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit}
-            INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel
-            COMPUTE_UNIT ${compute_unit}
-        )
-        add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen)
-    endif()
-
-endforeach()
-
-# generate npu_supported_ops.json
-add_npu_support_target(TARGET npu_supported_ops
-    OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH}
-    OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core
-    INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE}
-)
-
-if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases)
-    add_subdirectory(testcases)
-endif()
-
-# install kernel file
-if (${ENABLE_SOURCE_PACKAGE})
-    file(GLOB KERNEL_FILES
-         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-         ${CMAKE_CURRENT_SOURCE_DIR}/*.h
-         ${CMAKE_CURRENT_SOURCE_DIR}/*.py
-    )
-    install(FILES ${KERNEL_FILES}
-            DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic
-    )
-endif()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/leaky_relu_custom.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/leaky_relu_custom.cpp
deleted file mode 100644
index d191cc280..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/onnx_inference/LeakyReluCustom/op_kernel/leaky_relu_custom.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "kernel_operator.h"
-using namespace AscendC;
-
-constexpr int32_t BUFFER_NUM = 2;                                     // tensor num for each queue
-
-class KernelLeakyRelu {
-public:
-    __aicore__ inline KernelLeakyRelu() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, uint32_t totalLength, uint32_t tileNum, float negativeSlope)
-    {
-        ASSERT(GetBlockNum() != 0 && "block dim can not be zero!");
-        this->blockLength = totalLength / GetBlockNum();
-        this->tileNum = tileNum;
-        this->negativeSlope = static_cast<float>(negativeSlope);
-        ASSERT(tileNum != 0 && "tile num can not be zero!");
-        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
-
-        // get start index for current core, core parallel
-        xGm.SetGlobalBuffer((__gm__ float*)x + this->blockLength * GetBlockIdx(), this->blockLength);
-        yGm.SetGlobalBuffer((__gm__ float*)y + this->blockLength * GetBlockIdx(), this->blockLength);
-        // pipe alloc memory to queue, the unit is Bytes
-        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(float));
-        pipe.InitBuffer(outQueueY, BUFFER_NUM, this->tileLength * sizeof(float));
-        pipe.InitBuffer(tmpBuffer1, this->tileLength * sizeof(float));
-        pipe.InitBuffer(tmpBuffer2, this->tileLength * sizeof(float));
-    }
-    __aicore__ inline void Process()
-    {
-        // loop count need to be doubled, due to double buffer
-        int32_t loopCount = this->tileNum * BUFFER_NUM;
-        // tiling strategy, pipeline parallel
-        for (int32_t i = 0; i < loopCount; i++) {
-            CopyIn(i);
-            Compute(i);
-            CopyOut(i);
-        }
-    }
-
-private:
-    __aicore__ inline void CopyIn(int32_t progress)
-    {
-        // alloc tensor from queue memory
-        LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
-        // copy progress_th tile from global tensor to local tensor
-        DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
-        // enque input tensors to VECIN queue
-        inQueueX.EnQue(xLocal);
-    }
-    __aicore__ inline void Compute(int32_t progress)
-    {
-        // deque input tensors from VECIN queue
-        LocalTensor<float> xLocal = inQueueX.DeQue<float>();
-        LocalTensor<float> yLocal = outQueueY.AllocTensor<float>();
-        LocalTensor<float> tmpTensor1 = tmpBuffer1.Get<float>();
-        LocalTensor<float> tmpTensor2 = tmpBuffer2.Get<float>();
-        float inputVal = 0.0;
-        Maxs(tmpTensor1, xLocal, inputVal, this->tileLength);
-        Mins(tmpTensor2, xLocal, inputVal, this->tileLength);
-        Muls(tmpTensor2, tmpTensor2, this->negativeSlope, this->tileLength);
-        Add(yLocal, tmpTensor1, tmpTensor2, this->tileLength);
-        // enque the output tensor to VECOUT queue
-        outQueueY.EnQue<float>(yLocal);
-        // free input tensors for reuse
-        inQueueX.FreeTensor(xLocal);
-    }
-    __aicore__ inline void CopyOut(int32_t progress)
-    {
-        // deque output tensor from VECOUT queue
-        LocalTensor<float> yLocal = outQueueY.DeQue<float>();
-        // copy progress_th tile from local tensor to global tensor
-        DataCopy(yGm[progress * this->tileLength], yLocal, this->tileLength);
-        // free output tensor for reuse
-        outQueueY.FreeTensor(yLocal);
-    }
-
-private:
-    TPipe pipe;
-    TBuf<QuePosition::VECCALC> tmpBuffer1, tmpBuffer2;
-    // create queues for input, in this case depth is equal to buffer num
-    TQue<QuePosition::VECIN, BUFFER_NUM> inQueueX;
-    // create queue for output, in this case depth is equal to buffer num
-    TQue<QuePosition::VECOUT, BUFFER_NUM> outQueueY;
-    GlobalTensor<float> xGm, yGm;
-    uint32_t blockLength;
-    uint32_t tileNum;
-    uint32_t tileLength;
-    float negativeSlope;
-};
-
-extern "C" __global__ __aicore__ void leaky_relu_custom(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) {
-    GET_TILING_DATA(tiling_data, tiling);
-    KernelLeakyRelu op;
-    op.Init(x, y, tiling_data.totalLength, tiling_data.tileNum, tiling_data.negativeSlope);
-    op.Process();
-}
-
-#ifndef __CCE_KT_TEST__
-// call of kernel function
-void leaky_relu_custom_do(uint32_t blockDim, void* l2ctrl, void* stream, uint8_t* x, uint8_t* y,
-    uint8_t* workspace, uint8_t* tiling)
-{
-    leaky_relu_custom<<<blockDim, l2ctrl, stream>>>(x, y, workspace, tiling);
-}
-#endif
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/.gitignore b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/.gitignore
deleted file mode 100644
index fd37d8d0b..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-/custom_op
-build
-build_out
-/*/run/out/*
-!/*/run/out/test_data
-fusion_result.json
-
-# pytorch used
-libaclopnn.so
-pytorch-v1.8.1*
-pytorch-v1.11.0*
-pytorch-v2.0.0*
-cache
-prof_total
-*.zip
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/op_dev b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/op_dev
deleted file mode 120000
index d561644c8..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/op_dev
+++ /dev/null
@@ -1 +0,0 @@
-../acl_invocation/op_dev
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/AddCustomKernelNpu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/AddCustomKernelNpu.cpp
deleted file mode 100644
index 7d7b2b8f6..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/AddCustomKernelNpu.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-#include <torch/csrc/autograd/custom_function.h>
-
-#include "torch_npu/csrc/framework/utils/OpAdapter.h"
-#include "torch_npu/csrc/aten/NPUNativeFunctions.h"
-#include "torch_npu/csrc/aten/ops/op_api/op_api_common.h"
-
-namespace at_npu {
-namespace native {
-using torch::autograd::Function;
-using torch::autograd::AutogradContext;
-
-at::Tensor NPUNativeFunctions::npu_add_custom(const at::Tensor& x, const at::Tensor& y) {
-    at::Tensor result = OpPreparation::ApplyTensor(x); // 创建输出内存
-
-    // calculate the output result of the NPU
-    EXEC_NPU_CMD(aclnnAddCustom, x, y, result);
-    return result;
-}
-} // namespace native
-} // namespace at_npu
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/npu_native_functions.yaml b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/npu_native_functions.yaml
deleted file mode 100644
index 1ad90dbcf..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/pytorch_patch/npu_native_functions.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- func: npu_add_custom(Tensor x, Tensor y) -> Tensor
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/readme.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/readme.md
deleted file mode 100644
index 104a863ef..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/readme.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# acl samples
-bash run.sh ${is_dynamic}(0/1) ${replay_mode}(/batch/iterator)
-
-# run static op (depend on chip version)
-bash run.sh 0
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/run.sh b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/run.sh
deleted file mode 100644
index e2da06738..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/run.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH
-
-clear;clear
-# 清除之前遗留的文件
-rm -rf kernel_meta_temp* cache prof_total
-CURRENT_DIR=$(
-    cd $(dirname ${BASH_SOURCE:-$0})
-    pwd
-); cd $CURRENT_DIR
-
-# 导出环境变量
-IS_DYNAMIC=$1
-REPLAY_MODE=$2
-PYTORCH_VERSION=1.11.0
-PTA_DIR=pytorch-v${PYTORCH_VERSION}
-
-if [ ! $ASCEND_HOME_DIR ]; then
-    export ASCEND_HOME_DIR=/usr/local/Ascend/latest
-fi
-source $ASCEND_HOME_DIR/bin/setenv.bash
-
-PYTHON_VERSION=`python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}'`
-export HI_PYTHON=python${PYTHON_VERSION}
-export PYTHONPATH=$ASCEND_HOME_DIR/python/site-packages:$PYTHONPATH
-export PATH=$ASCEND_HOME_DIR/python/site-packages/bin:$PATH
-
-# 检查当前昇腾芯片的类型
-function check_soc_version() {
-    SOC_VERSION_CONCAT=`python3 -c '''
-import ctypes, os
-def get_soc_version():
-    max_len = 256
-    rtsdll = ctypes.CDLL(f"libruntime.so")
-    c_char_t = ctypes.create_string_buffer(b"\xff" * max_len, max_len)
-    rtsdll.rtGetSocVersion.restype = ctypes.c_uint64
-    rt_error = rtsdll.rtGetSocVersion(c_char_t, ctypes.c_uint32(max_len))
-    if rt_error:
-        print("rt_error:", rt_error)
-        return ""
-    soc_full_name = c_char_t.value.decode("utf-8")
-    find_str = "Short_SoC_version="
-    ascend_home_dir = os.environ.get("ASCEND_HOME_DIR")
-    with open(f"{ascend_home_dir}/compiler/data/platform_config/{soc_full_name}.ini", "r") as f:
-        for line in f:
-            if find_str in line:
-                start_index = line.find(find_str)
-                result = line[start_index + len(find_str):].strip()
-                return "{},{}".format(soc_full_name, result.lower())
-    return ""
-print(get_soc_version())
-    '''`
-    if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then
-        echo "ERROR: SOC_VERSION_CONCAT is invalid!"
-        return 1
-    fi
-    SOC_FULL_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 1`
-    SOC_SHORT_VERSION=`echo $SOC_VERSION_CONCAT | cut -d ',' -f 2`
-}
-
-function main() {
-    if [[ ${IS_DYNAMIC}"x" = "x" ]]; then
-        echo "ERROR: IS_DYNAMIC is invalid!"
-        return 1
-    fi
-
-    if [[ ${REPLAY_MODE}"x" = "x" || ${REPLAY_MODE} = "batch" || ${REPLAY_MODE} = "iterator" ]]; then
-        echo "INFO: REPLAY_MODE valid : ${REPLAY_MODE}"
-    else
-        echo "ERROR: REPLAY_MODE is invalid!"
-        return 1
-    fi
-
-    # 清除遗留生成文件和日志文件
-    rm -rf $HOME/ascend/log/*
-    rm -rf $ASCEND_OPP_PATH/vendors/*
-    rm -rf custom_op
-
-    # 生成自定义算子工程样例
-    JSON_NAME=add_custom
-    CAMEL_JSON_NAME=`echo $JSON_NAME | sed -r 's/(^|-|_)(\w)/\U\2/g'`
-    msopgen gen -i op_dev/${JSON_NAME}.json -f tf -c ai_core-${SOC_SHORT_VERSION} -lan cpp -out ./custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: msopgen custom op sample failed!"
-        return 1
-    fi
-    echo "INFO: msopgen custom op sample success!"
-
-    cp -rf op_dev/* custom_op
-    if [ $? -ne 0 ]; then
-        echo "ERROR: copy custom op files failed!"
-        return 1
-    fi
-    if [[ $IS_DYNAMIC != 1 ]]; then
-        if [[ $REPLAY_MODE = "batch" ]]; then
-            sed -i "s/set(BATCH_MODE_REPLAY_LIST/set(BATCH_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(BATCH_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        elif [[ $REPLAY_MODE = "iterator" ]]; then
-            sed -i "s/set(ITERATOR_MODE_REPLAY_LIST/set(ITERATOR_MODE_REPLAY_LIST ${CAMEL_JSON_NAME}/g" `grep "set(ITERATOR_MODE_REPLAY_LIST" -rl custom_op/op_kernel/CMakeLists.txt`
-        fi
-    fi
-    sed -i "s#/usr/local/Ascend/latest#$ASCEND_HOME_DIR#g" `grep "/usr/local/Ascend/latest" -rl custom_op/CMakePresets.json`
-
-    # 构建自定义算子包并安装
-    bash custom_op/run.sh
-    if [ $? -ne 0 ]; then
-        echo "ERROR: build and install custom op run package failed!"
-        return 1
-    fi
-    echo "INFO: build and install custom op run package success!"
-
-    # PTA源码仓，可以自行放置zip包
-    if [ ! -f "v${PYTORCH_VERSION}.zip" ]; then
-        wget https://gitee.com/ascend/pytorch/repository/archive/v${PYTORCH_VERSION}.zip --no-check-certificate
-    fi
-    rm -rf ${PTA_DIR}; unzip -o -q v${PYTORCH_VERSION}.zip
-
-    # PTA自定义算子注册
-    FUNCTION_REGISTE_FIELD=`cat pytorch_patch/npu_native_functions.yaml`
-    FUNCTION_REGISTE_FILE="${PTA_DIR}/torch_npu/csrc/aten/npu_native_functions.yaml"
-    if ! grep -q "\  $FUNCTION_REGISTE_FIELD" $FUNCTION_REGISTE_FILE; then
-        sed -i "/custom:/a \  $FUNCTION_REGISTE_FIELD" $FUNCTION_REGISTE_FILE
-    fi
-    # PTA自定义算子适配文件
-    cp -rf pytorch_patch/*.cpp ${PTA_DIR}/torch_npu/csrc/aten/ops/op_api
-
-    # 编译PTA插件并安装
-    (cd ${PTA_DIR}; bash ci/build.sh --python=${PYTHON_VERSION}; pip3 install dist/*.whl --force-reinstall)
-
-    # 执行测试文件
-    export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH
-    python3 test_ops_custom.py
-    if [ $? -ne 0 ]; then
-        echo "ERROR: run custom op failed!"
-        return 1
-    fi
-
-    # 解析dump文件为numpy文件
-    files=$(ls ./prof_total)
-    cd $CURRENT_DIR/prof_total/$files
-    msprof --export=on --output=$CURRENT_DIR/prof_total/$files
-    if [[ $? -eq 0 ]];then
-        echo "INFO: parse success"
-    else
-        echo "ERROR: pasrse failed"
-        return 1
-    fi
-
-    # 校验summary文件夹
-    summary_list=(
-        acl_0_1.csv
-        acl_statistic_0_1.csv
-        ge_op_execute_0_1.csv
-        op_statistic_0_1.csv
-        op_summary_0_1.csv
-        prof_rule_0.json
-        runtime_api_0_1.csv
-        task_time_0_1.csv
-    )
-    if [ $(ls ./device_*/summary/ | wc -l) -eq ${#summary_list[@]} ];then
-        for summary in ${summary_list[@]}; do
-            if [ ! -f $(pwd)/device_0/summary/$summary ];then
-                echo "ERROR: summary files not exist"
-                return 1
-            fi
-        done
-        echo "INFO: All summary result exist"
-    else
-        echo "ERROR: check summary result fail"
-        return 1
-    fi
-
-    # 校验timeline文件夹
-    timeline_list=(
-        acl_0_1.json
-        ge_op_execute_0_1.json
-        msprof_0_1.json
-        runtime_api_0_1.json
-        task_time_0_1.json
-        thread_group_0_1.json
-    )
-    if [ $(ls ./device_*/timeline/ | wc -l) -eq ${#timeline_list[@]} ];then
-        for timeline in ${timeline_list[@]}; do
-            if [ ! -f $(pwd)/device_0/timeline/$timeline ];then
-                echo "ERROR: timeline files not exist"
-                return 1
-            fi
-        done
-        echo "INFO: timeline files exist"
-    else
-       echo "ERROR: timeline files not exist"
-       return 1
-    fi
-    echo "INFO: Ascend C Add Custom SUCCESS"
-}
-
-check_soc_version
-main
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/test_ops_custom.py b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/test_ops_custom.py
deleted file mode 100644
index 5f7a07ca4..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/pytorch_invocation/test_ops_custom.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/python3
-# -*- coding:utf-8 -*-
-# Copyright 2023 Huawei Technologies Co., Ltd
-import torch
-import torch_npu
-from torch_npu.testing.testcase import TestCase, run_tests
-torch.npu.config.allow_internal_format = False
-
-
-class TestCustomAdd(TestCase):
-    def test_add_custom(self):
-        length = [8, 2048]
-        x = torch.rand(length, device='cpu', dtype=torch.float16)
-        y = torch.rand(length, device='cpu', dtype=torch.float16)
-        print(x, '\n', y)
-
-        prof_path = "./prof_total"
-        with torch.npu.profile(prof_path) as prof:
-            torch.npu.synchronize()
-            output = torch_npu.npu_add_custom(x.npu(), y.npu()).cpu()
-            torch.npu.synchronize()
-
-        print(output)
-        self.assertRtolEqual(output, x + y)
-
-
-if __name__ == "__main__":
-    run_tests()
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md
deleted file mode 100644
index fa6bded3b..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-AscendC 自定义算子入TensorFlow网络示例教程:
-以Yolov3 TensorFlow离线推理为例
-推理平台：Ascend310P3
-
-一、自定义算子准备
-1.先构建AscendC-LeakyRelu算子工程
-/usr/local/python3.7/bin/msopgen gen -i leakyrelu.json -f tf  -c ai_core-ascend310p -lan cpp -out ./custom_op
-2.将目录下的op_host和op_kernel实现同步至生成的custom_op工程目录下,可以替换之前msopgen生成的默认文件
-3.确认CMakePresets.json中 "ASCEND_CANN_PACKAGE_PATH" 为CANN软件包安装路径，执行 ./build.sh编译出自定义算子包
-4.安装在custom_op/build_out/目录下生成的自定义算子run包
-
-二、离线推理验证流程
-1.先下载yolov3 tensorflow离线pb模型：
-https://gitee.com/link?target=https%3A%2F%2Fobs-9be7.obs.cn-east-2.myhuaweicloud.com%2F003_Atc_Models%2Fmodelzoo%2Fyolov3_tf.pb
-
-2.Pb模型转换为om模型
-For Ascend310P3:
-atc --model=./yolov3_tf.pb --framework=3 --output=./YOLOv3_TF --input_shape="input:4,416,416,3" --soc_version=Ascend310P3 --fusion_switch_file=fusion_off.cfg
-其中 --fusion_switch_file为关闭算子融合配置，此处若不关闭融合，LeakyRelu算子会进行融合，因此会无法单独编译LeakyRelu算子进行验证
-
-若出现:
-start compile Ascend C operator LeakyRelu. kernel name is leaky_relu
-compile Ascend C operator: LeakyRelu success!
-打印，表明进入了AscendC算子编译
-
-出现ATC run success, welcome to the next use 表明离线om模型转换成功
-
-3.执行离线推理
-可使用https://gitee.com/ascend/tools/tree/master/msame 该工具
-
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg
deleted file mode 100644
index 2472195ec..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "Switch": {
-    "GraphFusion": {
-      "BatchNormPreprocessFusionPass": "off",
-      "ConvBatchnormFusionPass": "off",
-      "BatchNormBnInferFusionPass": "off",
-      "HostBNFusionPass": "off"
-    },
-    "UBFusion": {
-     "ALL": "off"
-    }
-  }
-}
-
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json
deleted file mode 100644
index e38679d0f..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json
+++ /dev/null
@@ -1,29 +0,0 @@
-[
-    {
-        "op": "LeakyRelu",
-        "input_desc": [
-            {
-                "name": "x",
-                "param_type": "required",
-                "format": [
-                    "ND"
-                ],
-                "type": [
-                    "fp16"
-                ]
-            }
-        ],
-        "output_desc": [
-            {
-                "name": "y",
-                "param_type": "required",
-                "format": [
-                    "ND"
-                ],
-                "type": [
-                    "fp16"
-                ]
-            }
-        ]
-    }
-]
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp
deleted file mode 100644
index 7e40df176..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "leaky_relu_tiling.h"
-#include "register/op_def_registry.h"
-const uint32_t BLOCK_DIM = 8;
-
-namespace optiling {
-static ge::graphStatus TilingFunc(gert::TilingContext* context)
-{
-    TilingData tiling;
-    const gert::StorageShape* x1_shape = context->GetInputShape(0);
-    int32_t data_sz = 1;
-    for (int i = 0; i < x1_shape->GetStorageShape().GetDimNum(); i++)
-        data_sz *= x1_shape->GetStorageShape().GetDim(i);
-    tiling.set_size(data_sz);
-    context->SetBlockDim(BLOCK_DIM);
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
-    return ge::GRAPH_SUCCESS;
-}
-}
-
-
-namespace ge {
-static ge::graphStatus InferShape(gert::InferShapeContext* context)
-{
-    const gert::Shape* x1_shape = context->GetInputShape(0);
-    gert::Shape* y_shape = context->GetOutputShape(0);
-    *y_shape = *x1_shape;
-    return GRAPH_SUCCESS;
-}
-}
-
-
-namespace ops {
-class LeakyRelu : public OpDef {
-public:
-    explicit LeakyRelu(const char* name) : OpDef(name)
-    {
-        this->Input("x")
-            .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16})
-            .Format({ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND});
-        this->Output("y")
-            .ParamType(REQUIRED)
-            .DataType({ge::DT_FLOAT16})
-            .Format({ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND});
-
-        this->SetInferShape(ge::InferShape);
-
-        this->AICore().SetTiling(optiling::TilingFunc);
-        this->AICore().AddConfig("ascend310p");
-
-        this->Attr("negative_slope").Float(0.01f);
-    }
-};
-
-OP_ADD(LeakyRelu);
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h
deleted file mode 100644
index f880ff3fd..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h
+++ /dev/null
@@ -1,10 +0,0 @@
-
-#include "register/tilingdata_base.h"
-
-namespace optiling {
-BEGIN_TILING_DATA_DEF(TilingData)
-  TILING_DATA_FIELD_DEF(uint32_t, size);
-END_TILING_DATA_DEF;
-
-REGISTER_TILING_DATA_CLASS(LeakyRelu, TilingData)
-}
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h
deleted file mode 100644
index fa414c5b3..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef KERNEL_LEAKY_RELU_H
-#define KERNEL_LEAKY_RELU_H
-#include "op_frame/elemwise_frame.h"
-#include "kernel_leaky_relu_tiling.h"
-
-namespace leaky_relu_ascendc {
-template <typename T> class KernelLeakyRelu : public AscendC::ElemwiseOpBase {
-public:
-    using DType = T;
-    __aicore__ KernelLeakyRelu() {}
-    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y);
-
-public:
-    __aicore__ inline void MyCopyIn(int32_t progress, AscendC::LocalTensor<T>& inBuf);
-    __aicore__ inline void MyCompute(int32_t progress, AscendC::LocalTensor<T>& inBuf, AscendC::LocalTensor<T>& outBuf);
-    __aicore__ inline void MyCopyOut(int32_t progress, AscendC::LocalTensor<T>& outBuf);
-
-public:
-    LeakyReluParam param;
-
-private:
-    AscendC::GlobalTensor<T> xGm;
-    AscendC::GlobalTensor<T> yGm;
-};
-
-template <typename T> __aicore__ inline void KernelLeakyRelu<T>::Init(GM_ADDR x, GM_ADDR y)
-{
-    ElemwiseOpBase::Init(param.loopSize, param.dataLen, 0, param.dataLen);
-    xGm.SetGlobalBuffer((__gm__ T*)(x) + block_idx * param.blockFactor);
-    yGm.SetGlobalBuffer((__gm__ T*)(y) + block_idx * param.blockFactor);
-}
-
-template <typename T>
-__aicore__ inline void KernelLeakyRelu<T>::MyCopyIn(int32_t progress, AscendC::LocalTensor<T>& x_buf)
-{
-    auto tailFlag = 0;
-    if (param.loopSize == progress + 1) {
-        tailFlag = 1;
-    }
-    x_buf.SetUserTag(tailFlag);
-    AscendC::DataCopy(x_buf, xGm[progress * param.ubFactor], param.dmaParam[tailFlag]);
-}
-
-template <typename T>
-__aicore__ inline void KernelLeakyRelu<T>::MyCompute(int32_t progress, AscendC::LocalTensor<T>& x_buf,
-    AscendC::LocalTensor<T>& y_buf)
-{
-    auto x_tag = x_buf.GetUserTag();
-    y_buf.SetUserTag(x_tag);
-    AscendC::LeakyRelu(y_buf, x_buf, param.negativeSlope, param.itemSize[x_tag]);
-}
-
-template <typename T>
-__aicore__ inline void KernelLeakyRelu<T>::MyCopyOut(int32_t progress, AscendC::LocalTensor<T>& y_buf)
-{
-    AscendC::DataCopy(yGm[progress * param.ubFactor], y_buf, param.dmaParam[y_buf.GetUserTag()]);
-}
-} // namespace leaky_relu
-#endif // KERNEL_LEAKY_RELU_H
\ No newline at end of file
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h
deleted file mode 100644
index 6deeccc87..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved.
- */
-#ifndef KERNEL_LEAKY_RELU_TILING_H
-#define KERNEL_LEAKY_RELU_TILING_H
-#include "kernel_operator.h"
-
-struct LeakyReluParam {
-    uint32_t blockFactor;
-    uint32_t ubFactor;
-    int32_t dataLen;
-    half negativeSlope;
-    AscendC::DataCopyParams dmaParam[2] { {}, {} };
-    uint32_t itemSize[2];
-    int32_t loopSize;
-};
-
-template <int32_t limit>
-__aicore__ inline void InitTilingParam(int32_t totalSize, LeakyReluParam& param, half slope = static_cast<half>(0.1))
-{
-    int32_t splitSize = totalSize / block_num;
-    int64_t blockFactor = splitSize;
-
-    const auto vec_len = AscendC::DEFAULT_BLOCK_SIZE / sizeof(half);
-
-    int64_t ubFactor = blockFactor;
-    int64_t blockNum = (splitSize + blockFactor - 1) / blockFactor;
-
-    int64_t ub_for_num = (ubFactor + limit - 1) / limit;
-    int64_t adjust_factor = (ubFactor + ub_for_num - 1) / ub_for_num;
-    int64_t align_factor = (adjust_factor + vec_len - 1) / vec_len;
-
-    ubFactor = align_factor * vec_len;
-    if (ubFactor > limit) {
-        ubFactor = (adjust_factor / vec_len) * vec_len;
-    }
-    param.negativeSlope = slope;
-    param.blockFactor = blockFactor;
-    param.ubFactor = ubFactor;
-    param.loopSize = (blockFactor + ubFactor - 1) / ubFactor;
-    param.dataLen = limit * sizeof(half);
-
-    param.itemSize[0] = ubFactor;
-    param.itemSize[1] = splitSize % ubFactor;
-    param.dmaParam[0].blockLen = (ubFactor * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE;
-    param.dmaParam[1].blockLen =
-        (param.itemSize[1] * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE;
-
-    if (param.itemSize[1] == 0) {
-        param.itemSize[1] = ubFactor;
-        param.dmaParam[1].blockLen = param.dmaParam[0].blockLen;
-    }
-};
-#endif // KERNEL_LEAKY_RELU_TILING_H
diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp
deleted file mode 100644
index df4fbdc23..000000000
--- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "kernel_operator.h"
-#include "kernel_leaky_relu.h"
-#define UB_LIMIT ((AscendC::TOTAL_UB_SIZE) / 4 / sizeof(half))
-
-extern "C" __global__ __aicore__ void leaky_relu(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling)
-{
-    GET_TILING_DATA(tiling_data, tiling);
-    // kernel impl
-    AscendC::ElemwiseFrame<leaky_relu_ascendc::KernelLeakyRelu<half>> op;
-    InitTilingParam<UB_LIMIT>(tiling_data.size, op.param);
-    op.Init(x, y);
-    op.Process();
-}
-- 
Gitee


From c9b4b4c129ea4bcf4437fde62949feae4a8c4708 Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Mon, 15 Sep 2025 07:11:56 +0000
Subject: [PATCH 72/97] !2755 add static tensor program samples Merge pull
 request !2755 from zhanghao0689/master

---
 .../KernelLaunch/CMakeLists.txt               |  49 +++++
 .../KernelLaunch/README.md                    |  96 +++++++++
 .../KernelLaunch/add_custom_tiling.h          |  17 ++
 .../KernelLaunch/add_custom_v1.cpp            |  88 ++++++++
 .../KernelLaunch/add_custom_v2.cpp            | 145 +++++++++++++
 .../KernelLaunch/add_custom_v3.cpp            |  90 ++++++++
 .../KernelLaunch/add_custom_v4.cpp            |  99 +++++++++
 .../KernelLaunch/cmake/cpu_lib.cmake          |   9 +
 .../KernelLaunch/cmake/npu_lib.cmake          |  11 +
 .../KernelLaunch/data_utils.h                 | 203 ++++++++++++++++++
 .../KernelLaunch/main.cpp                     | 148 +++++++++++++
 .../KernelLaunch/run.sh                       | 121 +++++++++++
 .../KernelLaunch/scripts/gen_data.py          |  25 +++
 .../KernelLaunch/scripts/verify_result.py     |  53 +++++
 .../README.md                                 |  69 ++++++
 operator/ascendc/0_introduction/README.md     |   1 +
 16 files changed, 1224 insertions(+)
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/README.md
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v1.cpp
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v2.cpp
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v4.cpp
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/cpu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/npu_lib.cmake
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/data_utils.h
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/main.cpp
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/run.sh
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/verify_result.py
 create mode 100644 operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/README.md

diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/CMakeLists.txt b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/CMakeLists.txt
new file mode 100644
index 000000000..e31108ae1
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/CMakeLists.txt
@@ -0,0 +1,49 @@
+cmake_minimum_required(VERSION 3.16)
+project(Ascend_c)
+
+set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
+set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type")
+set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest"
+    CACHE STRING "ASCEND CANN package installation directory"
+)
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE)
+endif()
+if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
+    set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE)
+endif()
+
+# ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}.
+# ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library
+file(GLOB KERNEL_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v1.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_v4.cpp
+)
+
+if("${RUN_MODE}" STREQUAL "cpu")
+    include(cmake/cpu_lib.cmake)
+elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu")
+    include(cmake/npu_lib.cmake)
+else()
+    message("invalid RUN_MODE: ${RUN_MODE}")
+endif()
+add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+target_compile_options(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:-g>>
+    -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror
+)
+
+target_link_libraries(ascendc_kernels_bbit PRIVATE
+    $<BUILD_INTERFACE:$<$<OR:$<STREQUAL:${RUN_MODE},npu>,$<STREQUAL:${RUN_MODE},sim>>:host_intf_pub>>
+    $<BUILD_INTERFACE:$<$<STREQUAL:${RUN_MODE},cpu>:ascendcl>>
+    ascendc_kernels_${RUN_MODE}
+)
+
+install(TARGETS ascendc_kernels_bbit
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+)
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/README.md b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/README.md
new file mode 100644
index 000000000..bf4099816
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/README.md
@@ -0,0 +1,96 @@
+## 目录结构介绍
+
+```
+├── KernelLaunch
+│   ├── cmake                   // 编译工程文件
+│   ├── scripts
+│   │   ├── gen_data.py         // 输入数据和真值数据生成脚本
+│   │   └── verify_result.py    // 验证输出数据和真值数据是否一致的验证脚本
+│   ├── add_custom_tiling.h     // tiling结构体
+│   ├── add_custom_v1.cpp       // 算子kernel实现1：未优化前实现
+│   ├── add_custom_v2.cpp       // 算子kernel实现2：基于实现1，实现double buffer
+│   ├── add_custom_v3.cpp       // 算子kernel实现3：优化double buffer实现，简化判断逻辑，并使用LocalMemAllocator简化代码
+│   ├── add_custom_v4.cpp       // 算子kernel实现4：基于add_custom_v3，修改地址分配逻辑，消除bank冲突
+│   ├── CMakeLists.txt          // 编译工程文件
+│   ├── data_utils.h            // 数据读入写出函数
+│   ├── main.cpp                // 主函数，调用算子的应用程序，含CPU域及NPU域调用
+│   └── run.sh                  // 编译运行算子的脚本
+```
+
+## 代码实现介绍
+
+本样例中实现的是固定shape为72*4096的Add算子。
+
+- kernel实现
+
+  Add算子的数学表达式为：
+
+  ```
+  z = x + y
+  ```
+
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。
+
+  实现1：请参考[add_custom_v1.cpp](./add_custom_v1.cpp)，使用静态Tensor编程方法，进行add算子的编程。
+
+  实现2：请参考[add_custom_v2.cpp](./add_custom_v2.cpp)，优化性能，使用double buffer进行流水排布。
+
+  实现3：请参考[add_custom_v3.cpp](./add_custom_v3.cpp)，优化add_custom_v2中反向同步，替换为MTE2等待MTE3执行结束。减少分支判断的同时，算子性能因为double buffer的原因不受影响。另外使用LocalMemAllocator进行线性内存分配，Bank冲突不敏感场景可以使用这种方式简化分配。
+
+  实现4：请参考[add_custom_v4.cpp](./add_custom_v4.cpp)，基于add_custom_v3的实现，优化地址分配消除Bank冲突。 
+
+- 调用实现
+
+  1. CPU侧运行验证主要通过ICPU_RUN_KF CPU调测宏等CPU调测库提供的接口来完成；
+  2. NPU侧运行验证主要通过使用ACLRT_LAUNCH_KERNEL内核调用宏来完成。
+
+  应用程序通过ASCENDC_CPU_DEBUG 宏区分代码逻辑运行于CPU侧还是NPU侧。
+
+## 运行样例算子
+
+- 打开样例目录
+  以命令行方式下载样例代码，master分支为例。
+
+  ```bash
+  cd ${git_clone_path}/samples/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch
+  ```
+- 配置环境变量
+
+  请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+
+  - 默认路径，root用户安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    ```
+  - 默认路径，非root用户安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    ```
+  - 指定路径install_path，安装CANN软件包
+    ```bash
+    export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+    ```
+- 样例执行
+
+  ```bash
+  bash run.sh -r [RUN_MODE] -v  [SOC_VERSION]
+  ```
+
+  - RUN_MODE：编译方式，可选择CPU调试，NPU仿真，NPU上板。支持参数为[cpu /sim / npu]
+  - SOC_VERSION：昇腾AI处理器型号，如果无法确定具体的[SOC_VERSION]，则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询，在查询到的“Name”前增加Ascend信息，例如“Name”对应取值为xxxyy，实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号：
+    - Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+  示例如下，Ascendxxxyy请替换为实际的AI处理器型号。
+
+  ```bash
+  bash run.sh -r cpu -v Ascendxxxyy
+  ```
+
+## 更新说明
+
+
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/06 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
new file mode 100644
index 000000000..278a6e336
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
@@ -0,0 +1,17 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_H
+#define ADD_CUSTOM_TILING_H
+#include <cstdint>
+
+struct AddCustomTilingData {
+    uint32_t singleCoreLength;
+};
+#endif
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v1.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v1.cpp
new file mode 100644
index 000000000..831ae4c3c
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v1.cpp
@@ -0,0 +1,88 @@
+/**
+ * @file add_custom_v1.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr uint32_t TILE_LENGTH = 4096;
+}
+
+class KernelAddV1 {
+public:
+    __aicore__ inline KernelAddV1() = default;
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t singleCoreLength)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        loopCount = singleCoreLength / TILE_LENGTH;
+    }
+    __aicore__ inline void Process()
+    {
+        AscendC::LocalTensor<float> xLocal(AscendC::TPosition::VECCALC, xAddr, TILE_LENGTH);
+        AscendC::LocalTensor<float> yLocal(AscendC::TPosition::VECCALC, yAddr, TILE_LENGTH);
+        AscendC::LocalTensor<float> zLocal(AscendC::TPosition::VECCALC, zAddr, TILE_LENGTH);
+
+        // one buffer
+        for (uint32_t i = 0; i < loopCount; i++) {
+            // dependency of PIPE_V & PIPE_MTE2 caused by xLocal/yLocal between 2 sequential loops
+            if (i != 0) {
+                AscendC::WaitFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            }
+            AscendC::DataCopy(xLocal, xGm[i * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocal, yGm[i * TILE_LENGTH], TILE_LENGTH);
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocal/yLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            if (i != 0) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocal between 2 sequential loops
+                AscendC::WaitFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            }
+            AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH);
+            if (i != (loopCount - 1)) {
+                // dependency of PIPE_V & PIPE_MTE2 caused by xLocal/yLocal between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            }
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::DataCopy(zGm[i * TILE_LENGTH], zLocal, TILE_LENGTH);
+            if (i != (loopCount - 1)) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocal between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            }
+        }
+    }
+
+private:
+    static constexpr uint32_t xAddr = 0;
+    static constexpr uint32_t yAddr = TILE_LENGTH * sizeof(float);
+    static constexpr uint32_t zAddr = TILE_LENGTH * sizeof(float) * 2;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    uint32_t loopCount;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v1(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling)
+{
+    AscendC::InitSocState();
+    KernelAddV1 op;
+    op.Init(x, y, z, ((__gm__ AddCustomTilingData *)tiling)->singleCoreLength);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v1(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling)
+{
+    add_custom_v1<<<blockDim, nullptr, stream>>>(x, y, z, tiling);
+}
+#endif
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v2.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v2.cpp
new file mode 100644
index 000000000..2c4525d55
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v2.cpp
@@ -0,0 +1,145 @@
+/**
+ * @file add_custom_v2.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr uint32_t TILE_LENGTH = 4096;
+}
+
+class KernelAddV2 {
+public:
+    __aicore__ inline KernelAddV2() = default;
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t singleCoreLength)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        loopCount = singleCoreLength / TILE_LENGTH;
+    }
+    __aicore__ inline void Process()
+    {
+        // ping
+        AscendC::LocalTensor<float> xLocalPing(AscendC::TPosition::VECCALC, xAddrPing, TILE_LENGTH);
+        AscendC::LocalTensor<float> yLocalPing(AscendC::TPosition::VECCALC, yAddrPing, TILE_LENGTH);
+        AscendC::LocalTensor<float> zLocalPing(AscendC::TPosition::VECCALC, zAddrPing, TILE_LENGTH);
+        // pong
+        AscendC::LocalTensor<float> xLocalPong(AscendC::TPosition::VECCALC, xAddrPong, TILE_LENGTH);
+        AscendC::LocalTensor<float> yLocalPong(AscendC::TPosition::VECCALC, yAddrPong, TILE_LENGTH);
+        AscendC::LocalTensor<float> zLocalPong(AscendC::TPosition::VECCALC, zAddrPong, TILE_LENGTH);
+
+        // double buffer
+        for (uint32_t i = 0; i < loopCount / 2; i++) {
+            // ping part
+            // dependency of PIPE_V & PIPE_MTE2 caused by xLocalPing/yLocalPing between 2 sequential loops
+            if (i != 0) {
+                AscendC::WaitFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            }
+            AscendC::DataCopy(xLocalPing, xGm[2 * i * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocalPing, yGm[2 * i * TILE_LENGTH], TILE_LENGTH);
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocalPing/yLocalPing in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            if (i != 0) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocalPing between 2 sequential loops
+                AscendC::WaitFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            }
+            AscendC::Add(zLocalPing, xLocalPing, yLocalPing, TILE_LENGTH);
+            if (i != (loopCount / 2 - 1)) {
+                // dependency of PIPE_V & PIPE_MTE2 caused by xLocalPing/yLocalPing between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            }
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocalPing in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::DataCopy(zGm[2 * i * TILE_LENGTH], zLocalPing, TILE_LENGTH);
+            if (i != (loopCount / 2 - 1)) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocalPing between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            }
+
+            // pong part
+            // dependency of PIPE_V & PIPE_MTE2 caused by xLocalPong/yLocalPong between 2 sequential loops
+            if (i != 0) {
+                AscendC::WaitFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID1);
+            }
+            AscendC::DataCopy(xLocalPong, xGm[(2 * i + 1) * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocalPong, yGm[(2 * i + 1) * TILE_LENGTH], TILE_LENGTH);
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocalPong/yLocalPong in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID1);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID1);
+            if (i != 0) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocalPong between 2 sequential loops
+                AscendC::WaitFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID1);
+            }
+            AscendC::Add(zLocalPong, xLocalPong, yLocalPong, TILE_LENGTH);
+            if (i != (loopCount / 2 - 1)) {
+                // dependency of PIPE_V & PIPE_MTE2 caused by xLocalPong/yLocalPong between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID1);
+            }
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocalPong in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID1);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID1);
+            AscendC::DataCopy(zGm[(2 * i + 1) * TILE_LENGTH], zLocalPong, TILE_LENGTH);
+            if (i != (loopCount / 2 - 1)) {
+                // dependency of PIPE_MTE3 & PIPE_V caused by zLocalPong between 2 sequential loops
+                AscendC::SetFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID1);
+            }
+        }
+
+        // tail block
+        if (loopCount % 2 != 0) {
+            // dependency of PIPE_V & PIPE_MTE2 caused by xLocalPing/yLocalPing with the previous for loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE2>(EVENT_ID0);
+            AscendC::DataCopy(xLocalPing, xGm[(loopCount - 1) * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocalPing, yGm[(loopCount - 1) * TILE_LENGTH], TILE_LENGTH);
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocalPing/yLocalPing in one loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(EVENT_ID0);
+            // dependency of PIPE_MTE3 & PIPE_V caused by zLocalPing with the previous for loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE3_V>(EVENT_ID0);
+            AscendC::Add(zLocalPing, xLocalPing, yLocalPing, TILE_LENGTH);
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocalPing in one loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(EVENT_ID0);
+            AscendC::DataCopy(zGm[(loopCount - 1) * TILE_LENGTH], zLocalPing, TILE_LENGTH);
+        }
+    }
+
+private:
+    static constexpr uint32_t xAddrPing = 0;
+    static constexpr uint32_t yAddrPing = TILE_LENGTH * sizeof(float);
+    static constexpr uint32_t zAddrPing = TILE_LENGTH * sizeof(float) * 2;
+    static constexpr uint32_t xAddrPong = TILE_LENGTH * sizeof(float) * 3;
+    static constexpr uint32_t yAddrPong = TILE_LENGTH * sizeof(float) * 4;
+    static constexpr uint32_t zAddrPong = TILE_LENGTH * sizeof(float) * 5;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    uint32_t loopCount;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v2(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling)
+{
+    AscendC::InitSocState();
+    KernelAddV2 op;
+    op.Init(x, y, z, ((__gm__ AddCustomTilingData *)tiling)->singleCoreLength);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v2(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling)
+{
+    add_custom_v2<<<blockDim, nullptr, stream>>>(x, y, z, tiling);
+}
+#endif
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
new file mode 100644
index 000000000..d424b54f1
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
@@ -0,0 +1,90 @@
+/**
+ * @file add_custom_v3.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr uint32_t TILE_LENGTH = 4096;
+}
+
+class KernelAddV3 {
+public:
+    __aicore__ inline KernelAddV3() = default;
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t singleCoreLength)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        loopCount = singleCoreLength / TILE_LENGTH;
+    }
+
+    __aicore__ inline void Process()
+    {
+        // use local memory allocator to simplify memor allocation
+        AscendC::LocalMemAllocator<AscendC::Hardware::UB> ubAllocator;
+        // ping
+        AscendC::LocalTensor<float> xLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> yLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> zLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        // pong
+        AscendC::LocalTensor<float> xLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> yLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> zLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+
+        // double buffer
+        AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID0);
+        AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID1);
+        for (uint32_t i = 0; i < loopCount; i++) {
+            int32_t eventID = (i % 2 == 0 ? EVENT_ID0 : EVENT_ID1);
+            AscendC::LocalTensor<float> &xLocal = (i % 2 == 0 ? xLocalPing : xLocalPong);
+            AscendC::LocalTensor<float> &yLocal = (i % 2 == 0 ? yLocalPing : yLocalPong);
+            AscendC::LocalTensor<float> &zLocal = (i % 2 == 0 ? zLocalPing : zLocalPong);
+            // dependency of PIPE_MTE3 & PIPE_MTE2 caused by xLocal/yLocal between 2 sequential loops
+            AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(eventID);
+            AscendC::DataCopy(xLocal, xGm[i * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocal, yGm[i * TILE_LENGTH], TILE_LENGTH);
+
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocal/yLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(eventID);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(eventID);
+            AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH);
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(eventID);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(eventID);
+            AscendC::DataCopy(zGm[i * TILE_LENGTH], zLocal, TILE_LENGTH);
+            // dependency of PIPE_MTE3 & PIPE_MTE2 caused by zLocal between 2 sequential loops
+            AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(eventID);
+        }
+        AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID0);
+        AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID1);
+    }
+
+private:
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    uint32_t loopCount;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v3(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling)
+{
+    AscendC::InitSocState();
+    KernelAddV3 op;
+    op.Init(x, y, z, ((__gm__ AddCustomTilingData *)tiling)->singleCoreLength);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v3(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling)
+{
+    add_custom_v3<<<blockDim, nullptr, stream>>>(x, y, z, tiling);
+}
+#endif
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v4.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v4.cpp
new file mode 100644
index 000000000..ad45b3b44
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v4.cpp
@@ -0,0 +1,99 @@
+/**
+ * @file add_custom_v4.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "kernel_operator.h"
+
+using AscendC::TPosition;
+namespace {
+constexpr uint32_t TILE_LENGTH = 4096;
+constexpr uint32_t DST_START_ADDRESS = 0x20000;
+}
+
+class KernelAddV4 {
+public:
+    __aicore__ inline KernelAddV4() = default;
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t singleCoreLength)
+    {
+        xGm.SetGlobalBuffer((__gm__ float *)x + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        yGm.SetGlobalBuffer((__gm__ float *)y + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        zGm.SetGlobalBuffer((__gm__ float *)z + AscendC::GetBlockIdx() * singleCoreLength, singleCoreLength);
+        loopCount = singleCoreLength / TILE_LENGTH;
+    }
+
+    __aicore__ inline void Process()
+    {
+        // ping
+        AscendC::LocalTensor<float> xLocalPing(AscendC::TPosition::VECCALC, xAddrPing, TILE_LENGTH);
+        AscendC::LocalTensor<float> yLocalPing(AscendC::TPosition::VECCALC, yAddrPing, TILE_LENGTH);
+        AscendC::LocalTensor<float> zLocalPing(AscendC::TPosition::VECCALC, zAddrPing, TILE_LENGTH);
+        // pong
+        AscendC::LocalTensor<float> xLocalPong(AscendC::TPosition::VECCALC, xAddrPong, TILE_LENGTH);
+        AscendC::LocalTensor<float> yLocalPong(AscendC::TPosition::VECCALC, yAddrPong, TILE_LENGTH);
+        AscendC::LocalTensor<float> zLocalPong(AscendC::TPosition::VECCALC, zAddrPong, TILE_LENGTH);
+
+        // double buffer
+        AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID0);
+        AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID1);
+        for (uint32_t i = 0; i < loopCount; i++) {
+            int32_t eventID = (i % 2 == 0 ? EVENT_ID0 : EVENT_ID1);
+            AscendC::LocalTensor<float> &xLocal = (i % 2 == 0 ? xLocalPing : xLocalPong);
+            AscendC::LocalTensor<float> &yLocal = (i % 2 == 0 ? yLocalPing : yLocalPong);
+            AscendC::LocalTensor<float> &zLocal = (i % 2 == 0 ? zLocalPing : zLocalPong);
+            // dependency of PIPE_MTE3 & PIPE_MTE2 caused by xLocal/yLocal between 2 sequential loops
+            AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(eventID);
+            AscendC::DataCopy(xLocal, xGm[i * TILE_LENGTH], TILE_LENGTH);
+            AscendC::DataCopy(yLocal, yGm[i * TILE_LENGTH], TILE_LENGTH);
+
+            // dependency of PIPE_MTE2 & PIPE_V caused by xLocal/yLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::MTE2_V>(eventID);
+            AscendC::WaitFlag<AscendC::HardEvent::MTE2_V>(eventID);
+            AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH);
+            // dependency of PIPE_V & PIPE_MTE3 caused by zLocal in one single loop
+            AscendC::SetFlag<AscendC::HardEvent::V_MTE3>(eventID);
+            AscendC::WaitFlag<AscendC::HardEvent::V_MTE3>(eventID);
+            AscendC::DataCopy(zGm[i * TILE_LENGTH], zLocal, TILE_LENGTH);
+            // dependency of PIPE_MTE3 & PIPE_MTE2 caused by zLocal between 2 sequential loops
+            AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(eventID);
+        }
+        AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID0);
+        AscendC::WaitFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID1);
+    }
+
+private:
+    // according to bank conflict rule:
+    // rr conflict happened when 2 read requests are in the same bank group
+    // rw conflict happened when read and write requests are in the same bank
+    // so we adjust the address to avoid bank conflicts
+    static constexpr uint32_t xAddrPing = 0x0;
+    static constexpr uint32_t yAddrPing = TILE_LENGTH * sizeof(float) + 256;
+    static constexpr uint32_t zAddrPing = DST_START_ADDRESS;
+    static constexpr uint32_t xAddrPong = TILE_LENGTH * sizeof(float) * 2 + 256;
+    static constexpr uint32_t yAddrPong = TILE_LENGTH * sizeof(float) * 3 + 512;
+    static constexpr uint32_t zAddrPong = DST_START_ADDRESS + TILE_LENGTH * sizeof(float);
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    uint32_t loopCount;
+};
+
+extern "C" __global__ __aicore__ void add_custom_v4(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling)
+{
+    AscendC::InitSocState();
+    KernelAddV4 op;
+    op.Init(x, y, z, ((__gm__ AddCustomTilingData *)tiling)->singleCoreLength);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+void add_custom_do_v4(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling)
+{
+    add_custom_v4<<<blockDim, nullptr, stream>>>(x, y, z, tiling);
+}
+#endif
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/cpu_lib.cmake b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/cpu_lib.cmake
new file mode 100644
index 000000000..5362c8b5a
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/cpu_lib.cmake
@@ -0,0 +1,9 @@
+if(NOT DEFINED ENV{CMAKE_PREFIX_PATH})
+    set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake)
+endif()
+find_package(tikicpulib REQUIRED)
+
+add_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
+target_link_libraries(ascendc_kernels_${RUN_MODE} PUBLIC tikicpulib::${SOC_VERSION})
+target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 -std=c++17)
+install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/npu_lib.cmake
new file mode 100644
index 000000000..f92b095d1
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/cmake/npu_lib.cmake
@@ -0,0 +1,11 @@
+if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
+elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+    set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake)
+else()
+    message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed")
+endif()
+include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
+
+# ascendc_library use to add kernel file to generate ascendc library
+ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/data_utils.h b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/data_utils.h
new file mode 100644
index 000000000..9d3445780
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/data_utils.h
@@ -0,0 +1,203 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "acl/acl.h"
+
+typedef enum {
+    DT_UNDEFINED = -1,
+    FLOAT = 0,
+    HALF = 1,
+    INT8_T = 2,
+    INT32_T = 3,
+    UINT8_T = 4,
+    INT16_T = 6,
+    UINT16_T = 7,
+    UINT32_T = 8,
+    INT64_T = 9,
+    UINT64_T = 10,
+    DOUBLE = 11,
+    BOOL = 12,
+    STRING = 13,
+    COMPLEX64 = 16,
+    COMPLEX128 = 17,
+    BF16 = 27
+} printDataType;
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+#define CHECK_ACL(x)                                                                        \
+    do {                                                                                    \
+        aclError __ret = x;                                                                 \
+        if (__ret != ACL_ERROR_NONE) {                                                      \
+            std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
+        }                                                                                   \
+    } while (0);
+
+/**
+ * @brief Read data from file
+ * @param [in] filePath: file path
+ * @param [out] fileSize: file size
+ * @return read result
+ */
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T> void DoPrintData(const T *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << data[i];
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow)
+{
+    assert(elementsPerRow != 0);
+    for (size_t i = 0; i < count; ++i) {
+        std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]);
+        if (i % elementsPerRow == elementsPerRow - 1) {
+            std::cout << std::endl;
+        }
+    }
+}
+
+void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16)
+{
+    if (data == nullptr) {
+        ERROR_LOG("Print data failed. data is nullptr");
+        return;
+    }
+
+    switch (dataType) {
+        case BOOL:
+            DoPrintData(reinterpret_cast<const bool *>(data), count, elementsPerRow);
+            break;
+        case INT8_T:
+            DoPrintData(reinterpret_cast<const int8_t *>(data), count, elementsPerRow);
+            break;
+        case UINT8_T:
+            DoPrintData(reinterpret_cast<const uint8_t *>(data), count, elementsPerRow);
+            break;
+        case INT16_T:
+            DoPrintData(reinterpret_cast<const int16_t *>(data), count, elementsPerRow);
+            break;
+        case UINT16_T:
+            DoPrintData(reinterpret_cast<const uint16_t *>(data), count, elementsPerRow);
+            break;
+        case INT32_T:
+            DoPrintData(reinterpret_cast<const int32_t *>(data), count, elementsPerRow);
+            break;
+        case UINT32_T:
+            DoPrintData(reinterpret_cast<const uint32_t *>(data), count, elementsPerRow);
+            break;
+        case INT64_T:
+            DoPrintData(reinterpret_cast<const int64_t *>(data), count, elementsPerRow);
+            break;
+        case UINT64_T:
+            DoPrintData(reinterpret_cast<const uint64_t *>(data), count, elementsPerRow);
+            break;
+        case HALF:
+            DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
+            break;
+        case FLOAT:
+            DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
+            break;
+        case DOUBLE:
+            DoPrintData(reinterpret_cast<const double *>(data), count, elementsPerRow);
+            break;
+        default:
+            ERROR_LOG("Unsupported type: %d", dataType);
+    }
+    std::cout << std::endl;
+}
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/main.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/main.cpp
new file mode 100644
index 000000000..caf5653e8
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/main.cpp
@@ -0,0 +1,148 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "data_utils.h"
+#ifndef ASCENDC_CPU_DEBUG
+#include "acl/acl.h"
+extern void add_custom_do_v1(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling);
+extern void add_custom_do_v2(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling);
+extern void add_custom_do_v3(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling);
+extern void add_custom_do_v4(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z, uint8_t *tiling);
+using KernelEntry = void (*)(uint32_t, void *, uint8_t *, uint8_t *, uint8_t *, uint8_t *);
+#else
+#include "tikicpulib.h"
+extern "C" __global__ __aicore__ void add_custom_v1(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling);
+extern "C" __global__ __aicore__ void add_custom_v2(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling);
+extern "C" __global__ __aicore__ void add_custom_v3(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling);
+extern "C" __global__ __aicore__ void add_custom_v4(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR tiling);
+using KernelEntry = void (*)(GM_ADDR, GM_ADDR, GM_ADDR, GM_ADDR);
+
+#endif
+
+struct ArgInfo {
+    std::string fileName;
+    size_t length;
+};
+
+#ifndef ASCENDC_CPU_DEBUG
+
+void KernelCall(KernelEntry kernelEntry, uint32_t blockDim, void *stream, std::vector<ArgInfo> &inputsInfo,
+    std::vector<ArgInfo> &outputsInfo, uint8_t *tiling)
+{
+    std::vector<uint8_t *> inputHost(inputsInfo.size());
+    std::vector<uint8_t *> inputDevice(inputsInfo.size());
+    std::vector<uint8_t *> outputHost(outputsInfo.size());
+    std::vector<uint8_t *> outputDevice(outputsInfo.size());
+    uint8_t *tilingDevice;
+
+    CHECK_ACL(aclrtMalloc((void **)(&tilingDevice), sizeof(AddCustomTilingData), ACL_MEM_MALLOC_HUGE_FIRST));
+    CHECK_ACL(aclrtMemcpy(
+        tilingDevice, sizeof(AddCustomTilingData), tiling, sizeof(AddCustomTilingData), ACL_MEMCPY_HOST_TO_DEVICE));
+
+    for (uint32_t i = 0; i < inputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMallocHost((void **)(&inputHost[i]), inputsInfo[i].length));
+        CHECK_ACL(aclrtMalloc((void **)(&inputDevice[i]), inputsInfo[i].length, ACL_MEM_MALLOC_HUGE_FIRST));
+        ReadFile(inputsInfo[i].fileName, inputsInfo[i].length, inputHost[i], inputsInfo[i].length);
+        CHECK_ACL(aclrtMemcpy(
+            inputDevice[i], inputsInfo[i].length, inputHost[i], inputsInfo[i].length, ACL_MEMCPY_HOST_TO_DEVICE));
+    }
+
+    for (uint32_t i = 0; i < outputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMallocHost((void **)(&outputHost[i]), outputsInfo[i].length));
+        CHECK_ACL(aclrtMalloc((void **)(&outputDevice[i]), outputsInfo[i].length, ACL_MEM_MALLOC_HUGE_FIRST));
+    }
+
+    kernelEntry(blockDim, stream, inputDevice[0], inputDevice[1], outputDevice[0], tilingDevice);
+    CHECK_ACL(aclrtSynchronizeStream(stream));
+
+    CHECK_ACL(aclrtFree(tilingDevice));
+    for (uint32_t i = 0; i < outputsInfo.size(); i++) {
+        CHECK_ACL(aclrtMemcpy(
+            outputHost[i], outputsInfo[i].length, outputDevice[i], outputsInfo[i].length, ACL_MEMCPY_DEVICE_TO_HOST));
+        WriteFile(outputsInfo[i].fileName, outputHost[i], outputsInfo[i].length);
+        CHECK_ACL(aclrtFree(outputDevice[i]));
+        CHECK_ACL(aclrtFreeHost(outputHost[i]));
+    }
+
+    for (uint32_t i = 0; i < inputsInfo.size(); i++) {
+        CHECK_ACL(aclrtFree(inputDevice[i]));
+        CHECK_ACL(aclrtFreeHost(inputHost[i]));
+    }
+}
+
+#else
+
+#define KernelCall(kernelEntry, blockDim, inputsInfo, outputsInfo, tiling)                          \
+    do {                                                                                            \
+        std::vector<uint8_t *> input(inputsInfo.size());                                            \
+        std::vector<uint8_t *> output(outputsInfo.size());                                          \
+                                                                                                    \
+        for (uint32_t i = 0; i < inputsInfo.size(); i++) {                                          \
+            input[i] = (uint8_t *)AscendC::GmAlloc(inputsInfo[i].length);                           \
+            ReadFile(inputsInfo[i].fileName, inputsInfo[i].length, input[i], inputsInfo[i].length); \
+        }                                                                                           \
+                                                                                                    \
+        for (uint32_t i = 0; i < outputsInfo.size(); i++) {                                         \
+            output[i] = (uint8_t *)AscendC::GmAlloc(outputsInfo[i].length);                         \
+        }                                                                                           \
+                                                                                                    \
+        AscendC::SetKernelMode(KernelMode::AIV_MODE);                                               \
+        ICPU_RUN_KF(kernelEntry, blockDim, input[0], input[1], output[0], tiling);                  \
+        for (uint32_t i = 0; i < inputsInfo.size(); i++) {                                          \
+            AscendC::GmFree((void *)input[i]);                                                      \
+        }                                                                                           \
+                                                                                                    \
+        for (uint32_t i = 0; i < outputsInfo.size(); i++) {                                         \
+            WriteFile(outputsInfo[i].fileName, output[i], outputsInfo[i].length);                   \
+            AscendC::GmFree((void *)output[i]);                                                     \
+        }                                                                                           \
+    } while (0)
+
+#endif
+
+int32_t main(int32_t argc, char *argv[])
+{
+    uint32_t blockDim = 8;
+    // set data length, in this case we use 8 cores and length of each core is 4096 * 9
+    uint32_t dataLen = 4096 * 9 * blockDim;
+    size_t inputByteSize = dataLen * sizeof(float);
+    size_t outputByteSize = dataLen * sizeof(float);
+    AddCustomTilingData tiling;
+    tiling.singleCoreLength = dataLen / blockDim;
+
+    std::vector<ArgInfo> inputsInfo = {{"./input/input_x.bin", inputByteSize}, {"./input/input_y.bin", inputByteSize}};
+    std::vector<ArgInfo> outputsV1Info = {{"./output/output_z_v1.bin", outputByteSize}};
+    std::vector<ArgInfo> outputsV2Info = {{"./output/output_z_v2.bin", outputByteSize}};
+    std::vector<ArgInfo> outputsV3Info = {{"./output/output_z_v3.bin", outputByteSize}};
+    std::vector<ArgInfo> outputsV4Info = {{"./output/output_z_v4.bin", outputByteSize}};
+
+#ifndef ASCENDC_CPU_DEBUG
+    CHECK_ACL(aclInit(nullptr));
+    int32_t deviceId = 0;
+    CHECK_ACL(aclrtSetDevice(deviceId));
+    aclrtStream stream = nullptr;
+    CHECK_ACL(aclrtCreateStream(&stream));
+
+    KernelCall(add_custom_do_v1, blockDim, stream, inputsInfo, outputsV1Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_do_v2, blockDim, stream, inputsInfo, outputsV2Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_do_v3, blockDim, stream, inputsInfo, outputsV3Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_do_v4, blockDim, stream, inputsInfo, outputsV4Info, (uint8_t *)&tiling);
+
+    CHECK_ACL(aclrtDestroyStream(stream));
+    CHECK_ACL(aclrtResetDevice(deviceId));
+    CHECK_ACL(aclFinalize());
+#else
+    KernelCall(add_custom_v1, blockDim, inputsInfo, outputsV1Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_v2, blockDim, inputsInfo, outputsV2Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_v3, blockDim, inputsInfo, outputsV3Info, (uint8_t *)&tiling);
+    KernelCall(add_custom_v4, blockDim, inputsInfo, outputsV4Info, (uint8_t *)&tiling);
+#endif
+    return 0;
+}
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/run.sh b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/run.sh
new file mode 100644
index 000000000..6c691801e
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/run.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+CURRENT_DIR=$(
+    cd $(dirname ${BASH_SOURCE:-$0})
+    pwd
+)
+
+BUILD_TYPE="Debug"
+INSTALL_PREFIX="${CURRENT_DIR}/out"
+
+SHORT=r:,v:,i:,b:,p:,
+LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    -b | --build-type)
+        BUILD_TYPE="$2"
+        shift 2
+        ;;
+    -p | --install-prefix)
+        INSTALL_PREFIX="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+RUN_MODE_LIST="cpu sim npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!"
+    exit -1
+fi
+
+VERSION_LIST="Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+if [ "${RUN_MODE}" = "sim" ]; then
+    # in case of running op in simulator, use stub .so instead
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+elif [ "${RUN_MODE}" = "cpu" ]; then
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build out
+mkdir -p build
+cmake -B build \
+    -DRUN_MODE=${RUN_MODE} \
+    -DSOC_VERSION=${SOC_VERSION} \
+    -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+    -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
+    -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH}
+cmake --build build -j
+cmake --install build
+
+rm -f ascendc_kernels_bbit
+cp ./out/bin/ascendc_kernels_bbit ./
+rm -rf input output
+mkdir -p input output
+python3 scripts/gen_data.py
+(
+    export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH
+    if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then
+        if [ "${RUN_MODE}" = "npu" ]; then
+            msprof op --launch-count=4 --output=./prof ./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "sim" ]; then
+            msprof op simulator --launch-count=4 --output=./prof ./ascendc_kernels_bbit
+        elif [ "${RUN_MODE}" = "cpu" ]; then
+            ./ascendc_kernels_bbit
+        fi
+    else
+        ./ascendc_kernels_bbit
+    fi
+)
+# tidy folder by delete log files
+if [ "${RUN_MODE}" = "sim" ]; then
+    rm -f *.log *.dump *.vcd *.toml *_log
+fi
+md5sum output/*.bin
+python3 scripts/verify_result.py output/output_z_v1.bin output/golden.bin
+python3 scripts/verify_result.py output/output_z_v2.bin output/golden.bin
+python3 scripts/verify_result.py output/output_z_v3.bin output/golden.bin
+python3 scripts/verify_result.py output/output_z_v4.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/gen_data.py b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/gen_data.py
new file mode 100644
index 000000000..b8f7ccb5b
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/gen_data.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+
+
+def gen_golden_data_simple():
+    input_x = np.random.uniform(1, 100, [8 * 9, 4096]).astype(np.float32)
+    input_y = np.random.uniform(1, 100, [8 * 9, 4096]).astype(np.float32)
+    golden = (input_x + input_y).astype(np.float32)
+
+    input_x.tofile("./input/input_x.bin")
+    input_y.tofile("./input/input_y.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data_simple()
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/verify_result.py b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/verify_result.py
new file mode 100644
index 000000000..a5019f30f
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/scripts/verify_result.py
@@ -0,0 +1,53 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-4
+absolute_tol = 1e-5
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/README.md b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/README.md
new file mode 100644
index 000000000..b6a82a0b1
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/README.md
@@ -0,0 +1,69 @@
+## 概述
+
+本样例介绍基于静态Tensor方式编程的场景下Add算子的实现方法，并提供核函数直调方法。
+
+## 目录结构介绍
+
+```
+├── 23_static_tensor_programming_kernellaunch      // 使用核函数直调的方式调用Add自定义算子
+│   └── KernelLaunch                               // Kernel Launch方式调用核函数样例
+```
+
+## 算子描述
+
+算子实现的是固定shape为72×4096的Add算子。
+
+Add的计算公式为：
+
+```python
+z = x + y
+```
+
+- x：输入，形状为\[72, 4096]，数据类型为float；
+- y：输入，形状为\[72, 4096]，数据类型为float；
+- z：输出，形状为\[72, 4096]，数据类型为float；
+
+## 算子规格描述
+
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">72 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">72 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">y</td><td align="center">72 * 4096</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom_v1 / add_custom_v2 / add_custom_v3 / add_custom_v4</td></tr>
+</table>
+
+## 支持的产品型号
+
+本样例支持如下产品型号：
+
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+## 编译运行样例算子
+
+针对自定义算子工程，编译运行包含如下步骤：
+
+- 编译自定义算子工程；
+- 调用执行自定义算子；
+
+详细操作如下所示。
+
+### 1. 获取源码包
+
+编译运行此样例前，请参考[准备：获取样例代码](../README.md#codeready)获取源码包。
+
+### 2. 编译运行样例工程
+
+- [KernelLaunch样例运行](./KernelLaunch/README.md)
+
+## 更新说明
+
+
+| 时间       | 更新事项         |
+| ---------- | ---------------- |
+| 2025/09/06 | 新增直调方式样例 |
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 2f95f076d..44a722d77 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -37,6 +37,7 @@
 | [20_mmad_kernellaunch](./20_mmad_kernellaunch) | 基于Ascend C基础API的Matmul自定义Cube算子及KernelLaunch调用样例 | Atlas 推理系列产品AI Core<br>Atlas A2训练系列产品/Atlas 800I A2推理产品 |
 | [21_vectoradd_kernellaunch](./21_vectoradd_kernellaunch) | 基于Ascend C的Add多场景自定义Vector算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 | [22_baremix_kernellaunch](./22_baremix_kernellaunch) | 通过更底层的编码方式，实现MatmulLeayrelu融合算子的样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [23_static_tensor_programming_kernellaunch](./23_static_tensor_programming_kernellaunch) | 通过静态Tensor编程方式，实现Add算子的样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 
 ## 获取样例代码<a name="codeready"></a>
 
-- 
Gitee


From d170b6fd195454f89da52074a060a7eb2a7aae14 Mon Sep 17 00:00:00 2001
From: Li-Jian <lijian120@huawei.com>
Date: Mon, 15 Sep 2025 07:13:17 +0000
Subject: [PATCH 73/97] !2757 fix precision problem on david Merge pull request
 !2757 from Li-Jian/master

---
 .../MatmulInvocationNeo/main.cpp                  |  4 ++--
 .../MatmulInvocationNeo/matmul_custom.cpp         |  3 +++
 .../MatmulInvocationNeo/matmul_custom_tiling.cpp  | 15 +++++++++------
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/main.cpp b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/main.cpp
index 17518d258..1fb5a80d8 100644
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/main.cpp
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/main.cpp
@@ -34,9 +34,9 @@ int32_t main(int32_t argc, char *argv[])
     uint8_t *tilingBuf = (uint8_t *)malloc(tilingFileSize);
     GenerateTiling(socVersion, tilingBuf);
 #ifdef CUSTOM_ASCEND310P
-    uint32_t blockDim = 2;
+    uint32_t blockDim = reinterpret_cast<TCubeTiling *>(tilingBuf)->usedCoreNum;
 #else
-    uint32_t blockDim = 22;
+    uint32_t blockDim = (reinterpret_cast<TCubeTiling *>(tilingBuf)->usedCoreNum + 1) / 2;
 #endif
 
 #ifdef ASCENDC_CPU_DEBUG
diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom.cpp b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom.cpp
index 19292d6c4..f3a763b8d 100644
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom.cpp
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom.cpp
@@ -120,6 +120,9 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD
            MatmulType<AscendC::TPosition::GM, CubeFormat::ND, B_T>,
            MatmulType<AscendC::TPosition::GM, CubeFormat::ND, C_T>> mm;
     REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), mm, &tiling); // Initialize the matmul object.
+    if (GetBlockIdx() >= tiling.usedCoreNum) {
+        return;
+    }
 #ifdef CUSTOM_ASCEND310P
     // Set temp UB space when on ASCEND310P.
     AscendC::TBuf<> tmpMMFormatUb;
diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom_tiling.cpp b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom_tiling.cpp
index d3f0dc2db..f86f9b3c0 100644
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom_tiling.cpp
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/matmul_custom_tiling.cpp
@@ -25,9 +25,9 @@ using namespace std;
   */
 void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
 {
-    int M = 512;
-    int N = 1024;
-    int K = 512;
+    constexpr int32_t M = 512;
+    constexpr int32_t N = 1024;
+    constexpr int32_t K = 512;
 
     TPosition leftPosition = TPosition::GM;
     CubeFormat leftFormat = CubeFormat::ND;
@@ -45,8 +45,8 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
 
     bool isBias = false;
 
-    int32_t singleCoreM = 512;
-    int32_t singleCoreN = 512;
+    constexpr int32_t SINGLECORE_M = 512;
+    constexpr int32_t SINGLECORE_N = 512;
 
     optiling::TCubeTiling tilingData;
     auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
@@ -60,7 +60,10 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
     tilingApi.SetOrgShape(M, N, K);
     tilingApi.SetShape(M, N, K);
     if (ascendcPlatform->GetSocVersion() == platform_ascendc::SocVersion::ASCEND310P) {
-        tilingApi.SetSingleShape(singleCoreM, singleCoreN, -1);  // Set the fixed singleCoreM=512, singleCoreN=512.
+        tilingApi.SetSingleShape(SINGLECORE_M, SINGLECORE_N, -1);  // Set the fixed singleCoreM=512, singleCoreN=512.
+        int32_t mBlockNum = M / SINGLECORE_M;
+        int32_t nBlockNum = N / SINGLECORE_N;
+        tilingApi.SetDim(mBlockNum * nBlockNum);
     }
     tilingApi.SetBias(isBias);
     tilingApi.SetBufferSpace(-1, -1, -1);
-- 
Gitee


From 7a634125028d7f7ed9e60970b480a67ed25419d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E9=99=86?= <liulu92@huawei.com>
Date: Mon, 15 Sep 2025 11:47:35 +0000
Subject: [PATCH 74/97] =?UTF-8?q?!2762=20uniquecust=20modify=20infershape?=
 =?UTF-8?q?=20Merge=20pull=20request=20!2762=20from=20=E5=88=98=E9=99=86/m?=
 =?UTF-8?q?aster?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../4_op_dev/1_custom_op/op_proto/unique_cust.h     | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/cplusplus/level1_single_api/4_op_dev/1_custom_op/op_proto/unique_cust.h b/cplusplus/level1_single_api/4_op_dev/1_custom_op/op_proto/unique_cust.h
index 6d18e6739..5eee4e1d2 100644
--- a/cplusplus/level1_single_api/4_op_dev/1_custom_op/op_proto/unique_cust.h
+++ b/cplusplus/level1_single_api/4_op_dev/1_custom_op/op_proto/unique_cust.h
@@ -19,7 +19,20 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Finds unique elements in a 1D tensor. \n
 
+*@par Inputs:
+*x: 1D tensor. Support all types mentioned in TensorType.
+*Input "x" is a k-dimensional tensor. \n
+
+*@par Attributes:
+*out_idx: An optional DType from: "int32, int64". Defaults to "int32". \n
+
+*@par Outputs:
+*@li y: "x" in the unique output "y".
+*@li idx: A tensor the same size as "x". The index of each value of "x". \n
+*/
 REG_OP(UniqueCust)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \
            DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_DOUBLE}))
-- 
Gitee


From 6bf58b5ca3bc8d072ed04eb83853125561eefa33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AE=81?= <lining.li@huawei.com>
Date: Mon, 15 Sep 2025 11:59:49 +0000
Subject: [PATCH 75/97] =?UTF-8?q?!2765=20sync=20pydflow=20code=20Merge=20p?=
 =?UTF-8?q?ull=20request=20!2765=20from=20=E6=9D=8E=E5=AE=81/dev?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 inference/dataflow/py_dflow/CMakeLists.txt    |  2 +
 inference/dataflow/py_dflow/build.sh          | 21 ++++-
 .../py_dflow/cmake/intf_pub_linux.cmake       |  7 +-
 .../py_dflow/cmake/modules/Finddflow.cmake    | 85 +++++++++++++++++++
 .../py_dflow/cmake/modules/Findmetadef.cmake  | 23 +----
 .../py_dflow/python/dataflow/dataflow.py      |  9 +-
 .../python/dataflow/tools/tpl/tpl_cmake.py    |  2 +-
 .../dataflow/py_dflow/wrapper/CMakeLists.txt  | 15 ++--
 .../wrapper/flow_func_wrapper/CMakeLists.txt  |  8 +-
 9 files changed, 134 insertions(+), 38 deletions(-)
 create mode 100644 inference/dataflow/py_dflow/cmake/modules/Finddflow.cmake

diff --git a/inference/dataflow/py_dflow/CMakeLists.txt b/inference/dataflow/py_dflow/CMakeLists.txt
index 5323a373d..69cf67fa3 100644
--- a/inference/dataflow/py_dflow/CMakeLists.txt
+++ b/inference/dataflow/py_dflow/CMakeLists.txt
@@ -39,12 +39,14 @@ if (BUILD_OPEN_PROJECT)
     include(cmake/test_funcs.cmake)
     include(cmake/intf_pub_linux.cmake)
     include(cmake/modules/Findair.cmake)
+    include(cmake/modules/Finddflow.cmake)
     include(cmake/modules/Findmetadef.cmake)
     include(cmake/modules/Findparser.cmake)
 
     # 自研软件包
     find_package(slog MODULE REQUIRED)
     find_package(air MODULE REQUIRED)
+    find_package(dflow MODULE REQUIRED)
     find_package(metadef MODULE REQUIRED)
     find_package(parser MODULE REQUIRED)
     find_package(udf MODULE REQUIRED)
diff --git a/inference/dataflow/py_dflow/build.sh b/inference/dataflow/py_dflow/build.sh
index b236b2872..87b29aecc 100755
--- a/inference/dataflow/py_dflow/build.sh
+++ b/inference/dataflow/py_dflow/build.sh
@@ -23,12 +23,15 @@ BUILD_RELATIVE_PATH="build"
 # print usage message
 usage() {
   echo "Usage:"
-  echo "  sh build.sh [-h | --help] [-v | --verbose] [-j<N>] [--ascend_install_path=<PATH>] [--output_path=<PATH>] [--python_path=<PATH>]"
+  echo "  sh build.sh [-h | --help] [-v | --verbose] [-j<N>] [--build_type=<Release|Debug>]"
+  echo "              [--ascend_install_path=<PATH>] [--output_path=<PATH>] [--python_path=<PATH>]"
   echo ""
   echo "Options:"
   echo "    -h, --help     Print usage"
   echo "    -v, --verbose  Display build command"
   echo "    -j<N>          Set the number of threads used for building DFlow, default is 8"
+  echo "    --build_type=<Release|Debug>"
+  echo "                   Set build type, default Release"
   echo "    --ascend_install_path=<PATH>"
   echo "                   Set ascend package install path, default /usr/local/Ascend/ascend-toolkit/latest"
   echo "    --output_path=<PATH>"
@@ -38,6 +41,17 @@ usage() {
   echo ""
 }
 
+# check value of build_type option
+# usage: check_build_type build_type
+check_build_type() {
+  arg_value="$1"
+  if [ "X$arg_value" != "XRelease" ] && [ "X$arg_value" != "XDebug" ]; then
+    echo "Invalid value $arg_value for option --$2"
+    usage
+    exit 1
+  fi
+}
+
 # parse and set options
 checkopts() {
   VERBOSE=""
@@ -67,6 +81,11 @@ checkopts() {
         VERBOSE="VERBOSE=1"
         shift
         ;;
+      --build_type)
+        check_build_type "$2" build_type
+        CMAKE_BUILD_TYPE="$2"
+        shift 2
+        ;;
       --ascend_install_path)
         ASCEND_INSTALL_PATH="$(realpath $2)"
         shift 2
diff --git a/inference/dataflow/py_dflow/cmake/intf_pub_linux.cmake b/inference/dataflow/py_dflow/cmake/intf_pub_linux.cmake
index 4c9b502e9..6561a3ac6 100644
--- a/inference/dataflow/py_dflow/cmake/intf_pub_linux.cmake
+++ b/inference/dataflow/py_dflow/cmake/intf_pub_linux.cmake
@@ -9,13 +9,12 @@ target_compile_options(intf_pub INTERFACE
     -Wall 
     -fPIC 
     $<IF:$<STREQUAL:${CMAKE_SYSTEM_NAME},centos>,-fstack-protector-all,-fstack-protector-strong>
-    $<$<COMPILE_LANGUAGE:CXX>:-std=c++11>    
+    $<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
 )
 target_compile_definitions(intf_pub INTERFACE
-    _GLIBCXX_USE_CXX11_ABI=0 
+    _GLIBCXX_USE_CXX11_ABI=0
     $<$<CONFIG:Release>:CFG_BUILD_NDEBUG>
-    $<$<CONFIG:Debug>:CFG_BUILD_DEBUG>   
-    WIN64=1
+    $<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
     LINUX=0
     LOG_CPP
 )
diff --git a/inference/dataflow/py_dflow/cmake/modules/Finddflow.cmake b/inference/dataflow/py_dflow/cmake/modules/Finddflow.cmake
new file mode 100644
index 000000000..aa51a2793
--- /dev/null
+++ b/inference/dataflow/py_dflow/cmake/modules/Finddflow.cmake
@@ -0,0 +1,85 @@
+if (dflow_FOUND)
+    message(STATUS "Package dflow has been found.")
+    return()
+endif()
+
+set(_cmake_targets_defined "")
+set(_cmake_targets_not_defined "")
+set(_cmake_expected_targets "")
+foreach(_cmake_expected_target IN ITEMS flow_graph dflow_headers)
+    list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
+    if(TARGET "${_cmake_expected_target}")
+        list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
+    else()
+        list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
+    endif()
+endforeach()
+unset(_cmake_expected_target)
+
+if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
+    unset(_cmake_targets_defined)
+    unset(_cmake_targets_not_defined)
+    unset(_cmake_expected_targets)
+    unset(CMAKE_IMPORT_FILE_VERSION)
+    cmake_policy(POP)
+    return()
+endif()
+
+if(NOT _cmake_targets_defined STREQUAL "")
+    string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
+    string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
+    message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
+endif()
+unset(_cmake_targets_defined)
+unset(_cmake_targets_not_defined)
+unset(_cmake_expected_targets)
+
+find_path(_INCLUDE_DIR
+    NAMES flow_graph/data_flow.h
+    NO_CMAKE_SYSTEM_PATH
+    NO_CMAKE_FIND_ROOT_PATH)
+
+find_library(flow_graph_SHARED_LIBRARY
+    NAMES libflow_graph.so
+    PATH_SUFFIXES lib64
+    NO_CMAKE_SYSTEM_PATH
+    NO_CMAKE_FIND_ROOT_PATH)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(dflow
+    FOUND_VAR
+        dflow_FOUND
+    REQUIRED_VARS
+        _INCLUDE_DIR
+        flow_graph_SHARED_LIBRARY
+)
+
+if(dflow_FOUND)
+    set(dflow_INCLUDE_DIR "${_INCLUDE_DIR}")
+    include(CMakePrintHelpers)
+    message(STATUS "Variables in dflow module:")
+    cmake_print_variables(dflow_INCLUDE_DIR)
+    cmake_print_variables(flow_graph_SHARED_LIBRARY)
+
+    add_library(flow_graph SHARED IMPORTED)
+    set_target_properties(flow_graph PROPERTIES
+        INTERFACE_LINK_LIBRARIES "dflow_headers"
+        IMPORTED_LOCATION "${flow_graph_SHARED_LIBRARY}"
+    )
+
+    add_library(dflow_headers INTERFACE IMPORTED)
+    set_target_properties(dflow_headers PROPERTIES
+        INTERFACE_INCLUDE_DIRECTORIES "${dflow_INCLUDE_DIR}"
+    )
+
+    include(CMakePrintHelpers)
+    cmake_print_properties(TARGETS flow_graph
+        PROPERTIES INTERFACE_LINK_LIBRARIES IMPORTED_LOCATION
+    )
+    cmake_print_properties(TARGETS dflow_headers
+        PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+    )
+endif()
+
+# Cleanup temporary variables.
+set(_INCLUDE_DIR)
diff --git a/inference/dataflow/py_dflow/cmake/modules/Findmetadef.cmake b/inference/dataflow/py_dflow/cmake/modules/Findmetadef.cmake
index 750835bc7..67f9a3dfd 100644
--- a/inference/dataflow/py_dflow/cmake/modules/Findmetadef.cmake
+++ b/inference/dataflow/py_dflow/cmake/modules/Findmetadef.cmake
@@ -1,12 +1,12 @@
 if (metadef_FOUND)
-    message(STATUS "Package air has been found.")
+    message(STATUS "Package metadef has been found.")
     return()
 endif()
 
 set(_cmake_targets_defined "")
 set(_cmake_targets_not_defined "")
 set(_cmake_expected_targets "")
-foreach(_cmake_expected_target IN ITEMS flow_graph metadef_headers)
+foreach(_cmake_expected_target IN ITEMS metadef_headers)
     list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
     if(TARGET "${_cmake_expected_target}")
         list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
@@ -35,13 +35,7 @@ unset(_cmake_targets_not_defined)
 unset(_cmake_expected_targets)
 
 find_path(_INCLUDE_DIR
-    NAMES flow_graph/data_flow.h
-    NO_CMAKE_SYSTEM_PATH
-    NO_CMAKE_FIND_ROOT_PATH)
-
-find_library(flow_graph_SHARED_LIBRARY
-    NAMES libflow_graph.so
-    PATH_SUFFIXES lib64
+    NAMES graph/types.h
     NO_CMAKE_SYSTEM_PATH
     NO_CMAKE_FIND_ROOT_PATH)
 
@@ -51,7 +45,6 @@ find_package_handle_standard_args(metadef
         metadef_FOUND
     REQUIRED_VARS
         _INCLUDE_DIR
-        flow_graph_SHARED_LIBRARY
 )
 
 if(metadef_FOUND)
@@ -59,13 +52,6 @@ if(metadef_FOUND)
     include(CMakePrintHelpers)
     message(STATUS "Variables in metadef module:")
     cmake_print_variables(metadef_INCLUDE_DIR)
-    cmake_print_variables(flow_graph_SHARED_LIBRARY)
-
-    add_library(flow_graph SHARED IMPORTED)
-    set_target_properties(flow_graph PROPERTIES
-        INTERFACE_LINK_LIBRARIES "metadef_headers"
-        IMPORTED_LOCATION "${flow_graph_SHARED_LIBRARY}"
-    )
 
     add_library(metadef_headers INTERFACE IMPORTED)
     set_target_properties(metadef_headers PROPERTIES
@@ -73,9 +59,6 @@ if(metadef_FOUND)
     )
 
     include(CMakePrintHelpers)
-    cmake_print_properties(TARGETS flow_graph
-        PROPERTIES INTERFACE_LINK_LIBRARIES IMPORTED_LOCATION
-    )
     cmake_print_properties(TARGETS metadef_headers
         PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
     )
diff --git a/inference/dataflow/py_dflow/python/dataflow/dataflow.py b/inference/dataflow/py_dflow/python/dataflow/dataflow.py
index 73d70805a..8b4fb177f 100644
--- a/inference/dataflow/py_dflow/python/dataflow/dataflow.py
+++ b/inference/dataflow/py_dflow/python/dataflow/dataflow.py
@@ -1286,15 +1286,16 @@ class FlowGraph(object):
                 "for details about the error information, see the ascend log.",
                 dwrapper.INNER_ERROR,
             )
+        flow_info.start_time = ret[2].start_time
+        flow_info.end_time = ret[2].end_time
+        flow_info.flow_flags = ret[2].flow_flags
+        flow_info.transaction_id = ret[2].transaction_id
         if ret[0].ret_code != 0 and ret[0].ret_code != dwrapper.SUBHEALTHY:
             log.error("failed to fetch data, error msg = %s", ret[0].error_msg)
+            return (outputs, flow_info, ret[0].ret_code)
 
         for output in ret[1]:
             outputs.append(Tensor(output))
-        flow_info.start_time = ret[2].start_time
-        flow_info.end_time = ret[2].end_time
-        flow_info.flow_flags = ret[2].flow_flags
-        flow_info.transaction_id = ret[2].transaction_id
         return (outputs, flow_info, ret[0].ret_code)
 
     def feed(
diff --git a/inference/dataflow/py_dflow/python/dataflow/tools/tpl/tpl_cmake.py b/inference/dataflow/py_dflow/python/dataflow/tools/tpl/tpl_cmake.py
index 7c4beeab7..e1f727e19 100644
--- a/inference/dataflow/py_dflow/python/dataflow/tools/tpl/tpl_cmake.py
+++ b/inference/dataflow/py_dflow/python/dataflow/tools/tpl/tpl_cmake.py
@@ -93,7 +93,7 @@ add_library(${UDF_TARGET_LIB} SHARED
 
 target_compile_options(${UDF_TARGET_LIB} PRIVATE
   -O2
-  -std=c++11
+  -std=c++17
   -ftrapv
   -fstack-protector-all
   -fPIC
diff --git a/inference/dataflow/py_dflow/wrapper/CMakeLists.txt b/inference/dataflow/py_dflow/wrapper/CMakeLists.txt
index 87e5b99e4..920557436 100644
--- a/inference/dataflow/py_dflow/wrapper/CMakeLists.txt
+++ b/inference/dataflow/py_dflow/wrapper/CMakeLists.txt
@@ -29,14 +29,17 @@ set_target_properties(dflow_wrapper
     PREFIX ""
 )
 
+target_compile_definitions(dflow_wrapper PRIVATE
+    PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF
+)
+
 target_compile_options(dflow_wrapper PRIVATE
     -O2
-    -std=c++11
     -Xlinker -export-dynamic
 )
 
 target_link_options(dflow_wrapper PRIVATE
-    -s
+    $<$<CONFIG:Release>:-s>
 )
 
 project(data_wrapper)
@@ -47,6 +50,10 @@ target_include_directories(data_wrapper PRIVATE
     ${pybind11_INCLUDE_DIR}
 )
 
+target_compile_definitions(data_wrapper PRIVATE
+    PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF
+)
+
 target_link_libraries(data_wrapper PRIVATE
     $<BUILD_INTERFACE:intf_pub>
     $<BUILD_INTERFACE:udf_headers>
@@ -60,13 +67,11 @@ set_target_properties(data_wrapper
 
 target_compile_options(data_wrapper PRIVATE
     -O2
-    -std=c++11
-    -s
     -Xlinker -export-dynamic
 )
 
 target_link_options(data_wrapper PRIVATE
-    -s
+    $<$<CONFIG:Release>:-s>
 )
 
 add_subdirectory(flow_func_wrapper)
diff --git a/inference/dataflow/py_dflow/wrapper/flow_func_wrapper/CMakeLists.txt b/inference/dataflow/py_dflow/wrapper/flow_func_wrapper/CMakeLists.txt
index c4b3e35cd..64800eba6 100644
--- a/inference/dataflow/py_dflow/wrapper/flow_func_wrapper/CMakeLists.txt
+++ b/inference/dataflow/py_dflow/wrapper/flow_func_wrapper/CMakeLists.txt
@@ -23,13 +23,15 @@ set_target_properties(flowfunc_wrapper
     PREFIX ""
 )
 
+target_compile_definitions(flowfunc_wrapper PRIVATE
+    PYBIND11_NO_ASSERT_GIL_HELD_INCREF_DECREF
+)
+
 target_compile_options(flowfunc_wrapper PRIVATE
     -O2
-    -std=c++11
-    -s
     -Xlinker -export-dynamic
 )
 
 target_link_options(flowfunc_wrapper PRIVATE
-    -s
+    $<$<CONFIG:Release>:-s>
 )
\ No newline at end of file
-- 
Gitee


From 408eccf6434fe07c606b47646bd5db489b2fc40b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Tue, 16 Sep 2025 06:58:26 +0000
Subject: [PATCH 76/97] =?UTF-8?q?!2759=20add=20xPyD=20sample=20Merge=20pul?=
 =?UTF-8?q?l=20request=20!2759=20from=20=E8=B5=B5=E6=99=BA=E6=85=A7/zzh=5F?=
 =?UTF-8?q?add=5Fllm=5Fdatadist?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../10_llm_data_dist/README.md                |  10 ++
 .../pull_blocks_xpyd_sample.py                | 150 ++++++++++++++++++
 .../push_blocks_sample.py                     |   7 +-
 3 files changed, 164 insertions(+), 3 deletions(-)
 create mode 100644 python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/pull_blocks_xpyd_sample.py

diff --git a/python/level1_single_api/10_llm_data_dist/README.md b/python/level1_single_api/10_llm_data_dist/README.md
index 6dc0a99d7..8b1a3f6b7 100644
--- a/python/level1_single_api/10_llm_data_dist/README.md
+++ b/python/level1_single_api/10_llm_data_dist/README.md
@@ -122,5 +122,15 @@
       # Decoder主机:
       GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python switch_role_sample.py --device_id 1 --role d --local_host_ip 10.170.10.1 --remote_host_ip 10.170.10.0
       ```
+     - pull_blocks_xpyd_sample.py：此样例程序支持xPyD测试场景，使用单侧建链方式，每个进程申请内存并注册blocks, 每个decoder和所有的prompt发起建链, 并pull blocks到本地，local_ip_port指定本地host ip和端口，
+      分别在Prompt主机与Decoder主机，执行样例程序：
+      ```
+      # 任意个Prompt主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python pull_blocks_xpyd_sample.py --device_id 0 --role p --local_ip_port 10.170.10.0:26000
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python pull_blocks_xpyd_sample.py --device_id 1 --role p --local_ip_port 10.170.10.0:26001
+      # 任意个Decoder主机:
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python pull_blocks_xpyd_sample.py --device_id 2 --role d --local_ip_port 10.170.10.0:26002 --remote_ip_port '10.170.10.0:26000;10.170.10.0:26001'
+      GLOO_SOCKET_IFNAME=enp67s0f5 HCCL_INTRA_ROCE_ENABLE=1 python pull_blocks_xpyd_sample.py --device_id 3 --role d --local_ip_port 10.170.10.0:26003 --remote_ip_port '10.170.10.0:26000;10.170.10.0:26001'
+      ```
     **注**：**GLOO_SOCKET_IFNAME**为本地网卡名，可通过ifconfig查询；**HCCL_INTRA_ROCE_ENABLE=1**代表使用roce方式进行通信；
 
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/pull_blocks_xpyd_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/pull_blocks_xpyd_sample.py
new file mode 100644
index 000000000..aeaf76871
--- /dev/null
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/pull_blocks_xpyd_sample.py
@@ -0,0 +1,150 @@
+"""
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import argparse
+import time
+import logging
+from llm_datadist import LLMDataDist, LLMRole, LLMConfig, CacheDesc, DataType, BlocksCacheKey, \
+    Placement, LLMClusterInfo, LLMStatusCode
+import torch
+import torch_npu
+import torchair
+import socket
+import struct
+
+logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
+
+NUM_TENSORS = 1
+BLOCKS_NUM = 3
+KV_SHAPE = 10
+
+def ip_port_to_int(ip_port):
+    ip, port_str = ip_port.split(':')
+    port = int(port_str)
+    if not (0 <= port <= 65535):
+        raise ValueError("端口号必须在0-65535之间")
+    # 将IP转换为4字节二进制
+    ip_bytes = socket.inet_aton(ip)
+
+    # 将4字节IP转换为32位整数
+    ip_int = struct.unpack('!I', ip_bytes)[0]
+
+    # 组合IP整数(32位)和端口(16位)为一个48位整数
+    result = (ip_int << 16) | port
+    return result
+
+
+def init_llm_datadist(args) -> LLMDataDist:
+    datadist = LLMDataDist(role, ip_port_to_int(args.local_ip_port))
+    llm_config = LLMConfig()
+    llm_config.device_id = args.device_id
+    llm_config.local_comm_res = ""
+    if args.role == 'p':
+        llm_config.listen_ip_info = args.local_ip_port
+    llm_options = llm_config.generate_options()
+    datadist.init(llm_options)
+    logging.info(f"init {role} success, cluster_id={ip_port_to_int(args.local_ip_port)}")
+    return datadist
+
+
+def run_prompt_sample(datadist, args):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), ip_port_to_int(args.local_ip_port), dtype=torch.float).npu()
+
+    addr = int(tensor.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr],
+                                                BlocksCacheKey(ip_port_to_int(args.local_ip_port), 0))
+    logging.info('register_blocks_cache success')
+    logging.info(f'before decoder pull, tensor={tensor}')
+
+    time.sleep(30)
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+    logging.info('[finalize] success')
+
+
+def run_decoder_sample(datadist, args):
+    # 1. 注册内存
+    cache_manager = datadist.cache_manager
+    cache_desc = CacheDesc(num_tensors=NUM_TENSORS, shape=[BLOCKS_NUM, KV_SHAPE], data_type=DataType.DT_FLOAT,
+                           placement=Placement.DEVICE)
+    remote_list = args.remote_ip_port.split(';')
+
+    tensor = torch.full((BLOCKS_NUM, KV_SHAPE), 0, dtype=torch.float).npu()
+    addr = int(tensor.data_ptr())
+    cache = cache_manager.register_blocks_cache(cache_desc, [addr],
+                                                BlocksCacheKey(ip_port_to_int(args.local_ip_port), 0))
+    logging.info('register_blocks_cache success')
+
+    time.sleep(5) # register end
+
+    # 2. 向所有prompt建链
+    cluster_list = []
+    for remote in remote_list:
+        cluster = LLMClusterInfo()
+        cluster.remote_cluster_id = ip_port_to_int(remote)
+        cluster.append_local_ip_info(args.local_ip_port.split(':')[0], 0)
+        cluster.append_remote_ip_info(remote.split(':')[0], int(remote.split(':')[1]))
+        cluster_list.append(cluster)
+    ret, _ = datadist.link_clusters(cluster_list, 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("link failed")
+
+    # 3. 向prompt pull blocks
+    for i, remote in enumerate(remote_list):
+        cache_manager.pull_blocks(BlocksCacheKey(ip_port_to_int(remote), 0),
+                                  cache, src_blocks=[0, 1], dst_blocks=[0, 2])
+        logging.info(f'after decoder pull from {ip_port_to_int(remote)}, tensor={tensor}')
+
+    # 4. 断链
+    ret, _ = datadist.unlink_clusters(cluster_list, 5000)
+    if ret != LLMStatusCode.LLM_SUCCESS:
+        raise Exception("unlink failed")
+
+    cache_manager.unregister_cache(cache.cache_id)
+    datadist.finalize()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device_id", type=int, default=0, help='device id')
+    parser.add_argument("--role", type=str, default=1, help='role type, support p/d')
+    parser.add_argument("--local_ip_port", type=str, help='local ip port, eg:10.10.10.1:26000')
+    parser.add_argument("--remote_ip_port", type=str,
+                        help='remote host ip list, eg:10.10.10.2:26000;10.10.10.3:26000')
+    args = parser.parse_args()
+    if args.role not in ['p', 'd']:
+        raise RuntimeError("Not supported cluster id")
+    if args.device_id not in [0, 1, 2, 3, 4, 5, 6, 7]:
+        raise RuntimeError("Not supported device id")
+    if args.local_ip_port is None:
+        raise RuntimeError("local_ip_port is not set")
+    if args.role == 'd':
+        if args.remote_ip_port is None:
+            raise RuntimeError("remote_ip_port is not set")
+    logging.info(f'Sample start, device_id = {args.device_id}, role = {args.role}')
+
+    torch.npu.set_device(args.device_id)
+    role = LLMRole.PROMPT if args.role == 'p' else LLMRole.DECODER
+    datadist = init_llm_datadist(args)
+    if role == LLMRole.PROMPT:
+        run_prompt_sample(datadist, args)
+    else:
+        run_decoder_sample(datadist, args)
+    logging.info('Sample end')
diff --git a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
index d4c971629..074c62533 100644
--- a/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
+++ b/python/level1_single_api/10_llm_data_dist/cache_manager_api_samples/push_blocks_sample.py
@@ -73,6 +73,9 @@ def run_prompt_sample(datadist):
     # 2. 等decoder pull blocks
     dist.barrier() # decoder push blocks end
 
+    logging.info(f'after decoder push, {tensor=}')
+    logging.info(f'after decoder push, {tensor2=}')
+
     # 3. 解链
     cluster = LLMClusterInfo()
     cluster.remote_cluster_id = DECODER_CLUSTER_ID
@@ -108,9 +111,7 @@ def run_decoder_sample(datadist, local_host_ip, remote_host_ip):
         raise Exception("link failed")
 
     # 3. 向prompt push blocks
-    cache_manager.pull_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1], dst_blocks=[0, 2])
-    logging.info(f'after decoder pull, {tensor=}')
-    logging.info(f'after decoder pull, {tensor2=}')
+    cache_manager.push_blocks(BlocksCacheKey(PROMPT_CLUSTER_ID, 0), cache, src_blocks=[0, 1], dst_blocks=[0, 2])
 
     dist.barrier() # push_blocks end
 
-- 
Gitee


From e2cab24744795a89d969bccd709ab11405b23a18 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 16 Sep 2025 13:28:24 +0000
Subject: [PATCH 77/97] !2761 simple add hello world samples Merge pull request
 !2761 from renjie/master

---
 .../23_simple_add/CMakeLists.txt              |  15 ++
 .../0_introduction/23_simple_add/README.md    |  86 +++++++++
 .../23_simple_add/add_custom.cpp              | 180 ++++++++++++++++++
 .../24_simple_hello_world/CMakeLists.txt      |  15 ++
 .../24_simple_hello_world/README.md           |  54 ++++++
 .../24_simple_hello_world/hello_world.cpp     |  35 ++++
 6 files changed, 385 insertions(+)
 create mode 100644 operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/23_simple_add/README.md
 create mode 100644 operator/ascendc/0_introduction/23_simple_add/add_custom.cpp
 create mode 100644 operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/24_simple_hello_world/README.md
 create mode 100644 operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp

diff --git a/operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt b/operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt
new file mode 100644
index 000000000..b3e88f157
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.16)
+
+set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
+
+find_package(ASC REQUIRED)
+
+project(kernel_samples LANGUAGES ASC CXX)
+
+set_source_files_properties(
+    add_custom.cpp PROPERTIES LANGUAGE ASC
+)
+
+add_executable(demo
+    add_custom.cpp
+)
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/23_simple_add/README.md b/operator/ascendc/0_introduction/23_simple_add/README.md
new file mode 100644
index 000000000..4b3dce29f
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_simple_add/README.md
@@ -0,0 +1,86 @@
+## 简化Add算子直调样例
+本样例以Add算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
+> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+## 目录结构介绍
+```
+├── 23_simple_add
+│   ├── CMakeLists.txt      // 编译工程文件
+│   └── add_custom.cpp      // 算子实现及测试
+```
+
+## 算子描述
+Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：  
+```
+z = x + y
+```
+## 算子规格描述
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+## 代码实现介绍
+- kernel实现  
+  Add算子的数学表达式为：
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。
+- tiling实现  
+  TilingData参数设计，TilingData参数本质上是和并行数据切分相关的参数，本示例算子使用了2个tiling参数：totalLength、tileNum。totalLength是指需要计算的数据量大小，tileNum是指每个核上总计算数据分块个数。比如，totalLength这个参数传递到kernel侧后，可以通过除以参与计算的核数，得到每个核上的计算量，这样就完成了多核数据的切分。
+
+- 调用实现  
+  使用内核调用符<<<>>>调用核函数。
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+
+## 运行样例算子
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/23_simple_add/
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    配置按安装径后，执行以下命令统一配置环境变量。
+    ```bash
+    # 配置CANN环境变量
+    source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
+    # 添加AscendC CMake Module搜索路径至环境变量
+    export CMAKE_PREFIX_PATH=${ASCEND_INSTALL_PATH}/compiler/tikcpp/ascendc_kernel_cmake:$CMAKE_PREFIX_PATH
+    ```
+
+  - 样例执行
+    ```bash
+    mkdir -p build && cd build;   # 创建并进入build目录
+    cmake ..;make -j;             # 编译工程
+    ./demo                        # 执行样例
+    ```
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/15 | 新增本readme |
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/23_simple_add/add_custom.cpp b/operator/ascendc/0_introduction/23_simple_add/add_custom.cpp
new file mode 100644
index 000000000..d2b5cb112
--- /dev/null
+++ b/operator/ascendc/0_introduction/23_simple_add/add_custom.cpp
@@ -0,0 +1,180 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <cstdint>
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <iterator>
+#include "acl/acl.h"
+#include "kernel_operator.h"
+
+constexpr uint32_t BUFFER_NUM = 2; // tensor num for each queue
+
+struct AddCustomTilingData
+{
+    uint32_t totalLength;
+    uint32_t tileNum;
+};
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+        xGm.SetGlobalBuffer((__gm__ float *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ float *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ float *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(float));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(float));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(float));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.AllocTensor<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.AllocTensor<float>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<float> xLocal = inQueueX.DeQue<float>();
+        AscendC::LocalTensor<float> yLocal = inQueueY.DeQue<float>();
+        AscendC::LocalTensor<float> zLocal = outQueueZ.AllocTensor<float>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<float>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<float> zLocal = outQueueZ.DeQue<float>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<float> xGm;
+    AscendC::GlobalTensor<float> yGm;
+    AscendC::GlobalTensor<float> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+__global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, AddCustomTilingData tiling)
+{
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
+    KernelAdd op;
+    op.Init(x, y, z, tiling.totalLength, tiling.tileNum);
+    op.Process();
+}
+
+std::vector<float> kernel_add(std::vector<float> &x, std::vector<float> &y)
+{
+    constexpr uint32_t blockDim = 8;
+    uint32_t totalLength = x.size();
+    size_t totalByteSize = totalLength * sizeof(float);
+    int32_t deviceId = 0;
+    aclrtStream stream = nullptr;
+    AddCustomTilingData tiling = {/*totalLength:*/totalLength, /*tileNum:*/8};
+    uint8_t *xHost = reinterpret_cast<uint8_t *>(x.data());
+    uint8_t *yHost = reinterpret_cast<uint8_t *>(y.data());
+    uint8_t *zHost = nullptr;
+    uint8_t *xDevice = nullptr;
+    uint8_t *yDevice = nullptr;
+    uint8_t *zDevice = nullptr;
+
+    aclInit(nullptr);
+    aclrtSetDevice(deviceId);
+    aclrtCreateStream(&stream);
+
+    aclrtMallocHost((void **)(&zHost), totalByteSize);
+    aclrtMalloc((void **)&xDevice, totalByteSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    aclrtMalloc((void **)&yDevice, totalByteSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    aclrtMalloc((void **)&zDevice, totalByteSize, ACL_MEM_MALLOC_HUGE_FIRST);
+
+    aclrtMemcpy(xDevice, totalByteSize, xHost, totalByteSize, ACL_MEMCPY_HOST_TO_DEVICE);
+    aclrtMemcpy(yDevice, totalByteSize, yHost, totalByteSize, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    add_custom<<<blockDim, nullptr, stream>>>(xDevice, yDevice, zDevice, tiling);
+    aclrtSynchronizeStream(stream);
+
+    aclrtMemcpy(zHost, totalByteSize, zDevice, totalByteSize, ACL_MEMCPY_DEVICE_TO_HOST);
+    std::vector<float> z((float *)zHost, (float *)(zHost + totalLength));
+
+    aclrtFree(xDevice);
+    aclrtFree(yDevice);
+    aclrtFree(zDevice);
+    aclrtFreeHost(zHost);
+
+    aclrtDestroyStream(stream);
+    aclrtResetDevice(deviceId);
+    aclFinalize();
+
+    return z;
+}
+
+uint32_t VerifyResult(std::vector<float> &output, std::vector<float> &golden)
+{
+    auto printTensor = [](std::vector<float> &tensor, const char *name) {
+        constexpr size_t maxPrintSize = 20;
+        std::cout << name << ": ";
+        std::copy(tensor.begin(), tensor.begin() + std::min(tensor.size(), maxPrintSize),
+            std::ostream_iterator<float>(std::cout, " "));
+        if (tensor.size() > maxPrintSize) {
+            std::cout << "...";
+        }
+        std::cout << std::endl;
+    };
+    printTensor(output, "Output");
+    printTensor(golden, "Golden");
+    if (std::equal(output.begin(), output.end(), golden.begin())) {
+        std::cout << "[Success] Case accuracy is verification passed." << std::endl;
+        return 0;
+    } else {
+        std::cout << "[Failed] Case accuracy is verification failed!" << std::endl;
+        return 1;
+    }
+    return 0;
+}
+
+int32_t main(int32_t argc, char *argv[])
+{
+    constexpr uint32_t totalLength = 8 * 2048;
+    constexpr float valueX = 1.2f;
+    constexpr float valueY = 2.3f;
+    std::vector<float> x(totalLength, valueX);
+    std::vector<float> y(totalLength, valueY);
+
+    std::vector<float> output = kernel_add(x, y);
+
+    std::vector<float> golden(totalLength, valueX + valueY);
+    return VerifyResult(output, golden);
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
new file mode 100644
index 000000000..590f26516
--- /dev/null
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.16)
+
+set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
+
+find_package(ASC REQUIRED)
+
+project(kernel_samples LANGUAGES ASC CXX)
+
+set_source_files_properties(
+    hello_world.cpp PROPERTIES LANGUAGE ASC
+)
+
+add_executable(demo
+    hello_world.cpp
+)
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/README.md b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
new file mode 100644
index 000000000..53ab1d8cc
--- /dev/null
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
@@ -0,0 +1,54 @@
+## 简化HelloWorld算子直调样例
+本样例通过使用<<<>>>内核调用符来完成算子核函数在NPU侧运行验证的基础流程，核函数内通过printf打印输出结果。
+> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+## 目录结构介绍
+```
+├── 24_simple_helloworld
+│   ├── CMakeLists.txt      // 编译工程文件
+│   └── hello_world.cpp     // 算子实现及测试
+```
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+
+## 运行样例算子
+  - 打开样例目录
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/24_simple_helloworld/
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    配置按安装径后，执行以下命令统一配置环境变量。
+    ```bash
+    # 配置CANN环境变量
+    source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
+    # 添加AscendC CMake Module搜索路径至环境变量
+    export CMAKE_PREFIX_PATH=${ASCEND_INSTALL_PATH}/compiler/tikcpp/ascendc_kernel_cmake:$CMAKE_PREFIX_PATH
+    ```
+  - 样例执行
+    ```bash
+    mkdir -p build && cd build;   # 创建并进入build目录
+    cmake ..;make -j;             # 编译工程
+    ./demo                        # 执行样例
+    ```
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/15 | 新增本readme |
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp b/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp
new file mode 100644
index 000000000..cb28dec3d
--- /dev/null
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp
@@ -0,0 +1,35 @@
+/**
+ * @file hello_world.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "acl/acl.h"
+
+__global__ __aicore__ void hello_world()
+{
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIC_ONLY);
+    AscendC::printf("Hello World!!!\n");
+}
+
+int32_t main(int argc, char const *argv[])
+{
+    aclInit(nullptr);
+    int32_t deviceId = 0;
+    aclrtSetDevice(deviceId);
+    aclrtStream stream = nullptr;
+    aclrtCreateStream(&stream);
+
+    constexpr uint32_t blockDim = 1;
+    hello_world<<<blockDim, nullptr, stream>>>();
+    aclrtSynchronizeStream(stream);
+
+    aclrtDestroyStream(stream);
+    aclrtResetDevice(deviceId);
+    aclFinalize();
+    return 0;
+}
\ No newline at end of file
-- 
Gitee


From 7a7d686259ef4fd27dcf874e4db8da0d2cd86ee3 Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Wed, 17 Sep 2025 00:59:06 +0000
Subject: [PATCH 78/97] !2766 change case idx Merge pull request !2766 from
 renjie/master

---
 .../{23_simple_add => 25_simple_add}/CMakeLists.txt           | 0
 .../0_introduction/{23_simple_add => 25_simple_add}/README.md | 4 ++--
 .../{23_simple_add => 25_simple_add}/add_custom.cpp           | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename operator/ascendc/0_introduction/{23_simple_add => 25_simple_add}/CMakeLists.txt (100%)
 rename operator/ascendc/0_introduction/{23_simple_add => 25_simple_add}/README.md (99%)
 rename operator/ascendc/0_introduction/{23_simple_add => 25_simple_add}/add_custom.cpp (100%)

diff --git a/operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt b/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
similarity index 100%
rename from operator/ascendc/0_introduction/23_simple_add/CMakeLists.txt
rename to operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
diff --git a/operator/ascendc/0_introduction/23_simple_add/README.md b/operator/ascendc/0_introduction/25_simple_add/README.md
similarity index 99%
rename from operator/ascendc/0_introduction/23_simple_add/README.md
rename to operator/ascendc/0_introduction/25_simple_add/README.md
index 4b3dce29f..ffa237fc2 100644
--- a/operator/ascendc/0_introduction/23_simple_add/README.md
+++ b/operator/ascendc/0_introduction/25_simple_add/README.md
@@ -3,7 +3,7 @@
 > ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
 ## 目录结构介绍
 ```
-├── 23_simple_add
+├── 25_simple_add
 │   ├── CMakeLists.txt      // 编译工程文件
 │   └── add_custom.cpp      // 算子实现及测试
 ```
@@ -48,7 +48,7 @@ z = x + y
   - 打开样例目录   
     以命令行方式下载样例代码，master分支为例。
     ```bash
-    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/23_simple_add/
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/25_simple_add/
     ```
   - 配置环境变量
 
diff --git a/operator/ascendc/0_introduction/23_simple_add/add_custom.cpp b/operator/ascendc/0_introduction/25_simple_add/add_custom.cpp
similarity index 100%
rename from operator/ascendc/0_introduction/23_simple_add/add_custom.cpp
rename to operator/ascendc/0_introduction/25_simple_add/add_custom.cpp
-- 
Gitee


From a104fad229ee46a836a038ae5b354af87e3e7891 Mon Sep 17 00:00:00 2001
From: SeaElm <yangruizhi5@huawei.com>
Date: Wed, 17 Sep 2025 12:27:02 +0000
Subject: [PATCH 79/97] !2760 add AddCustomTiny sample for frameworklaunch
 Merge pull request !2760 from SeaElm/seaelm

---
 .../AddCustomTiny/CMakeLists.txt              | 45 ++++++++++
 .../AddCustomTiny/README.md                   | 86 +++++++++++++++++++
 .../AddCustomTiny/add_custom_host.cpp         | 56 ++++++++++++
 .../AddCustomTiny/add_custom_kernel.cpp       | 86 +++++++++++++++++++
 .../AddCustomTiny/add_custom_tiling.h         | 19 ++++
 .../1_add_frameworklaunch/README.md           |  2 +
 6 files changed, 294 insertions(+)
 create mode 100644 operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/README.md
 create mode 100644 operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_host.cpp
 create mode 100644 operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_kernel.cpp
 create mode 100644 operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_tiling.h

diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
new file mode 100644
index 000000000..38e13a85e
--- /dev/null
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(opp)
+
+set(ASCEND_COMPUTE_UNIT ascend910b)
+find_package(ASC REQUIRED)
+
+npu_op_package(${vendor_name}
+    TYPE RUN
+)
+
+file(GLOB host_ops_srcs ${CMAKE_CURRENT_SOURCE_DIR}/add_custom_host.cpp)
+npu_op_code_gen(
+    SRC ${host_ops_srcs}
+    PACKAGE ${vendor_name}
+    OUT_DIR ${ASCEND_AUTOGEN_PATH}
+    OPTIONS
+    OPS_PRODUCT_NAME ${ASCEND_COMPUTE_UNIT}
+)
+
+file(GLOB autogen_aclnn_srcs ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp)
+set_source_files_properties(${autogen_aclnn_srcs} PROPERTIES GENERATED TRUE)
+npu_op_library(cust_opapi ACLNN
+    ${autogen_aclnn_srcs}
+)
+
+npu_op_library(cust_optiling TILING
+    ${host_ops_srcs}
+)
+
+npu_op_kernel_library(ascendc_kernels
+    SRC_BASE ${CMAKE_SOURCE_DIR}/
+    TILING_LIBRARY cust_optiling
+)
+
+npu_op_kernel_sources(ascendc_kernels
+    OP_NAME AddCustom
+    KERNEL_FILE add_custom_kernel.cpp
+)
+
+npu_op_package_add(${vendor_name}
+    LIBRARY
+    cust_opapi
+    cust_optiling
+    ascendc_kernels
+)
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/README.md b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/README.md
new file mode 100644
index 000000000..d0e4472c1
--- /dev/null
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/README.md
@@ -0,0 +1,86 @@
+## 简化Add算子直调样例
+本样例以Add算子为示例，展示了简单、灵活的算子编译流程。
+**注意：本样例仅支持CANN8.3及以上版本。**
+## 目录结构介绍
+```
+├── AddCustomTiny
+│   ├── add_cutsom_host.cpp      // host侧编译源码文件
+│   ├── add_custom_tiling.h      // host侧编译tiling头文件
+│   ├── add_custom_kernel.cpp    // kernel侧编译源码文件
+│   ├── CMakeLists.txt           // 编译工程文件
+│   └── readme.md                // 算子实现及测试
+```
+
+## 算子描述
+Add算子实现了两个数据相加，返回相加结果的功能。对应的数学表达式为：  
+```
+z = x + y
+```
+## 算子规格描述
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">Add</td></tr>
+</tr>
+<tr><td rowspan="3" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">x</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+<tr><td align="center">y</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">z</td><td align="center">8 * 2048</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">add_custom</td></tr>
+</table>
+
+## 代码实现介绍
+- kernel实现  
+  Add算子的数学表达式为：
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。
+- tiling实现  
+  TilingData参数设计，TilingData参数本质上是和并行数据切分相关的参数，本示例算子使用了2个tiling参数：totalLength、tileNum。totalLength是指需要计算的数据量大小，tileNum是指每个核上总计算数据分块个数。比如，totalLength这个参数传递到kernel侧后，可以通过除以参与计算的核数，得到每个核上的计算量，这样就完成了多核数据的切分。
+
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2训练系列产品/Atlas 800I A2推理产品
+
+
+## 编译样例算子
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    配置按安装径后，执行以下命令统一配置环境变量。
+    ```bash
+    # 配置CANN环境变量
+    source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
+    # 添加AscendC CMake Module搜索路径至环境变量
+    export CMAKE_PREFIX_PATH=${ASCEND_INSTALL_PATH}/compiler/tikcpp/ascendc_kernel_cmake:$CMAKE_PREFIX_PATH
+    ```
+
+  - 样例执行
+    ```bash
+    rm -rf build && mkdir build && cd build
+    cmake .. && make -j binary package
+    ```
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/16 | 新增readme |
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_host.cpp b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_host.cpp
new file mode 100644
index 000000000..1cc87f4fb
--- /dev/null
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_host.cpp
@@ -0,0 +1,56 @@
+/**
+ * @file add_custom_host.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "register/op_def_registry.h"
+#include "tiling/tiling_api.h"
+
+namespace optiling {
+const uint32_t BLOCK_DIM = 8;
+const uint32_t TILE_NUM = 8;
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    AddCustomTilingData *tiling = context->GetTilingData<AddCustomTilingData>();
+    uint32_t totalLength = context->GetInputShape(0)->GetOriginShape().GetShapeSize();
+    context->SetBlockDim(BLOCK_DIM);
+    tiling->totalLength = totalLength;
+    tiling->tileNum = TILE_NUM;
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+
+namespace ops {
+class AddCustom : public OpDef {
+public:
+    explicit AddCustom(const char *name) : OpDef(name)
+    {
+        this->Input("x")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Input("y")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Output("z")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+            
+        this->AICore()
+            .SetTiling(optiling::TilingFunc)
+            .AddConfig("ascend910")
+            .AddConfig("ascend310p")
+            .AddConfig("ascend310b")
+            .AddConfig("ascend910b");
+    }
+};
+OP_ADD(AddCustom);
+} // namespace ops
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_kernel.cpp b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_kernel.cpp
new file mode 100644
index 000000000..22a9876fe
--- /dev/null
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_kernel.cpp
@@ -0,0 +1,86 @@
+/**
+ * @file add_custom_kernel.cpp
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+#include "add_custom_tiling.h"
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+
+        xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(DTYPE_X));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Y));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Z));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<DTYPE_X> xGm;
+    AscendC::GlobalTensor<DTYPE_Y> yGm;
+    AscendC::GlobalTensor<DTYPE_Z> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    REGISTER_TILING_DEFAULT(AddCustomTilingData);
+    GET_TILING_DATA(tilingData, tiling);
+    KernelAdd op;
+    op.Init(x, y, z, tilingData.totalLength, tilingData.tileNum);
+    op.Process();
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_tiling.h b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_tiling.h
new file mode 100644
index 000000000..d80ecbee4
--- /dev/null
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/add_custom_tiling.h
@@ -0,0 +1,19 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_H
+#define ADD_CUSTOM_TILING_H
+#include <cstdint>
+
+struct AddCustomTilingData {
+    uint32_t totalLength;
+    uint32_t tileNum;
+};
+
+#endif // ADD_CUSTOM_TILING_H
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/README.md b/operator/ascendc/0_introduction/1_add_frameworklaunch/README.md
index de724d24f..5f1223f77 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/README.md
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/README.md
@@ -9,6 +9,7 @@
 │   ├── AclOfflineModel        // 通过aclopExecuteV2调用的方式调用AddCustom算子
 │   ├── AclOnlineModel         // 通过aclopCompile调用的方式调用AddCustom算子
 │   ├── AddCustom              // AddCustom算子工程
+│   ├── AddCustomTiny          // AddCustom自定义算子工程极简样例
 │   ├── PytorchInvocation      // 通过pytorch调用的方式调用AddCustom算子
 │   ├── TensorflowInvocation   // 通过tensorflow调用的方式调用AddCustom算子
 │   ├── CppExtensionInvocation // 通过CppExtension调用的方式调用AddCustom算子
@@ -150,3 +151,4 @@ CANN软件包中提供了工程创建工具msOpGen，AddCustom算子工程可通
 | 2024/11/11 | 样例目录调整 |
 | 2024/11/18 | 算子工程改写为由msOpGen生成 |
 | 2025/01/17 | 新增CppExtensionInvocation样例 |
+| 2025/9/17 | 新增AddCustomTiny极简工程样例                   |
\ No newline at end of file
-- 
Gitee


From c36a0b3636e99063c26aa670534a56d3c77b2c91 Mon Sep 17 00:00:00 2001
From: Chen Ning <chenning54@huawei.com>
Date: Thu, 18 Sep 2025 11:35:45 +0000
Subject: [PATCH 80/97] !2774 add namespace AscendC::tiling for sample * add
 namespace AscendC::tiling for sample

---
 .../MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
index 8f32f3418..4ea1394d9 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
@@ -15,7 +15,7 @@
 
 struct MatmulCustomTilingData {
     uint64_t localMemSize;
-    TCubeTiling cubeTilingData;
+    AscendC::tiling::TCubeTiling cubeTilingData;
 };
 
 #endif  // MATMUL_TILING_H
\ No newline at end of file
-- 
Gitee


From a2dc87c0fb5ea7f4ee383cdf63bb406a8c982e1b Mon Sep 17 00:00:00 2001
From: Yangzw <yangzongwen1@huawei.com>
Date: Thu, 18 Sep 2025 11:50:02 +0000
Subject: [PATCH 81/97] !2771 fix dump name in readme.md Merge pull request
 !2771 from Yangzw/master

---
 .../3_ir/1_fuse_tile_concat_pass/readme.md       | 16 ++++++++++++++--
 .../3_ir/2_fuse_matmul_add_pass/readme.md        | 16 ++++++++++++++--
 .../3_ir/3_modify_subgraph_pass/readme.md        | 10 ++++++++--
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/cplusplus/level1_single_api/3_ir/1_fuse_tile_concat_pass/readme.md b/cplusplus/level1_single_api/3_ir/1_fuse_tile_concat_pass/readme.md
index 37647c871..0d69d830d 100644
--- a/cplusplus/level1_single_api/3_ir/1_fuse_tile_concat_pass/readme.md
+++ b/cplusplus/level1_single_api/3_ir/1_fuse_tile_concat_pass/readme.md
@@ -77,7 +77,13 @@
 
    - 检查执行结果：
 
-     - 自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_0_RunCustomPassBegin.pbtxt**（"xxxxxxxx"为8位阿拉伯数字，下同）和**ge_onnx_xxxxxxxx_graph_0_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取。
+     - 自定义Pass生效时，对比NPU编译过程中间dump图，发现模型已按照预期被优化，dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取：
+       - 针对8.3.RC1之前的版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_RunCustomPassBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassEnd.pbtxt：融合后的图
+       - 8.3.RC1及后续版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_PreRunBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassBeforeInfershape.pbtxt：融合后的图
 
      - 日志中出现如下打印：
 
@@ -108,7 +114,13 @@
 
      - 自定义pass生效前后运行结果相同。
 
-     - 自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_1_RunCustomPassBegin.pbtxt**和**ge_onnx_xxxxxxxx_graph_1_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。
+     - 自定义Pass生效时，对比NPU编译过程中间dump图，发现模型已按照预期被优化：
+       - 针对8.3.RC1之前的版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_RunCustomPassBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassEnd.pbtxt：融合后的图
+       - 8.3.RC1及后续版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_PreRunBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassBeforeInfershape.pbtxt：融合后的图
 
      - 日志中出现如下打印：
 
diff --git a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass/readme.md b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass/readme.md
index e993a43c8..a4986c4c7 100644
--- a/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass/readme.md
+++ b/cplusplus/level1_single_api/3_ir/2_fuse_matmul_add_pass/readme.md
@@ -78,7 +78,13 @@
 
    - 检查执行结果：
 
-     - 自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_0_RunCustomPassBegin.pbtxt**（"xxxxxxxx"为8位阿拉伯数字，下同）和**ge_onnx_xxxxxxxx_graph_0_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取。
+     - 自定义Pass生效时，对比NPU编译过程中间dump图，发现模型已按照预期被优化，dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取：
+       - 针对8.3.RC1之前的版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_RunCustomPassBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassEnd.pbtxt：融合后的图
+       - 8.3.RC1及后续版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_PreRunBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassBeforeInfershape.pbtxt：融合后的图
 
      - 日志中出现如下打印：
 
@@ -107,7 +113,13 @@
 
      - 自定义pass生效前后运行结果相同。
 
-     - 自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_1_RunCustomPassBegin.pbtxt**和**ge_onnx_xxxxxxxx_graph_1_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。
+     - 自定义Pass生效时，对比NPU编译过程中间dump图，发现模型已按照预期被优化：
+       - 针对8.3.RC1之前的版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_RunCustomPassBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassEnd.pbtxt：融合后的图
+       - 8.3.RC1及后续版本，dump图名字为：
+         - ge_onnx_xxxxxxxx_PreRunBegin.pbtxt：融合前的图
+         - ge_onnx_xxxxxxxx_RunCustomPassBeforeInfershape.pbtxt：融合后的图
 
      - 日志中出现如下打印：
 
diff --git a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
index 66986b664..acd7ff8ac 100644
--- a/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
+++ b/cplusplus/level1_single_api/3_ir/3_modify_subgraph_pass/readme.md
@@ -22,7 +22,7 @@
 -   编译器：g++
 -   芯片：all
 -   python及依赖的库：python3.7.5、tensorflow1.15.0
--   已完成昇腾AI软件栈在开发环境上的部署（本示例代码基于CANN 8.2.RC1版本）
+-   已完成昇腾AI软件栈在开发环境上的部署
 
 
 ## 程序编译<a name="section66456345656813"></a>
@@ -88,7 +88,13 @@
 
   - 两次自定义pass生效前后运行结果按照预期不相同，且结果都正确。
 
-  - 两次自定义pass生效时对比npu编译过程中间dump图**ge_onnx_xxxxxxxx_graph_1_RunCustomPassBegin.pbtxt**（"xxxxxxxx"为8位阿拉伯数字，下同）和**ge_onnx_xxxxxxxx_graph_1_RunCustomPassEnd.pbtxt**发现模型已按照预期被优化。dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取。
+  - 两次自定义Pass生效时，对比NPU编译过程中间dump图，发现模型已按照预期被优化，dump图的获取方法请单击[Link](https://hiascend.com/document/redirect/CannCommercialEnvvar)>编译相关>图编译>DUMP_GE_GRAPH获取：
+    - 针对8.3.RC1之前的版本，dump图名字为：
+      - ge_onnx_xxxxxxxx_RunCustomPassBegin.pbtxt：融合前的图
+      - ge_onnx_xxxxxxxx_RunCustomPassEnd.pbtxt：融合后的图
+    - 8.3.RC1及后续版本，dump图名字为：
+       - ge_onnx_xxxxxxxx_PreRunBegin.pbtxt：融合前的图
+       - ge_onnx_xxxxxxxx_RunCustomPassBeforeInfershape.pbtxt：融合后的图
 
   - 使用**libmodify_subgraph_pass_01.so**时，日志中出现如下打印：
 
-- 
Gitee


From de813043470dd53569eae7a111db195137a8e790 Mon Sep 17 00:00:00 2001
From: zhanghao0689 <zhanghao152@huawei.com>
Date: Mon, 22 Sep 2025 11:40:21 +0000
Subject: [PATCH 82/97] !2775 remove pos Merge pull request !2775 from
 zhanghao0689/master

---
 .../KernelLaunch/add_custom_v3.cpp                   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
index d424b54f1..b3b0511de 100644
--- a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_v3.cpp
@@ -31,13 +31,13 @@ public:
         // use local memory allocator to simplify memor allocation
         AscendC::LocalMemAllocator<AscendC::Hardware::UB> ubAllocator;
         // ping
-        AscendC::LocalTensor<float> xLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
-        AscendC::LocalTensor<float> yLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
-        AscendC::LocalTensor<float> zLocalPing = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> xLocalPing = ubAllocator.Alloc<float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> yLocalPing = ubAllocator.Alloc<float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> zLocalPing = ubAllocator.Alloc<float, TILE_LENGTH>();
         // pong
-        AscendC::LocalTensor<float> xLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
-        AscendC::LocalTensor<float> yLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
-        AscendC::LocalTensor<float> zLocalPong = ubAllocator.Alloc<AscendC::TPosition::VECCALC, float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> xLocalPong = ubAllocator.Alloc<float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> yLocalPong = ubAllocator.Alloc<float, TILE_LENGTH>();
+        AscendC::LocalTensor<float> zLocalPong = ubAllocator.Alloc<float, TILE_LENGTH>();
 
         // double buffer
         AscendC::SetFlag<AscendC::HardEvent::MTE3_MTE2>(EVENT_ID0);
-- 
Gitee


From ea24ca1b8cf42b92870f4a2233b241aa3aa604ba Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 23 Sep 2025 01:07:11 +0000
Subject: [PATCH 83/97] !2776 add simple pybind sample && matmul leakyrelu
 sample Merge pull request !2776 from renjie/master

---
 .../24_simple_hello_world/CMakeLists.txt      |   6 +-
 .../24_simple_hello_world/README.md           |   7 +-
 .../{hello_world.cpp => hello_world.asc}      |   2 +-
 .../25_simple_add/CMakeLists.txt              |   6 +-
 .../0_introduction/25_simple_add/README.md    |   7 +-
 .../{add_custom.cpp => add_custom.asc}        |   2 +-
 .../26_simple_matmulleakyrelu/CMakeLists.txt  |  18 +
 .../26_simple_matmulleakyrelu/README.md       | 100 +++++
 .../26_simple_matmulleakyrelu/data_utils.h    |  96 +++++
 .../matmul_leakyrelu.asc                      | 350 ++++++++++++++++++
 .../26_simple_matmulleakyrelu/run.sh          |   6 +
 .../scripts/gen_data.py                       |  35 ++
 .../scripts/verify_result.py                  |  55 +++
 .../CMakeLists.txt                            |  63 ++++
 .../27_simple_add_cpp_extensions/README.md    | 102 +++++
 .../add_custom.asc                            | 111 ++++++
 .../add_custom_test.py                        |  38 ++
 operator/ascendc/0_introduction/README.md     |   7 +-
 18 files changed, 989 insertions(+), 22 deletions(-)
 rename operator/ascendc/0_introduction/24_simple_hello_world/{hello_world.cpp => hello_world.asc} (97%)
 rename operator/ascendc/0_introduction/25_simple_add/{add_custom.cpp => add_custom.asc} (99%)
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/data_utils.h
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
 create mode 100755 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/run.sh
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/gen_data.py
 create mode 100644 operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/verify_result.py
 create mode 100644 operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
 create mode 100644 operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
 create mode 100644 operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
 create mode 100644 operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom_test.py

diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
index 590f26516..559929405 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
@@ -6,10 +6,6 @@ find_package(ASC REQUIRED)
 
 project(kernel_samples LANGUAGES ASC CXX)
 
-set_source_files_properties(
-    hello_world.cpp PROPERTIES LANGUAGE ASC
-)
-
 add_executable(demo
-    hello_world.cpp
+    hello_world.asc
 )
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/README.md b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
index 53ab1d8cc..ddcaf96e9 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/README.md
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
@@ -5,19 +5,18 @@
 ```
 ├── 24_simple_helloworld
 │   ├── CMakeLists.txt      // 编译工程文件
-│   └── hello_world.cpp     // 算子实现及测试
+│   └── hello_world.asc     // AscendC算子实现 & 调用样例
 ```
 
 ## 支持的产品型号
 本样例支持如下产品型号：
-- Atlas A2训练系列产品/Atlas 800I A2推理产品
-
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品
 
 ## 运行样例算子
   - 打开样例目录
     以命令行方式下载样例代码，master分支为例。
     ```bash
-    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/24_simple_helloworld/
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/24_simple_helloworld
     ```
   - 配置环境变量
 
diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp b/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.asc
similarity index 97%
rename from operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp
rename to operator/ascendc/0_introduction/24_simple_hello_world/hello_world.asc
index cb28dec3d..e67f32663 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.cpp
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/hello_world.asc
@@ -1,5 +1,5 @@
 /**
- * @file hello_world.cpp
+ * @file hello_world.asc
  *
  * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt b/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
index b3e88f157..08689321b 100644
--- a/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
@@ -6,10 +6,6 @@ find_package(ASC REQUIRED)
 
 project(kernel_samples LANGUAGES ASC CXX)
 
-set_source_files_properties(
-    add_custom.cpp PROPERTIES LANGUAGE ASC
-)
-
 add_executable(demo
-    add_custom.cpp
+    add_custom.asc
 )
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/25_simple_add/README.md b/operator/ascendc/0_introduction/25_simple_add/README.md
index ffa237fc2..470c3edef 100644
--- a/operator/ascendc/0_introduction/25_simple_add/README.md
+++ b/operator/ascendc/0_introduction/25_simple_add/README.md
@@ -5,7 +5,7 @@
 ```
 ├── 25_simple_add
 │   ├── CMakeLists.txt      // 编译工程文件
-│   └── add_custom.cpp      // 算子实现及测试
+│   └── add_custom.asc      // AscendC算子实现 & 调用样例
 ```
 
 ## 算子描述
@@ -41,14 +41,13 @@ z = x + y
 
 ## 支持的产品型号
 本样例支持如下产品型号：
-- Atlas A2训练系列产品/Atlas 800I A2推理产品
-
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品
 
 ## 运行样例算子
   - 打开样例目录   
     以命令行方式下载样例代码，master分支为例。
     ```bash
-    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/25_simple_add/
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/25_simple_add
     ```
   - 配置环境变量
 
diff --git a/operator/ascendc/0_introduction/25_simple_add/add_custom.cpp b/operator/ascendc/0_introduction/25_simple_add/add_custom.asc
similarity index 99%
rename from operator/ascendc/0_introduction/25_simple_add/add_custom.cpp
rename to operator/ascendc/0_introduction/25_simple_add/add_custom.asc
index d2b5cb112..544e134f3 100644
--- a/operator/ascendc/0_introduction/25_simple_add/add_custom.cpp
+++ b/operator/ascendc/0_introduction/25_simple_add/add_custom.asc
@@ -1,5 +1,5 @@
 /**
- * @file add_custom.cpp
+ * @file add_custom.asc
  *
  * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
new file mode 100644
index 000000000..2958d3a02
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.16)
+
+set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
+
+find_package(ASC REQUIRED)
+
+project(kernel_samples LANGUAGES ASC CXX)
+
+add_executable(demo
+    matmul_leakyrelu.asc
+)
+
+target_link_libraries(demo PRIVATE
+    tiling_api
+    register
+    platform
+    m
+)
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
new file mode 100644
index 000000000..da5b62293
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
@@ -0,0 +1,100 @@
+## 简化MatmulLeakyRelu算子直调样例
+本样例以MatmulLeakyRelu算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
+> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+
+## 目录结构介绍
+```
+├── 26_simple_matmulleakyrelu
+│   ├── CMakeLists.txt          // 编译工程文件
+│   ├── data_utils.h            // 数据读入写出函数
+│   ├── matmul_leakyrelu.asc    // AscendC算子实现 & 调用样例
+│   ├── run.sh                  // 运行脚本
+│   └── scripts
+│       ├── gen_data.py         // 输入数据和真值数据生成脚本文件
+│       └── verify_result.py    // 真值对比文件
+```
+
+## 算子描述
+算子使用了MatmulLeakyRelu高阶API，实现了快速的MatmulLeakyRelu矩阵乘法的运算操作。
+
+MatmulLeakyRelu的计算公式为：
+
+```
+C = A * B + Bias
+C = C > 0 ? C : C * 0.001
+```
+
+- A、B为源操作数，A为左矩阵，形状为\[M, K]；B为右矩阵，形状为\[K, N]。
+- C为目的操作数，存放矩阵乘结果的矩阵，形状为\[M, N]。
+- Bias为矩阵乘偏置，形状为\[N]。对A*B结果矩阵的每一行都采用该Bias进行偏置。
+
+## 算子规格描述
+<table>
+<tr><td rowspan="1" align="center">算子类型(OpType)</td><td colspan="4" align="center">MatmulLeakyRelu</td></tr>
+</tr>
+<tr><td rowspan="4" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
+<tr><td align="center">a</td><td align="center">1024 * 256</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">b</td><td align="center">256 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
+<tr><td align="center">bias</td><td align="center">640</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+</tr>
+<tr><td rowspan="1" align="center">算子输出</td><td align="center">c</td><td align="center">1024 * 640</td><td align="center">float</td><td align="center">ND</td></tr>
+</tr>
+<tr><td rowspan="1" align="center">核函数名</td><td colspan="4" align="center">matmul_leakyrelu_custom</td></tr>
+</table>
+
+## 代码实现介绍
+本样例中实现的是[m, n, k]固定为[1024, 640, 256]的MatmulLeakyRelu算子。
+- kernel实现  
+  MatmulLeakyRelu算子的数学表达式为：
+  ```
+  C = A * B + Bias
+  C = C > 0 ? C : C * 0.001
+  ```
+  其中A的形状为[1024, 256]，B的形状为[256, 640]，C的形状为[1024, 640]，Bias的形状为[640]。具体请参考[matmul_leakyrelu.cpp](./matmul_leakyrelu.cpp)。
+
+- 调用实现  
+  使用内核调用符<<<>>>调用核函数。
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品
+
+## 运行样例算子
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/26_simple_matmulleakyrelu
+    ```
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    配置按安装径后，执行以下命令统一配置环境变量。
+    ```bash
+    # 配置CANN环境变量
+    source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
+    # 添加AscendC CMake Module搜索路径至环境变量
+    export CMAKE_PREFIX_PATH=${ASCEND_INSTALL_PATH}/compiler/tikcpp/ascendc_kernel_cmake:$CMAKE_PREFIX_PATH
+    ```
+  - 样例执行
+    ```bash
+    bash run.sh  # 编译并执行样例
+    ```
+
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/22 | 新增本readme |
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/data_utils.h b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/data_utils.h
new file mode 100644
index 000000000..582fbf68f
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/data_utils.h
@@ -0,0 +1,96 @@
+/**
+ * @file data_utils.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef DATA_UTILS_H
+#define DATA_UTILS_H
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO]  " fmt "\n", ##args)
+#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN]  " fmt "\n", ##args)
+#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR]  " fmt "\n", ##args)
+
+bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize)
+{
+    struct stat sBuf;
+    int fileStatus = stat(filePath.data(), &sBuf);
+    if (fileStatus == -1) {
+        ERROR_LOG("failed to get file");
+        return false;
+    }
+    if (S_ISREG(sBuf.st_mode) == 0) {
+        ERROR_LOG("%s is not a file, please enter a file", filePath.c_str());
+        return false;
+    }
+
+    std::ifstream file;
+    file.open(filePath, std::ios::binary);
+    if (!file.is_open()) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    std::filebuf *buf = file.rdbuf();
+    size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in);
+    if (size == 0) {
+        ERROR_LOG("file size is 0");
+        file.close();
+        return false;
+    }
+    if (size > bufferSize) {
+        ERROR_LOG("file size is larger than buffer size");
+        file.close();
+        return false;
+    }
+    buf->pubseekpos(0, std::ios::in);
+    buf->sgetn(static_cast<char *>(buffer), size);
+    fileSize = size;
+    file.close();
+    return true;
+}
+
+/**
+ * @brief Write data to file
+ * @param [in] filePath: file path
+ * @param [in] buffer: data to write to file
+ * @param [in] size: size to write
+ * @return write result
+ */
+bool WriteFile(const std::string &filePath, const void *buffer, size_t size)
+{
+    if (buffer == nullptr) {
+        ERROR_LOG("Write file failed. buffer is nullptr");
+        return false;
+    }
+
+    int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE);
+    if (fd < 0) {
+        ERROR_LOG("Open file failed. path = %s", filePath.c_str());
+        return false;
+    }
+
+    size_t writeSize = write(fd, buffer, size);
+    (void)close(fd);
+    if (writeSize != size) {
+        ERROR_LOG("Write file Failed.");
+        return false;
+    }
+
+    return true;
+}
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
new file mode 100644
index 000000000..430d62328
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
@@ -0,0 +1,350 @@
+/**
+ * @file matmul_leakyrelu_custom.asc
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "data_utils.h"
+#include "kernel_tiling/kernel_tiling.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "acl/acl.h"
+#include "tiling/tiling_api.h"
+#include "kernel_operator.h"
+#include "lib/matmul_intf.h"
+
+using namespace matmul;
+
+__aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b)
+{
+    return (a + b - 1) / b;
+}
+
+/**
+  * @brief  Copy tiling data to TCubeTiling ptr from tiling gm addr.
+  * @param  tiling: TCubeTiling ptr which needs to copy tiling data.
+  * @param  tilingGM: tiling gm addr.
+  * @retval None
+  */
+__aicore__ inline void CopyTiling(TCubeTiling *tiling, GM_ADDR tilingGM)
+{
+    uint32_t *ptr = reinterpret_cast<uint32_t *>(tiling);
+    auto tiling32 = reinterpret_cast<__gm__ uint32_t *>(tilingGM);
+
+    for (uint32_t i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) {
+        *ptr = *(tiling32 + i);
+    }
+    return;
+}
+
+template <typename aType, typename bType, typename cType, typename biasType> class MatmulLeakyKernel {
+public:
+    __aicore__ inline MatmulLeakyKernel(){};
+    __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace,
+                                const TCubeTiling &tiling, AscendC::TPipe *pipe);
+    __aicore__ inline void Process(AscendC::TPipe *pipe);
+
+    __aicore__ inline void MatmulCompute();
+    __aicore__ inline void LeakyReluCompute();
+    __aicore__ inline void CopyOut(uint32_t count);
+    __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB,
+                                      int32_t &offsetC, int32_t &offsetBias);
+
+    Matmul<MatmulType<AscendC::TPosition::GM, CubeFormat::ND, aType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, bType>,
+           MatmulType<AscendC::TPosition::VECIN, CubeFormat::ND, cType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, biasType>>
+        matmulObj;
+
+    AscendC::GlobalTensor<aType> aGlobal;
+    AscendC::GlobalTensor<bType> bGlobal;
+    AscendC::GlobalTensor<cType> cGlobal;
+    AscendC::GlobalTensor<biasType> biasGlobal;
+    AscendC::LocalTensor<cType> reluOutLocal;
+    TCubeTiling tiling;
+    AscendC::TQue<AscendC::TPosition::VECOUT, 1> reluOutQueue_;
+};
+
+/**
+  * @brief  Set matmulLeaky input and output gm addr of current core.
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: C matrix gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tiling: matmul tiling data.
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias,
+                                                                              GM_ADDR c, GM_ADDR workspace,
+                                                                              const TCubeTiling &tiling, AscendC::TPipe *pipe)
+{
+    this->tiling = tiling;
+    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka);
+    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ bType *>(b), tiling.Kb * tiling.N);
+    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(c), tiling.M * tiling.N);
+    biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N);
+
+    int32_t offsetA, offsetB, offsetC, offsetBias;
+    CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); // Calculate the gm offset based on the blockidx.
+    aGlobal = aGlobal[offsetA];
+    bGlobal = bGlobal[offsetB];
+    cGlobal = cGlobal[offsetC];
+    biasGlobal = biasGlobal[offsetBias];
+    pipe->InitBuffer(reluOutQueue_, 1, tiling.baseM * tiling.baseN * sizeof(cType)); // Init output buffer.
+}
+
+/**
+  * @brief  Main process of matmul calculation
+  * @param  pipe: Global memory and sync management TPipe object.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::Process(AscendC::TPipe *pipe)
+{
+    uint32_t computeRound = 0;
+
+    matmulObj.SetTensorA(aGlobal);
+    matmulObj.SetTensorB(bGlobal);
+    matmulObj.SetBias(biasGlobal);
+    while (matmulObj.template Iterate<true>()) { // Once Iterate, compute baseM * baseN, sync is set true here.
+        MatmulCompute(); // Get matmul compute result.
+        LeakyReluCompute(); // Compute leakyRelu.
+        CopyOut(computeRound); // Copy leakyRelu out result to GM.
+        computeRound++;
+    }
+    matmulObj.End();
+}
+
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::MatmulCompute()
+{
+    reluOutLocal = reluOutQueue_.AllocTensor<cType>();
+    matmulObj.template GetTensorC<true>(reluOutLocal, false, true);
+}
+
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::LeakyReluCompute()
+{
+    LeakyRelu(reluOutLocal, reluOutLocal, (cType)0.001, tiling.baseM * tiling.baseN);
+    reluOutQueue_.EnQue(reluOutLocal);
+}
+
+/**
+  * @brief  Copy leakyRelu out result to GM.
+  * @param  count: Iterate count(once Iterate, compute baseM * baseN).
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void MatmulLeakyKernel<aType, bType, cType, biasType>::CopyOut(uint32_t count)
+{
+    reluOutQueue_.DeQue<cType>();
+    const uint32_t roundM = tiling.singleCoreM / tiling.baseM;
+    const uint32_t roundN = tiling.singleCoreN / tiling.baseN;
+    uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN);
+    AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / AscendC::DEFAULT_C0_SIZE), 0,
+                                (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / AscendC::DEFAULT_C0_SIZE)};
+    DataCopy(cGlobal[startOffset], reluOutLocal, copyParam);
+    reluOutQueue_.FreeTensor(reluOutLocal);
+}
+
+/**
+  * @brief  Calculate the gm offset based on the blockidx.
+  * @param  blockIdx: Current Core blockidx.
+  * @param  tiling: Matmul tiling data.
+  * @param  offsetA: Gm offset of A matrix.
+  * @param  offsetB: Gm offset of B matrix.
+  * @param  offsetC: Gm offset of C matrix.
+  * @param  offsetBias: Gm offset of Bias matrix.
+  * @retval None
+  */
+template <typename aType, typename bType, typename cType, typename biasType>
+__aicore__ inline void
+MatmulLeakyKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, const TCubeTiling &tiling,
+                                                             int32_t &offsetA, int32_t &offsetB, int32_t &offsetC,
+                                                             int32_t &offsetBias)
+{
+    auto mSingleBlocks = Ceiling(tiling.M, tiling.singleCoreM);
+    auto mCoreIndx = blockIdx % mSingleBlocks;
+    auto nCoreIndx = blockIdx / mSingleBlocks;
+
+    offsetA = mCoreIndx * tiling.Ka * tiling.singleCoreM;
+    offsetB = nCoreIndx * tiling.singleCoreN;
+    offsetC = mCoreIndx * tiling.N * tiling.singleCoreM + nCoreIndx * tiling.singleCoreN;
+    offsetBias = nCoreIndx * tiling.singleCoreN;
+}
+
+/**
+  * @brief  matmul_leakyrelu kernel function entry
+  * @param  a: A matrix gm addr.
+  * @param  b: B matrix gm addr.
+  * @param  bias: Bias gm addr.
+  * @param  c: Out gm addr.
+  * @param  workspace: Temporary gm space addr required by matmul calc.
+  * @param  tilingGm: Tiling data addr. 
+  * @retval None
+  */
+__global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c,
+                                                              GM_ADDR workspace, GM_ADDR tilingGm)
+{
+    AscendC::TPipe pipe;
+    TCubeTiling tiling;
+    CopyTiling(&tiling, tilingGm);
+
+    MatmulLeakyKernel<half, half, float, float> matmulLeakyKernel;
+    matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe);
+    REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); // Initialize the matmul object.
+    matmulLeakyKernel.Process(&pipe);
+}
+
+/**
+  * @brief  Generate matmul tiling.
+  * @param  socVersion: Platform socversion.
+  * @param  tilingBuf data buffer.
+  */
+void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
+{
+    using TPosition = matmul_tiling::TPosition;
+    using CubeFormat = matmul_tiling::CubeFormat;
+    using DataType = matmul_tiling::DataType;
+    using namespace std;
+    int M = 1024;
+    int N = 640;
+    int K = 256;
+
+    TPosition leftPosition = TPosition::GM;
+    CubeFormat leftFormat = CubeFormat::ND;
+    DataType leftDtype = DataType::DT_FLOAT16;
+    bool isTransA = false;
+
+    TPosition rightPosition = TPosition::GM;
+    CubeFormat rightFormat = CubeFormat::ND;
+    DataType rightDtype = DataType::DT_FLOAT16;
+    bool isTransB = false;
+
+    TPosition resultPosition = TPosition::GM;
+    CubeFormat resultFormat = CubeFormat::ND;
+    DataType resultDtype = DataType::DT_FLOAT;
+
+    TPosition biasPosition = TPosition::GM;
+    CubeFormat biasFormat = CubeFormat::ND;
+    DataType biasDtype = DataType::DT_FLOAT;
+    bool isBias = true;
+
+    int usedCoreNum = 2;
+    int baseM = 256;
+    int baseN = 128;
+
+    optiling::TCubeTiling tilingData;
+    auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
+    matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform);
+
+    tilingApi.SetDim(usedCoreNum); // Set the number of cores that participate in multi-core computaion is 2.
+    tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA);
+    tilingApi.SetBType(rightPosition, rightFormat, rightDtype, isTransB);
+    tilingApi.SetCType(resultPosition, resultFormat, resultDtype);
+    tilingApi.SetBiasType(biasPosition, biasFormat, biasDtype);
+
+    tilingApi.SetOrgShape(M, N, K);
+    tilingApi.SetShape(M, N, K);
+    tilingApi.SetBias(isBias);
+    tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); // Set the matmul travse is FIRSTM.
+    tilingApi.SetFixSplit(baseM, baseN, -1); // Set the fixed baseM=128, baseN=256.
+    tilingApi.SetBufferSpace(-1, -1, -1);
+
+    int64_t res = tilingApi.GetTiling(tilingData); // Get matmul tiling data.
+    tilingData.set_stepM(1); // Set the matmul tiling stepM=1.
+    tilingData.set_stepN(1); // Set the matmul tiling stepN=1.
+    if (res == -1) {
+        std::cout << "gen tiling failed" << std::endl;
+    }
+    uint32_t tcubeTilingSize = tilingData.GetDataSize();
+    tilingData.SaveToBuffer(tilingBuf, tcubeTilingSize);
+    return;
+}
+
+int32_t main(int32_t argc, char *argv[])
+{
+    const char *socVersion = "Ascend910B1";
+    auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
+    size_t aFileSize = 262144 * sizeof(int16_t);
+    size_t bFileSize = 163840 * sizeof(int16_t);
+    size_t cFileSize = 655360 * sizeof(float);
+    size_t biasFileSize = 640 * sizeof(float);
+    size_t tilingFileSize = sizeof(TCubeTiling);
+    size_t userWorkspaceSize = 0;
+    size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform->GetLibApiWorkSpaceSize());
+    size_t workspaceSize = userWorkspaceSize + systemWorkspaceSize;
+    uint8_t *tilingBuf = (uint8_t *)malloc(tilingFileSize);
+    GenerateTiling(socVersion, tilingBuf);
+    uint32_t blockDim = 1;
+
+    aclInit(nullptr);
+    int32_t deviceId = 0;
+    aclrtSetDevice(deviceId);
+    aclrtStream stream = nullptr;
+    aclrtCreateStream(&stream);
+
+    uint8_t *inputAHost;
+    uint8_t *inputADevice;
+    aclrtMallocHost((void **)(&inputAHost), aFileSize);
+    aclrtMalloc((void **)&inputADevice, aFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    ReadFile("./input/x1_gm.bin", aFileSize, inputAHost, aFileSize);
+    aclrtMemcpy(inputADevice, aFileSize, inputAHost, aFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    uint8_t *inputBHost;
+    uint8_t *inputBDevice;
+    aclrtMallocHost((void **)(&inputBHost), bFileSize);
+    aclrtMalloc((void **)&inputBDevice, bFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    ReadFile("./input/x2_gm.bin", bFileSize, inputBHost, bFileSize);
+    aclrtMemcpy(inputBDevice, bFileSize, inputBHost, bFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    uint8_t *outputCHost;
+    uint8_t *outputCDevice;
+    aclrtMallocHost((void **)(&outputCHost), cFileSize);
+    aclrtMalloc((void **)&outputCDevice, cFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
+
+    uint8_t *inputBiasHost;
+    uint8_t *inputBiasDevice;
+    aclrtMallocHost((void **)(&inputBiasHost), biasFileSize);
+    aclrtMalloc((void **)&inputBiasDevice, biasFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    ReadFile("./input/bias.bin", biasFileSize, inputBiasHost, biasFileSize);
+    aclrtMemcpy(inputBiasDevice, biasFileSize, inputBiasHost, biasFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    uint8_t *tilingHost;
+    uint8_t *tilingDevice;
+    aclrtMallocHost((void **)(&tilingHost), tilingFileSize);
+    aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
+    aclrtMemcpy(tilingHost, tilingFileSize, tilingBuf, tilingFileSize, ACL_MEMCPY_HOST_TO_HOST);
+    aclrtMemcpy(tilingDevice, tilingFileSize, tilingHost, tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
+
+    uint8_t *workspaceDevice;
+    aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
+
+    matmul_leakyrelu_custom<<<blockDim, nullptr, stream>>>(inputADevice, inputBDevice, inputBiasDevice, outputCDevice,
+                                                           workspaceDevice, tilingDevice);
+
+    aclrtSynchronizeStream(stream);
+
+    aclrtFree(inputADevice);
+    aclrtFreeHost(inputAHost);
+    aclrtFree(inputBDevice);
+    aclrtFreeHost(inputBHost);
+    aclrtMemcpy(outputCHost, cFileSize, outputCDevice, cFileSize, ACL_MEMCPY_DEVICE_TO_HOST);
+    WriteFile("./output/output.bin", outputCHost, cFileSize);
+    aclrtFree(outputCDevice);
+    aclrtFreeHost(outputCHost);
+    aclrtFree(inputBiasDevice);
+    aclrtFreeHost(inputBiasHost);
+    aclrtFree(tilingDevice);
+    aclrtFreeHost(tilingHost);
+    aclrtFree(workspaceDevice);
+
+    aclrtDestroyStream(stream);
+    aclrtResetDevice(deviceId);
+    aclFinalize();
+    free(tilingBuf);
+    return 0;
+}
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/run.sh b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/run.sh
new file mode 100755
index 000000000..fe03bb21f
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+rm -rf build; mkdir -p build; cd build
+cmake ..; make -j
+python3 ../scripts/gen_data.py
+./demo
+python3 ../scripts/verify_result.py output/output.bin output/golden.bin
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/gen_data.py b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/gen_data.py
new file mode 100644
index 000000000..e03d4359c
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/gen_data.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import numpy as np
+import os
+
+
+def gen_golden_data():
+    M = 1024
+    N = 640
+    K = 256
+
+    input_a = np.random.randint(1, 10, [M, K]).astype(np.float16)
+    input_b = np.random.randint(1, 10, [K, N]).astype(np.float16)
+    input_bias = np.random.randint(1, 10, [N]).astype(np.float32)
+    alpha = 0.001
+    golden = (np.matmul(input_a.astype(np.float32), input_b.astype(np.float32)) + input_bias).astype(np.float32)
+    golden = np.where(golden >= 0, golden, golden * alpha)
+    os.system("mkdir -p input")
+    os.system("mkdir -p output")
+    input_a.tofile("./input/x1_gm.bin")
+    input_b.tofile("./input/x2_gm.bin")
+    input_bias.tofile("./input/bias.bin")
+    golden.tofile("./output/golden.bin")
+
+
+if __name__ == "__main__":
+    gen_golden_data()
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/verify_result.py b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/verify_result.py
new file mode 100644
index 000000000..7a7e27ffa
--- /dev/null
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/scripts/verify_result.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import sys
+import numpy as np
+
+# for float32
+relative_tol = 1e-6
+absolute_tol = 1e-9
+error_tol = 1e-4
+
+
+def verify_result(output, golden):
+    output = np.fromfile(output, dtype=np.float32).reshape(-1)
+    golden = np.fromfile(golden, dtype=np.float32).reshape(-1)
+    print("golden : ", golden)
+    print("output : ", output)
+    different_element_results = np.isclose(output,
+                                           golden,
+                                           rtol=relative_tol,
+                                           atol=absolute_tol,
+                                           equal_nan=True)
+    different_element_indexes = np.where(different_element_results == False)[0]
+    for index in range(len(different_element_indexes)):
+        real_index = different_element_indexes[index]
+        golden_data = golden[real_index]
+        output_data = output[real_index]
+        print(
+            "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" %
+            (real_index, golden_data, output_data,
+             abs(output_data - golden_data) / golden_data))
+        if index == 100:
+            break
+    error_ratio = float(different_element_indexes.size) / golden.size
+    print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol))
+    return error_ratio <= error_tol
+
+
+if __name__ == '__main__':
+    try:
+        res = verify_result(sys.argv[1], sys.argv[2])
+        if not res:
+            raise ValueError("[ERROR] result error")
+        else:
+            print("test pass")
+    except Exception as e:
+        print(e)
+        sys.exit(1)
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
new file mode 100644
index 000000000..96c61cc34
--- /dev/null
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
@@ -0,0 +1,63 @@
+cmake_minimum_required(VERSION 3.16)
+
+set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
+
+find_package(ASC REQUIRED)
+
+execute_process(COMMAND python3 -c "import os; import torch; print(os.path.dirname(torch.__file__))"
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE TORCH_PATH
+)
+message("TORCH_PATH is ${TORCH_PATH}")
+
+execute_process(COMMAND python3 -c "import os; import torch_npu; print(os.path.dirname(torch_npu.__file__))"
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE TORCH_NPU_PATH
+)
+message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
+
+execute_process(COMMAND python3 -m pybind11 --includes
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE PYBIND11_INC
+)
+string(REPLACE " " ";" PYBIND11_INC ${PYBIND11_INC})
+
+execute_process(COMMAND python3-config --extension-suffix
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE PYBIND11_SUFFIX
+)
+
+project(kernel_samples LANGUAGES ASC CXX)
+
+add_library(pybind11_lib SHARED
+    add_custom.asc
+)
+
+target_link_libraries(pybind11_lib PRIVATE
+    torch_npu
+)
+
+target_link_directories(pybind11_lib PRIVATE
+    ${TORCH_PATH}/lib
+    ${TORCH_NPU_PATH}/lib
+)
+
+target_include_directories(pybind11_lib PRIVATE
+    ${TORCH_NPU_PATH}/include
+    ${TORCH_PATH}/include
+    ${TORCH_PATH}/include/torch/csrc/api/include
+)
+
+target_compile_definitions(pybind11_lib PRIVATE
+    _GLIBCXX_USE_CXX11_ABI=0
+)
+
+target_compile_options(pybind11_lib PRIVATE
+    ${PYBIND11_INC}
+    -fPIC
+)
+
+set_target_properties(pybind11_lib PROPERTIES
+    OUTPUT_NAME add_custom${PYBIND11_SUFFIX}
+    PREFIX "" SUFFIX ""
+)
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
new file mode 100644
index 000000000..3769caeb1
--- /dev/null
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
@@ -0,0 +1,102 @@
+## 简化Pybind算子直调样例
+本样例使用pybind方式调用核函数，以带有Tiling的Add算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
+> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+
+## 目录结构介绍
+```
+├── 27_simple_add_cpp_extensions
+│   ├── CMakeLists.txt        // 编译工程文件
+│   ├── add_custom_test.py    // python调用脚本
+│   ├── add_custom.asc        // AscendC算子实现 & Pybind封装
+│   └── run.sh                // 编译运行算子的脚本
+```
+## 代码实现介绍
+- kernel实现  
+  Add算子的数学表达式为：
+  ```
+  z = x + y
+  ```
+  计算逻辑是：Ascend C提供的矢量计算接口的操作元素都为LocalTensor，输入数据需要先搬运进片上存储，然后使用计算接口完成两个输入参数相加，得到最终结果，再搬出到外部存储上。
+
+  Add算子的实现流程分为3个基本任务：CopyIn，Compute，CopyOut。CopyIn任务负责将Global Memory上的输入Tensor xGm和yGm搬运到Local Memory，分别存储在xLocal、yLocal，Compute任务负责对xLocal、yLocal执行加法操作，计算结果存储在zLocal中，CopyOut任务负责将输出数据从zLocal搬运至Global Memory上的输出Tensor zGm中。具体请参考[add_custom.asc](./add_custom.asc)。
+
+- 调用实现  
+  通过PyTorch框架进行模型的训练、推理时，会调用到很多算子进行计算，调用方式也和kernel编译流程相关。对于自定义算子工程，需要使用PyTorch Ascend Adapter中的OP-Plugin算子插件对功能进行扩展，让torch可以直接调用自定义算子包中的算子；对于简化KernelLaunch开放式算子编程的方式，也可以使用pytorch调用，此样例演示的就是这种算子调用方式。
+
+  pybind11.cpp文件是一个C++的代码示例，使用了pybind11库来将C++代码封装成Python模块。该代码实现中定义了一个名为m的pybind11模块，其中包含一个名为run_add_custom的函数。该函数与my_add::run_add_custom函数相同，用于将C++函数转成Python函数。在函数实现中，通过c10_npu::getCurrentNPUStream() 的函数获取当前NPU上的流，通过内核调用符<<<>>>调用自定义的Kernel函数add_custom，在NPU上执行算子。
+
+  在add_custom_test.py调用脚本中，通过导入自定义模块add_custom，调用自定义模块add_custom中的run_add_custom函数，在NPU上执行x和y的加法操作，并将结果保存在变量z中。
+
+## 支持的产品型号
+本样例支持如下产品型号：
+- Atlas A2 训练系列产品/Atlas 800I A2 推理产品
+
+## 运行样例算子
+  - 安装pytorch (这里使用2.1.0版本为例)
+
+    **aarch64:**
+
+    ```bash
+    pip3 install torch==2.1.0
+    ```
+
+    **x86:**
+
+    ```bash
+    pip3 install torch==2.1.0+cpu  --index-url https://download.pytorch.org/whl/cpu
+    ```
+
+  - 安装torch-npu （以Pytorch2.1.0、python3.9、CANN版本8.0.RC1.alpha002为例）
+
+    ```bash
+    git clone https://gitee.com/ascend/pytorch.git -b v6.0.rc1.alpha002-pytorch2.1.0
+    cd pytorch/
+    bash ci/build.sh --python=3.9
+    pip3 install dist/*.whl
+    ```
+
+    安装pybind11
+    ```bash
+    pip3 install pybind11
+    ```
+
+  - 打开样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/ascendc/0_introduction/27_simple_add_cpp_extensions
+    ```
+
+  - 配置环境变量
+
+    请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware)，选择对应配置环境变量的命令。
+    - 默认路径，root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+      ```
+    - 默认路径，非root用户安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+      ```
+    - 指定路径install_path，安装CANN软件包
+      ```bash
+      export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
+      ```
+    配置按安装径后，执行以下命令统一配置环境变量。
+    ```bash
+    # 配置CANN环境变量
+    source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
+    # 添加AscendC CMake Module搜索路径至环境变量
+    export CMAKE_PREFIX_PATH=${ASCEND_INSTALL_PATH}/compiler/tikcpp/ascendc_kernel_cmake:$CMAKE_PREFIX_PATH
+    ```
+
+  - 样例执行
+    ```bash
+    rm -rf build; mkdir -p build; cd build  # 创建并进入build目录
+    cmake ..; make -j                       # 编译算子so
+    python3 ../add_custom_test.py           # 执行样例
+    ```
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2025/09/22 | 新增本readme |
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
new file mode 100644
index 000000000..d4a076832
--- /dev/null
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
@@ -0,0 +1,111 @@
+/**
+ * @file add_custom.asc
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <pybind11/pybind11.h>
+#include <torch/extension.h>
+
+#include "torch_npu/csrc/core/npu/NPUStream.h"
+#include "kernel_operator.h"
+
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = 8;
+        this->tileLength = this->blockLength / this->tileNum / BUFFER_NUM;
+        xGm.SetGlobalBuffer((__gm__ half *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ half *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ half *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(half));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(half));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(half));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.AllocTensor<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.AllocTensor<half>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<half> xLocal = inQueueX.DeQue<half>();
+        AscendC::LocalTensor<half> yLocal = inQueueY.DeQue<half>();
+        AscendC::LocalTensor<half> zLocal = outQueueZ.AllocTensor<half>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<half>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<half> zLocal = outQueueZ.DeQue<half>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::TPosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::TPosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<half> xGm;
+    AscendC::GlobalTensor<half> yGm;
+    AscendC::GlobalTensor<half> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+__global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength)
+{
+    KernelAdd op;
+    op.Init(x, y, z, totalLength);
+    op.Process();
+}
+
+namespace my_add {
+at::Tensor run_add_custom(const at::Tensor &x, const at::Tensor &y)
+{
+    auto aclStream = c10_npu::getCurrentNPUStream().stream(false);
+    at::Tensor z = at::empty_like(x);
+    uint32_t blockDim = 8;
+    uint32_t totalLength = 1;
+    for (uint32_t size : x.sizes()) {
+        totalLength *= size;
+    }
+    auto xGm = static_cast<uint8_t *>(const_cast<void *>(x.storage().data()));
+    auto yGm = static_cast<uint8_t *>(const_cast<void *>(y.storage().data()));
+    auto zGm = static_cast<uint8_t *>(const_cast<void *>(z.storage().data()));
+    add_custom<<<blockDim, nullptr, aclStream>>>(xGm, yGm, zGm, totalLength);
+    return z;
+}
+} // namespace my_add
+
+PYBIND11_MODULE(add_custom, m)
+{
+    m.doc() = "add_custom pybind11 interfaces"; // optional module docstring
+    m.def("run_add_custom", &my_add::run_add_custom, "");
+}
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom_test.py b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom_test.py
new file mode 100644
index 000000000..b5d63dee7
--- /dev/null
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom_test.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python3
+# coding=utf-8
+#
+# Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# ===============================================================================
+
+import torch
+import torch_npu
+from torch_npu.testing.testcase import TestCase, run_tests
+import sys, os
+
+sys.path.append(os.getcwd())
+import add_custom
+
+torch.npu.config.allow_internal_format = False
+
+
+class TestCustomAdd(TestCase):
+
+    def test_add_custom_ops(self):
+        length = [8, 2048]
+        x = torch.rand(length, device='cpu', dtype=torch.float16)
+        y = torch.rand(length, device='cpu', dtype=torch.float16)
+
+        x_npu = x.npu()
+        y_npu = y.npu()
+        output = add_custom.run_add_custom(x_npu, y_npu)
+        cpuout = torch.add(x, y)
+
+        self.assertRtolEqual(output, cpuout)
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/operator/ascendc/0_introduction/README.md b/operator/ascendc/0_introduction/README.md
index 44a722d77..0b9383e71 100644
--- a/operator/ascendc/0_introduction/README.md
+++ b/operator/ascendc/0_introduction/README.md
@@ -38,7 +38,10 @@
 | [21_vectoradd_kernellaunch](./21_vectoradd_kernellaunch) | 基于Ascend C的Add多场景自定义Vector算子的KernelLaunch调用样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 | [22_baremix_kernellaunch](./22_baremix_kernellaunch) | 通过更底层的编码方式，实现MatmulLeayrelu融合算子的样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 | [23_static_tensor_programming_kernellaunch](./23_static_tensor_programming_kernellaunch) | 通过静态Tensor编程方式，实现Add算子的样例 | Atlas A2训练系列产品/Atlas 800I A2推理产品
-
+| [24_simple_hello_world](./24_simple_hello_world) | Ascend C异构混合编程样例, 实现Hello World算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [25_simple_add](./25_simple_add) | Ascend C异构混合编程样例, 实现Add自定义Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [26_simple_matmulleakyrelu](./26_simple_matmulleakyrelu) | Ascend C异构混合编程样例, 实现MatmulLeakyRelu自定义Cube+Vector算子及调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品
+| [27_simple_add_cpp_extensions](./27_simple_add_cpp_extensions) | Ascend C异构混合编程样例, 实现Add自定义Vector算子动态库及pybind调用, 支持host/device代码混合编程 | Atlas A2训练系列产品/Atlas 800I A2推理产品
 ## 获取样例代码<a name="codeready"></a>
 
  可以使用以下两种方式下载，请选择其中一种进行源码准备。
@@ -72,4 +75,4 @@
 | 2025/01/06 | 新增21_vectoradd_kernellaunch样例  |
 | 2025/07/22 | 新增8_library_frameworklaunch样例       |
 | 2025/7/28 | 新增22_baremix_kernellaunch                   |
-
+| 2025/9/22 | 新增Ascend C异构混合编程样例24-27                   |
\ No newline at end of file
-- 
Gitee


From b808c8314a85f442debe9929ea63f452d4f35b0c Mon Sep 17 00:00:00 2001
From: SeaElm <yangruizhi5@huawei.com>
Date: Tue, 23 Sep 2025 07:43:54 +0000
Subject: [PATCH 84/97] !2777 fix header mirco define Merge pull request !2777
 from SeaElm/master

---
 .../op_kernel/matmul_custom_tiling.h                   |  6 +++---
 .../op_host/matmul_custom_tiling.h                     |  4 ++++
 .../op_host/matmul_leakyrelu_custom_tiling.h           |  6 +++---
 .../ReduceCustom/op_host/reduce_custom_tiling.h        |  8 ++++----
 .../AbsGatherMaskKernelInvocation/data_utils.h         |  2 +-
 .../AbsPadKernelInvocation/data_utils.h                |  2 +-
 .../ReduceMinKernelInvocation/data_utils.h             |  2 +-
 .../op_kernel/whole_reduce_sum_custom_tiling.h         |  6 +++---
 .../whole_reduce_sum_custom_tiling.h                   |  6 +++---
 .../CppExtensionInvocation/csrc/function.h             |  6 +++---
 .../VectorAddMultiCoreWithTiling/add_custom_tiling.h   |  2 +-
 .../add_custom_tiling.h                                |  2 +-
 .../KernelLaunch/add_custom_tiling.h                   |  2 +-
 .../AddKernelInvocationTilingNeo/add_custom_tiling.h   |  2 +-
 .../op_kernel/tiling_key_add_custom.h                  |  8 ++++----
 .../BroadcastCustom/op_host/broadcast_custom_tiling.h  |  2 +-
 .../MatmulCustom/op_host/matmul_custom_tiling.h        |  3 +++
 .../LeakyReluCustom/op_host/leaky_relu_custom_tiling.h |  6 +++---
 .../MatmulCustom/op_host/matmul_custom_tiling.h        |  3 +++
 .../MatmulCustom/op_host/matmul_custom_tiling.h        |  3 +++
 .../DumpTensorCube/MmadCustom/op_kernel/mmad_custom.h  |  6 +++---
 .../MmadCustom/op_kernel_cube_only/mmad_custom.h       |  6 +++---
 .../CubeGroupCustom/op_host/cube_group_custom_tiling.h | 10 ++++++----
 .../op_host/matmul_api_constant_custom_tiling.h        |  8 +++++---
 operator/ascendc/2_features/2_tbufpool/data_utils.h    | 10 +++++-----
 .../2_tbufpool/op_host/tbufpool_custom_tiling.h        |  6 +++---
 .../2_features/2_tbufpool/op_kernel/tbufpool_custom.h  |  7 +++----
 .../all_gather_matmul_demo_def.h                       |  2 +-
 .../op_kernel/matmul_reduce_scatter_custom_block.h     |  2 +-
 .../matmul_reduce_scatter_demo_def.h                   |  2 +-
 .../op_kernel/matmul_all_reduce_custom_common.h        |  6 +++---
 .../matmul_all_reduce_demo_def.h                       |  6 +++---
 .../KernelLaunch/quant_group_matmul_custom_tiling.h    |  6 +++---
 .../CppExtensions/setup/csrc/function.h                |  6 +++---
 .../AddKernelInvocationTilingNeo/add_custom_tiling.h   |  2 +-
 .../op_host/matmul_custom_tiling.h                     |  6 +++---
 .../op_host/matmul_custom_tiling.h                     |  3 +++
 .../op_host/matmul_leakyrelu_custom_tiling.h           |  6 +++---
 38 files changed, 100 insertions(+), 81 deletions(-)

diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
index 4ea1394d9..024eb214e 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomMultiCore/op_kernel/matmul_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MATMUL_TILING_H
-#define MATMUL_TILING_H
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 
 #include <cstdint>
 #include "kernel_tiling/kernel_tiling.h"
@@ -18,4 +18,4 @@ struct MatmulCustomTilingData {
     AscendC::tiling::TCubeTiling cubeTilingData;
 };
 
-#endif  // MATMUL_TILING_H
\ No newline at end of file
+#endif  // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
index 5020453c8..38d92816b 100644
--- a/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/0_introduction/10_matmul_frameworklaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
@@ -7,6 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -18,3 +20,5 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
+
+#endif  // MATMUL_CUSTOM_TILING_H
diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
index f3ddd66e9..20d3e8e2f 100644
--- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
+++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MATMUL_LEAKYRELU_TILING_H
-#define MATMUL_LEAKYRELU_TILING_H
+#ifndef MATMUL_LEAKYRELU_CUSTOM_TILING_H
+#define MATMUL_LEAKYRELU_CUSTOM_TILING_H
 
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
@@ -22,4 +22,4 @@ END_TILING_DATA_DEF;
 REGISTER_TILING_DATA_CLASS(MatmulLeakyreluCustom, MatmulLeakyreluCustomTilingData)
 }  // namespace optiling
 
-#endif
\ No newline at end of file
+#endif  // MATMUL_LEAKYRELU_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom_tiling.h b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom_tiling.h
index 7f8570e5c..85fb604f0 100644
--- a/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom_tiling.h
+++ b/operator/ascendc/0_introduction/14_reduce_frameworklaunch/ReduceCustom/op_host/reduce_custom_tiling.h
@@ -1,5 +1,5 @@
 /**
- * @file add_custom_tiling.h
+ * @file reduce_custom_tiling.h
  *
  * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef ADD_CUSTOM_TILING_H
-#define ADD_CUSTOM_TILING_H
+#ifndef REDUCE_CUSTOM_TILING_H
+#define REDUCE_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 
 namespace optiling {
@@ -19,4 +19,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(ReduceCustom, TilingData)
 } // namespace optiling
-#endif // ADD_CUSTOM_TILING_H
+#endif // REDUCE_CUSTOM_TILING_H
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/data_utils.h b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/data_utils.h
index 7cdefd866..d6d99aebf 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/data_utils.h
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsGatherMaskKernelInvocation/data_utils.h
@@ -1,5 +1,5 @@
 /**
- * @file data_utils.cpp
+ * @file data_utils.h
  *
  * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/data_utils.h b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/data_utils.h
index 7cdefd866..d6d99aebf 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/data_utils.h
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/data_utils.h
@@ -1,5 +1,5 @@
 /**
- * @file data_utils.cpp
+ * @file data_utils.h
  *
  * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/data_utils.h b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/data_utils.h
index 7cdefd866..d6d99aebf 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/data_utils.h
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/data_utils.h
@@ -1,5 +1,5 @@
 /**
- * @file data_utils.cpp
+ * @file data_utils.h
  *
  * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom/op_kernel/whole_reduce_sum_custom_tiling.h b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom/op_kernel/whole_reduce_sum_custom_tiling.h
index 964a2a7b5..b74e5080c 100644
--- a/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom/op_kernel/whole_reduce_sum_custom_tiling.h
+++ b/operator/ascendc/0_introduction/18_unaligned_wholereduces_frameworklaunch/WholeReduceSumCustom/op_kernel/whole_reduce_sum_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef WHOELE_REDUCE_SUM_CUSTOM_TILING_H
-#define WHOELE_REDUCE_SUM_CUSTOM_TILING_H
+#ifndef WHOLE_REDUCE_SUM_CUSTOM_TILING_H
+#define WHOLE_REDUCE_SUM_CUSTOM_TILING_H
 #include <cstdint>
 
 struct WholeReduceSumCustomTilingData {
@@ -16,4 +16,4 @@ struct WholeReduceSumCustomTilingData {
     uint32_t rows;
     uint32_t cols;
 };
-#endif // WHOELE_REDUCE_SUM_CUSTOM_TILING_H
\ No newline at end of file
+#endif // WHOLE_REDUCE_SUM_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/whole_reduce_sum_custom_tiling.h b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/whole_reduce_sum_custom_tiling.h
index 5d7bf7c55..f96b24d36 100644
--- a/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/whole_reduce_sum_custom_tiling.h
+++ b/operator/ascendc/0_introduction/19_unaligned_wholereduces_kernellaunch/WholeReduceSumKernelInvocation/whole_reduce_sum_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef WHOELE_REDUCE_SUM_CUSTOM_TILING_H
-#define WHOELE_REDUCE_SUM_CUSTOM_TILING_H
+#ifndef WHOLE_REDUCE_SUM_CUSTOM_TILING_H
+#define WHOLE_REDUCE_SUM_CUSTOM_TILING_H
 #include <cstdint>
 
 struct WholeReduceSumCustomTilingData {
@@ -16,4 +16,4 @@ struct WholeReduceSumCustomTilingData {
     uint32_t rows;
     uint32_t cols;
 };
-#endif
\ No newline at end of file
+#endif  // WHOLE_REDUCE_SUM_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/csrc/function.h b/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/csrc/function.h
index 43117bba0..bd3acec12 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/csrc/function.h
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/CppExtensionInvocation/csrc/function.h
@@ -7,11 +7,11 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef FUNCTION_H_
-#define FUNCTION_H_
+#ifndef FUNCTION_H
+#define FUNCTION_H
 
 #include <ATen/ATen.h>
 
 at::Tensor add_custom_autograd(const at::Tensor& self, const at::Tensor& other);
 
-#endif //  FUNCTION_H_
+#endif //  FUNCTION_H
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.h b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.h
index 6293b7713..7b82c3377 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.h
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTiling/add_custom_tiling.h
@@ -34,4 +34,4 @@ struct AddCustomTilingData {
 
     uint32_t isEvenCore;
 };
-#endif
\ No newline at end of file
+#endif  // ADD_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom_tiling.h b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom_tiling.h
index 8c31f6f4a..cf6cbed24 100644
--- a/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom_tiling.h
+++ b/operator/ascendc/0_introduction/21_vectoradd_kernellaunch/VectorAddMultiCoreWithTilingBroadcast/add_custom_tiling.h
@@ -36,4 +36,4 @@ struct AddCustomTilingData {
     uint32_t tailTileLength;
     uint32_t tailLastTileLength;
 };
-#endif
\ No newline at end of file
+#endif  // ADD_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
index 278a6e336..c5d4c20f3 100644
--- a/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
+++ b/operator/ascendc/0_introduction/23_static_tensor_programming_kernellaunch/KernelLaunch/add_custom_tiling.h
@@ -14,4 +14,4 @@
 struct AddCustomTilingData {
     uint32_t singleCoreLength;
 };
-#endif
\ No newline at end of file
+#endif // ADD_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
index 7b400df63..42109f93d 100644
--- a/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
+++ b/operator/ascendc/0_introduction/3_add_kernellaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
@@ -15,4 +15,4 @@ struct AddCustomTilingData {
     uint32_t totalLength;
     uint32_t tileNum;
 };
-#endif
\ No newline at end of file
+#endif  // ADD_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
index 1cc3d7700..eae217444 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
@@ -1,5 +1,5 @@
 /**
- * @file tiling_key_add_custom.cpp
+ * @file tiling_key_add_custom.h
  *
  * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef TILING_KEY_ADD_H
-#define TILING_KEY_ADD_H
+#ifndef TILING_KEY_ADD_CUSTOM_H
+#define TILING_KEY_ADD_CUSTOM_H
 #include "ascendc/host_api/tiling/template_argument.h"
 
 #define ADD_TPL_FP16 10
@@ -66,4 +66,4 @@ ASCENDC_TPL_SEL(
     ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1)
     )
 );
-#endif
\ No newline at end of file
+#endif  // TILING_KEY_ADD_CUSTOM_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom/op_host/broadcast_custom_tiling.h b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom/op_host/broadcast_custom_tiling.h
index 868175874..587e7dd90 100644
--- a/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom/op_host/broadcast_custom_tiling.h
+++ b/operator/ascendc/0_introduction/7_broadcast_frameworklaunch/BroadcastCustom/op_host/broadcast_custom_tiling.h
@@ -25,4 +25,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(BroadcastCustom, BroadcastTilingData)
 } // namespace optiling
-#endif
\ No newline at end of file
+#endif  // BROADCAST_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
index 135fc5b54..eaa785944 100644
--- a/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/0_introduction/8_library_frameworklaunch/static_library/MatmulCustom/op_host/matmul_custom_tiling.h
@@ -7,6 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -18,3 +20,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
+#endif  // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
index 599026df6..7707aaef9 100644
--- a/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
+++ b/operator/ascendc/0_introduction/9_leakyrelu_frameworklaunch/LeakyReluCustom/op_host/leaky_relu_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef LEAKYRELU_CUSTOM_TILING_H
-#define LEAKYRELU_CUSTOM_TILING_H
+#ifndef LEAKY_RELU_CUSTOM_TILING_H
+#define LEAKY_RELU_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 
 namespace optiling {
@@ -20,4 +20,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(LeakyReluCustom, TilingData)
 } // namespace optiling
-#endif // LEAKYRELU_CUSTOM_TILING_H
\ No newline at end of file
+#endif // LEAKY_RELU_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
index d664c3cbe..884c442f9 100644
--- a/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/1_utilities/0_printf/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
@@ -7,6 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -17,3 +19,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
+#endif  // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
index d664c3cbe..884c442f9 100644
--- a/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/1_utilities/3_assert/FrameworkLaunch/MatmulCustom/op_host/matmul_custom_tiling.h
@@ -7,6 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -17,3 +19,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
+#endif  // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel/mmad_custom.h b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel/mmad_custom.h
index 324d595f3..1ed198587 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel/mmad_custom.h
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel/mmad_custom.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MMAD_CUSTOM_OP_KERNEL_MMAD_CUSTOM_H
-#define MMAD_CUSTOM_OP_KERNEL_MMAD_CUSTOM_H
+#ifndef MMAD_CUSTOM_H
+#define MMAD_CUSTOM_H
 #include "kernel_operator.h"
 
 constexpr int32_t OFFSET_LENGTH = 32; // offset length for DumpAccChkPoint
@@ -184,4 +184,4 @@ private:
     uint16_t aSize, bSize, cSize, mBlocks, nBlocks, kBlocks;
 };
 
-#endif // MMAD_CUSTOM_OP_KERNEL_MMAD_CUSTOM_H
+#endif // MMAD_CUSTOM_H
diff --git a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel_cube_only/mmad_custom.h b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel_cube_only/mmad_custom.h
index ec3896b33..8969a06ef 100644
--- a/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel_cube_only/mmad_custom.h
+++ b/operator/ascendc/1_utilities/7_dumptensor/FrameworkLaunch/DumpTensorCube/MmadCustom/op_kernel_cube_only/mmad_custom.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MMAD_CUSTOM_OP_KERNEL_CUBE_ONLY_MMAD_CUSTOM_H
-#define MMAD_CUSTOM_OP_KERNEL_CUBE_ONLY_MMAD_CUSTOM_H
+#ifndef MMAD_CUSTOM_H
+#define MMAD_CUSTOM_H
 #include "kernel_operator.h"
 
 // half type, cube block: [16, 16]
@@ -177,4 +177,4 @@ private:
     uint16_t aSize, bSize, cSize;
 };
 
-#endif // MMAD_CUSTOM_OP_KERNEL_CUBE_ONLY_MMAD_CUSTOM_H
+#endif // MMAD_CUSTOM_H
diff --git a/operator/ascendc/2_features/12_cube_group/CubeGroupCustom/op_host/cube_group_custom_tiling.h b/operator/ascendc/2_features/12_cube_group/CubeGroupCustom/op_host/cube_group_custom_tiling.h
index 8a7e36e8a..b09f90486 100644
--- a/operator/ascendc/2_features/12_cube_group/CubeGroupCustom/op_host/cube_group_custom_tiling.h
+++ b/operator/ascendc/2_features/12_cube_group/CubeGroupCustom/op_host/cube_group_custom_tiling.h
@@ -1,12 +1,14 @@
-/*
- * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+/**
+ * @file cube_group_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 #ifndef CUBE_GROUP_CUSTOM_TILING_H
-#define CUBE_GREOUP_CUSTOM_TILING_H
+#define CUBE_GROUP_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -17,4 +19,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(CubeGroupCustom, CubeGroupCustomTilingData)
 }
-#endif // namespace optiling
+#endif // CUBE_GROUP_CUSTOM_TILING_H
diff --git a/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom/op_host/matmul_api_constant_custom_tiling.h b/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom/op_host/matmul_api_constant_custom_tiling.h
index 8d8382186..bd6a6dd69 100644
--- a/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom/op_host/matmul_api_constant_custom_tiling.h
+++ b/operator/ascendc/2_features/14_matmul_api_constant/MatmulApiConstantCustom/op_host/matmul_api_constant_custom_tiling.h
@@ -1,5 +1,7 @@
-/*
- * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+/**
+ * @file matmul_api_constant_custom_tiling.h
+ *
+ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,4 +19,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulApiConstantCustom, MatmulApiConstantCustomTilingData)
 }
-#endif // namespace optiling
+#endif  // MATMUL_API_CONSTANT_CUSTOM_TILING_H
diff --git a/operator/ascendc/2_features/2_tbufpool/data_utils.h b/operator/ascendc/2_features/2_tbufpool/data_utils.h
index 05590dd72..caa807b9e 100644
--- a/operator/ascendc/2_features/2_tbufpool/data_utils.h
+++ b/operator/ascendc/2_features/2_tbufpool/data_utils.h
@@ -21,7 +21,7 @@
 #include <sys/stat.h>
 #ifndef ASCENDC_CPU_DEBUG
 #include "acl/acl.h"
-#endif
+#endif  // ASCENDC_CPU_DEBUG
 
 typedef enum {
     DT_UNDEFINED = -1,
@@ -55,7 +55,7 @@ typedef enum {
             std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \
         }                                                                                   \
     } while (0);
-#endif
+#endif  // ASCENDC_CPU_DEBUG
 
 /**
 * @brief Read data from file
@@ -155,7 +155,7 @@ void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow
         }
     }
 }
-#endif
+#endif  // ASCENDC_CPU_DEBUG
 
 void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow=16)
 {
@@ -196,7 +196,7 @@ void PrintData(const void *data, size_t count, printDataType dataType, size_t el
         case HALF:
             DoPrintHalfData(reinterpret_cast<const aclFloat16 *>(data), count, elementsPerRow);
             break;
-#endif
+#endif  // ASCENDC_CPU_DEBUG
         case FLOAT:
             DoPrintData(reinterpret_cast<const float *>(data), count, elementsPerRow);
             break;
@@ -208,4 +208,4 @@ void PrintData(const void *data, size_t count, printDataType dataType, size_t el
     }
     std::cout << std::endl;
 }
-#endif // EXAMPLES_COMMON_DATA_UTILS_H
+#endif // DATA_UTILS_H
diff --git a/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
index 63c60d78c..015d193d1 100644
--- a/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
+++ b/operator/ascendc/2_features/2_tbufpool/op_host/tbufpool_custom_tiling.h
@@ -8,11 +8,11 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#ifndef EXAMPLES_ACTIVATION_TBUFPOOL_CUSTOM_TILING_H
-#define EXAMPLES_ACTIVATION_TBUFPOOL_CUSTOM_TILING_H
+#ifndef TBUFPOOL_CUSTOM_TILING_H
+#define TBUFPOOL_CUSTOM_TILING_H
 #include <cstdint>
 
 struct TbufPoolTilingData {
     uint32_t totalLength;
 };
-#endif
+#endif  // TBUFPOOL_CUSTOM_TILING_H
diff --git a/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
index 9c3559512..066bbc25e 100644
--- a/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
+++ b/operator/ascendc/2_features/2_tbufpool/op_kernel/tbufpool_custom.h
@@ -8,8 +8,8 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#ifndef EXAMPLES_ACTIVATION_INITBUFPOOL_CUSTOM_H
-#define EXAMPLES_ACTIVATION_INITBUFPOOL_CUSTOM_H
+#ifndef TBUFPOOL_CUSTOM_H
+#define TBUFPOOL_CUSTOM_H
 #include "../op_host/tbufpool_custom_tiling.h"
 #include "kernel_operator.h"
 
@@ -124,5 +124,4 @@ class TbufPoolImpl {
     };
 }// namespace MyCustomKernel
 
-#endif
-    
\ No newline at end of file
+#endif  // TBUFPOOL_CUSTOM_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
index 1377915f5..7edd8afc9 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_demo_def.h
@@ -16,4 +16,4 @@ constexpr uint32_t RANK_M = 512;
 constexpr uint32_t RANK_K = 5120;
 constexpr uint32_t RANK_N = 640;
 
-#endif
\ No newline at end of file
+#endif // ALL_GATHER_MATMUL_DEMO_DEF_H
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_block.h b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_block.h
index 5fb880fb5..56322b612 100644
--- a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_block.h
+++ b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/MatmulReduceScatterCustom/op_kernel/matmul_reduce_scatter_custom_block.h
@@ -1,5 +1,5 @@
 /**
- * @file matmul_reduce_scatter_custom_block.cpp
+ * @file matmul_reduce_scatter_custom_block.h
  *
  * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
  *
diff --git a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/matmul_reduce_scatter_demo_def.h b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/matmul_reduce_scatter_demo_def.h
index 12453ccf0..82febcc32 100644
--- a/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/matmul_reduce_scatter_demo_def.h
+++ b/operator/ascendc/4_best_practices/22_matmul_reduce_scatter_custom/matmul_reduce_scatter_demo_def.h
@@ -20,4 +20,4 @@ constexpr bool IS_TRANS_B = false;
 constexpr int64_t COMM_TURN = 0;
 constexpr char REDUCE_OP[] = "sum";
 
-#endif
+#endif  // MATMUL_REDUCE_SCATTER_DEMO_DEF_H
diff --git a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
index 4dbf9e704..b0ade35fd 100644
--- a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
+++ b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/MatmulAllReduceCustom/op_kernel/matmul_all_reduce_custom_common.h
@@ -8,8 +8,8 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#ifndef MC2_ALLREDUCE_COMM_H
-#define MC2_ALLREDUCE_COMM_H
+#ifndef MC2_ALL_REDUCE_COMM_H
+#define MC2_ALL_REDUCE_COMM_H
 
 #if defined ASCENDC_CPU_DEBUG
 #define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
@@ -114,4 +114,4 @@ __aicore__ __inline__ GM_ADDR GetTailC(GM_ADDR cGM, TCubeTiling& tiling, uint32_
 }
 
 }
-#endif // MC2_ALLREDUCE_COMM_H
+#endif // MC2_ALL_REDUCE_COMM_H
diff --git a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/matmul_all_reduce_demo_def.h b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/matmul_all_reduce_demo_def.h
index 6fbbf969c..b012baed4 100644
--- a/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/matmul_all_reduce_demo_def.h
+++ b/operator/ascendc/4_best_practices/23_matmul_all_reduce_custom/matmul_all_reduce_demo_def.h
@@ -8,8 +8,8 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#ifndef MATMUL_REDUCE_SCATTER_DEMO_DEF_H
-#define MATMUL_REDUCE_SCATTER_DEMO_DEF_H
+#ifndef MATMUL_ALL_REDUCE_DEMO_DEF_H
+#define MATMUL_ALL_REDUCE_DEMO_DEF_H
 
 constexpr uint32_t RANK_DIM = 8;
 constexpr uint32_t RANK_M = 16384;
@@ -20,4 +20,4 @@ constexpr bool IS_TRANS_B = false;
 constexpr int64_t COMM_TURN = 0;
 constexpr char REDUCE_OP[] = "sum";
 
-#endif
+#endif  // MATMUL_ALL_REDUCE_DEMO_DEF_H
diff --git a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/quant_group_matmul_custom_tiling.h b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/quant_group_matmul_custom_tiling.h
index e698b6653..bf541254e 100644
--- a/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/quant_group_matmul_custom_tiling.h
+++ b/operator/ascendc/4_best_practices/6_group_matmul/KernelLaunch/quant_group_matmul_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef QUANT_GROUP_MATMUL_TILING_H
-#define QUANT_GROUP_MATMUL_TILING_H
+#ifndef QUANT_GROUP_MATMUL_CUSTOM_TILING_H
+#define QUANT_GROUP_MATMUL_CUSTOM_TILING_H
 
 #include "kernel_tiling/kernel_tiling.h"
 
@@ -26,4 +26,4 @@ struct QuantGroupMatmulCustomTilingData
     TCubeTiling mmTilingData;
 };
 
-#endif
\ No newline at end of file
+#endif // QUANT_GROUP_MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h
index f728d30a1..97767b35d 100644
--- a/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h
+++ b/operator/ascendc/tutorials/AddCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h
@@ -7,12 +7,12 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef FUNCTION_H_
-#define FUNCTION_H_
+#ifndef FUNCTION_H
+#define FUNCTION_H
 
 #include <ATen/ATen.h>
 
 at::Tensor my_op_impl_autograd(const at::Tensor &self, const at::Tensor &other);
 at::Tensor my_op_impl_autograd1(const at::Tensor &self, const at::Tensor &other);
 
-#endif //  FUNCTION_H_
+#endif //  FUNCTION_H
diff --git a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
index 7b400df63..42109f93d 100644
--- a/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
+++ b/operator/ascendc/tutorials/AddCustomSample/KernelLaunch/AddKernelInvocationTilingNeo/add_custom_tiling.h
@@ -15,4 +15,4 @@ struct AddCustomTilingData {
     uint32_t totalLength;
     uint32_t tileNum;
 };
-#endif
\ No newline at end of file
+#endif  // ADD_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h
index fd898cba9..01c1a3c4a 100644
--- a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MATMUL_TILING_H
-#define MATMUL_TILING_H
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
@@ -22,4 +22,4 @@ END_TILING_DATA_DEF;
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
 
-#endif
\ No newline at end of file
+#endif // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
index 5020453c8..500743d0d 100644
--- a/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
+++ b/operator/ascendc/tutorials/MatmulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_host/matmul_custom_tiling.h
@@ -7,6 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
+#ifndef MATMUL_CUSTOM_TILING_H
+#define MATMUL_CUSTOM_TILING_H
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
 
@@ -18,3 +20,4 @@ END_TILING_DATA_DEF;
 
 REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulCustomTilingData)
 } // namespace optiling
+#endif // MATMUL_CUSTOM_TILING_H
\ No newline at end of file
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
index f3ddd66e9..6b4044241 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom_tiling.h
@@ -7,8 +7,8 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-#ifndef MATMUL_LEAKYRELU_TILING_H
-#define MATMUL_LEAKYRELU_TILING_H
+#ifndef MATMUL_LEAKYRELU_CUSTOM_TILING_H
+#define MATMUL_LEAKYRELU_CUSTOM_TILING_H
 
 #include "register/tilingdata_base.h"
 #include "tiling/tiling_api.h"
@@ -22,4 +22,4 @@ END_TILING_DATA_DEF;
 REGISTER_TILING_DATA_CLASS(MatmulLeakyreluCustom, MatmulLeakyreluCustomTilingData)
 }  // namespace optiling
 
-#endif
\ No newline at end of file
+#endif // MATMUL_LEAKYRELU_CUSTOM_TILING_H
\ No newline at end of file
-- 
Gitee


From 81800e934052f7ef5e7f0813f7d1e5c4e37a462a Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Tue, 23 Sep 2025 11:22:25 +0000
Subject: [PATCH 85/97] !2779 simplify simple matmul leakyrelu case Merge pull
 request !2779 from renjie/master

---
 .../24_simple_hello_world/README.md           |  2 +-
 .../0_introduction/25_simple_add/README.md    |  2 +-
 .../26_simple_matmulleakyrelu/README.md       |  4 +-
 .../matmul_leakyrelu.asc                      | 66 +++++--------------
 .../27_simple_add_cpp_extensions/README.md    |  2 +-
 5 files changed, 20 insertions(+), 56 deletions(-)

diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/README.md b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
index ddcaf96e9..483b3d8c3 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/README.md
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
@@ -33,7 +33,7 @@
       ```bash
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
-    配置按安装径后，执行以下命令统一配置环境变量。
+    配置安装路径后，执行以下命令统一配置环境变量。
     ```bash
     # 配置CANN环境变量
     source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
diff --git a/operator/ascendc/0_introduction/25_simple_add/README.md b/operator/ascendc/0_introduction/25_simple_add/README.md
index 470c3edef..439264e6a 100644
--- a/operator/ascendc/0_introduction/25_simple_add/README.md
+++ b/operator/ascendc/0_introduction/25_simple_add/README.md
@@ -64,7 +64,7 @@ z = x + y
       ```bash
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
-    配置按安装径后，执行以下命令统一配置环境变量。
+    配置安装路径后，执行以下命令统一配置环境变量。
     ```bash
     # 配置CANN环境变量
     source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
index da5b62293..6c2eea0b4 100644
--- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
@@ -51,7 +51,7 @@ C = C > 0 ? C : C * 0.001
   C = A * B + Bias
   C = C > 0 ? C : C * 0.001
   ```
-  其中A的形状为[1024, 256]，B的形状为[256, 640]，C的形状为[1024, 640]，Bias的形状为[640]。具体请参考[matmul_leakyrelu.cpp](./matmul_leakyrelu.cpp)。
+  其中A的形状为[1024, 256]，B的形状为[256, 640]，C的形状为[1024, 640]，Bias的形状为[640]。具体请参考[matmul_leakyrelu.asc](./matmul_leakyrelu.asc)。
 
 - 调用实现  
   使用内核调用符<<<>>>调用核函数。
@@ -81,7 +81,7 @@ C = C > 0 ? C : C * 0.001
       ```bash
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
-    配置按安装径后，执行以下命令统一配置环境变量。
+    配置安装路径后，执行以下命令统一配置环境变量。
     ```bash
     # 配置CANN环境变量
     source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
index 430d62328..7c057d79a 100644
--- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
@@ -10,35 +10,16 @@
 #include "data_utils.h"
 #include "kernel_tiling/kernel_tiling.h"
 #include "tiling/platform/platform_ascendc.h"
-#include "acl/acl.h"
 #include "tiling/tiling_api.h"
+#include "acl/acl.h"
 #include "kernel_operator.h"
 #include "lib/matmul_intf.h"
 
-using namespace matmul;
-
 __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b)
 {
     return (a + b - 1) / b;
 }
 
-/**
-  * @brief  Copy tiling data to TCubeTiling ptr from tiling gm addr.
-  * @param  tiling: TCubeTiling ptr which needs to copy tiling data.
-  * @param  tilingGM: tiling gm addr.
-  * @retval None
-  */
-__aicore__ inline void CopyTiling(TCubeTiling *tiling, GM_ADDR tilingGM)
-{
-    uint32_t *ptr = reinterpret_cast<uint32_t *>(tiling);
-    auto tiling32 = reinterpret_cast<__gm__ uint32_t *>(tilingGM);
-
-    for (uint32_t i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) {
-        *ptr = *(tiling32 + i);
-    }
-    return;
-}
-
 template <typename aType, typename bType, typename cType, typename biasType> class MatmulLeakyKernel {
 public:
     __aicore__ inline MatmulLeakyKernel(){};
@@ -52,8 +33,10 @@ public:
     __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB,
                                       int32_t &offsetC, int32_t &offsetBias);
 
-    Matmul<MatmulType<AscendC::TPosition::GM, CubeFormat::ND, aType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, bType>,
-           MatmulType<AscendC::TPosition::VECIN, CubeFormat::ND, cType>, MatmulType<AscendC::TPosition::GM, CubeFormat::ND, biasType>>
+    matmul::Matmul<matmul::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, aType>,
+                   matmul::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, bType>,
+                   matmul::MatmulType<AscendC::TPosition::VECIN, CubeFormat::ND, cType>,
+                   matmul::MatmulType<AscendC::TPosition::GM, CubeFormat::ND, biasType>>
         matmulObj;
 
     AscendC::GlobalTensor<aType> aGlobal;
@@ -183,16 +166,13 @@ MatmulLeakyKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, c
   * @param  bias: Bias gm addr.
   * @param  c: Out gm addr.
   * @param  workspace: Temporary gm space addr required by matmul calc.
-  * @param  tilingGm: Tiling data addr. 
+  * @param  tiling: Tiling data. 
   * @retval None
   */
 __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c,
-                                                              GM_ADDR workspace, GM_ADDR tilingGm)
+                                                              GM_ADDR workspace, AscendC::tiling::TCubeTiling tiling)
 {
     AscendC::TPipe pipe;
-    TCubeTiling tiling;
-    CopyTiling(&tiling, tilingGm);
-
     MatmulLeakyKernel<half, half, float, float> matmulLeakyKernel;
     matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe);
     REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); // Initialize the matmul object.
@@ -201,15 +181,13 @@ __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR
 
 /**
   * @brief  Generate matmul tiling.
-  * @param  socVersion: Platform socversion.
-  * @param  tilingBuf data buffer.
+  * @param  ascendcPlatform: platform info. 
   */
-void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
+AscendC::tiling::TCubeTiling GenerateTiling(platform_ascendc::PlatformAscendC* ascendcPlatform)
 {
     using TPosition = matmul_tiling::TPosition;
     using CubeFormat = matmul_tiling::CubeFormat;
     using DataType = matmul_tiling::DataType;
-    using namespace std;
     int M = 1024;
     int N = 640;
     int K = 256;
@@ -237,8 +215,6 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
     int baseM = 256;
     int baseN = 128;
 
-    optiling::TCubeTiling tilingData;
-    auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion);
     matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform);
 
     tilingApi.SetDim(usedCoreNum); // Set the number of cores that participate in multi-core computaion is 2.
@@ -254,15 +230,14 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf)
     tilingApi.SetFixSplit(baseM, baseN, -1); // Set the fixed baseM=128, baseN=256.
     tilingApi.SetBufferSpace(-1, -1, -1);
 
+    AscendC::tiling::TCubeTiling tilingData;
     int64_t res = tilingApi.GetTiling(tilingData); // Get matmul tiling data.
-    tilingData.set_stepM(1); // Set the matmul tiling stepM=1.
-    tilingData.set_stepN(1); // Set the matmul tiling stepN=1.
     if (res == -1) {
         std::cout << "gen tiling failed" << std::endl;
     }
-    uint32_t tcubeTilingSize = tilingData.GetDataSize();
-    tilingData.SaveToBuffer(tilingBuf, tcubeTilingSize);
-    return;
+    tilingData.stepM = 1; // Set the matmul tiling stepM=1.
+    tilingData.stepN = 1; // Set the matmul tiling stepN=1.
+    return tilingData;
 }
 
 int32_t main(int32_t argc, char *argv[])
@@ -277,8 +252,7 @@ int32_t main(int32_t argc, char *argv[])
     size_t userWorkspaceSize = 0;
     size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform->GetLibApiWorkSpaceSize());
     size_t workspaceSize = userWorkspaceSize + systemWorkspaceSize;
-    uint8_t *tilingBuf = (uint8_t *)malloc(tilingFileSize);
-    GenerateTiling(socVersion, tilingBuf);
+    auto tiling = GenerateTiling(ascendcPlatform);
     uint32_t blockDim = 1;
 
     aclInit(nullptr);
@@ -313,18 +287,11 @@ int32_t main(int32_t argc, char *argv[])
     ReadFile("./input/bias.bin", biasFileSize, inputBiasHost, biasFileSize);
     aclrtMemcpy(inputBiasDevice, biasFileSize, inputBiasHost, biasFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
 
-    uint8_t *tilingHost;
-    uint8_t *tilingDevice;
-    aclrtMallocHost((void **)(&tilingHost), tilingFileSize);
-    aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST);
-    aclrtMemcpy(tilingHost, tilingFileSize, tilingBuf, tilingFileSize, ACL_MEMCPY_HOST_TO_HOST);
-    aclrtMemcpy(tilingDevice, tilingFileSize, tilingHost, tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE);
-
     uint8_t *workspaceDevice;
     aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
 
     matmul_leakyrelu_custom<<<blockDim, nullptr, stream>>>(inputADevice, inputBDevice, inputBiasDevice, outputCDevice,
-                                                           workspaceDevice, tilingDevice);
+                                                           workspaceDevice, tiling);
 
     aclrtSynchronizeStream(stream);
 
@@ -338,13 +305,10 @@ int32_t main(int32_t argc, char *argv[])
     aclrtFreeHost(outputCHost);
     aclrtFree(inputBiasDevice);
     aclrtFreeHost(inputBiasHost);
-    aclrtFree(tilingDevice);
-    aclrtFreeHost(tilingHost);
     aclrtFree(workspaceDevice);
 
     aclrtDestroyStream(stream);
     aclrtResetDevice(deviceId);
     aclFinalize();
-    free(tilingBuf);
     return 0;
 }
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
index 3769caeb1..c04da6102 100644
--- a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
@@ -81,7 +81,7 @@
       ```bash
       export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest
       ```
-    配置按安装径后，执行以下命令统一配置环境变量。
+    配置安装路径后，执行以下命令统一配置环境变量。
     ```bash
     # 配置CANN环境变量
     source ${ASCEND_INSTALL_PATH}/bin/setenv.bash
-- 
Gitee


From 416b88c832c4057526d927af118886eb4da8f452 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E6=9D=A8?= <liyang843@h-partners.com>
Date: Thu, 25 Sep 2025 09:38:52 +0000
Subject: [PATCH 86/97] =?UTF-8?q?!2770=20allgathermm=20optimize=20Merge=20?=
 =?UTF-8?q?pull=20request=20!2770=20from=20=E6=9D=8E=E6=9D=A8/allgathermm?=
 =?UTF-8?q?=5Foptv2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AclNNInvocation/README.md                 |   1 -
 .../AclNNInvocation/src/main.cpp              |   6 -
 .../AclNNInvocation/src/op_runner.cpp         |   3 +-
 .../op_host/all_gather_matmul_custom.cpp      |   5 -
 .../op_kernel/all_gather_matmul_custom.cpp    | 123 ++++++++-----
 .../op_kernel/gather_mm.h                     |  99 -----------
 .../op_kernel/mc2_matmul_block.h              | 167 ------------------
 .../op_kernel/mc2_matmul_compute.h            |  98 ----------
 .../21_all_gather_matmul_custom/README.md     |   2 +-
 .../all_gather_matmul_custom.json             |  12 +-
 10 files changed, 78 insertions(+), 438 deletions(-)
 delete mode 100644 operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
 delete mode 100644 operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
 delete mode 100644 operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h

diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
index 40cfc9d50..4d5b71060 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/README.md
@@ -28,7 +28,6 @@
    aclnnStatus aclnnAllGatherMatmulCustomGetWorkspaceSize(
        const aclTensor *a,
        const aclTensor *b,
-       const aclTensor *biasOptional,
        char *group,
        const aclTensor *cOut,
        const aclTensor *gatherOutOut,
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
index 86ff36642..bc9eac908 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/main.cpp
@@ -24,18 +24,15 @@
 bool g_isDevice = false;
 
 namespace {
-constexpr int32_t INPUT_BUFFER_BIAS = 2; 
 OperatorDesc CreateOpDesc()
 {
     // define operator
     std::vector<int64_t> shapeA { RANK_M, RANK_K };
     std::vector<int64_t> shapeB { RANK_K, RANK_N };
-    std::vector<int64_t> shapeBias {};
     std::vector<int64_t> shapeC { RANK_M * RANK_DIM, RANK_N };
     std::vector<int64_t> shapeGatherOut { RANK_M * RANK_DIM, RANK_K };
     aclDataType dataTypeA = ACL_FLOAT16;
     aclDataType dataTypeB = ACL_FLOAT16;
-    aclDataType dataTypeBias = ACL_FLOAT16;
     aclDataType dataTypeC = ACL_FLOAT16;
     aclDataType dataTypeGatherOut = ACL_FLOAT16;
     aclFormat format = ACL_FORMAT_ND;
@@ -43,7 +40,6 @@ OperatorDesc CreateOpDesc()
     OperatorDesc opDesc;
     opDesc.AddInputTensorDesc(dataTypeA, shapeA.size(), shapeA.data(), format);
     opDesc.AddInputTensorDesc(dataTypeB, shapeB.size(), shapeB.data(), format);
-    opDesc.AddInputTensorDesc(dataTypeBias, shapeBias.size(), shapeBias.data(), format);
     opDesc.AddOutputTensorDesc(dataTypeC, shapeC.size(), shapeC.data(), format);
     opDesc.AddOutputTensorDesc(dataTypeGatherOut, shapeGatherOut.size(), shapeGatherOut.data(), format);
     return opDesc;
@@ -56,8 +52,6 @@ bool SetInputData(OpRunner &runner, uint32_t rankId)
         runner.GetInputBuffer<void>(0), runner.GetInputSize(0)); // Read input_a file
     ReadFile("../input/input_b_" + std::to_string(rankId) + ".bin", fileSize,
         runner.GetInputBuffer<void>(1), runner.GetInputSize(1)); // Read input_b file
-    ReadFile("../input/input_bias_" + std::to_string(rankId) + ".bin", fileSize,
-        runner.GetInputBuffer<void>(INPUT_BUFFER_BIAS), runner.GetInputSize(INPUT_BUFFER_BIAS));
     INFO_LOG("Set input success");
     return true;
 }
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
index 5aa62934f..c47e2ddee 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AclNNInvocation/src/op_runner.cpp
@@ -298,10 +298,9 @@ bool OpRunner::RunOp(std::string group, aclrtStream stream)
         INFO_LOG("Copy input[%zu] success", i);
     }
 
-    aclTensor *bias = nullptr;
     size_t workspaceSize = 0;
     aclOpExecutor *handle = nullptr;
-    auto ret = aclnnAllGatherMatmulCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], bias, (char*)group.c_str(),
+    auto ret = aclnnAllGatherMatmulCustomGetWorkspaceSize(inputTensor_[0], inputTensor_[1], (char*)group.c_str(),
         outputTensor_[0], outputTensor_[1], &workspaceSize, &handle);
     if (ret != ACL_SUCCESS) {
         ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast<int32_t>(ret));
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
index 9916b7b9d..fedb14ba8 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_host/all_gather_matmul_custom.cpp
@@ -131,11 +131,6 @@ public:
             .Format({ge::FORMAT_ND})
             .UnknownShapeFormat({ge::FORMAT_ND})
             .IgnoreContiguous();
-        this->Input("bias")
-            .ParamType(OPTIONAL)
-            .DataType({ge::DT_FLOAT16})
-            .Format({ge::FORMAT_ND})
-            .UnknownShapeFormat({ge::FORMAT_ND});
 
         this->Output("c")
             .ParamType(REQUIRED)
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
index bcdae45b8..ad77b44a8 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/all_gather_matmul_custom.cpp
@@ -7,76 +7,103 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
-
 #include "kernel_operator.h"
-#include "kernel_operator_intf.h"
 #include "lib/matmul_intf.h"
-#include "gather_mm.h"
 #include "all_gather_matmul_custom_tiling.h"
-
 using namespace AscendC;
+using MATMUL_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, half>;
 
-extern "C" __global__ __aicore__ void all_gather_matmul_custom(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR biasGM, GM_ADDR cGM,
-    GM_ADDR gatherOutGM, GM_ADDR workspaceGM, GM_ADDR tilingGM)
+__aicore__ inline void MatmulKernel(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR cGM, TCubeTiling &tiling,
+                                    MatmulImpl<MATMUL_TYPE, MATMUL_TYPE, MATMUL_TYPE> &mm)
 {
-    if ASCEND_IS_AIV {
+    if (GetBlockIdx() >= tiling.usedCoreNum) {
         return;
     }
-    REGISTER_TILING_DEFAULT(AllGatherMatmulCustomTilingData);
-    auto tiling = (__gm__ AllGatherMatmulCustomTilingData*)tilingGM;
-    __gm__ void *mc2InitTiling = (__gm__ void *)(&(tiling->mc2InitTiling));
-    __gm__ void *mc2CcTiling = (__gm__ void *)(&(tiling->mc2CcTiling));
+    GlobalTensor<half> aGlobal, bGlobal, cGlobal;
+    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ half *>(aGM), tiling.M * tiling.Ka);
+    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ half *>(bGM), tiling.Ka * tiling.N);
+    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ half *>(cGM), tiling.M * tiling.N);
 
-    GET_TILING_DATA(tilingData, tilingGM);
-    auto &&cfg         = tilingData.cfg;
-    auto &&localTiling = tilingData.localTiling;
-    auto &&tileTiling  = tilingData.tileTiling;
-    auto &&tailTiling  = tilingData.tailTiling;
-    const auto tileNum = cfg.tileNum;
-    const auto tailNum = cfg.tailNum;
+    int mSingleBlocks = (tiling.M + tiling.singleCoreM - 1) / tiling.singleCoreM;
+    int mCoreIndex = GetBlockIdx() % mSingleBlocks;
+    int nCoreIndex = GetBlockIdx() / mSingleBlocks;
+    int offsetA = mCoreIndex * tiling.Ka * tiling.singleCoreM;
+    int offsetB = nCoreIndex * tiling.singleCoreN;
+    int offsetC = mCoreIndex * tiling.N * tiling.singleCoreM + nCoreIndex * tiling.singleCoreN;
+    int tailM = Std::min(tiling.M - mCoreIndex * tiling.singleCoreM, tiling.singleCoreM);
+    int tailN = Std::min(tiling.N - nCoreIndex * tiling.singleCoreN, tiling.singleCoreN);
 
-    const auto aTileCnt    = tileTiling.M * tileTiling.Ka;
-    const auto aTileOffset = tileTiling.M * tileTiling.Ka * sizeof(A_DTYPE);
-    const auto cTileOffset = tileTiling.M * tileTiling.N * sizeof(C_DTYPE);
-    const auto aTailCnt    = tailTiling.M * tailTiling.Ka;
-    const auto aRankCnt    = cfg.rankM * cfg.rankK;
+    mm.SetOrgShape(tiling.M, tiling.N, tiling.Ka, tiling.Kb);
+    mm.SetTensorA(aGlobal[offsetA]);
+    mm.SetTensorB(bGlobal[offsetB]);
+    mm.SetTail(tailM, tailN);
+    mm.IterateAll(cGlobal[offsetC]);
+}
 
+extern "C" __global__ __aicore__ void all_gather_matmul_custom(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR cGM,
+                                                               GM_ADDR gatherOutGM, GM_ADDR workspaceGM,
+                                                               GM_ADDR tilingGM)
+{
+    if ASCEND_IS_AIV {
+        return;
+    }
+    REGISTER_TILING_DEFAULT(AllGatherMatmulCustomTilingData);
+    GET_TILING_DATA(tilingData, tilingGM);
     TPipe pipe;
 
+    auto &&localTiling = tilingData.localTiling;
+    auto &&tileTiling = tilingData.tileTiling;
+    auto &&tailTiling = tilingData.tailTiling;
+    const auto tileNum = tilingData.cfg.tileNum;
+    const auto tailNum = tilingData.cfg.tailNum;
+    const auto aTileEleCnt = tileTiling.M * tileTiling.Ka;
+    const auto aTileSize = tileTiling.M * tileTiling.Ka * sizeof(half);
+    const auto cTileSize = tileTiling.M * tileTiling.N * sizeof(half);
+    const auto aTailEleCnt = tailTiling.M * tailTiling.Ka;
+    const auto aRankEleCnt = localTiling.M * localTiling.Ka;
+    const auto aRankSize = localTiling.M * localTiling.Ka * sizeof(half);
+    const auto cRankSize = localTiling.M * localTiling.N * sizeof(half);
+
     Hccl hccl;
     GM_ADDR contextGM = GetHcclContext<HCCL_GROUP_ID_0>();
-    hccl.Init(contextGM, mc2InitTiling);
-    hccl.SetCcTiling(mc2CcTiling);
-
-    // 下发allgather任务
-    // 首块
-    auto handleId = hccl.AllGather<true>(aGM, gatherOutGM, aTileCnt, HcclDataType::HCCL_DATA_TYPE_FP16, aRankCnt, tileNum);
-    // 尾块
-    auto tailHandleId = hccl.AllGather<true>(aGM + tileNum * aTileOffset, gatherOutGM + tileNum * aTileOffset, aTailCnt,
-        HcclDataType::HCCL_DATA_TYPE_FP16, aRankCnt, tailNum);
-
-    using A_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, A_DTYPE>;
-    using B_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, B_DTYPE>;
-    using C_TYPE = MatmulType<AscendC::TPosition::GM, CubeFormat::ND, C_DTYPE>;
+    hccl.InitV2(contextGM, &tilingData);
+    hccl.SetCcTilingV2(offsetof(AllGatherMatmulCustomTilingData, mc2CcTiling));
+    auto handleId =
+        hccl.AllGather<true>(aGM, gatherOutGM, aTileEleCnt, HcclDataType::HCCL_DATA_TYPE_FP16, aRankEleCnt, tileNum);
+    auto tailHandleId = hccl.AllGather<true>(aGM + tileNum * aTileSize, gatherOutGM + tileNum * aTileSize, aTailEleCnt,
+                                             HcclDataType::HCCL_DATA_TYPE_FP16, aRankEleCnt, tailNum);
 
-    // 本卡数据计算
-    MatmulKernelLocal<A_TYPE, B_TYPE, C_TYPE>(aGM, bGM, cGM, cfg, localTiling, hccl);
+    MatmulImpl<MATMUL_TYPE, MATMUL_TYPE, MATMUL_TYPE> mm;
+    mm.SetSubBlockIdx(0);
+    mm.Init(&localTiling);
+    MatmulKernel(aGM, bGM, cGM + hccl.GetRankId() * cRankSize, localTiling, mm);
 
-    // tile首块计算
-    auto aAddr = gatherOutGM; // gatherOut 作为 mm A矩阵地址
+    auto aAddr = gatherOutGM;
     auto cAddr = cGM;
-    if (tileNum > 0) {
-        MatmulKernel<A_TYPE, B_TYPE, C_TYPE>(aAddr, bGM, cAddr, cfg, tileTiling, hccl, handleId,
-            tileNum);
+    mm.Init(&tileTiling);
+    for (uint32_t i = 0; i < tileNum; i++) {
+        hccl.Wait(handleId);
+        for (uint32_t rankId = 0; rankId < hccl.GetRankDim(); rankId++) {
+            if (rankId == hccl.GetRankId())
+                continue;
+            MatmulKernel(aAddr + rankId * aRankSize, bGM, cAddr + rankId * cRankSize, tileTiling, mm);
+        }
+        aAddr += aTileSize;
+        cAddr += cTileSize;
     }
 
-    // tail尾块计算
-    aAddr = gatherOutGM + tileNum * aTileOffset;
-    cAddr = cGM + tileNum * cTileOffset;
+    aAddr = gatherOutGM + tileNum * aTileSize;
+    cAddr = cGM + tileNum * cTileSize;
     if (tailNum > 0) {
-        MatmulKernel<A_TYPE, B_TYPE, C_TYPE>(aAddr, bGM, cAddr, cfg, tailTiling, hccl, tailHandleId,
-            tailNum);
+        mm.Init(&tailTiling);
+        hccl.Wait(tailHandleId);
+        for (uint32_t rankId = 0; rankId < hccl.GetRankDim(); rankId++) {
+            if (rankId == hccl.GetRankId())
+                continue;
+            MatmulKernel(aAddr + rankId * aRankSize, bGM, cAddr + rankId * cRankSize, tailTiling, mm);
+        }
     }
 
+    mm.End();
     hccl.Finalize();
 }
\ No newline at end of file
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
deleted file mode 100644
index b363d8ce2..000000000
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/gather_mm.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * @file gather_mm.h
- *
- * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#ifndef MC2_GATHER_MM_H
-#define MC2_GATHER_MM_H
-
-#if defined ASCENDC_CPU_DEBUG
-#define SET_G_CORE_TYPE_IS_AIV thread_local int g_coreType = 2
-#define SET_G_CORE_TYPE_IS_AIC thread_local int g_coreType = 1
-#define DTYPE_A half
-#define DTYPE_C half
-#else
-#define SET_G_CORE_TYPE_IS_AIV
-#define SET_G_CORE_TYPE_IS_AIC
-#endif
-
-#include "kernel_operator_intf.h"
-#include "mc2_matmul_compute.h"
-#include "all_gather_matmul_custom_tiling.h"
-
-namespace AscendC {
-using A_DTYPE = DTYPE_A;
-using B_DTYPE = DTYPE_B;
-using C_DTYPE = DTYPE_C;
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulKernelLocal(GM_ADDR aGM, GM_ADDR bGM, GM_ADDR cGM, AllGatherMatmulTiling &cfg,
-    TCubeTiling &tiling, Hccl<HCCL_SERVER_TYPE_AICPU> &hccl)
-{
-    if ASCEND_IS_AIC {
-        if (GetBlockIdx() >= tiling.usedCoreNum) {
-            return;
-        }
-        using C_T = typename C_TYPE::T;
-        const auto aRankDataCnt = cfg.rankM * cfg.rankK;
-        const auto cRankDataCnt = cfg.rankM * cfg.rankN;
-
-        MatmulCompute<A_TYPE, B_TYPE, C_TYPE> mmLocal;
-        mmLocal.Init(cfg, tiling);
-        mmLocal.UpdateWeight(bGM);
-        mmLocal.UpdateAddress(aGM, aRankDataCnt, cGM + hccl.GetRankId() * cRankDataCnt * sizeof(C_T), cRankDataCnt);
-        mmLocal.Process();
-        mmLocal.End();
-    }
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulKernel(GM_ADDR aAddr, GM_ADDR bGM, GM_ADDR cAddr, AllGatherMatmulTiling &cfg,
-    TCubeTiling &tiling, Hccl<HCCL_SERVER_TYPE_AICPU> &hccl, HcclHandle &handleId, uint32_t tileCnt)
-{
-    if ASCEND_IS_AIC {
-        if (GetBlockIdx() >= tiling.usedCoreNum) {
-            for (uint32_t i = 0; i < tileCnt; i++) {
-                CrossCoreSetFlag<0x0, PIPE_FIX>(0x8);
-                CrossCoreWaitFlag(0x8);
-            }
-            return;
-        }
-        using A_T = typename A_TYPE::T;
-        using C_T = typename C_TYPE::T;
-        const auto aDataCnt = tiling.M * tiling.Ka;
-        const auto aOffset = aDataCnt * sizeof(A_T);
-        const auto cDataCnt = tiling.M * tiling.N;
-        const auto cOffset = cDataCnt * sizeof(C_T);
-        const auto aRankOffset = cfg.rankM * cfg.rankK * sizeof(A_T);
-        const auto cRankOffset = cfg.rankM * cfg.rankN * sizeof(C_T);
-
-        MatmulCompute<A_TYPE, B_TYPE, C_TYPE> mm;
-        mm.Init(cfg, tiling);
-        mm.UpdateWeight(bGM);
-        for (uint32_t i = 0; i < tileCnt; i++) {
-            // wait current handle allgather
-            hccl.Wait(handleId);
-            CrossCoreSetFlag<0x0, PIPE_FIX>(0x8);
-            CrossCoreWaitFlag(0x8);
-            // calculate all ranks data
-            for (uint32_t rankId = 0; rankId < hccl.GetRankDim(); rankId++) {
-                // skip local rank
-                if (rankId == hccl.GetRankId()) {
-                    continue;
-                }
-                mm.UpdateAddress(aAddr + rankId * aRankOffset, aDataCnt, cAddr + rankId * cRankOffset, cDataCnt);
-                mm.Process();
-            }
-            aAddr += aOffset;
-            cAddr += cOffset;
-        }
-        mm.End();
-    }
-}
-}
-#endif  // MC2_GATHER_MM_H
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
deleted file mode 100644
index 00d4322b5..000000000
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_block.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- * @file mc2_matmul_block.h
- *
- * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#ifndef MC2_MATMUL_BLOCK_H
-#define MC2_MATMUL_BLOCK_H
-
-namespace AscendC {
-
-constexpr uint32_t C0_SIZE = 16;
-
-struct BaseBlockOffset {
-    uint64_t offsetA;
-    uint64_t offsetB;
-    uint64_t offsetC;
-};
-
-struct BaseBlockArguments
-{
-    bool isRowOrder;
-    uint32_t singleCoreM;
-    uint32_t singleCoreN;
-    uint32_t mBlockCnt;       // M方向的基本块个数
-    uint32_t nBlockCnt;       // N方向的基本块个数
-    uint32_t nBaseTail;       // N方向的尾块大小
-    uint32_t mBaseTail;       // M方向的尾块大小
-    uint32_t totalBlockCnt;   // C矩阵的全部基本块个数
-    uint32_t blockCnt;        // 单核需要计算的基本块个数
-    uint32_t blockStartIdx;   // 当前核需计算的基本块的起始位置索引
-    uint32_t blockCurrIdx;    // 当前核需计算的基本块的当前位置索引
-    uint32_t preCoreNum;      // 满核分配后剩余基本块数/需要预分配1个block的核数
-    uint32_t preCoreStartIdx; // 多分配一个基本块的核起始位置
-    uint64_t mBlockOffset;
-    uint64_t nBlockOffset;
-    uint64_t mCWorkOffset;
-};
-
-class MatmulBaseBlock {
-public:
-    __aicore__ inline MatmulBaseBlock() {}
-    __aicore__ inline void Init(TCubeTiling& tiling);
-    __aicore__ inline void InitBlockWithoutIndex();
-    __aicore__ inline void UpdateBlockIndex(uint32_t currPos);
-    __aicore__ inline void UpdateBlockParams(int32_t mTileIndex=0, int32_t nTileIndex=0);
-    __aicore__ inline void CalcGMOffset();
-    __aicore__ inline void GetBlockStartIdx(uint32_t startIdx, uint32_t endIdx);
-
-public:
-    BaseBlockOffset offset_;
-    BaseBlockArguments args_;
-    TCubeTiling tiling_;
-};
-
-__aicore__ inline void MatmulBaseBlock::Init(TCubeTiling& tiling)
-{
-    tiling_ = tiling;
-    args_.preCoreStartIdx = 0;
-    args_.mBlockCnt = DivCeil(tiling.M, tiling.baseM);         //M方向分Base块个数
-    args_.nBlockCnt = DivCeil(tiling.N, tiling.baseN);         //N方向分Base块个数
-    args_.nBaseTail = tiling.N - (args_.nBlockCnt - 1) * tiling.baseN;
-    args_.mBaseTail = tiling.M - (args_.mBlockCnt - 1) * tiling.baseM;
-    args_.totalBlockCnt = args_.mBlockCnt * args_.nBlockCnt;
-    args_.isRowOrder = true;
-    if (tiling_.N > 5 * tiling_.M) { // 5: ratio of rowOrder
-        args_.isRowOrder = false;
-    }
-}
-
-__aicore__ inline void MatmulBaseBlock::InitBlockWithoutIndex()
-{
-    args_.totalBlockCnt = args_.mBlockCnt * args_.nBlockCnt;
-    args_.blockCnt = args_.totalBlockCnt / tiling_.usedCoreNum;
-    args_.preCoreNum = args_.totalBlockCnt % tiling_.usedCoreNum;
-
-    // 多分配1个基本块的核索引, 从上一次结束位置开始
-    auto startIdx = args_.preCoreStartIdx;
-    auto endIdx = (startIdx + args_.preCoreNum) % tiling_.usedCoreNum;
-    args_.preCoreStartIdx = endIdx;
-    GetBlockStartIdx(startIdx, endIdx);
-}
-
-__aicore__ inline void MatmulBaseBlock::GetBlockStartIdx(uint32_t startIdx, uint32_t endIdx)
-{
-    if (startIdx > endIdx) {
-        if (block_idx < endIdx) {
-            args_.blockCnt += 1;
-            args_.blockStartIdx = block_idx * args_.blockCnt;
-        } else if (block_idx >= startIdx) {
-            args_.blockCnt += 1;
-            args_.blockStartIdx = block_idx * args_.blockCnt - (tiling_.usedCoreNum - args_.preCoreNum);
-        } else {
-            args_.blockStartIdx = block_idx * args_.blockCnt + endIdx;
-        }
-    } else {
-        if (block_idx < startIdx) {
-            args_.blockStartIdx = block_idx * args_.blockCnt;
-        } else if (block_idx >= endIdx) {
-            args_.blockStartIdx = block_idx * args_.blockCnt + args_.preCoreNum;
-        } else {
-            args_.blockCnt += 1;
-            args_.blockStartIdx = block_idx * args_.blockCnt - startIdx;
-        }
-    }
-
-    if (!args_.isRowOrder) {
-        auto blockStart = args_.blockStartIdx;
-        args_.blockStartIdx = blockStart / args_.mBlockCnt + blockStart % args_.mBlockCnt * args_.nBlockCnt;
-    }
-}
-
-__aicore__ inline void MatmulBaseBlock::UpdateBlockIndex(uint32_t currPos)
-{
-    // 按行取，计算第i个基本块的index
-    if (args_.isRowOrder) {
-        args_.blockCurrIdx = args_.blockStartIdx + currPos % args_.blockCnt;
-        return;
-    }
-
-    args_.blockCurrIdx = args_.blockStartIdx + (currPos % args_.blockCnt) * args_.nBlockCnt;
-    // 按列取，如果block超行，需计算下一列的位置
-    if (args_.blockCurrIdx >= args_.totalBlockCnt) {
-        args_.blockCurrIdx = args_.blockCurrIdx % args_.totalBlockCnt + args_.blockCurrIdx / args_.totalBlockCnt;
-    }
-    return;
-}
-
-__aicore__ inline void MatmulBaseBlock::UpdateBlockParams(int32_t mTileIndex, int32_t nTileIndex)
-{
-    (void)mTileIndex;
-    (void)nTileIndex;
-    if (args_.blockCurrIdx == (args_.totalBlockCnt - 1)) {
-        // 当前矩阵最后一块
-        args_.singleCoreM = args_.mBaseTail;
-        args_.singleCoreN = args_.nBaseTail;
-    } else if (args_.blockCurrIdx >= (args_.mBlockCnt - 1) * args_.nBlockCnt) {
-        // 当前矩阵最后一行
-        args_.singleCoreM = args_.mBaseTail;
-        args_.singleCoreN = tiling_.baseN;
-    } else if ((args_.blockCurrIdx + 1) % args_.nBlockCnt == 0) {
-        // 当前矩阵最后一列
-        args_.singleCoreM = tiling_.baseM;
-        args_.singleCoreN = args_.nBaseTail;
-    } else {
-        args_.singleCoreM = tiling_.baseM;
-        args_.singleCoreN = tiling_.baseN;
-    }
-
-    // 更新基本块的地址偏移
-    args_.mBlockOffset = args_.blockCurrIdx / args_.nBlockCnt * tiling_.baseM; // 基本块所在的行偏移
-    args_.nBlockOffset = args_.blockCurrIdx % args_.nBlockCnt * tiling_.baseN; // 基本块所在的列偏移
-    args_.mCWorkOffset = args_.mBlockOffset;
-}
-
-__aicore__ inline void MatmulBaseBlock::CalcGMOffset()
-{
-    offset_.offsetA = args_.mBlockOffset * tiling_.Ka;
-    offset_.offsetB = args_.nBlockOffset;
-    offset_.offsetC = args_.nBlockOffset + args_.mCWorkOffset * tiling_.N;
-}
-}      // namespace ASCENDC
-#endif // MC2_MATMUL_BLOCK_H
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h
deleted file mode 100644
index 0bac09100..000000000
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/AllGatherMatmulCustom/op_kernel/mc2_matmul_compute.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * @file mc2_matmul_compute.h
- *
- * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#ifndef MC2_MATMUL_COMPUTE_H
-#define MC2_MATMUL_COMPUTE_H
-#include "mc2_matmul_block.h"
-#include "all_gather_matmul_custom_tiling.h"
-
-namespace AscendC {
-using namespace matmul;
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-class MatmulCompute {
-    using A_T = typename A_TYPE::T;
-    using B_T = typename B_TYPE::T;
-    using C_T = typename C_TYPE::T;
-
-public:
-    __aicore__ inline MatmulCompute() {}
-    __aicore__ inline void Init(AllGatherMatmulTiling& cfg, TCubeTiling& tiling);
-    __aicore__ inline void UpdateWeight(GM_ADDR bGM);
-    __aicore__ inline void UpdateAddress(GM_ADDR aGM, uint32_t aSize, GM_ADDR cGM, uint32_t cSize);
-    __aicore__ inline void Process();
-    __aicore__ inline void End();
-
-private:
-    MatmulImpl<A_TYPE, B_TYPE, C_TYPE> mm_;
-    GlobalTensor<A_T> aGlobal;
-    GlobalTensor<B_T> bGlobal;
-    GlobalTensor<C_T> cGlobal;
-    MatmulBaseBlock block_;
-    TCubeTiling tiling_;
-    AllGatherMatmulTiling cfg_;
-};
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::UpdateWeight(GM_ADDR bGM)
-{
-    // MC2的计算流中默认B矩阵不变，GM地址无需偏移
-    bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ B_T *>(bGM), tiling_.Kb * tiling_.N);
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::UpdateAddress(
-    GM_ADDR aGM, uint32_t aSize, GM_ADDR cGM, uint32_t cSize)
-{
-    aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ A_T *>(aGM), aSize);
-    cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ C_T *>(cGM), cSize);
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::Init(AllGatherMatmulTiling& cfg, TCubeTiling& tiling)
-{
-    mm_.SetSubBlockIdx(0);
-    mm_.Init(&tiling, GetTPipePtr());
-    tiling_ = tiling;
-    cfg_ = cfg;
-    block_.Init(tiling);
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::Process()
-{
-    // 每次block循环开始前需要计算初始blockIndex
-    block_.InitBlockWithoutIndex();
-    for (uint32_t i = 0; i < block_.args_.blockCnt; i++) {
-        // calculate blockCurrIndex
-        block_.UpdateBlockIndex(i);
-        if (block_.args_.blockCurrIdx < block_.args_.totalBlockCnt) {
-            block_.UpdateBlockParams();
-            block_.CalcGMOffset();
-            mm_.SetSingleShape(block_.args_.singleCoreM, block_.args_.singleCoreN, tiling_.singleCoreK);
-            mm_.SetTensorA(aGlobal[block_.offset_.offsetA]);
-            mm_.SetTensorB(bGlobal[block_.offset_.offsetB]);
-            mm_.Iterate();
-            mm_.GetTensorC(cGlobal[block_.offset_.offsetC]);
-            // 增加M等FIX同步
-            event_t eventIDFixToM = static_cast<event_t>(GetTPipePtr()->FetchEventID(HardEvent::FIX_M));
-            SetFlag<HardEvent::FIX_M>(eventIDFixToM);
-            WaitFlag<HardEvent::FIX_M>(eventIDFixToM);
-        }
-    }
-}
-
-template <class A_TYPE, class B_TYPE, class C_TYPE>
-__aicore__ inline void MatmulCompute<A_TYPE, B_TYPE, C_TYPE>::End()
-{
-    mm_.End();
-}
-}
-#endif // MC2_MATMUL_COMPUTE_H
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
index 03c7430e5..0ba75d35a 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/README.md
@@ -32,7 +32,7 @@ $$
 <tr><td rowspan="4" align="center">算子输入</td><td align="center">name</td><td align="center">shape</td><td align="center">data type</td><td align="center">format</td></tr>
 <tr><td align="center">a</td><td align="center">512 * 5120</td><td align="center">float16</td><td align="center">ND</td></tr>
 <tr><td align="center">b</td><td align="center">5120 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
-<tr><td align="center">bias</td><td align="center">/</td><td align="center">/</td><td align="center">/</td></tr>
+<tr></tr>
 </tr>
 </tr>
 <tr><td rowspan="2" align="center">算子输出</td><td align="center">c</td><td align="center">4096 * 640</td><td align="center">float16</td><td align="center">ND</td></tr>
diff --git a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
index 6ab16e763..96aa36210 100644
--- a/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
+++ b/operator/ascendc/4_best_practices/21_all_gather_matmul_custom/all_gather_matmul_custom.json
@@ -13,7 +13,7 @@
                 ]
             },
             {
-                "name": "x2",
+                "name": "b",
                 "param_type": "required",
                 "format": [
                     "ND"
@@ -21,16 +21,6 @@
                 "type": [
                     "float16"
                 ]
-            },
-            {
-                "name": "bias",
-                "param_type": "optional",
-                "format": [
-                    "ND"
-                ],
-                "type": [
-                    "float16"
-                ]
             }
         ],
         "output_desc":[
-- 
Gitee


From 04f2289454efc33e0b3db90b5196d1c0c01c41fc Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Fri, 26 Sep 2025 09:32:09 +0000
Subject: [PATCH 87/97] !2781 update npu arch set Merge pull request !2781 from
 renjie/master

---
 .../0_introduction/24_simple_hello_world/CMakeLists.txt     | 6 ++++--
 .../ascendc/0_introduction/25_simple_add/CMakeLists.txt     | 6 ++++--
 .../0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt | 6 ++++--
 .../26_simple_matmulleakyrelu/matmul_leakyrelu.asc          | 1 +
 .../27_simple_add_cpp_extensions/CMakeLists.txt             | 3 +--
 .../27_simple_add_cpp_extensions/add_custom.asc             | 1 +
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
index 559929405..22fb96a8e 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/CMakeLists.txt
@@ -1,11 +1,13 @@
 cmake_minimum_required(VERSION 3.16)
 
-set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
-
 find_package(ASC REQUIRED)
 
 project(kernel_samples LANGUAGES ASC CXX)
 
 add_executable(demo
     hello_world.asc
+)
+
+target_compile_options(demo PRIVATE
+    $<$<COMPILE_LANGUAGE:ASC>:--npu-arch=dav-2201>
 )
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt b/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
index 08689321b..75dac07dc 100644
--- a/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/25_simple_add/CMakeLists.txt
@@ -1,11 +1,13 @@
 cmake_minimum_required(VERSION 3.16)
 
-set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
-
 find_package(ASC REQUIRED)
 
 project(kernel_samples LANGUAGES ASC CXX)
 
 add_executable(demo
     add_custom.asc
+)
+
+target_compile_options(demo PRIVATE
+    $<$<COMPILE_LANGUAGE:ASC>:--npu-arch=dav-2201>
 )
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
index 2958d3a02..05004ccca 100644
--- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.16)
 
-set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
-
 find_package(ASC REQUIRED)
 
 project(kernel_samples LANGUAGES ASC CXX)
@@ -15,4 +13,8 @@ target_link_libraries(demo PRIVATE
     register
     platform
     m
+)
+
+target_compile_options(demo PRIVATE
+    $<$<COMPILE_LANGUAGE:ASC>:--npu-arch=dav-2201>
 )
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
index 7c057d79a..7cdcc247f 100644
--- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc
@@ -172,6 +172,7 @@ MatmulLeakyKernel<aType, bType, cType, biasType>::CalcOffset(int32_t blockIdx, c
 __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c,
                                                               GM_ADDR workspace, AscendC::tiling::TCubeTiling tiling)
 {
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_AIC_1_2);
     AscendC::TPipe pipe;
     MatmulLeakyKernel<half, half, float, float> matmulLeakyKernel;
     matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe);
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
index 96c61cc34..fe5b95bde 100644
--- a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.16)
 
-set(SOC_VERSION "Ascend910B1" CACHE STRING "soc version")
-
 find_package(ASC REQUIRED)
 
 execute_process(COMMAND python3 -c "import os; import torch; print(os.path.dirname(torch.__file__))"
@@ -54,6 +52,7 @@ target_compile_definitions(pybind11_lib PRIVATE
 
 target_compile_options(pybind11_lib PRIVATE
     ${PYBIND11_INC}
+    $<$<COMPILE_LANGUAGE:ASC>:--npu-arch=dav-2201>
     -fPIC
 )
 
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
index d4a076832..ccd87474d 100644
--- a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/add_custom.asc
@@ -81,6 +81,7 @@ private:
 
 __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength)
 {
+    KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_AIV_ONLY);
     KernelAdd op;
     op.Init(x, y, z, totalLength);
     op.Process();
-- 
Gitee


From b65a7fa7593f3edd17c9e061b1e1e92566a7c377 Mon Sep 17 00:00:00 2001
From: hujiawenKaven <hujiawen5@hisilicon.com>
Date: Mon, 29 Sep 2025 08:41:16 +0000
Subject: [PATCH 88/97] !2782 fix definition bug in lower version cmake Merge
 pull request !2782 from hujiawenKaven/master

---
 .../MatmulInvocationNeo/cmake/npu_lib.cmake                   | 4 ++--
 .../CppExtensions/CMakeLists.txt                              | 4 ++--
 .../MatmulLeakyReluInvocation/cmake/npu_lib.cmake             | 4 ++--
 .../MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake        | 4 ++--
 .../BareMixInvocation/cmake/npu_lib.cmake                     | 4 ++--
 .../KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake      | 4 ++--
 .../KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake      | 4 ++--
 .../MatmulABshareInvocation/cmake/npu_lib.cmake               | 4 ++--
 .../KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake      | 4 ++--
 .../KernelLaunch/CppExtensions/CMakeLists.txt                 | 4 ++--
 .../MatmulLeakyReluInvocation/cmake/npu_lib.cmake             | 4 ++--
 .../MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake        | 4 ++--
 12 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
+++ b/operator/ascendc/0_introduction/11_matmul_kernellaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/CMakeLists.txt b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/CMakeLists.txt
index ffc6da8d3..4ce075ac2 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/CppExtensions/CMakeLists.txt
@@ -30,8 +30,8 @@ ascendc_library(kernels STATIC
 )
 
 ascendc_compile_definitions(kernels PRIVATE
-  -DHAVE_WORKSPACE
-  -DHAVE_TILING
+  HAVE_WORKSPACE
+  HAVE_TILING
   $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
 )
 
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
+++ b/operator/ascendc/0_introduction/13_matmulleakyrelu_kernellaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
+++ b/operator/ascendc/0_introduction/22_baremix_kernellaunch/BareMixInvocation/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
+++ b/operator/ascendc/1_utilities/0_printf/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
+++ b/operator/ascendc/1_utilities/3_assert/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/cmake/npu_lib.cmake b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/cmake/npu_lib.cmake
+++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake b/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
+++ b/operator/ascendc/tutorials/MatmulCustomSample/KernelLaunch/MatmulInvocationNeo/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/CppExtensions/CMakeLists.txt b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/CppExtensions/CMakeLists.txt
index ffc6da8d3..4ce075ac2 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/CppExtensions/CMakeLists.txt
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/CppExtensions/CMakeLists.txt
@@ -30,8 +30,8 @@ ascendc_library(kernels STATIC
 )
 
 ascendc_compile_definitions(kernels PRIVATE
-  -DHAVE_WORKSPACE
-  -DHAVE_TILING
+  HAVE_WORKSPACE
+  HAVE_TILING
   $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
 )
 
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocation/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
diff --git a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
index b3c8ff7ae..d9a9fb2ea 100644
--- a/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
+++ b/operator/ascendc/tutorials/MatmulLeakyReluCustomSample/KernelLaunch/MatmulLeakyReluInvocationAsync/cmake/npu_lib.cmake
@@ -12,6 +12,6 @@ ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES})
 
 ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE
     $<$<BOOL:$<IN_LIST:${SOC_VERSION},${CUSTOM_ASCEND310P_LIST}>>:CUSTOM_ASCEND310P>
-    -DHAVE_WORKSPACE
-    -DHAVE_TILING
+    HAVE_WORKSPACE
+    HAVE_TILING
 )
-- 
Gitee


From 96cee207d72a34df29154b36a45f9a552060b36c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B5=87=E9=94=B4?= <jikai5@huawei.com>
Date: Tue, 14 Oct 2025 07:01:37 +0000
Subject: [PATCH 89/97] =?UTF-8?q?!2785=20update=20cplusplus/level1=5Fsingl?=
 =?UTF-8?q?e=5Fapi/3=5Fir/IRBuild/readme.md.=20Merge=20pull=20request=20!2?=
 =?UTF-8?q?785=20from=20=E5=B5=87=E9=94=B4/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cplusplus/level1_single_api/3_ir/IRBuild/readme.md         | 7 ++-----
 cplusplus/level1_single_api/8_graphrun/graph_run/readme.md | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/cplusplus/level1_single_api/3_ir/IRBuild/readme.md b/cplusplus/level1_single_api/3_ir/IRBuild/readme.md
index 3674b0b9f..3cc68b66a 100644
--- a/cplusplus/level1_single_api/3_ir/IRBuild/readme.md
+++ b/cplusplus/level1_single_api/3_ir/IRBuild/readme.md
@@ -97,11 +97,8 @@
 
      **./ir_build ${soc_version} gen**
      
-     ${soc_version}：昇腾AI处理器的版本，可以从${ASCEND_PATH}/atc/data/platform_config查看ini文件名，文件名即为对应版本，如果仍然无法确定具体使用的版本号，可以通过如下方法查询：
-     1.  单击如下手册中的链接并进入该手册，[CANN Ascend-DMI工具用户指南](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910?category=operation-maintenance)。
-     2.  完成“使用工具>使用前准备“，然后进入“使用工具>设备实时状态查询“章节。
-     3.  使用相关命令查看芯片的详细信息，例如使用**ascend-dmi -i -dt**命令查看芯片的详细信息，返回信息中“Chip Name“对应取值，去除空格后，即为具体使用的${soc_version}。
-
+     ${soc_version}：昇腾AI处理器的版本，可以从[ATC离线模型编译工具](https://hiascend.com/document/redirect/CannCommercialAtc)中的“参数说明”部分的“--soc_version”查询。
+     
      编译成功提示：
 
      ```
diff --git a/cplusplus/level1_single_api/8_graphrun/graph_run/readme.md b/cplusplus/level1_single_api/8_graphrun/graph_run/readme.md
index e1b8c4b4c..41d05703c 100644
--- a/cplusplus/level1_single_api/8_graphrun/graph_run/readme.md
+++ b/cplusplus/level1_single_api/8_graphrun/graph_run/readme.md
@@ -47,7 +47,7 @@
 
    1. 执行编译脚本。
 
-       a. 修改Makefile文件的安装包路径。
+       a. 修改Makefile文件中ASCEND_PATH变量取值：修改为CANN软件包的实际安装路径。
 
        b. 分别执行**make clean**和**make graph_run**进行编译。
 
-- 
Gitee


From 9d1dc760752f09b68caed7b83235b473a871bf7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E6=99=BA=E6=85=A7?= <zhaozhihui5@huawei.com>
Date: Fri, 24 Oct 2025 01:44:15 +0000
Subject: [PATCH 90/97] =?UTF-8?q?!2787=20=E4=BF=AE=E6=94=B9llm=20datadist?=
 =?UTF-8?q?=E6=A0=B7=E4=BE=8Breadme=E7=9A=84=E5=8F=AF=E8=AF=BB=E6=80=A7=20?=
 =?UTF-8?q?Merge=20pull=20request=20!2787=20from=20=E8=B5=B5=E6=99=BA?=
 =?UTF-8?q?=E6=85=A7/zzh=5Fdev?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cplusplus/level1_single_api/11_llm_data_dist/readme.md | 4 +++-
 cplusplus/level1_single_api/12_adxl/readme.md          | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/cplusplus/level1_single_api/11_llm_data_dist/readme.md b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
index 24ae39e0f..92dcc5a7b 100644
--- a/cplusplus/level1_single_api/11_llm_data_dist/readme.md
+++ b/cplusplus/level1_single_api/11_llm_data_dist/readme.md
@@ -1,5 +1,6 @@
 ## 目录
 
+- [目录](#目录)
 - [样例介绍](#样例介绍)
 - [目录结构](#目录结构)
 - [环境要求](#环境要求)
@@ -14,6 +15,7 @@
 
 ## 目录结构
 
+相对当前目录，结构如下：
 ```
 ├── prompt_sample.cpp                // sample1的prompt样例main函数
 ├── decoder_sample.cpp               // sample1的decoder样例main函数
@@ -39,7 +41,7 @@
 
 1. 修改CMakeLists.txt文件中的安装包路径
 
-2. 执行如下命令进行编译。
+2. 在当前目录下执行如下命令进行编译。
 
    依次执行:
 
diff --git a/cplusplus/level1_single_api/12_adxl/readme.md b/cplusplus/level1_single_api/12_adxl/readme.md
index 8da00a041..05246949c 100644
--- a/cplusplus/level1_single_api/12_adxl/readme.md
+++ b/cplusplus/level1_single_api/12_adxl/readme.md
@@ -1,5 +1,6 @@
 ## 目录
 
+- [目录](#目录)
 - [样例介绍](#样例介绍)
 - [目录结构](#目录结构)
 - [环境要求](#环境要求)
@@ -14,6 +15,7 @@
 
 ## 目录结构
 
+相对当前目录，结构如下：
 ```
 ├── adxl_engine_sample.cpp          // adxl_engine的sample1样例
 ├── adxl_engine_sample2.cpp         // adxl_engine的sample2样例
@@ -33,7 +35,7 @@
 
 1. 修改CMakeLists.txt文件中的安装包路径
 
-2. 执行如下命令进行编译。
+2. 在当前目录执行如下命令进行编译。
 
    依次执行:
 
-- 
Gitee


From adb2d71065026f90a7b664ef27f74ba406706edc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E9=91=AB?= <liuxin429@huawei.com>
Date: Wed, 5 Nov 2025 01:56:04 +0000
Subject: [PATCH 91/97] !2789 fix scalar type half * fix scalar type half

---
 .../AbsDuplicateKernelInvocation/abs_duplicate.cpp            | 2 +-
 .../ReduceMinKernelInvocation/reduce_min.cpp                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
index 90f90adfe..131640e43 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
@@ -59,7 +59,7 @@ private:
         uint64_t mask0 = (1ul << 16) - (1ul << BLOCK_ELEMENT_NUM);
         uint64_t mask[2] = {mask0, 0};
         for (int32_t i = 0; i < BLOCK_GROUP_NUM; i++) {
-            AscendC::Duplicate<half>(inputLocal[i * BLOCKLEN_CEIL], 0, mask, 1, 1, 1); // clear dummy data on inputLocal
+            AscendC::Duplicate<half>(inputLocal[i * BLOCKLEN_CEIL], static_cast<half>(0), mask, 1, 1, 1); // clear dummy data on inputLocal
         }
         AscendC::Abs(outputLocal, inputLocal, BLOCKLEN_CEIL * BLOCK_GROUP_NUM);
         outQueue.EnQue<half>(outputLocal);
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
index 6f9cd6e1f..19c9425d0 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
@@ -59,8 +59,8 @@ private:
         AscendC::LocalTensor<half> outputLocal = outQueue.AllocTensor<half>();
         AscendC::LocalTensor<half> workLocal = workLocalTbuf.Get<half>();
         AscendC::LocalTensor<half> inputLocal = inQueue.DeQue<half>();
-        AscendC::Duplicate<half>(outputLocal, 0, BLOCK_GROUP_NUM * BLOCKLEN_CEIL);
-        AscendC::Duplicate<half>(workLocal, 0, BLOCKLEN_CEIL);
+        AscendC::Duplicate<half>(outputLocal, static_cast<half>(0), BLOCK_GROUP_NUM * BLOCKLEN_CEIL);
+        AscendC::Duplicate<half>(workLocal, static_cast<half>(0), BLOCKLEN_CEIL);
 
         uint64_t Mask0 = ((uint64_t)1 << BLOCK_ELEMENT_NUM) -
                          1; // mask mode controls only the first 4 elements do ReduceMin calculation
-- 
Gitee


From adb50a46f383a6943513eb262d995446592aa7f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E6=97=AD?= <wangxu243@huawei.com>
Date: Thu, 13 Nov 2025 06:53:06 +0000
Subject: [PATCH 92/97] =?UTF-8?q?!2790=20high=20api=20rename=20Merge=20pul?=
 =?UTF-8?q?l=20request=20!2790=20from=20=E7=8E=8B=E6=97=AD/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AbsDuplicateKernelInvocation/abs_duplicate.cpp        | 2 +-
 .../AbsPadKernelInvocation/abs_pad.cpp                    | 2 +-
 .../ReduceMinKernelInvocation/reduce_min.cpp              | 2 +-
 .../BallQuery/op_kernel/ball_query_norm_fp16.h            | 8 ++++----
 .../BallQuery/op_kernel/ball_query_norm_fp32_perf.h       | 8 ++++----
 .../BallQuery/op_kernel/ball_query_stack.h                | 8 ++++----
 .../op_kernel/flash_attention_score_common.h              | 2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
index 131640e43..c54c398ff 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsDuplicateKernelInvocation/abs_duplicate.cpp
@@ -23,7 +23,7 @@ public:
         srcGlobal.SetGlobalBuffer((__gm__ half *)(inputGM) + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
         dstGlobal.SetGlobalBuffer((__gm__ half *)(outputGM) + BLOCK_LENGTH * AscendC::GetBlockIdx(), blockLength);
         syncGlobal.SetGlobalBuffer((__gm__ int32_t *)(syncGM), USE_CORE_NUM * DEFAULT_SYNCALL_NEED_SIZE);
-        AscendC::InitGlobalMemory<half>(dstGlobal, blockLength, 0);
+        AscendC::Fill<half>(dstGlobal, blockLength, 0);
 
         pipe.InitBuffer(inQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
         pipe.InitBuffer(outQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
diff --git a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/abs_pad.cpp b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/abs_pad.cpp
index 70c5c5113..bb4d29781 100644
--- a/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/abs_pad.cpp
+++ b/operator/ascendc/0_introduction/16_unaligned_abs_kernellaunch/AbsPadKernelInvocation/abs_pad.cpp
@@ -31,7 +31,7 @@ public:
         dstGlobal.SetGlobalBuffer((__gm__ half *)(outputGM) + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH);
         pipe.InitBuffer(inQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
         pipe.InitBuffer(outQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
-        AscendC::InitGlobalMemory(dstGlobal, BLOCK_LENGTH, half(0.0));
+        AscendC::Fill(dstGlobal, BLOCK_LENGTH, half(0.0));
     }
     __aicore__ inline void Process()
     {
diff --git a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
index 19c9425d0..b58b05f35 100644
--- a/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
+++ b/operator/ascendc/0_introduction/17_unaligned_reducemin_kernellaunch/ReduceMinKernelInvocation/reduce_min.cpp
@@ -23,7 +23,7 @@ public:
         dstGlobal.SetGlobalBuffer((__gm__ half *)(outputGM) + BLOCK_LENGTH * AscendC::GetBlockIdx(), blockLength);
         syncGlobal.SetGlobalBuffer((__gm__ int32_t *)(syncGM), USE_CORE_NUM * DEFAULT_SYNCALL_NEED_SIZE);
         // clear dstGm before doing calculations
-        AscendC::InitGlobalMemory<half>(dstGlobal, blockLength, 0);
+        AscendC::Fill<half>(dstGlobal, blockLength, 0);
 
         pipe.InitBuffer(inQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
         pipe.InitBuffer(outQueue, BUFFER_NUM, BLOCK_GROUP_NUM * BLOCKLEN_CEIL * sizeof(half));
diff --git a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp16.h b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp16.h
index 108e986e0..1b6050ed7 100644
--- a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp16.h
+++ b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp16.h
@@ -164,10 +164,10 @@ private:
         idxTmp1  = idxSeqN[align32N];
         idxTmp2  = idxTmp1[align32N];
 
-        ArithProgression(idxSeq0, 0, int32_t(dim3 * sizeof(T)), maxNM);
-        ArithProgression(idxSeq1, int32_t(sizeof(T)), int32_t(dim3 * sizeof(T)), maxNM);
-        ArithProgression(idxSeq2, int32_t(sizeof(T) + sizeof(T)), int32_t(dim3 * sizeof(T)), maxNM);
-        ArithProgression(idxSeqN, 0, 1, N);
+        Arange(idxSeq0, 0, int32_t(dim3 * sizeof(T)), maxNM);
+        Arange(idxSeq1, int32_t(sizeof(T)), int32_t(dim3 * sizeof(T)), maxNM);
+        Arange(idxSeq2, int32_t(sizeof(T) + sizeof(T)), int32_t(dim3 * sizeof(T)), maxNM);
+        Arange(idxSeqN, 0, 1, N);
     }
 
     __aicore__ inline void MaskInit(){
diff --git a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp32_perf.h b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp32_perf.h
index a397fa9f1..77fd39869 100644
--- a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp32_perf.h
+++ b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_norm_fp32_perf.h
@@ -165,10 +165,10 @@ private:
         idxTmp1 = idxSeqN[alignFP32N];                  
         idxTmp2 = idxTmp1[alignFP32N];
 
-        ArithProgression<int32_t>(idxSeq0, 0, dim3 * sizeof(half), N);
-        ArithProgression<int32_t>(idxSeq1, sizeof(half), dim3 * sizeof(half), N);
-        ArithProgression<int32_t>(idxSeq2, sizeof(half) + sizeof(half), dim3 * sizeof(half), N);
-        ArithProgression<int32_t>(idxSeqN, 0, 1, N);
+        Arange<int32_t>(idxSeq0, 0, dim3 * sizeof(half), N);
+        Arange<int32_t>(idxSeq1, sizeof(half), dim3 * sizeof(half), N);
+        Arange<int32_t>(idxSeq2, sizeof(half) + sizeof(half), dim3 * sizeof(half), N);
+        Arange<int32_t>(idxSeqN, 0, 1, N);
     }
 
     __aicore__ inline void MaskInit(){
diff --git a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_stack.h b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_stack.h
index 2cf49d817..a4ed6eaa1 100644
--- a/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_stack.h
+++ b/operator_contrib/BallQuerySample/FrameworkLaunch/BallQuery/op_kernel/ball_query_stack.h
@@ -128,10 +128,10 @@ private:
         LocalTensor<int32_t> idxSeqN = idxSeq2[maxNM];
         LocalTensor<int32_t> idxTmp1 = idxSeqN[align32N];
 
-        ArithProgression(idxSeq0, 0, int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
-        ArithProgression(idxSeq1, int32_t(sizeof(T)), int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
-        ArithProgression(idxSeq2, int32_t(sizeof(T) + sizeof(T)), int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
-        ArithProgression(idxSeqN, 0, 1, N);
+        Arange(idxSeq0, 0, int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
+        Arange(idxSeq1, int32_t(sizeof(T)), int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
+        Arange(idxSeq2, int32_t(sizeof(T) + sizeof(T)), int32_t(sizeof(T) + sizeof(T) + sizeof(T)), maxNM);
+        Arange(idxSeqN, 0, 1, N);
 
         Gather(xyz_x, xyzLocal, idxSeq0.ReinterpretCast<uint32_t>(), 0, N);    
         Gather(xyz_y, xyzLocal, idxSeq1.ReinterpretCast<uint32_t>(), 0, N);    
diff --git a/operator_contrib/FlashAttentionScoreSample/FrameworkLaunch/FlashAttentionScore/op_kernel/flash_attention_score_common.h b/operator_contrib/FlashAttentionScoreSample/FrameworkLaunch/FlashAttentionScore/op_kernel/flash_attention_score_common.h
index aa5a6b0aa..a323274e2 100644
--- a/operator_contrib/FlashAttentionScoreSample/FrameworkLaunch/FlashAttentionScore/op_kernel/flash_attention_score_common.h
+++ b/operator_contrib/FlashAttentionScoreSample/FrameworkLaunch/FlashAttentionScore/op_kernel/flash_attention_score_common.h
@@ -37,7 +37,7 @@ using AscendC::Div;
 using AscendC::Duplicate;
 using AscendC::GetBlockIdx;
 using AscendC::RoundMode;
-using AscendC::SelectWithBytesMask;
+using AscendC::Select;
 using AscendC::SelectWithBytesMaskShapeInfo;
 using AscendC::SoftmaxFlashV2;
 using AscendC::SoftMaxShapeInfo;
-- 
Gitee


From a80815bba00dd98bd861a8b7e52e2e7404cce04c Mon Sep 17 00:00:00 2001
From: renjie <renjie88@huawei.com>
Date: Mon, 17 Nov 2025 13:18:01 +0000
Subject: [PATCH 93/97] !2794 remove version warning * remove version warning

---
 operator/ascendc/0_introduction/24_simple_hello_world/README.md | 2 +-
 operator/ascendc/0_introduction/25_simple_add/README.md         | 2 +-
 .../ascendc/0_introduction/26_simple_matmulleakyrelu/README.md  | 1 -
 .../0_introduction/27_simple_add_cpp_extensions/README.md       | 1 -
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/README.md b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
index 483b3d8c3..29c769892 100644
--- a/operator/ascendc/0_introduction/24_simple_hello_world/README.md
+++ b/operator/ascendc/0_introduction/24_simple_hello_world/README.md
@@ -1,6 +1,6 @@
 ## 简化HelloWorld算子直调样例
 本样例通过使用<<<>>>内核调用符来完成算子核函数在NPU侧运行验证的基础流程，核函数内通过printf打印输出结果。
-> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+
 ## 目录结构介绍
 ```
 ├── 24_simple_helloworld
diff --git a/operator/ascendc/0_introduction/25_simple_add/README.md b/operator/ascendc/0_introduction/25_simple_add/README.md
index 439264e6a..96107f878 100644
--- a/operator/ascendc/0_introduction/25_simple_add/README.md
+++ b/operator/ascendc/0_introduction/25_simple_add/README.md
@@ -1,6 +1,6 @@
 ## 简化Add算子直调样例
 本样例以Add算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
-> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
+
 ## 目录结构介绍
 ```
 ├── 25_simple_add
diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
index 6c2eea0b4..9c3909b87 100644
--- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
+++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md
@@ -1,6 +1,5 @@
 ## 简化MatmulLeakyRelu算子直调样例
 本样例以MatmulLeakyRelu算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
-> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
 
 ## 目录结构介绍
 ```
diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
index c04da6102..ec7d5b618 100644
--- a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
+++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md
@@ -1,6 +1,5 @@
 ## 简化Pybind算子直调样例
 本样例使用pybind方式调用核函数，以带有Tiling的Add算子为示例，展示了一种更为简单的算子编译流程，支持main函数和Kernel函数在同一个cpp文件中实现。
-> ⚠️ **注意** 该样例将在未来的`CANN 8.3`开始支持。
 
 ## 目录结构介绍
 ```
-- 
Gitee


From 7ec7706f8d5129a85180b0510a9f6210954e6aa3 Mon Sep 17 00:00:00 2001
From: SeaElm <yangruizhi5@huawei.com>
Date: Tue, 18 Nov 2025 13:06:24 +0000
Subject: [PATCH 94/97] =?UTF-8?q?!2796=20update=20operator/ascendc/0=5Fint?=
 =?UTF-8?q?roduction/1=5Fadd=5Fframeworklaunch/AddCustom=E2=80=A6=20*=20up?=
 =?UTF-8?q?date=20operator/ascendc/0=5Fintroduction/1=5Fadd=5Fframeworklau?=
 =?UTF-8?q?nch/AddCustom=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
index 38e13a85e..dda6f26c1 100644
--- a/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
+++ b/operator/ascendc/0_introduction/1_add_frameworklaunch/AddCustomTiny/CMakeLists.txt
@@ -33,7 +33,7 @@ npu_op_kernel_library(ascendc_kernels
 )
 
 npu_op_kernel_sources(ascendc_kernels
-    OP_NAME AddCustom
+    OP_TYPE AddCustom
     KERNEL_FILE add_custom_kernel.cpp
 )
 
-- 
Gitee


From 62c7760a23233669ff18f7aa807333eda3e5a465 Mon Sep 17 00:00:00 2001
From: ruoshuisixue <lishangfan@h-partners.com>
Date: Fri, 21 Nov 2025 10:49:56 +0000
Subject: [PATCH 95/97] =?UTF-8?q?!2795=20=E6=A8=A1=E6=9D=BF=E5=8F=82?=
 =?UTF-8?q?=E6=95=B0=E7=B1=BB=E7=AE=97=E5=AD=90=E6=96=B0=E5=A2=9E=E8=87=AA?=
 =?UTF-8?q?=E5=AE=9A=E4=B9=89=E7=BB=93=E6=9E=84=E4=BD=93=20Merge=20pull=20?=
 =?UTF-8?q?request=20!2795=20from=20ruoshuisixue/master?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../AddTemplateCustom/op_host/add_custom.cpp  | 28 +++++++++++--------
 .../op_kernel/add_custom.cpp                  | 16 +++++++++--
 .../add_custom_tiling.h                       | 19 +++++++++----
 .../op_kernel/tiling_key_add_custom.h         | 11 +++++---
 .../6_addtemplate_frameworklaunch/README.md   |  9 +++---
 .../6_addtemplate_frameworklaunch/install.sh  |  2 ++
 6 files changed, 58 insertions(+), 27 deletions(-)
 rename operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/{op_host => op_kernel}/add_custom_tiling.h (69%)

diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp
index faf223f02..fdd5050ee 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp
@@ -19,23 +19,23 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context)
 {
     TilingData tiling;
     uint32_t totalLength = context->GetInputShape(0)->GetOriginShape().GetShapeSize();
-    ge::DataType dtype_x = context->GetInputDesc(0)->GetDataType();
-    ge::DataType dtype_y = context->GetInputDesc(1)->GetDataType();
-    ge::DataType dtype_z = context->GetOutputDesc(0)->GetDataType();
+    ge::DataType dataTypeX = context->GetInputDesc(0)->GetDataType();
+    ge::DataType dataTypeY = context->GetInputDesc(1)->GetDataType();
+    ge::DataType dataTypeZ = context->GetOutputDesc(0)->GetDataType();
     uint32_t D_T_X = ADD_TPL_FP32, D_T_Y=ADD_TPL_FP32, D_T_Z=ADD_TPL_FP32, TILE_NUM=1, IS_SPLIT=0;
-    if(dtype_x == ge::DataType::DT_FLOAT){
+    if(dataTypeX == ge::DataType::DT_FLOAT){
         D_T_X = ADD_TPL_FP32;
-    }else if(dtype_x == ge::DataType::DT_FLOAT16){
+    }else if(dataTypeX == ge::DataType::DT_FLOAT16){
         D_T_X = ADD_TPL_FP16;
     }
-    if(dtype_y == ge::DataType::DT_FLOAT){
+    if(dataTypeY == ge::DataType::DT_FLOAT){
         D_T_Y = ADD_TPL_FP32;
-    }else if(dtype_y == ge::DataType::DT_FLOAT16){
+    }else if(dataTypeY == ge::DataType::DT_FLOAT16){
         D_T_Y = ADD_TPL_FP16;
     }
-    if(dtype_z == ge::DataType::DT_FLOAT){
+    if(dataTypeZ == ge::DataType::DT_FLOAT){
         D_T_Z = ADD_TPL_FP32;
-    }else if(dtype_z == ge::DataType::DT_FLOAT16){
+    }else if(dataTypeZ == ge::DataType::DT_FLOAT16){
         D_T_Z = ADD_TPL_FP16;
     }
     if(totalLength< MIN_LENGTH_FOR_SPLIT){
@@ -45,10 +45,14 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context)
         IS_SPLIT = 1;
         TILE_NUM = DEFAULT_TILE_NUM;
     }
+    if(D_T_X == ADD_TPL_FP32 && D_T_Y == ADD_TPL_FP32 && D_T_Z == ADD_TPL_FP32){
+        TilingDataFp *tiling = context->GetTilingData<TilingDataFp>();
+        tiling->totalLength = totalLength;
+    }else if(D_T_X == ADD_TPL_FP16 && D_T_Y == ADD_TPL_FP16 && D_T_Z == ADD_TPL_FP16){
+        TilingDataFp16 *tiling = context->GetTilingData<TilingDataFp16>();
+        tiling->totalLength = totalLength;
+    }
     context->SetBlockDim(BLOCK_DIM);
-    tiling.set_totalLength(totalLength);
-    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
-    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
     const uint64_t tilingKey = GET_TPL_TILING_KEY(D_T_X, D_T_Y, D_T_Z, TILE_NUM, IS_SPLIT); // 模板参数tilingkey配置
     context->SetTilingKey(tilingKey);
     size_t *currentWorkspace = context->GetWorkspaceSizes(1);
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp
index 36bffd663..f17a6f678 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp
@@ -9,6 +9,7 @@
  */
 #include "kernel_operator.h"
 #include "tiling_key_add_custom.h"
+#include "add_custom_tiling.h"
 constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
 
 template<class dtypeX, class dtypeY, class dtypeZ>
@@ -19,7 +20,8 @@ public:
     {
         this->blockLength = totalLength / AscendC::GetBlockNum();
         this->tileNum = tileNum;
-        if(tileNum == 1){
+        uint32_t tile_type = 1;  // tile type
+        if(tileNum == tile_type){
             this->tileLength = totalLength;
         }else{
             this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
@@ -90,12 +92,22 @@ private:
 template<int D_T_X, int D_T_Y, int D_T_Z, int TILE_NUM, int IS_SPLIT>
  __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
 {
-    GET_TILING_DATA(tiling_data, tiling);
+    //注册默认tiling结构体
+    REGISTER_TILING_DEFAULT(optiling::TilingData);
+    //注册数据类型为FP32的tilingData结构体,此处必须和模板参数中定义保持一致,否则会有oom问题
+    REGISTER_TILING_FOR_TILINGKEY(
+    "D_T_X == ADD_TPL_FP32 && D_T_Y == ADD_TPL_FP32 && D_T_Z == ADD_TPL_FP32", optiling::TilingDataFp);
+    //注册数据类型为FP16的tilingData结构体,此处必须和模板参数中定义保持一致,否则会有oom问题
+    REGISTER_TILING_FOR_TILINGKEY(
+    "D_T_X == ADD_TPL_FP16 && D_T_Y == ADD_TPL_FP16 && D_T_Z == ADD_TPL_FP16", optiling::TilingDataFp16);
+
     if(D_T_X == ADD_TPL_FP32 && D_T_Y == ADD_TPL_FP32 && D_T_Z == ADD_TPL_FP32){
+        GET_TILING_DATA_WITH_STRUCT(optiling::TilingDataFp, tiling_data, tiling);
         KernelAdd<float, float, float> op;
         op.Init(x, y, z, tiling_data.totalLength, TILE_NUM);
         op.Process1();
     }else if(D_T_X == ADD_TPL_FP16 && D_T_Y == ADD_TPL_FP16 && D_T_Z == ADD_TPL_FP16){
+        GET_TILING_DATA_WITH_STRUCT(optiling::TilingDataFp16, tiling_data, tiling);
         KernelAdd<half, half, half> op;
         if(IS_SPLIT == 0){
             op.Init(x, y, z, tiling_data.totalLength, TILE_NUM);
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom_tiling.h
similarity index 69%
rename from operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h
rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom_tiling.h
index 7e9e79d1d..5e53d9d4f 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom_tiling.h
@@ -9,13 +9,22 @@
  */
 #ifndef ADD_CUSTOM_TILING_H
 #define ADD_CUSTOM_TILING_H
-#include "register/tilingdata_base.h"
+#include <cstdint>
 
 namespace optiling {
-BEGIN_TILING_DATA_DEF(TilingData)
-TILING_DATA_FIELD_DEF(uint32_t, totalLength);
-END_TILING_DATA_DEF;
+class TilingData{
+public:
+    uint32_t totalLength;
+};
 
-REGISTER_TILING_DATA_CLASS(AddCustom, TilingData)
+class TilingDataFp{
+public:
+    uint32_t totalLength;
+};
+
+class TilingDataFp16{
+public:
+    uint32_t totalLength;
+};
 } // namespace optiling
 #endif // ADD_CUSTOM_TILING_H
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
index eae217444..3838e5052 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h
@@ -14,8 +14,8 @@
 #define ADD_TPL_FP16 10
 #define ADD_TPL_FP32 20
 
-#define ADD_TPL_ND 15
-#define ADD_TPL_NZ 25
+#define ADD_TPL_ND 2 //数据格式定义
+#define ADD_TPL_NZ 29 //数据格式定义
 /**
 ASCENDC_TPL_ARGS_DECL(args0, ...):算子的模板参数定义, args0表示算子唯一标识, 建议与opType保持一致,后续为若干个DTYPE、FORMAT、UINT、BOOL的模板参数定义
     ASCENDC_TPL_DTYPE_DECL(args0, ...): DTYPE类型的模板参数定义,args0表示参数名,后续若干个参数为穷举的DTYPE枚举值
@@ -49,6 +49,7 @@ ASCENDC_TPL_SEL(...):算子的模板参数整体组合,可设置多个模板参
         ASCENDC_TPL_FORMAT_SEL(args0, ...): FORMAT类型的模板参数组合,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_FORMAT_DECL定义的参数范围子集
         ASCENDC_TPL_UINT_SEL(args0, args1, args2, ...): UINT类型的模板参数定义,args0表示参数名,args1是参数的表示类型,支持的表示类型为ASCENDC_TPL_UI_RANGE,ASCENDC_TPL_UI_LIST,ASCENDC_TPL_UI_MIX,后续的数值定义参考ASCENDC_TPL_UINT_DECL的规则
         ASCENDC_TPL_BOOL_SEL(args0, ...): bool类型的模板参数定义,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_BOOL_DECL定义的参数范围子集
+        ASCENDC_TPL_TILING_STRUCT_SEL(args0): 此模板参数组合对应的自定义tiling结构体,此处需要和kernel侧的判断逻辑保持一致，否则会有oom问题,args0表示tiling结构体名
 */
 ASCENDC_TPL_SEL(
     ASCENDC_TPL_ARGS_SEL(
@@ -56,14 +57,16 @@ ASCENDC_TPL_SEL(
     ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP16),
     ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP16),
     ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8),
-    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1)
+    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1),
+    ASCENDC_TPL_TILING_STRUCT_SEL(optiling::TilingDataFp16)
     ),
     ASCENDC_TPL_ARGS_SEL(
     ASCENDC_TPL_DTYPE_SEL(D_T_X, ADD_TPL_FP32),
     ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP32),
     ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP32),
     ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8),
-    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1)
+    ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1),
+    ASCENDC_TPL_TILING_STRUCT_SEL(optiling::TilingDataFp)
     )
 );
 #endif  // TILING_KEY_ADD_CUSTOM_H
\ No newline at end of file
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md
index 975426ef1..bd153e5dc 100644
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md
@@ -139,7 +139,8 @@ CANN软件包中提供了工程创建工具msOpGen，AddTemplateCustom算子工
 ### 4. 调用执行算子工程
 - [aclnn调用AddTemplateCustom算子工程](./AclNNInvocation/README.md)
 ## 更新说明
-| 时间       | 更新事项     |
-| ---------- |----------|
-| 2024/10/25 | 新增模板参数算子样例 |
-| 2024/11/18 | 样例目录调整 |
+| 时间         | 更新事项         |
+|------------|--------------|
+| 2024/10/25 | 新增模板参数算子样例   |
+| 2024/11/18 | 样例目录调整       |
+| 2025/11/7 | 新增自定义结构体调用示例 |
diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
index 4b74830f0..b64510b32 100755
--- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
+++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh
@@ -49,6 +49,8 @@ OP_NAME=AddTemplateCustom
 rm -rf CustomOp
 # Generate the op framework
 msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp
+# Delete gen tiling.h
+rm -rf CustomOp/op_host/add_custom_tiling.h
 # Copy op implementation files to CustomOp
 cp -rf $OP_NAME/* CustomOp
 # Build CustomOp project
-- 
Gitee


From 342850b59ec5e850987ae44061c7197db9099d5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BE=90=E6=A2=A6=E7=85=8A?= <xumengxuan2@huawei.com>
Date: Tue, 25 Nov 2025 03:50:41 +0000
Subject: [PATCH 96/97] =?UTF-8?q?!2798=20=E5=AE=8C=E5=96=84=20dataflow=20r?=
 =?UTF-8?q?eadme=20=E4=B8=AD=E5=AE=89=E8=A3=85=E5=8D=B8=E8=BD=BD=E6=B5=81?=
 =?UTF-8?q?=E7=A8=8B=20Merge=20pull=20request=20!2798=20from=20=E5=BE=90?=
 =?UTF-8?q?=E6=A2=A6=E7=85=8A/readme=5Finstall?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 inference/dataflow/py_dflow/README.md | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/inference/dataflow/py_dflow/README.md b/inference/dataflow/py_dflow/README.md
index 59a708e71..cb61b56c0 100644
--- a/inference/dataflow/py_dflow/README.md
+++ b/inference/dataflow/py_dflow/README.md
@@ -48,10 +48,17 @@ py_dflow
 
 ## 安装
 可执行如下命令安装编译生成的`dataflow`软件包。
-
-```shell
-pip install dataflow-0.0.1-py3-none-any.whl
-```
-当提示`Successfully installed dataflow-0.0.1`时，表示安装成功。  
-**请注意：如果存在多个python版本，请使用编译指定python对应的pip进行安装， 如果环境有之前安装的dataflow版本，需要使用`pip uninstall dataflow`命令卸载后再安装。**
-
+- 卸载环境之前安装的`dataflow`版本
+  ```shell
+  pip uninstall dataflow 
+  ```
+- 强制安装编译生成的`dataflow`软件包到指定路径
+  ```shell
+  pip install dataflow-0.0.1-py3-none-any.whl --force-reinstall -t ${HOME}/Ascend/ascend-toolkit/${cann_version}/python/site-packages
+  ```
+  - ${HOME}：表示CANN软件包安装目录
+  - ${cann_version}：表示CANN包版本号
+
+- 当提示`Successfully installed dataflow-0.0.1`时，表示安装成功。
+
+**请注意：如果存在多个python版本，请使用编译指定python对应的pip进行安装**
-- 
Gitee


From f5dd3187a602e7b50280731d54ee934e433b00fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BE=90=E6=A2=A6=E7=85=8A?= <xumengxuan2@huawei.com>
Date: Tue, 25 Nov 2025 03:51:23 +0000
Subject: [PATCH 97/97] =?UTF-8?q?!2793=20=E4=BF=AE=E6=94=B9=20test=5Fperf.?=
 =?UTF-8?q?py=20=E9=80=82=E9=85=8Dnumpy2.=C3=97=20Merge=20pull=20request?=
 =?UTF-8?q?=20!2793=20from=20=E5=BE=90=E6=A2=A6=E7=85=8A/test=5Fperf?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 inference/dataflow/python/test_perf.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/inference/dataflow/python/test_perf.py b/inference/dataflow/python/test_perf.py
index c5247c5e6..ab76b3f5c 100644
--- a/inference/dataflow/python/test_perf.py
+++ b/inference/dataflow/python/test_perf.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
+import numpy as np
 import dataflow as df
 import time
 
@@ -43,7 +44,7 @@ flow_node0_out = flow_node0(data0, data1)
 dag = df.FlowGraph([flow_node0_out])
 
 # feed
-feed_data = df.Tensor([4, 7, 5], tensor_desc=df.TensorDesc(df.DT_INT32, [3]))
+feed_data = df.Tensor(np.array([4, 7, 5], dtype=np.int32), tensor_desc=df.TensorDesc(df.DT_INT32, [3]))
 
 flow_info = df.FlowInfo()
 
@@ -56,7 +57,7 @@ for i in range(10):
     dag.feed_data({data0:feed_data, data1:feed_data}, flow_info)
 e = time.time()
 
-print(f"TEST-TIME: fetch cost {(e -s * 1000000)} us")
+print(f"TEST-TIME: fetch cost {((e -s) * 1000000)} us")
 print("TEST SUCCESS")
 
 # 释放dataflow资源
-- 
Gitee