diff --git a/akg-mlir/python/akg_mlir/exec_tools/py_benchmark.py b/akg-mlir/python/akg_mlir/exec_tools/py_benchmark.py index 265c5da34039c737b57ac1b6c4a699e57fb8175d..6a9034bd818708773e2a615394ecaced12d87b57 100644 --- a/akg-mlir/python/akg_mlir/exec_tools/py_benchmark.py +++ b/akg-mlir/python/akg_mlir/exec_tools/py_benchmark.py @@ -184,20 +184,22 @@ def _transform_data_to_ctypes_ascend(data, data_shape = np.array(device_shape[data_idx]) data_bytes = d.nbytes is_numpy_bf16 = False + is_numpy_output = False if isinstance(d, int): data_ctypes.append(ctypes.c_int(d)) elif isinstance(d, np.ndarray): + if data_idx in output_idx_set: + is_numpy_output = True if d.dtype.name == "bfloat16": d = d.astype(np.float32) data[data_idx] = d is_numpy_bf16 = True ascend_tensor_obj = akgAscendLaunch.AscendTensorObjStructPyTorch() - is_output = data_idx in output_idx_set ascend_tensor_obj.tensor_info = d ascend_tensor_obj.shape_info = data_shape ascend_tensor_obj.nbytes = data_bytes - ascend_tensor_obj.is_output = is_output + ascend_tensor_obj.is_output = is_numpy_output ascend_tensor_obj.is_bf16 = is_numpy_bf16 data_ctypes.append(ascend_tensor_obj) diff --git a/akg-mlir/python/kernel.py b/akg-mlir/python/kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..80a6fe7df08adbbfdab4b48dd694a5fdf5972cb3 --- /dev/null +++ b/akg-mlir/python/kernel.py @@ -0,0 +1,158 @@ +# Copyright 2023-2025 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Module for akg support ascend_npu_ir test """ +import os +import re +import ctypes +import subprocess +import numpy as np + +from akg import akgAscendLaunch +from akg.message import get_npucompiler_path +from akg.utils.dynamic_utils import get_device_shape + +def _transform_data_to_ctypes_ascend(data, + kernel_name, + output_indexes, + is_dyn_shape=False, + backend="ascend", + is_profile_params=False, + ): + """ transform tensor input data to ctypes for ascend """ + data_ctypes = [] + if len(data) == 0: + # dynamic shape info cannot generate inputs while compilation + return data_ctypes + + device_shape, _, _ = get_device_shape( + data, kernel_name, is_dyn_shape and not is_profile_params) + + output_idx_set = [] + for output_idx in output_indexes: + if output_idx >= 0: + output_idx_set.append(output_idx) + else: + output_idx_set.append(output_idx + len(data)) + output_idx_set = set(output_idx_set) + for data_idx, d in enumerate(data): + data_shape = np.array(device_shape[data_idx]) + data_bytes = d.nbytes + is_numpy_bf16 = False + is_numpy_output = False + if isinstance(d, int): + data_ctypes.append(ctypes.c_int(d)) + elif isinstance(d, np.ndarray): + if data_idx in output_idx_set: + is_numpy_output = True + if d.dtype.name == "bfloat16": + d = d.astype(np.float32) + data[data_idx] = d + is_numpy_bf16 = True + + ascend_tensor_obj = akgAscendLaunch.AscendTensorObjStructPyTorch() + ascend_tensor_obj.tensor_info = d + ascend_tensor_obj.shape_info = data_shape + ascend_tensor_obj.nbytes = data_bytes + ascend_tensor_obj.is_output = is_numpy_output + ascend_tensor_obj.is_bf16 = is_numpy_bf16 + data_ctypes.append(ascend_tensor_obj) + + return data_ctypes + +class Kernel: + """ Kernel for support ascend_npu_ir """ + def __init__(self, kernel_meta=None): + self.kernel_name = kernel_meta.get('kernel_name') + self.dynamic = kernel_meta.get('dynamic') + self.device_id = kernel_meta.get('device_index') + self.base_dir = os.path.dirname(os.path.abspath(__file__)) + self.output_so_dir = os.path.join(self.base_dir, "data/") + backend = kernel_meta.get('backend') + self.backend = backend if backend is not None else "ascend" + num_outputs = kernel_meta.get('num_outputs') + self.output_indexes = self._get_output_index(num_outputs) + + def _get_output_index(self, num_outputs: int): + return [-i for i in range(1, num_outputs + 1)] + + def compile(self, input_mlir: str): + """ Compile .mlir file to .so file. """ + mlir_file_name = f"{self.kernel_name}_out.mlir" + mlir_file_path = os.path.join(self.base_dir, mlir_file_name) + os.makedirs(self.output_so_dir, exist_ok=True) + output_so_path = os.path.join(self.output_so_dir, f"{self.kernel_name}.so") + if not self.dynamic: + pattern = r'(\{[^{}]*\{[^{}]*)<[^<>]*>' + replacement = r'\1' + input_mlir = re.sub(pattern, replacement, input_mlir, count=1) + + try: + with open(mlir_file_path, "w", encoding="utf-8") as f: + f.write(input_mlir) + + bishengir_compile_path = get_npucompiler_path() + compile_cmd = [ + bishengir_compile_path, + mlir_file_path, + "-enable-hfusion-compile=true", + "-enable-hivm-compile=true", + "-enable-bin-relocation=false", + "-block-dim=40", + "-enable-auto-multi-buffer=true", + "-o", + output_so_path, + ] + print(f"exec command: {compile_cmd}") + result = subprocess.run( + compile_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True + ) + print(f"compile finish, lib.so save to {os.path.abspath(output_so_path)}") + return (result.stdout, result.stderr) + except Exception as compile_err: + raise Exception(f"compile MLIR failed, error message: {str(compile_err)}") from compile_err + finally: + if os.path.exists(mlir_file_path): + os.remove(mlir_file_path) + + def run(self, *args, **kwargs): + """ launch .so file by akg_ascend_backend """ + so_file_path = os.path.join(self.output_so_dir, f"lib{self.kernel_name}.so") + if not os.path.exists(so_file_path): + raise FileNotFoundError(f"can not find lib{self.kernel_name}.so in path: {so_file_path}") + n = len(args) + try: + input_for_mod_ctypes = _transform_data_to_ctypes_ascend( + args[:n-1], + self.kernel_name, + self.output_indexes, + self.dynamic, + self.backend + ) + + akgAscendLaunch.akg_ascend_run( + self.output_so_dir, + self.kernel_name, + self.device_id, + self.dynamic, + *input_for_mod_ctypes + ) + print(f"success launch kernel: {self.kernel_name}") + return None + except Exception as running_err: + raise Exception(f"exec {self.kernel_name}.so error, error msg: {str(running_err)}") from running_err + \ No newline at end of file