diff --git a/oec-ascend/MANIFEST.in b/oec-ascend/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..065c17cfbd2f1845660f46effbf6bcf5bdc38a86 --- /dev/null +++ b/oec-ascend/MANIFEST.in @@ -0,0 +1,6 @@ +# MANIFEST.in +recursive-include oec/test_cases * +recursive-include oec/data * +recursive-include oec/common * +global-exclude __pycache__ +global-exclude *.pyc \ No newline at end of file diff --git a/oec-ascend/README.md b/oec-ascend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b5863b532035befadd9eddcc5e59f32d1c5c1268 --- /dev/null +++ b/oec-ascend/README.md @@ -0,0 +1,55 @@ +# oec-ascend + +## 介绍 + +昇腾提供了OS兼容性验证工具(oec-ascend),包含一套基础测试用例,用于检查操作系统和昇腾软件之间的兼容性。 + +### 功能 + +当前支持以下五个维度的功能验证 + +- 运行环境 +- 应用开发 +- 算子 +- 模型开发 +- 集成测试 +## 支持的产品 +1. A2: Atlas A2训练系列产品,Atlas 800I A2推理产品 +2. A3: Atlas A3训练系列产品,Atlas A3 推理系列产品 +3. A300: 安装有 Atlas 300I Pro、Atlas 300V Pro、Atlas 300I Duo的推理服务器 + +## 安装前准备 +1. 请准备一台安装有昇腾NPU的环境。**建议运行内存大于96GB,剩余硬盘空间大于100GB** +2. 请参考 [昇腾社区文档](https://www.hiascend.com/document),安装昇腾驱动,固件软件。 +3. 请参考 [昇腾社区文档](https://www.hiascend.com/document),安装toolkit,kernels,nnal。 +4. 安装 cmake, g++。其中cmake建议版本大于 3.16,g++需要与环境上的gcc版本配套。 +### 安装ais_bench_net_test工具 +ais_bench_net_test工具用于测试和验证HCCL相关功能 +请参考[ais_bench_net_test工具安装与卸载文档](https://gitee.com/ascend/tools/tree/develop/ais-bench_workload/tool/net_test#%E5%B7%A5%E5%85%B7%E5%AE%89%E8%A3%85%E4%B8%8E%E5%8D%B8%E8%BD%BD)安装ais_bench_net_test + +### 创建并上传CANN软件到测试目录 +选取一个剩余空间充足的硬盘(建议剩余硬盘空间大于100GB)继续以下操作 +```bash +mkdir -p cann_test +cd cann_test +``` +上传toolkit,kernels,nnal的**run格式安装包**到cann_test路径下,并给软件安装包**添加执行权限**,请勿修改软件包名称。 +## 安装运行 oec-ascend工具 +### 通过whl包安装 +**注意**:请按照CANN软件安装指南中的说明配置当前环境安装的CANN的环境变量,如果不配置环境变量,默认使用usr/local/Ascend目录下的CANN软件包 +```bash +wget https://ascend-cann-open.obs.cn-north-4.myhuaweicloud.com/cann-os/oec_ascend_compatibility-1.0.0-py3-none-any.whl +pip3 install oec_ascend_compatibility-1.0.0-py3-none-any.whl +oec-ascend --product A2 --target cann +``` +**参数说明** + +>--product,-p 被测试的环境产品形态, 请根据您的产品形态输入对应的值,当前支持A2/A3/A5/A300,其中A5为预留参数,当前A5测试范围和A2相同。 + +>--target, -t 被测试环境中需要测试功能组件,当前支持 all/cann。all测试所有功能组件,cann仅测试CANN组件,当前这两个选项效果完全相同,后续会添加hdk等组件扩展工具能力。 + +## 查看运行报告 +工具会在运行目录下生成"output/<时间戳>/\/"目录存放测试报告,日志和临时文件,请下载该路径下的report.xlsx 文件到本地使用表格软件打开查看,时间戳为兼容性验证工具启动时的时间,可以在工具运行的打屏信息中找到报告的的生成路径。 + +其中:**sheet1为整体功能模块通过率,sheet2为各个测试用例运行情况和测试内容存放路径** + diff --git a/oec-ascend/oec/BaseTest.py b/oec-ascend/oec/BaseTest.py new file mode 100644 index 0000000000000000000000000000000000000000..b549be01117066605993dc4504c41915e85c8a09 --- /dev/null +++ b/oec-ascend/oec/BaseTest.py @@ -0,0 +1,264 @@ +# encoding: utf-8 +import os +import threading +import inspect +import time +from datetime import datetime +from oec.Utils import elapsed_time_str +from typing import Tuple # 兼容python3.7 +from logging import getLogger +from oec.TestInterface import TestInterface +from oec.TestContext import TestContext +from oec.BaseTypes import State + +logger = getLogger("oec-ascend") + + +Context:TestContext = TestContext() + +def reset_context(): + global Context + Context = TestContext() + return Context + +class BaseTest(TestInterface): + def __init__( + self, + offering:str, + group: Tuple[str, str], + name: str, + optional: bool = True, + auxiliary: bool = False, + cached: bool = True, + log_dir: str = "", + tags=[], + products=[], + ): + self._context: TestContext = Context + self._name: str = name + self._group = group + self._tags = set(['all']) + self._optional: bool = optional + self._state: State = State.NOT_RUNNING + self._auxiliary: bool = auxiliary + self._cached: bool = cached + self._log_dir_path = log_dir if log_dir else self._context.get_log_dir() + self._lock = threading.Lock() + self._filename = None + self._lineno = None + self._start_time = datetime.now() + self._end_time = self._start_time + self._update_count = 0 + self._products = products + self._offering = offering + self.context.group_dict.setdefault(group, False) + for stack in inspect.stack()[1:]: + if stack.function != "__init__": + self._filename = stack.filename + self._lineno = stack.lineno + break + + for tag in tags: + self.tags.add(tag) + + if not name: + raise ValueError(self.message_with_path("name can not be empty.")) + self.context.add_test(self) + @property + def offering(self): + return self._offering + + @property + def products(self): + return self._products + + @property + def tags(self): + return self._tags + + @property + def group(self): + return self._group + + @property + def context(self): + return self._context + + def message_with_path(self, message): + return f"{message} File {self.get_origin_path()}:{self.get_origin_lineno()}" + + @property + def state(self): + return self._state + + def can_cached(self): + return self._cached + + def is_finished(self): + return self.state not in [State.NOT_RUNNING, State.RUNNING] + + def can_continue(self): + if self.is_passed() or self.state == State.UNSUPPORTED: + return True + + if self.is_failed() and self.is_optional(): + return True + + return False + + def is_failed(self): + if self.state in [State.FAIL, State.TIMEOUT]: + return True + return False + + def is_passed(self): + if self.state in [State.PASS, State.NOTHING_TO_DO, State.WARNING]: + return True + return False + + def is_supported(self): + if self.state in [State.UNSUPPORTED]: + return False + return True + + def set_reason(self, str): + raise NotImplementedError() + + def get_reason(self): + raise NotImplementedError() + + def get_log_dir_path(self): + if self._log_dir_path is None: + raise RuntimeError("log dir path is not set") + return self._log_dir_path + + def set_log_dir_path(self, path): + if not isinstance(path, str): + raise TypeError("The path must be a str") + self._log_dir_path = os.path.abspath(path) + + def get_origin_lineno(self): + return self._lineno + + def get_origin_path(self): + return self._filename + + + def update_elapsed_time(self): + if self.state in [State.RUNNING, State.NOT_RUNNING]: + self._end_time = datetime.now() + + @property + def elapsed_time(self): + delta = self._end_time - self._start_time + return delta + + @property + def elapsed_time_str(self): + return elapsed_time_str(self.elapsed_time) + + def update_console_message(self): + self.update_elapsed_time() + message = f"{self.name} {self.elapsed_time_str}" + anime = "⠋⠙⠸⠴⠦⠇" + if self.is_failed(): + message = f"\033[31m✕ {message} - {self.get_reason()}\033[0m" + elif self.state == State.WARNING: + message = f"\033[33m! {message} - {self.get_reason()}\033[0m" + elif self.is_passed(): + message = f"\033[32m✓ {message}\033[0m" + elif self.state == State.UNSUPPORTED: + message = f"\033[33m↷ {message} - {self.get_reason()}\033[0m" + else: + charactor = anime[self._update_count % len(anime)] + message = f"{charactor} {message}\033[0m" + + self.context.set_message(self.name, message) + self._update_count += 1 + + def del_console_message(self): + self.context.del_message(self.name) + + def run(self): + self._lock.acquire() + if self.is_finished() and self.can_cached(): + logger.debug( + f"The test {self.name} has been completed, using cached results" + ) + return + self.set_state(State.NOT_RUNNING) + self._start_time = datetime.now() + self.update_elapsed_time() + if self.products and self.context.procut not in self.products: + self.set_state(State.UNSUPPORTED) + self.set_reason(f"The product {self.context.procut} is not in {self.products}") + else: + try: + self.execute_command() + except Exception as e: + self.set_state(State.FAIL) + self.set_reason(f"{e}") + if self.is_failed(): + logger.debug( + f"{self.name} is {self.state.value}, reason: {self.get_reason()}" + ) + self.update_elapsed_time() + self._lock.release() + + def execute_command(self): + raise NotImplementedError() + + def count(self): + return 1 + + def set_name(self, name): + if not isinstance(name, str): + raise TypeError("name must be a string") + self._name = name + + @property + def name(self): + return self._name + + def is_optional(self): + return self._optional + + def set_optional(self, optional: bool): + if not isinstance(optional, bool): + raise TypeError("optional must be bool type") + self._optional = True + + @property + def auxiliary(self): + return self._auxiliary + + def set_state_if_not_finished(self, state: State): + if not self.is_finished(): + self.set_state(state) + + def set_state(self, state: State): + if not isinstance(state, State): + raise TypeError("state must be of type State") + if self.auxiliary and state == State.FAIL: + state=State.WARNING + + self.context.distribution[self.state] -= self.count() + self._state = state + self.context.distribution[state] += self.count() + + def get_test_content(self): + return ( + self.get_relative_log_file_path() + if self.is_finished() + else "No information due to the previous error." + ) + + @property + def logger(self): + return logger + + def __str__(self): + return self.name + + def __repr__(self): + return str(self) diff --git a/oec-ascend/oec/BaseTestCase.py b/oec-ascend/oec/BaseTestCase.py new file mode 100644 index 0000000000000000000000000000000000000000..00d5610d2ee3f5a63a4c84ace5c8051873a07b46 --- /dev/null +++ b/oec-ascend/oec/BaseTestCase.py @@ -0,0 +1,196 @@ +# encoding: utf-8 +import os +import subprocess +from logging import getLogger +import re +from typing import List # 兼容python3.7 +from oec.BaseTest import BaseTest +from oec.BaseTypes import State +import oec.Utils as Utils + +logger = getLogger("oec-ascend") + + +class TestCase(BaseTest): + def __init__( + self, + cmd: List[str] = [], + include: List[str] = None, + exclude: List[str] =[], + count=1, + origin_file: str = "", + with_case_info: bool = True, + cwd=None, + timeout=None, + *args, + **kwargs, + ): + super(TestCase, self).__init__(*args, **kwargs) + self._count = count + self._cmd = cmd + self.origin_file = origin_file + self.with_case_info = with_case_info + self._include = include + self._exclude = exclude + self.__reason = None + self._log = "" + self._retrun_code = 0 + self._cwd = cwd + self._timeout = timeout + if isinstance(self._include, str): + self._include = [self._include] + if isinstance(self._exclude, str): + self._exclude = [self._exclude] + + logger.debug(f"test case{self.group[0]}.{self.group[1]}.{self.name} ") + + @property + def cwd(self): + return self._cwd + + def set_reason(self, reason: str): + if not isinstance(reason, str): + raise TypeError(f"reason must be a string") + self.__reason = reason + + def get_reason(self): + return self.__reason + + def get_include(self): + return self._include + + def get_exclude(self): + return self._exclude + + def get_relative_log_file_path(self): + + return os.path.relpath(self.get_log_file_path(), self.context.work_path) + + def get_log_file_path(self): + return os.path.join(self.get_log_dir_path(), f"{self.name}.log") + + def get_test_content(self): + return ( + self.get_relative_log_file_path() + if self.is_finished() + else "No information due to the previous error." + ) + + def execute_command_with_cmd(self, cmd): + if self.state != State.NOT_RUNNING: + return + if cmd is None: + self.set_state(State.NOTHING_TO_DO) + return + self.set_state(State.RUNNING) + log = None + return_code = None + with open(self.get_log_file_path(), "w+") as f: + env = self.context.env.copy() + env["OEC_OUTPUT_PATH"] = f"{self.context.output_dir}/tmp/{self.name}" + env["OEC_DATA_PATH"] = self.context.data_path + env["OEC_WORKDIR"] = self.context.work_path + env["OEC_PRODUCT"] = self.context.procut + if self.with_case_info: + f.writelines([ + "**********************************************\n", + self.name + "\n", + self.origin_file + "\n", + f"cmd = {self.get_cmd()}\n\n", + f"export OEC_OUTPUT_PATH={env['OEC_OUTPUT_PATH']}\n", + f"export OEC_DATA_PATH={env['OEC_DATA_PATH']}\n", + f"export OEC_WORKDIR={env['OEC_WORKDIR']}\n", + f"export OEC_PRODUCT={env['OEC_PRODUCT']}\n", + f"cd {self.cwd}\n", + " ".join(self.get_cmd()) + "\n", + "**********************************************\n", + ]) + f.flush() + process = subprocess.Popen( + self.get_cmd(), + env=env, + cwd=os.path.dirname(self.get_origin_path()) if self.cwd is None else self.cwd, + # shell=True, + stdout=f, + stderr=subprocess.STDOUT, + text=True, + ) + try: + process.wait(self._timeout) + except subprocess.TimeoutExpired: + self.set_state(State.TIMEOUT) + f.seek(0) + log = f.read(-1) + return_code = process.returncode + self._retrun_code = return_code + self._log = log + + self.check_result(log, return_code) + return log, return_code + + def execute_command(self): + self.execute_command_with_cmd(self.get_cmd()) + + def get_cmd(self): + return self._cmd + + def count(self): + return self._count + + def get_doc(self): + pass + + @property + def log(self): + return self._log + + @property + def return_code(self): + return self._return_code + + def check_result(self, log: str, return_code): + logger.debug( + f'\n>> {self.name}{"(optional)" if self.is_optional() else ""} -> return {return_code} :\n File "{self.get_origin_path()}" :\n{log}' + ) + if self.is_finished(): + return + if self.get_include() is not None: + for pattern in self.get_include(): + result = re.search(pattern, log) + if result is None: + self.set_state(State.FAIL) + self.set_reason( + f"'{pattern}' was not found in the output of {self.name}, {self.get_log_file_path()}" + ) + return + + if self.get_exclude() is not None: + for pattern in self.get_exclude(): + result = re.search(pattern, log) + if result is not None: + self.set_state(State.FAIL) + span = result.span() + + lineno = log.count("\n", 0, span[0]) + 1 + position = log.rfind("\n", 0, span[0]) + position = span[0] - position + self.set_reason( + f"Find '{pattern}' in the output of {self.name}, {self.get_log_file_path()}:{lineno}:{position}" + ) + return + + + if return_code != 0: + code_map = { + 124: (State.TIMEOUT, f"code: {return_code}, timeout. Log: {Utils.get_file_path(self.get_log_file_path())}"), + 127: (State.FAIL, f"code: {return_code}, command not found. Log: {Utils.get_file_path(self.get_log_file_path())}"), + 191: (State.WARNING, f"code: {return_code}, warning. Log: {Utils.get_file_path(self.get_log_file_path())}"), + 192: (State.UNSUPPORTED, f"code: {return_code}, unsupported."), + } + failed = (State.FAIL, f"code: {return_code}, failed. Log: {Utils.get_file_path(self.get_log_file_path())}") + state = code_map.get(return_code, failed) + self.set_state(state[0]) + self.set_reason(state[1]) + return + + self.set_state(State.PASS) diff --git a/oec-ascend/oec/BaseTypes.py b/oec-ascend/oec/BaseTypes.py new file mode 100644 index 0000000000000000000000000000000000000000..9cbb19ff224385e97e450c2e24933cd2aa3f6137 --- /dev/null +++ b/oec-ascend/oec/BaseTypes.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +from enum import Enum, unique + + +@unique +class State(Enum): + NOT_RUNNING = "not running" + RUNNING = "running" + + NOTHING_TO_DO = "nothing to do" + PASS = "passed" + WARNING = "warning" + UNSUPPORTED = "unsupported" + + TIMEOUT = "timeout" + FAIL = "failed" + + diff --git a/oec-ascend/oec/TestContext.py b/oec-ascend/oec/TestContext.py new file mode 100644 index 0000000000000000000000000000000000000000..dd337c4cd71099d5e9c62ee8671a2c1b520003f8 --- /dev/null +++ b/oec-ascend/oec/TestContext.py @@ -0,0 +1,253 @@ +# encoding: utf-8 +import os +import random +import time +from datetime import datetime +import threading +from importlib import import_module +from oec.BaseTypes import State +from logging import getLogger +from oec.Utils import elapsed_time_str + +from oec.TestInterface import TestInterface + +logger = getLogger("oec-ascend") + + +def make_log_dir(log_dir): + logger.info(f"log dir is {log_dir}") + logger.info(f"create log path {log_dir}") + os.makedirs(log_dir) + return log_dir + + +class TestContext(object): + + def __init__(self): + self._all_tests = {} + self._data_path = "" + self._cann_path = "" + self._work_path = "" + self._output_dir:str = "" + self._defaut_log_dir:str = "" + self._used_tests = {} + self._test_order = [] + self._infomation = {} + self._states_distribution = {} + self._env = os.environ.copy() + self._console_output = {} + self._console_position = [] + self.finished = False + self._running_tests = [] + self._start_time = datetime.now() + self._tags = {} + self._target="" + self._product="" + self.group_dict = {} + for state in State: + self._states_distribution.setdefault(state, 0) + + self.infomation.setdefault("NPU", "unknow") + self.set_message("distribution", "") + self.set_message("rate", "") + + + def set_env(self,env): + self._env = env + + def set_message(self, key, message:str): + if key not in self._console_output: + self._console_position.append(key) + self._console_output[key] = message + + def del_message(self, key): + if key not in self._console_output: + return + del self._console_output[key] + self._console_position.remove(key) + + @property + def procut(self): + return self._product + + def set_product(self, product): + self._product = product + + @property + def target(self): + return self._target + + def set_target(self, target): + self._target = target + + @property + def env(self): + return self._env + + def set_output(self,output:str, timestamp): + output_path = os.path.join(output, timestamp, self.target) + log_dir = os.path.join(output_path, "logs") + make_log_dir(log_dir) + self._output_dir = output_path + self._defaut_log_dir = log_dir + + def set_work_path(self,work_path:str): + self._work_path = work_path + + @property + def work_path(self): + return self._work_path + + def set_cann_path(self,cann_path:str): + self._cann_path = cann_path + + @property + def cann_path(self): + return self._cann_path + + def set_data_path(self,data_path:str): + self._data_path = data_path + + @property + def data_path(self): + return self._data_path + + def update_state(self): + success = self.distribution[State.PASS] + self.distribution[State.NOTHING_TO_DO] + + total = len(self.get_used_tests()) + finished = total - self.distribution[State.NOT_RUNNING] - self.distribution[State.RUNNING] + ran = finished - self.distribution[State.WARNING] - self.distribution[State.UNSUPPORTED] + if total == 0: + return "wait for start" + + self.set_message("distribution", + f"total {total}, running {self.distribution[State.RUNNING]}, not running {self.distribution[State.NOT_RUNNING]}, " + f"passed {success}, warning {self.distribution[State.WARNING]}, failed {self.distribution[State.FAIL]}, " + f"timeout {self.distribution[State.TIMEOUT]}, unsupported {self.distribution[State.UNSUPPORTED]}.") + self.set_message("rate", + f"Completion rate {round(finished/total*100,2)}%, pass rate { 0 if ran==0 else round(success/ran*100,2)}% - {elapsed_time_str(datetime.now() - self._start_time)}") + + for test in self._running_tests: + test.update_console_message() + + + def get_state_distribution_str(self): + self.update_state() + all = [self._console_output[k] for k in self._console_position] + return '\n'.join(all) + + + @property + def distribution(self): + return self._states_distribution + + @property + def infomation(self): + return self._infomation + + @property + def output_dir(self): + return self._output_dir + + def get_output_dir(self): + return self.output_dir + + def get_log_dir(self): + return self._defaut_log_dir + + def set_log_dir(self, path: str): + self._defaut_log_dir = path + + def add_test(self, test: TestInterface): + if test.name in self._all_tests: + t2 = self._all_tests[test.name] + raise RuntimeError( + f'"{test.name}" in {test.get_origin_path()}:{test.get_origin_lineno()}' + f" has been used in {t2.get_origin_path()}:{t2.get_origin_lineno()}" + ) + for tag in test.tags: + self._tags.setdefault(tag,[]) + self._tags[tag].append(test) + + self._all_tests[test.name] = test + + @property + def test_order(self): + return self._test_order + + def set_test_order(self, path): + if not os.path.exists(path): + logger.fatal(f"Can not find the path: {path}") + exit(6500) + tests = self.get_tests() + path = os.path.join(path, "test_sequence.py") + + test_sequence = self.group_dict + targets = None + logger.debug("test_sequence is:") + logger.debug(test_sequence) + tmp_dict = {} + for group in test_sequence: + tmp_dict.setdefault(group, []) + used_test = {} + order_list = [] + for name, test in tests.items(): + + if test.group in tmp_dict: + tmp_dict[test.group].append(test) + used_test[test.name] = test + + for group, t in tmp_dict.items(): + if not t: + continue + if test_sequence[group]: + order_list.append(t) + else: + for test in t: + order_list.append([test]) + logger.debug(f"test sequence detials:") + logger.debug(order_list) + self._test_order = order_list + self._used_tests = used_test + + def clear_unimportented_messages(self, items, seconds): + time.sleep(seconds) + for test in items: + if test.is_failed() or test.state == State.WARNING: + continue + test.del_console_message() + + def run_tests(self): + self.distribution[State.NOT_RUNNING] = len(self.get_used_tests()) + order_list = self.test_order + self._start_time = datetime.now() + final_thread = None + for items in order_list: + threads = [] + self._running_tests = items + for test in items: + t = threading.Thread(target=test.run, name=test.name) + t.start() + threads.append(t) + for t in threads: + t.join() + self.update_state() + sleep_seconds = 5 if items is not order_list[-1] else 1.5 + final_thread = threading.Thread(target=self.clear_unimportented_messages, args=(items, sleep_seconds)) + final_thread.start() + self._running_tests = [] + + for test in items: + if not test.can_continue(): + final_thread.join() + return State.FAIL + if final_thread: + final_thread.join() + return State.PASS + + def get_used_tests(self): + return self._used_tests + + def get_tests(self): + return self._all_tests diff --git a/oec-ascend/oec/TestInterface.py b/oec-ascend/oec/TestInterface.py new file mode 100644 index 0000000000000000000000000000000000000000..d470625d3a6c8d0057c2b1797524640827c83c26 --- /dev/null +++ b/oec-ascend/oec/TestInterface.py @@ -0,0 +1,38 @@ +# encoding: utf-8 +from oec.BaseTypes import State + + +class TestInterface: + @property + def name(self) -> str: + raise NotImplementedError() + + @property + def tags(self) -> set: + raise NotImplementedError() + + @property + def group(self): + raise NotImplementedError() + + def get_origin_path(self) -> str: + raise NotImplementedError() + + def get_origin_lineno(self) -> int: + raise NotImplementedError() + + def set_log_dir_path(self, path: str): + raise NotImplementedError() + + def get_log_dir_path(self) -> str: + raise NotImplementedError() + + @property + def state(self) -> State: + raise NotImplementedError() + + def can_continue(self) -> bool: + raise NotImplementedError() + + def run(self): + raise NotImplementedError() diff --git a/oec-ascend/oec/TestReport.py b/oec-ascend/oec/TestReport.py new file mode 100644 index 0000000000000000000000000000000000000000..6743d2cc2ba5bb07ba1e0fdebd85119bf1e576b8 --- /dev/null +++ b/oec-ascend/oec/TestReport.py @@ -0,0 +1,115 @@ +# encoding: utf-8 + +import pandas as pd +import os +import openpyxl +from oec.TestContext import TestContext +from oec.BaseTestCase import TestCase +from logging import getLogger + +logger = getLogger("oec-ascend") + + +# path = "resource/report0.xlsx" +def gen_report(path: str, context: TestContext): + log_dir = context.get_output_dir() + src_path = os.path.join(path, "base_report.xlsx") + path = os.path.join(log_dir, "report.xlsx") + + # copy(src_path,path) + df = pd.read_excel(src_path, header=None) + + excel = openpyxl.load_workbook(src_path) + sheet_name = excel.sheetnames[0] + sheet = excel[sheet_name] + for item in sheet.merged_cells: + top_col, top_row, bottom_col, bottom_row = item.bounds + df.iloc[top_row - 1 : bottom_row, top_col - 1 : bottom_col] = ( + item.start_cell.value + ) + + # 环境信息 + dfe = df.iloc[1:7, 3:] + dfe.set_index([dfe.columns[0]], inplace=True) + dfe.columns = pd.Index(["value"]) + info = context.infomation + + logger.debug(info) + for key in dfe.index: + dfe.loc[key, "value"] = info.get(key, "") + logger.debug(dfe) + # 测试结果 + dft = df.iloc[8:] + dft.columns = dft.iloc[0] + dft = dft.iloc[1:] + + # 初始化字典 + class info: + def __init__(self): + self.passed = 0 + self.tests:list[TestCase] = [] + + def add(self, test: TestCase): + self.tests.append(test) + self.passed += test.count() if test.is_passed() else 0 + + @property + def total(self): + return sum([test.count() if test.is_supported() else 0 for test in self.tests]) + + @property + def failed(self): + return sum([test.count() if test.is_failed() else 0 for test in self.tests]) + + dic = {} + for i in range(len(dft)): + dic.setdefault((dft.iat[i, 0], dft.iat[i, 1]), info()) + + # 设置索引 + dft.set_index([dft.columns[0], dft.columns[1]], inplace=True) + dft.sort_index() + # 统计测试用例信息 + for _, test in context.get_used_tests().items(): + dic.setdefault(test.group, info()) + inf = dic[test.group] + inf.add(test) + + details = pd.DataFrame( + columns=["兼容性测试", "检测项", "用例编号", "测试内容", "结论"] + ) + # 写入表格 + for k in dic: + inf = dic[k] + if inf.total != 0: + dft.loc[k, "测试结果"] = f"{round(inf.passed/inf.total*100,2)}%" + dft.loc[k, "结论"] = "PASS" if inf.failed == 0 else "FAILED" + for test in inf.tests: + g1, g2 = k + details.loc[len(details)] = [ + g1, + g2, + test.name, + test.get_test_content(), + test.state.value, + ] + + # 保存excel + logger.debug(details) + logger.debug(df) + for i in range(1, 7): + sheet.cell(i + 1, 5, df.iat[i, 4]) + + for i in range(9, len(df)): + for j in range(3, len(df.columns)): + sheet.cell(i + 1, j + 1, df.iat[i, j]) + sheet2 = excel[excel.sheetnames[1]] + + for i in range(len(details)): + for j in range(len(details.columns)): + sheet2.cell(i + 2, j + 1, details.iat[i, j]) + excel.save(path) + + +if __name__ == "__main__": + context = TestContext(".") + gen_report("resource", context) diff --git a/oec-ascend/oec/Utils.py b/oec-ascend/oec/Utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cdfb20d1c81e3dedab3693307d399bc093cd9ece --- /dev/null +++ b/oec-ascend/oec/Utils.py @@ -0,0 +1,101 @@ +import os +from datetime import timedelta +import shutil +import psutil + + +def merge_env_variables(env_output, var_list): + """ + 解析env命令输出,提取指定环境变量并与当前环境合并 + + 参数: + env_output (str): env命令输出的文本 + var_list (list): 需要提取的环境变量名称列表 + + 返回: + dict: 合并后的环境变量字典,适用于subprocess模块 + """ + # 创建当前环境变量的副本 + merged_env = os.environ.copy() + + # 解析env命令的输出 + extracted_env = {} + for line in env_output.strip().splitlines(): + # 跳过空行和不符合格式的行 + if '=' not in line: + continue + + # 分割变量名和值(只分割第一个等号) + parts = line.split('=', 1) + var_name = parts[0] + var_value = parts[1] if len(parts) > 1 else '' + + # 如果变量在目标列表中,则记录 + if var_name in var_list: + extracted_env[var_name] = var_value + + # 合并到环境变量副本中 + merged_env.update(extracted_env) + return merged_env + + +def elapsed_time_str(delta:timedelta): + + hours = delta.seconds // 3600 + minutes = (delta.seconds // 60) % 60 + seconds = round(delta.seconds % 60 + delta.microseconds // 10000 * 0.01,1) + x = [delta.days,hours,minutes,seconds] + y = ['d','h','m','s'] + for i in range(len(x)): + if x[i] > 0 or i == len(x) - 1: + x = x[i:] + y = y[i:] + break + + result = "" + for i in range(len(x)): + result += f"{x[i]}{y[i]}" + return result + +def get_file_path(path: str): + cwd = os.getcwd() + relpath = os.path.relpath(path, cwd) + return path if len(path) < len(relpath) else relpath + + +def check_disk_space(v): + """ + 检查当前运行目录所在磁盘的剩余空间 + 如果剩余空间小于v GB返回False,否则返回True + """ + try: + # 获取当前工作目录所在磁盘 + current_dir = os.getcwd() + + # 获取磁盘使用情况 + disk_usage = shutil.disk_usage(current_dir) + + # 计算剩余空间(字节转换为GB) + free_space_gb = disk_usage.free / (1024 ** 3) + + return free_space_gb >= v + + except Exception as e: + return False + +def check_memory(v): + """ + 检查系统可用内存 + 如果可用内存小于v GB返回False,否则返回True + """ + try: + # 获取内存信息 + memory = psutil.virtual_memory() + + # 计算可用内存(字节转换为GB) + available_memory_gb = memory.available / (1024 ** 3) + + return available_memory_gb >= v + + except Exception as e: + return False diff --git a/oec-ascend/oec/__init__.py b/oec-ascend/oec/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..989496423994f5279b41be810a5dce43ea1f04c9 --- /dev/null +++ b/oec-ascend/oec/__init__.py @@ -0,0 +1,2 @@ +from oec.BaseTypes import * +from oec.BaseTestCase import TestCase \ No newline at end of file diff --git a/oec-ascend/oec/common/EnvTestCase.py b/oec-ascend/oec/common/EnvTestCase.py new file mode 100644 index 0000000000000000000000000000000000000000..57839d69eae9aacc5e24e12da4a2193390c453c1 --- /dev/null +++ b/oec-ascend/oec/common/EnvTestCase.py @@ -0,0 +1,30 @@ +# encoding: utf-8 +import os +from oec import TestCase,State +from oec.BaseTest import BaseTest +from oec.Utils import merge_env_variables +class SetEnvTestCase(TestCase): + def execute_command(self): + super().execute_command() + if not self.is_passed(): + return + cann_envname = [ + 'ASCEND_TOOLKIT_HOME', + 'ASCEND_HOME_PATH', + 'ASCEND_AICPU_PATH', + 'ASCEND_OPP_PATH', + 'TOOLCHAIN_HOME', + 'LD_LIBRARY_PATH', + 'PYTHONPATH', + 'PATH', + ] + env = merge_env_variables(self.log,cann_envname) + self.context.set_env(env) + self.logger.debug(self.context.env) + self.set_state(State.PASS) + + +class ResetEnvTestCase(BaseTest): + def execute_command(self): + self.context.env = os.environ.copy() + self.set_state(State.PASS) diff --git a/oec-ascend/oec/common/env_test.py b/oec-ascend/oec/common/env_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8eaa339f273cc5ea81ba49aebff0df0139928f8e --- /dev/null +++ b/oec-ascend/oec/common/env_test.py @@ -0,0 +1,118 @@ +import re +import platform +import distro +import oec +from oec import TestCase,State +from oec.BaseTest import BaseTest + + +class OSInfomationCase(BaseTest): + def get_os_version(self): + system = platform.system().lower() + + # Windows 系统 + if system == "windows": + try: + import winreg + with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Microsoft\Windows NT\CurrentVersion") as key: + product_name = winreg.QueryValueEx(key, "ProductName")[0] + display_version = winreg.QueryValueEx(key, "DisplayVersion")[0] + return f"{product_name} ({display_version})" + except: + return platform.version() + + # macOS 系统 + elif system == "darwin": + try: + mac_version = platform.mac_ver()[0] + return f"macOS {mac_version}" + except: + return "macOS (version unknown)" + + # Linux 系统 + elif system == "linux": + # 尝试通过 distro 库获取(推荐) + + return f"{distro.name(pretty=True)} {distro.version(pretty=True)}" + + # 其他系统 + else: + return platform.platform() + + def execute_command(self): + self.set_state(State.RUNNING) + info = self.context.infomation + info['OS Version'] = self.get_os_version() + info['架构'] = platform.machine() + self.set_state(State.PASS) + + def get_test_content(self): + return 'Get OS infomation from platform and distro package' + + + +class HDKInfomationCase(TestCase): + + def check_result(self, log, return_code): + super(HDKInfomationCase,self).check_result(log, return_code) + if self.is_failed(): + return + info = self.context.infomation + rst = re.search(r"Version:\s+(\S+)\s",log) + if rst: + + info['Ascend HDK Version'] = rst.group(1) + matches = re.findall(r'\|\s+\d+\s+(\S+)\s+\|', log) + matches2 = re.findall(r'\|\s+\w{4}:\w{2}:\w{2}.\w\s+\|', log) + info.setdefault('NPU', "unknow") + info.setdefault('Count', 0) + if matches: + info['NPU'] = matches[0] + if matches2: + info['Count'] = len(matches2) + + if info['Count'] > 1: + info["昇腾硬件"] = f"{info['NPU']} × {info['Count'] }" + else: + info["昇腾硬件"] = f"{info['NPU']}" + + self.logger.debug( + f"HDK NPU:{info['NPU']}, Count:{info['Count']}") + +class CANNNPUInfomationCase(TestCase): + + def check_result(self, log, return_code): + super(CANNNPUInfomationCase,self).check_result(log, return_code) + if self.is_failed(): + return + if log == "": + self.set_state(State.FAIL) + return + npu_count = log.split('\n') + if npu_count is None or len(npu_count) != 2: + self.set_state(State.FAIL) + return + npu,count = tuple(npu_count) + self.logger.debug(f"NPU:{npu}, Count:{count}") + info = self.context.infomation + info['NPU'] = npu + info['Count'] = int(count) + if info['Count'] > 1: + info["昇腾硬件"] = f'{npu} × {count}' + else: + info["昇腾硬件"] = f'{npu}' + self.set_state(State.PASS) + +class CANNVersionInfomationCase(TestCase): + + def check_result(self, log, return_code): + super(CANNVersionInfomationCase,self).check_result(log, return_code) + if self.is_failed(): + return + if log == "": + self.set_state(State.FAIL) + return + + self.logger.debug(f"CANN Version = {log}") + self.context.infomation['CANN Version'] = log + self.set_state(State.PASS) diff --git a/oec-ascend/oec/common/get_cann_version.py b/oec-ascend/oec/common/get_cann_version.py new file mode 100644 index 0000000000000000000000000000000000000000..29661901d43f507ceba0532c0dd62f4c63dfac77 --- /dev/null +++ b/oec-ascend/oec/common/get_cann_version.py @@ -0,0 +1,12 @@ +import os + +ASCEND_HOME_PATH = os.environ.get("ASCEND_HOME_PATH") +if ASCEND_HOME_PATH is None: + raise ValueError("ASCEND_HOME_PATH is not set") +realpath = os.path.realpath(f"{ASCEND_HOME_PATH}/runtime") +realpath = os.path.dirname(realpath) +version = os.path.basename(realpath) +if version is None: + print(f"can not get cann version.ASCEND_HOME_PATH={ASCEND_HOME_PATH},cann path = {realpath}") + exit(1) +print(version, end='') \ No newline at end of file diff --git a/oec-ascend/oec/common/get_npu_info.py b/oec-ascend/oec/common/get_npu_info.py new file mode 100644 index 0000000000000000000000000000000000000000..9696dbd8a8f126f59463fd76d5d466264015e9ac --- /dev/null +++ b/oec-ascend/oec/common/get_npu_info.py @@ -0,0 +1,9 @@ +try: + import acl + print(acl.get_soc_name()) + Count,ret = acl.rt.get_device_count() + if ret !=0: + exit(1) + print(Count,end='') +except Exception as e: + exit(2) \ No newline at end of file diff --git a/oec-ascend/oec/main.py b/oec-ascend/oec/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d745598f17214512e7002ccc54739978b56e4365 --- /dev/null +++ b/oec-ascend/oec/main.py @@ -0,0 +1,344 @@ +#!python +# encoding: utf-8 +import argparse +import logging +import os +import random +import sys +import time +import threading +from datetime import datetime +from importlib import import_module +from logging import getLogger +import shutil +from oec.BaseTestCase import TestCase +from oec.TestContext import TestContext +import oec.BaseTest as BaseTestModule + +from oec.common.EnvTestCase import SetEnvTestCase +from oec.TestReport import gen_report +import oec.common.env_test as env +from oec.Utils import check_disk_space, check_memory +logger = getLogger("oec-ascend") + + +def init_logger(level=logging.INFO): + class ErrorFilter(logging.Filter): + def filter(self, record): + return record.levelno < logging.ERROR + + logger.setLevel(logging.DEBUG) + stdout = logging.StreamHandler(sys.stdout) + stdout.setLevel(level) + stdout.addFilter(ErrorFilter()) + logger.addHandler(stdout) + + stderr = logging.StreamHandler(sys.stderr) + stderr.setFormatter( + logging.Formatter("[%(levelname)s][%(pathname)s:%(lineno)d] %(message)s") + ) + stderr.setLevel(logging.ERROR) + logger.addHandler(stderr) + +def get_targets(resource_root): + for _, dirs,_ in os.walk(resource_root,topdown=True): + return dirs + +def argparse_handler(targets): + parser = argparse.ArgumentParser( + prog="oec-ascend", + description="Ascend Operating System Compatibility Verification Tool", + ) + parser.add_argument( + "-p", + "--product", + required=True, + choices=['A2', 'A3', 'A5', 'A300'], + ) + + parser.add_argument( + "-t", + "--target", + required=True, + choices=targets, + help="offering of testcase.", + ) + + args = parser.parse_args() + return args + +def read_dirname_map(path:str): + if not os.path.exists(path): + logger.fatal(f"{path} was not found") + exit(500) + dirname_map = {} + with open(path) as f: + lines = f.readlines() + for idx, line in enumerate(lines): + strs = line.split() + if len(strs) != 2: + logger.fatal(f"sSyntax error in file {path}, line {idx}") + exit(510) + dirname_map[strs[0]] = strs[1] + return dirname_map + +def find_ascend_test_in_dir(path: str): + logger.info(f"test case director is '{path}' loading...") + sys.path.append(path) + level = len(path.split(os.path.sep)) + # group_dict = Context.group_dict + offering = os.path.basename(path) + dirname_map = read_dirname_map(f"{path}/map.config") + for prefix,dirs,files in os.walk(path,topdown=True): + dirs.sort() + logger.debug(prefix) + parents = prefix.split(os.path.sep) + if len(parents) - level == 2: + level1_group,level2_group = parents[-2],parents[-1] + # group_dict[(level1_group,level2_group)] = False + if len(parents) - level != 3: + continue + dirs.clear() + level1_group,level2_group,testcase_name = parents[-3],parents[-2],parents[-1] + group1_name = dirname_map.get(level1_group) + group2_name = dirname_map.get(f"{level1_group}/{level2_group}") + if group1_name is None or group2_name is None: + logger.error(f"{level1_group} -> {group1_name}, {level1_group}/{level2_group} -> {group2_name}") + continue + test_files = [] + for name in files: + if name[-3:] != ".sh": + continue + if name == "TEST.sh": + test_files.append(name) + if name[:len("TEST_")] == "TEST_": + test_files.append(name) + if len(test_files) == 0: + logger.error(f"Test Cases was not found in the director {prefix}") + continue + for name in test_files: + postfix = name[len("TEST_"):len(name)-len(".sh")] if name[:len("TEST_")] == "TEST_" else "" + TestCase( + offering=offering, + group=(group1_name,group2_name), + name = f"{testcase_name}{'_' if postfix else ''}{postfix}", + cmd=["bash", name], + origin_file=f"{prefix}/{name}", + cwd=prefix, + timeout=3600 #默认超时时间为1小时 + ) + + +def get_absolute_out_path(output): + output_path = os.path.abspath(output) + + return output_path + + +class HideCursor: + def __init__(self): + self.state = False + + def hide(self): + self.state = True + print("\033[?25l",end="",flush=True) + + def __del__(self): + if not self.state: + return + print("\033[?25h",end="",flush=True) + +hider = HideCursor() +def print_state(context: TestContext): + hider.hide() #隐藏光标显示 + last_lines_len = 0 + def update_state(): + nonlocal last_lines_len + state = context.get_state_distribution_str() + lines = state.split('\n') + lines_len = 0 + logger.info(f"\033[{last_lines_len + 1}A") + for v in lines: + terminal_colums, terminal_lines= os.get_terminal_size() + for l in range(0, len(v), terminal_colums): + logger.info(f"{v[l:l + terminal_colums]}\033[K") + lines_len += 1 + + for _ in range(lines_len, last_lines_len): + logger.info(f"\033[K") + delta_lines = last_lines_len -lines_len + if delta_lines > 0: + logger.info(f"\033[{delta_lines + 1}A") + last_lines_len = lines_len + + while not context.finished: + update_state() + time.sleep(0.125) + update_state() + +def enable_ansi_windows(): + """在 Windows 上启用 ANSI 转义序列支持""" + if sys.platform == "win32": + import ctypes + kernel32 = ctypes.windll.kernel32 + kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7) # 启用 VT100 模式 + +def init_env_test_case(offering): + env.OSInfomationCase( + offering=offering, + group=("运行环境","环境信息"), + name='READ_OS_INFOMATION', + ) + + + env.HDKInfomationCase( + offering=offering, + group=("运行环境","环境信息"), + name='READ_DRIVER_INFOMATION', + cmd = ['npu-smi', 'info'], + cwd = f"{os.path.dirname(__file__)}/common", + with_case_info=False + ) + + SetEnvTestCase( + offering=offering, + group=("运行环境","CANN信息"), + name="READ_CANN_SET_ENV", + cmd=['bash', '-c',f"source {BaseTestModule.Context.cann_path}/ascend-toolkit/set_env.sh && env"], + exclude=None, + cwd = f"{os.path.dirname(__file__)}/common", + with_case_info=False + ) + + env.CANNVersionInfomationCase( + offering=offering, + group=("运行环境","CANN信息"), + name='READ_CANN_VERSION_INFOMATION', + cmd = ['python3', 'get_cann_version.py'], + cwd=f"{os.path.dirname(__file__)}/common", + with_case_info=False + ) + + env.CANNNPUInfomationCase( + offering=offering, + group=("运行环境","CANN信息"), + name='READ_CANN_NPU_INFOMATION', + cmd = ['python3', 'get_npu_info.py'], + cwd = f"{os.path.dirname(__file__)}/common", + with_case_info=False + ) + + +def get_confirmation(prompt): + """ + Boolean return version + - Returns True for y/yes + - Returns False for n/no + - Prompts for re-entry for other inputs + """ + logger.warning(f"{prompt} Do you want to continue? [yes/no]") + while True: + response = input().strip().lower() + if response in ('y', 'yes'): + return + elif response in ('n', 'no'): + exit(550) + else: + logger.warning("\033[33mInvalid input. Do you want to continue? [yes/no]\033[0m") + + + + +def run_target_test(resource_root, cmd_args, target, verbose, timestamp): + # 检查剩余系统资源 + # 剩余资源阈值 + disk_space = 100#GB + memory_space = 96#GB + if not check_disk_space(disk_space): + get_confirmation(f"The available disk space of the current directory is less than {disk_space}GB, which may cause exceptions.") + if not check_memory(memory_space): + get_confirmation(f"The currently available running memory is less than {memory_space}GB, which may cause exceptions.") + # 重置上下文 + Context = BaseTestModule.reset_context() + + product = cmd_args.product + output_dir = "./output" + data_dir = os.path.dirname(__file__) + "/data" + cann_dir = "/usr/local/Ascend" + work_dir = os.path.realpath("./") + + # 如果source了环境变量则提取组合包安装路径 + ascend_home_path = os.environ.get('ASCEND_HOME_PATH') + if ascend_home_path is not None: + cann_dir = os.path.realpath(f"{ascend_home_path}/../..") + logger.info(f"Ascend install path is {cann_dir}") + + output = os.path.abspath(output_dir) + data_path = os.path.realpath(data_dir) + if not os.path.exists(data_path): + logger.fatal(f"{data_path} is not existing, please create it first!") + exit(1000) + cann_path = os.path.realpath(cann_dir) + if not os.path.exists(cann_path): + logger.fatal(f"{cann_path} is not existing, please install CANN first!") + exit(2000) + Context.set_product(product) + Context.set_target(target) + Context.set_data_path(data_path) + Context.set_cann_path(cann_path) + Context.set_output(output, timestamp) + Context.set_work_path(work_dir) + resource = f"{resource_root}/{target}" + resource = os.path.realpath(resource) + + init_env_test_case(target) + find_ascend_test_in_dir(resource) + + Context.set_test_order(resource) + logger.info( + f"Find {len(Context.get_tests())} test cases, using {len(Context.get_used_tests())} test cases." + ) + state_monitor = threading.Thread( + name="state_monitor", target=print_state, args=[Context] + ) + if not verbose: + Context.finished = False + enable_ansi_windows() + state_monitor.start() + result = Context.run_tests() + if not verbose: + Context.finished = True + state_monitor.join() + logger.info(f"Clean up tmp.") + shutil.rmtree(f"{Context.output_dir}/tmp") + logger.info(f"Complete!") + + gen_report(resource, Context) + logger.info(f"Generate an execution report with the path {Context.get_output_dir()}") + +def main(): + # 日志模块 + verbose = False + init_logger(logging.DEBUG if verbose else logging.INFO) + + # 获取支持的targtes + resource_root = os.path.realpath(os.path.dirname(__file__) + "/test_cases") + targets = get_targets(resource_root) + # 解析参数 + cmd_args = argparse_handler(['all'] + targets) + + if cmd_args.target != "all": + targets = [cmd_args.target] + timestamp = f'{datetime.now().strftime("%Y%m%d-%H-%M-%S")}-{random.randint(100,999)}' + # 执行测试 + for i, target in enumerate(targets): + logger.info(f"Targets: {targets} Target: {target} ({i+1}/{len(targets)})") + run_target_test(resource_root, cmd_args, target, verbose, timestamp) + logger.info("") + + +if __name__ == "__main__": + try: + main() + finally: + del hider #恢复光标显示 diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_DEVICE.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_DEVICE.sh new file mode 100644 index 0000000000000000000000000000000000000000..b65041869e66351791e5dd92da01c53d17949a0e --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_DEVICE.sh @@ -0,0 +1 @@ +python3 ./test_acl_device.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_EVENT.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_EVENT.sh new file mode 100644 index 0000000000000000000000000000000000000000..e9331de883f4ced45bf728a30d9575cd93924f23 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_EVENT.sh @@ -0,0 +1 @@ +python3 ./test_acl_event.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_LOG.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_LOG.sh new file mode 100644 index 0000000000000000000000000000000000000000..e53ec83ea90f5d56687ca03e1522f26aecba9345 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_LOG.sh @@ -0,0 +1 @@ +python3 ./test_acl_log.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_OP.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_OP.sh new file mode 100644 index 0000000000000000000000000000000000000000..e880be745333dcc0300c0af630b330ca37445671 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_OP.sh @@ -0,0 +1,2 @@ +mkdir -p "$OEC_OUTPUT_PATH" +python3 ./test_acl_op.py "$OEC_DATA_PATH" "$OEC_OUTPUT_PATH" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_THREAD.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_THREAD.sh new file mode 100644 index 0000000000000000000000000000000000000000..c7d2a6526cca22aa72d82f25e5943a7d3efc5226 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/TEST_THREAD.sh @@ -0,0 +1 @@ +python3 ./test_acl_thread.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_device.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_device.py new file mode 100644 index 0000000000000000000000000000000000000000..5707db6f48c700a2dccc503f2528e2ccf3463501 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_device.py @@ -0,0 +1,136 @@ +# encoding: utf-8 +# 版权所有 (C) 华为技术有限公司 2022-2023 +import unittest +import logging + + +import utils as util +import acl + +ACL_DEVICE = 0 +ACL_HOST = 1 +ACL_RT_OVERFLOW_MODE_SATURATION = 0 +ACL_RT_OVERFLOW_MODE_INFNAN = 1 +ACL_RT_OVERFLOW_MODEL_UNDEF=2 + + +class TestDevice(unittest.TestCase): + + @classmethod + def tearDownClass(cls): + #after all test + pass + + @classmethod + def setUpClass(cls): + # before all test + pass + + def setUp(self): + # before one test + pass + + def tearDown(self): + # after one test + pass + + def test_device_001_normal(self): + """ + test case for setting and restting device + 1. set device 0 + 2. get and check device id + 3. reset device 0 + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + d, ret = acl.rt.get_device() + self.assertEqual(d, 0) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + + def test_device_007_get_device_utilization_rate(self): + """ + 获取device的cube, aicpu, vector core单元的使用率 + 1、获取环境上的npu数量 + 2、获取每个npu的使用率 + 3、检查获取到的使用率是否包含所有必要的字段 + """ + n, ret =acl.rt.get_device_count() + self.assertEqual(ret, 0) + tmp = { + 'cube_utilization': 0, + 'vector_utilization': 0, + 'aicpu_utilization': 0, + 'memory_utilization': 0, + 'utilization_extend': 0 + } + for i in range(n): + rst, ret = acl.rt.get_device_utilization_rate(i) + self.assertEqual(ret, 0) + for key in tmp: + self.assertIn(key, rst) + + def test_device_009_query_device_status(self): + """ + test device status + 1. get device count + 2. query status for each device + 3. check statis is ok for each device + """ + n, ret = acl.rt.get_device_count() + self.assertEqual(ret, 0) + for i in range(n): + status, ret = acl.rt.query_device_status(i) + self.assertEqual(ret, 0) + self.assertEqual(status, 0) + + def test_device_010_peek_at_last_error(self): + """ + test device peek at last error + 1. make a mistake to rasie error + 2. peek last error + 3. check error is not cleared + """ + ret = acl.rt.set_device(-1) + self.assertNotEqual(ret, 0) + + #测试捕获错误码 + ret = acl.rt.peek_at_last_error(0) + self.assertNotEqual(ret, 0) + + # 测试错误吗没有被清空 + ret = acl.rt.peek_at_last_error(0) + self.assertNotEqual(ret, 0) + + def test_device_011_synchronize_device_with_timeout(self): + """ + test synchronize device with timeout + """ + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_device_with_timeout(5) + self.assertEqual(ret, 0) + + def test_device_017_reset_device_force(self): + """ + test reset device force + """ + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.set_device(ACL_DEVICE) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device_force(ACL_DEVICE) + self.assertEqual(ret, 0) + +if __name__ == "__main__": + suite = util.switch_cases(TestDevice, "all") + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.wasSuccessful(): + exit(0) +exit(1) + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_event.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_event.py new file mode 100644 index 0000000000000000000000000000000000000000..a51561ad9ae33f09f81554d8da1fe21d3ef6021e --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_event.py @@ -0,0 +1,332 @@ +# encoding: utf-8 +import unittest +import time +import threading +import acl +import utils as util + + + +ACL_EVENT_TIME_LINE = 0x0000008 +ACL_EVENT_RECORD_STATUS_NOT_READY = 0 +ACL_EVENT_RECORD_STATUS_COMPLETE = 1 + +g_callbackRunFlag = True + + +def launch_callback_fun_1(args_list): + for i in range(3): + print("lanuch_callback_fun 1") + print(args_list) + + +def launch_callback_fun_2(args_list): + for i in range(3): + print("lanuch_callback_fun 2") + print(args_list) + + +def callback_thr_func(args_list): + print("[callbacl_thr_func] args = ", args_list[0], args_list[1]) + timeout = args_list[1] + + print("[callback_thr_func] g_callbackRunFlag = ", g_callbackRunFlag, timeout) + + while g_callbackRunFlag is True: + print("[callback_thr_func] g_callbackRunFlag = ", g_callbackRunFlag) + ret = acl.rt.process_report(timeout) + print("[INFO] process_report ret = ", ret) + + print("[INFO] end") + + +class TestEvent(unittest.TestCase): + + def setUp(self): + + pass + + def tearDown(self): + + pass + + @classmethod + def tearDownClass(cls): + ret = acl.finalize() + assert ret == 0 + + @classmethod + def setUpClass(cls): + ret = acl.init() + assert ret == 0 + + def test_event_001_normal(self): + """ + test case for creating and destroying event + :return: + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + et, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(et) + self.assertEqual(ret, 0) + + def test_event_006_callback(self): + """ + test case for launching a callback function to do soming + 1.init resource : create_contest create_stream + 2.start a task by starting a thread + the thread triggers callback processing by calling process_report + 3.register the thread tor handle the callback function + 4.launch a callback function + 5.unresgistering a thread + 6.free reasources + :return: + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + stream, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + + timeout = 1000 + global g_callbackRunFlag + g_callbackRunFlag = True + args_list = [g_callbackRunFlag, timeout] + thr_id, ret = acl.util.start_thread(callback_thr_func,args_list) + self.assertEqual(ret,0) + + ret = acl.rt.subscribe_report(thr_id, stream) + self.assertEqual(ret, 0) + + ret = acl.rt.launch_callback(launch_callback_fun_1, ["zzq", "qzz"], 1, stream) + self.assertEqual(ret, 0) + ret = acl.rt.launch_callback(launch_callback_fun_2, ["zzq", "qzz"], 1, stream) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + + ret = acl.rt.subscribe_report(thr_id, 0) + self.assertEqual(ret, 0) + + ret = acl.rt.launch_callback(launch_callback_fun_1, ["zzq", "qzz"], 1, 0) + self.assertEqual(ret, 0) + ret = acl.rt.launch_callback(launch_callback_fun_2, ["zzq", "qzz"], 1, 0) + self.assertEqual(ret, 0) + + ret = acl.rt.synchronize_stream(0) + self.assertEqual(ret, 0) + + g_callbackRunFlag = False + + ret =acl.rt.unsubscribe_report(thr_id, stream) + self.assertEqual(ret, 0) + + ret =acl.rt.unsubscribe_report(thr_id, 0) + self.assertEqual(ret, 0) + + ret =acl.util.stop_thread(thr_id) + self.assertEqual(ret, 0) + + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + def test_event_0008_multi_streams(self): + """ + test case for acl synchroniztion waiting interface with mulit-streams + 1. set device, create stream1, stream2, event1, event2 + 2. record event1 and event2 to the stream1 handle + 3. call stream_waitevent function to block current stream, waiting for the event finished + 4. query the lapsed time between the two event + 5. free resources + :return: + """ + device_id = 0 + + context, ret = acl.rt.create_context(device_id) + stream, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + stream_2, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event() + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + time.sleep(0.005) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.stream_wait_event(stream_2, event_1) + self.assertEqual(ret, 0) + status, ret = acl.rt.query_event_wait_status(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_event(event_2) + self.assertEqual(ret, 0) + status, ret = acl.rt.query_event_status(event_2) + self.assertEqual(ret, 0) + self.assertEqual(status, ACL_EVENT_RECORD_STATUS_COMPLETE) + + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1, event_2) + self.assertEqual(ret, 0) + self.assertLessEqual(ms, 10) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + + def test_event_009_elapsed_time(self): + """ + test case for acl event elapsed_time + :return: + """ + device_id = 0 + context, ret =acl.rt.create_context(device_id) + self.assertEqual(ret, 0) + stream, ret =acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret =acl.rt.create_event_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + # sleep 2s to simulate the computational task + time.sleep(2) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1,event_2) + self.assertEqual(ret, 0) + print("[INFO] ms = {}".format(ms)) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + + def launch_callback_with_pythreading(self, blocked, sleep_time, blocked_time): + """ + 1、创建回调处理线程 + 2、包装一个符合回调函数格式的sleep函数 + 3、调用launch_callback 组色参数设置为blocked + 4、验证阻塞时间是否为blocked_time + 5、关闭线程 + """ + ret = acl.rt.set_device(0) + self.assertEqual(ret, 0) + stm, ret = acl.rt.create_stream() + self.assertEqual(ret, 0) + thd_flag = True + + def process_report_loop(): + # 回调函数处理线程 + max_time = 15 # 最长运行15s + t = time.perf_counter() + while thd_flag and time.perf_counter() - t < max_time: + # 每200ms重新调用process_report + acl.rt.process_report(200) + + thd = threading.Thread(target=process_report_loop) + thd.start() + + ret = acl.rt.subscribe_report(thd.ident, stm) + self.assertEqual(0, ret) + + def sleep_cbk(t): + for i in t: + time.sleep(i) + + ret = acl.rt.synchronize_stream(stm) + self.assertEqual(ret, 0) + st = time.perf_counter() + # 调用回调函数阻塞stllep_time秒 + ret = acl.rt.launch_callback(sleep_cbk, [sleep_time], blocked, stm) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stm) + self.assertAlmostEqual(blocked_time, time.perf_counter() -st, delta=0.01) + self.assertEqual(ret, 0) + + # 将thd_flag设置为 false 关闭回调线程 + thd_flag = False + self.assertEqual(ret, 0) + + thd.join() + + ret = acl.rt.unsubscribe_report(thd.ident, stm) + + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stm) + self.assertEqual(ret, 0) + ret = acl.rt.reset_device(0) + self.assertEqual(ret, 0) + + def test_event_013_launch_callback_with_pythread_blocaked(self): + """ + 测试使用python线程库作为subscribe_report的回调线程,并验证 launch_callback blocked参数设置为1时回调函数是否能够阻塞stream + 阻塞stream场景下blocked_stream 和 stream 相等为2s + """ + self.launch_callback_with_pythreading(1, 2, 2) + + def test_event_017_ex_event(self): + """ + test case for acl event elapsed_time + :return: + """ + device_id = 0 + context, ret =acl.rt.create_context(device_id) + self.assertEqual(ret, 0) + stream, ret =acl.rt.create_stream() + self.assertEqual(ret, 0) + event_1, ret =acl.rt.create_event_ex_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + event_2, ret = acl.rt.create_event_ex_with_flag(ACL_EVENT_TIME_LINE) + self.assertEqual(ret, 0) + ret = acl.rt.record_event(event_1, stream) + self.assertEqual(ret, 0) + # sleep 2s to simulate the computational task + time.sleep(2) + ret = acl.rt.record_event(event_2, stream) + self.assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(stream) + self.assertEqual(ret, 0) + ms, ret = acl.rt.event_elapsed_time(event_1,event_2) + self.assertEqual(ret, 0) + print("[INFO] ms = {}".format(ms)) + + ret = acl.rt.destroy_event(event_1) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_event(event_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_stream(stream) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(context) + self.assertEqual(ret, 0) + +if __name__ == "__main__": + suite = util.switch_cases(TestEvent, "all") + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.wasSuccessful(): + exit(0) +exit(1) + + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_log.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_log.py new file mode 100644 index 0000000000000000000000000000000000000000..79971c63ee75d712b98e7c44ab80919c6093f992 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_log.py @@ -0,0 +1,58 @@ +import unittest +import acl +import utils as util + + +class TestLog(unittest.TestCase): + + def setUp(self) -> None: + pass + + def tearDownClass(cls) -> None: + pass + + @classmethod + def tearDownClass(cls) -> None: + pass + + @classmethod + def setUpClass(cls): + pass + + def test_log_001_normal(self): + """ + test case for acl app_log, recording debug/info/warning/error logs + """ + a = [1, 2, 3] + b = (3, 2 ,1) + c = {4, 5, 6} + d = {"a": 1, "b":2 ,"c": 3} + e= 1.1111 + f = 1111111111 + g = "1111111111" + # log level = 0, debug + acl.app_log(0, "a = {}, b = {}, c = {}, d = {}, e = {}, f = {}, g = {}".format(a,b,c,d,e,f,g)) + # log level = 1, info + acl.app_log(1, "a = {}, b = {}, c = {}, d = {}, e = {}, f = {}, g = {}".format(a,b,c,d,e,f,g)) + # log level = 2, warning + acl.app_log(2, "a = {}, b = {}, c = {}, d = {}, e = {}, f = {}, g = {}".format(a,b,c,d,e,f,g)) + # log level = 3, error + acl.app_log(3, "a = {}, b = {}, c = {}, d = {}, e = {}, f = {}, g = {}".format(a,b,c,d,e,f,g)) + + def test_log_002_err_parameter(self): + """ + test case for acl app_log with invalid parameter + """ + # log level = 3, error + params = {'type': 'is', + 'params': [(), ('', ''), (123, 321)]} + self.assertEqual(util.params_check(self, params, acl.app_log), 0) + +if __name__ == "__main__": + + suite = util.switch_cases(TestLog, "all") + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.wasSuccessful(): + exit(0) +exit(1) + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_op.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_op.py new file mode 100644 index 0000000000000000000000000000000000000000..d995dcb809c87549566f39cf2f978cb83ca82d66 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_op.py @@ -0,0 +1,392 @@ +# -*- coding:utf-8 -*- +# 版权所有 (c) 华为技术有限公司 2022-2023 +import unittest +from threading import Lock +import os +import shutil +import numpy as np +import acl +import utils as util +import sys +import subprocess +import json +#from constant import Const + +data_path = sys.argv[1] +print(f"data path is {data_path}") + +output_dir = sys.argv[2] +print(f"output dir is {output_dir}") + +#get soc version +soc_version = acl.get_soc_name() +print(f"soc version is {soc_version}") + +add_json=""" +[ + { + "op": "Add", + "input_desc": [ + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + }, + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [8, 16], + "type": "int32" + } + ] + } +] +""" +add_json = json.loads(add_json) + +with open(f"{output_dir}/add.json", 'w', encoding='utf-8') as json_file: + json.dump(add_json, json_file, ensure_ascii=False, indent=4) + +#transfer op model +subprocess.run(f"atc --singleop={output_dir}/add.json --output={output_dir} --soc_version={soc_version}", shell=True, cwd=f"{output_dir}") + + +acl_dtype = { + "float32": 0, + "float16": 1, + "int8": 2, + "int32": 3, + "uint8": 4, + "int16": 6, + "uint16": 7, + "uint32": 8, + "int64": 9, + "double": 11, + "bool": 12 +} + +ACL_FORMAT_UNDEFINED = -1 +ACL_FORMAT_NCHW = 0 +ACL_FORMAT_NHWC = 1 +ACL_FORMAT_ND = 2 +ACL_FORMAT_NC1HWC0 = 3 +ACL_FORMAT_FRACTAL_Z = 4 +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +NPY_BYTE = 1 +ACL_FORMAT_ND = 2 +ACL_MEM_MALLOC_HUGE_FIRST = 0 +ACL_MEM_MALLOC_NORMAL_ONLY = 2 +ACL_STEP_START = 0 +ACL_STEP_END = 1 +ACL_OP_DUMP_OP_AICORE_ARGS = 0x00000001 + +def op_select(in_num, in_desc, out_num, out_desc, op_attr, op_kernel_desc): + """ + operator selector + """ + # get input + tilling_args = [] + args_list = [] + for i in range(in_num): + tilling_args.append(str(acl.get_tensor_desc_dim_v2(in_desc[i], 0)[0])) + tilling_args.append(str(acl.get_tensor_desc_dim_v2(in_desc[i], 1)[0])) + tilling_args.append(tilling_type[str(acl.get_tensor_desc_type(in_desc[i]))]) + + # get output + for i in range(out_num): + tilling_args.append(str(acl.get_tensor_desc_dim_v2(out_desc[i], 0)[0])) + tilling_args.append(str(acl.get_tensor_desc_dim_v2(out_desc[i], 1)[0])) + tilling_args.append(tilling_type[str(acl.get_tensor_desc_type(out_desc[i]))]) + + #set args + tilling = '_'.join(tilling_args) + args = np.array(args_list, dtype=np.uint32).tobytes() + args_ptr = acl.util.bytes_to_ptr(args) + size = len(args) + ret = acl.op.set_kernel_args(op_kernel_desc, tilling_mode[tilling], 2, args_ptr, size) + assert ret == 0 + workspace_sizes = bytes() + workspace_sizes_ptr = acl.util.bytes_to_ptr(workspace_sizes) + ret = acl.op.set_kernel_workspaces_sizes(op_kernel_desc, 0, workspace_sizes_ptr) + assert ret == 0 + + +class AclOp(object): + def __init__(self, a, b): + self.in_list = [] + self.in_host_list = [] + self.in_desc_list = [] + self.in_dev_list = [] + self.out_dev_list = [] + self.host_list = [] + self.out_list = [] + self.out_desc_list = [] + self.data = [a, b] + self.type = a.dtype + self.shape = a.shape + self.spec_type = ACL_FORMAT_ND + # attr + self.attr = acl.op.create_attr() + assert self.attr != 0 + # stream + self.stream, ret = acl.rt.create_stream() + assert ret == 0 + + def __del__(self): + # free resource + for i in range(len(self.in_desc_list)): + ret = acl.destroy_data_buffer(self.in_list[i]) + assert ret == 0 + ret = acl.destroy_data_buffer(self.in_host_list[i]) + assert ret == 0 + acl.destroy_tensor_desc(self.in_desc_list[i]) + + for i in range(len(self.out_desc_list)): + ret = acl.destroy_data_buffer(self.out_list[i]) + assert ret == 0 + acl.destroy_tensor_desc(self.out_desc_list[i]) + + for i in range(len(self.in_dev_list)): + ret = acl.rt.free(self.in_dev_list[i]) + assert ret == 0 + + for i in range(len(self.out_dev_list)): + ret = acl.rt.free(self.out_dev_list[i]) + assert ret == 0 + + for i in range(len(self.host_list)): + ret = acl.rt.free_host(self.host_list[i]) + assert ret == 0 + + acl.op.destroy_attr(self.attr) + + ret = acl.rt.destroy_stream(self.stream) + assert ret == 0 + + def tensor_desc_init(self, gen_dataset=True): + # create input output tensors + for data in self.data: + desc = acl.create_tensor_desc(acl_dtype[str(data.dtype)], list(data.shape), self.spec_type) + assert desc != 0 + self.in_desc_list.append(desc) + + size = acl.get_tensor_desc_size(desc) + bytes_data = data.tobytes() + data_ptr = acl.util.bytes_to_ptr(bytes_data) + host_data_buf = acl.create_data_buffer(data_ptr, size) + assert host_data_buf != 0 + self.in_host_list.append(host_data_buf) + + dev_ptr, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_HUGE_FIRST) + assert ret == 0 + ret = acl.rt.memcpy(dev_ptr, size, data_ptr, size, ACL_MEMCPY_HOST_TO_DEVICE) + assert ret == 0 + self.in_dev_list.append(dev_ptr) + data_buf = acl.create_data_buffer(dev_ptr, size) + assert data_buf != 0 + self.in_list.append(data_buf) + + if gen_dataset: + out_desc = acl.create_tensor_desc(acl_dtype[str(self.type)], list(self.shape), self.spec_type) + assert out_desc != 0 + self.out_desc_list.append(out_desc) + self.gen_output_data_set() + + def gen_output_data_set(self): + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + out_dev, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_HUGE_FIRST) + assert ret == 0 + out_data_buf = acl.create_data_buffer(out_dev, size) + assert out_data_buf != 0 + self.out_list.append(out_data_buf) + self.out_dev_list.append(out_dev) + + host_ptr, ret = acl.rt.malloc_host(size) + assert ret == 0 + self.host_list.append(host_ptr) + + def model_update_params(self, op_type): + ret = acl.op.update_params(op_type, self.in_desc_list, self.out_desc_list, self.attr) + assert ret == 0 + + def model_execute(self, op_type="Add"): + # model execute + ret = acl.op.execute_v2(op_type, self.in_desc_list, self.in_list, self.out_desc_list, + self.out_list, self.attr, self.stream) + print("ret:",ret) + unittest.TestCase().assertEqual(ret, 0) + ret = acl.rt.synchronize_stream(self.stream) + unittest.TestCase().assertEqual(ret, 0) + #device to host + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + acl.rt.memcpy(self.host_list[0], size, self.out_dev_list[0], size, ACL_MEMCPY_DEVICE_TO_HOST) + bytes_out = acl.util.ptr_to_bytes(self.host_list[0], size) + data = np.frombuffer(bytes_out, dtype=np.byte) + return data + + def model_op_execute(self, op_type="Add"): + # model execute + ret = acl.op.execute(op_type, self.in_desc_list, self.in_list, self.out_desc_list, + self.out_list, self.attr, self.stream) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stream) + assert ret == 0 + + #device to host + size = acl.get_tensor_desc_size(self.out_desc_list[0]) + acl.rt.memcpy(self.host_list[0], size, self.out_dev_list[0], size, ACL_MEMCPY_DEVICE_TO_HOST) + bytes_out = acl.util.ptr_to_bytes(self.host_list[0], size) + data = np.frombuffer(bytes_out, dtype=np.byte) + return data + + def np_data_format(self, data, dtype): + b_arr = data.tobytes() + arr_2 = np.frombuffer(b_arr, dtype=dtype) + return arr_2 + + def tensor_desc(self): + size = acl.get_tensor_desc_element_count(self.in_desc_list[0]) + print("size = ", size) + acl.set_tensor_desc_name(self.in_desc_list[0], "abc") + print("desc name= ", acl.get_tensor_desc_name(self.in_desc_list[0])) + fmt = acl.get_tensor_desc_format(self.indesc_list[0]) + print("fmt = ", fmt) + + def op_attr(self): + attr = acl.op.create_attr() + assert attr != 0 + + ret = acl.op.set_attr_bool(attr, "a", 0) + assert ret == 0 + ret = acl.op.set_attr_int(attr, "b", 1) + assert ret == 0 + ret = acl.op.set_attr_float(attr, "c", 2.0) + assert ret == 0 + ret = acl.op.set_attr_string(attr, "d", "123") + assert ret == 0 + data = [4, 5, 6] + ret = acl.op.set_attr_list_bool(attr, "e", data) + assert ret == 0 + data = [1.5, 2.14, 3.11] + ret = acl.op.set_attr_list_float(attr, "f", data) + assert ret == 0 + data = [10, 20, 30] + ret = acl.op.set_attr_list_int(attr, "g", data) + assert ret == 0 + ret = acl.op.set_attr_list_string(attr, "h", ["1", "2"]) + assert ret == 0 + data = [[10],[20, 30], [40, 50, 60]] + ret = acl.op.set_attr_list_list_int(attr, "i", data) + assert ret == 0 + acl.op.destroy_attr(attr) + return 0 + + def exe_with_dynamic_shape(self, op_type): + out_desc = acl.create_tensor_desc(acl_dtype[str(self.type)], [-1, -1], self.spec_type) + assert out_desc != 0 + self.out_desc_list.append(out_desc) + ret = acl.op.infer_shape(op_type, self.in_desc_list, self.in_host_list, + 1, self.out_desc_list, self.attr) + assert ret == 0 + + tensor_dims = [] + for i in range(len(self.out_desc_list)): + dim_nums = acl.get_tensor_desc_num_dims(self.out_desc_list[i]) + dim_size = [] + for j in range(dim_nums): + dim, ret = acl.get_tensor_desc_dim_v2(self.out_desc_list[i], j) + assert ret == 0 + if dim == -1: + dim_range, ret = acl.get_tensor_desc_dim_range(self.out_desc_list[i], j, 2) + assert ret == 0 + dim = dim_range[1] + dim_size.append(dim) + tensor_dims.append(dim_size) + print("[INFO] infer result: {}".format(tensor_dims)) + + self.shape = tensor_dims[0] + self.gen_output_data_set() + result = self.model_execute(op_type) + return result + + +g_callbackRunFlag = False + + +class TestOp(unittest.TestCase): + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + @classmethod + def tearDownClass(cls) -> None: + ret = acl.rt.reset_device(0) + if ret: + print("acl.rt.reset_device! ret:", ret) + raise AssertionError + ret = acl.finalize() + if ret: + print("acl.finalize failed! ret:", ret) + raise AssertionError + + @classmethod + def setUpClass(cls) -> None: + ret = acl.init() + if ret: + print("acl.init failed! ret:", ret) + raise AssertionError + ret = acl.op.set_model_dir(f"{output_dir}") + if ret: + print("acl.op.set_model_dir failed! ret:", ret) + raise AssertionError + ret = acl.rt.set_device(0) + if ret: + print("acl.rt.set_device failed! ret:", ret) + raise AssertionError + + def test_op_015_load_op(self): + """ + test case for loading operator + """ + np_data = np.fromfile(f"{output_dir}/0_Add_3_2_8_16_3_2_8_16_3_2_8_16.om", dtype="int8") + bytes_data = np_data.tobytes() + buffer = acl.util.bytes_to_ptr(bytes_data) + np_size = np_data.size + + ret = acl.op.load(buffer, np_size) + self.assertEqual(ret, 0) + + def test_op_017_normal_op_add(self): + """ + test case for operator add + """ + a = np.random.randint(100, size=(8, 16)).astype(np.int32) + b = np.random.randint(100, size=(8, 16)).astype(np.int32) + op_handle = AclOp(a, b) + op_handle.tensor_desc_init() + res = op_handle.model_execute() + data = op_handle.np_data_format(res, dtype=np.int32) + np_res = a + b + np_out = np.reshape(np_res, (np_res.size,)) + self.assertEqual((data == np_out).all(), True) + +if __name__ == "__main__": + #util.show_growth() + suite = util.switch_cases(TestOp, "all") + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.wasSuccessful(): + exit(0) +exit(1) + + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_thread.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_thread.py new file mode 100644 index 0000000000000000000000000000000000000000..9aa00c1331bf544be27402ade5cd8e0fc636148a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/test_acl_thread.py @@ -0,0 +1,151 @@ +import time +import unittest +import acl +import utils as util + +g_callbackFunFlag = True +g_context = 0 +g_timeout = 0 + + +def callback_thr_func(args_list): + ctx = args_list[0] + timeout = args_list[1] + if ctx != g_context: + raise Exception("{} != {}".format(ctx, g_context)) + if timeout != g_timeout: + raise Exception("{} != {}".format(timeout, g_timeout)) + + ret = acl.rt.set_context(ctx) + print(f"acl.rt.set_context(ctx) ret = {ret}") + + start = time.time() + while g_callbackFunFlag is True: + run_time = time.time() - start + if run_time > 10: + break + time.sleep(1) + ret = acl.rt.process_report(timeout) + print(f"acl.rt.process_report(timeout) ret = {ret}") + localtime = time.asctime(time.localtime(time.time())) + print("[INFO] after process_report", g_callbackFunFlag, localtime) + + +class TestThread(unittest.TestCase): + + def setUp(self) -> None: + pass + + def tearDownClass(cls) -> None: + pass + + @classmethod + def tearDownClass(cls) -> None: + ret = acl.rt.reset_device(0) + assert ret == 0 + ret = acl.finalize() + assert ret == 0 + + @classmethod + def setUpClass(cls): + ret = acl.init() + assert ret == 0 + ret == acl.rt.set_device(0) + assert ret == 0 + + def test_thread_001_normal(self): + """ + test case for starting a c thread + """ + + global g_callbackFunFlag + global g_context + global g_timeout + + g_callbackFunFlag = True + ctx, ret = acl.rt.create_context(0) + self.assertEqual(ret, 0) + g_context = ctx + timeout = 1000 + g_timeout = timeout + + args_list = [ctx, timeout] + callback_thr_id, ret = acl.util.start_thread(callback_thr_func, args_list) + self.assertEqual(ret, 0) + g_callbackFunFlag = False + + ret = acl.util.stop_thread(callback_thr_id) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(ctx) + self.assertEqual(ret, 0) + + def test_thread_002_two_thread(self): + """ + test case for starting two c threads + """ + global g_callbackFunFlag + global g_context + global g_timeout + + g_callbackFunFlag = True + ctx, ret = acl.rt.create_context(0) + self.assertEqual(ret, 0) + g_context = ctx + timeout = 1000 + g_timeout = timeout + + args_list = [ctx, timeout] + callback_thr_id_1, ret = acl.util.start_thread(callback_thr_func, args_list) + self.assertEqual(ret, 0) + callback_thr_id_2, ret = acl.util.start_thread(callback_thr_func, args_list) + self.assertEqual(ret, 0) + g_callbackFunFlag = False + + ret = acl.util.stop_thread(callback_thr_id_1) + self.assertEqual(ret, 0) + ret = acl.util.stop_thread(callback_thr_id_2) + self.assertEqual(ret, 0) + ret = acl.rt.destroy_context(ctx) + self.assertEqual(ret, 0) + + def test_thread_003_error(self): + """ + test case for acl start_thread with no paramter + """ + + try: + acl.util.start_thread() + except Exception as e: + self.assertIn(e.__str__(), ["function takes exactly 2 arguments (0 given)"]) + else: + self.fail("Expected exception not raised") + + def test_thread_004_error(self): + """ + test case for acl start_thread with invalid parmeter + """ + + try: + acl.util.start_thread('', '') + except Exception as e: + self.assertIn(e.__str__(), ["function takes exactly 2 arguments (0 given)", "parameter must be callable"]) + + def test_thread_005_error_param_stop_thread(self): + """ + test case for acl stop_thread with invalid parmeter + """ + + params = { + 'type': 'k', + 'params': [(), ('',)] + } + self.assertEqual(util.params_check(self, params, acl.util.stop_thread), 0) + +if __name__ == "__main__": + + suite = util.switch_cases(TestThread, "all") + result = unittest.TextTestRunner(verbosity=2).run(suite) + if result.wasSuccessful(): + exit(0) +exit(1) + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/utils.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cc6f64a52ab64884897957e6a5be8a54842008c0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/PYACL/utils.py @@ -0,0 +1,47 @@ +import unittest +import numpy as np +import acl + +def get_class_methods(class_name): + method_list = [method.split("_") for method in dir(class_name) if method.startswith("test_")] + method_list = sorted(method_list, key=lambda x: x[2]) + methods = ["_".join(method) for method in method_list] + return methods + +def switch_cases(case_class, opt): + suite = unittest.TestSuite() + methods = get_class_methods(case_class) + + if opt == "all": + for method in methods: + suite.addTest(case_class(method)) + return suite + +def align_size(origin_size, alignment): + if not alignment: + return 0 + return ((origin_size + (alignment - 1)) // alignment) * alignment + +def get_align_size(align_dict, pixel_fotmat, defaule_vale=0, case_value=0): + for key in align_dict.keys(): + if pixel_fotmat in key: + return align_dict.get(key)(defaule_vale, case_value) + return defaule_vale + +def get_device_type(): + device_type = acl.get_soc_name()[len('Ascend'):] + if "P" in device_type: + device_type = device_type[0:4] + else: + device_type = device_type[0:3] + device_type = device_type == "910P" and "910" or device_type + if device_type not in ["310", "310P", "910"]: + raise Exception(f"device_type = {device_type} not in 310/310P/910, npu-smi not found!") + return device_type + +def params_check(test_case, param_dic, test_fun): + parmas = param_dic['params'] + for _, param in enumerate(parmas): + with test_case.assertRaises(TypeError): + test_fun(*param) + return 0 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4966a889b38e2430bb52b44fc88c239ac33691db --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/CMakeLists.txt @@ -0,0 +1,66 @@ +cmake_minimum_required(VERSION 3.5.1) +project(GE_IR[CXX]) + +set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE) + +if (DEFINED ENV{ASCEND_HOME_PATH}) + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else () + set(ASCEND_PATH /usr/local/Ascend) +endif() + +set(ATC_INCLUDE_DIR ${ASCEND_PATH}/compiler/include) + +set(SRC_LIST + "src/main.cpp" +) + +set(common_compile_options + -std=c++11 + -g + -Wall +) + +set(common_compile_definitions + _GLIBCXX_USE_CXX11_ABI=0 +) + +############ ir_build ############ +add_executable(ir_build + ${SRC_LIST} +) + +target_compile_options(ir_build PRIVATE + ${common_compile_options} +) + +target_compile_definitions(ir_build PRIVATE + ${common_compile_definitions} +) + +target_include_directories(ir_build PRIVATE + ${ASCEND_PATH}/opp/built-in/op_proto/inc + ${ATC_INCLUDE_DIR}/graph + ${ATC_INCLUDE_DIR}/ge + ${ATC_INCLUDE_DIR}/parser + ${ATC_INCLUDE_DIR} +) + +target_link_directories(ir_build PRIVATE + ${ASCEND_PATH}/compiler/lib64/stub +) + +target_link_libraries(ir_build PRIVATE + -Wl,--no-as-needed + graph + ge_compiler + fmk_parser + -Wl,--as-needed +) + +############ install ############ +set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_LIST_DIR}) + +install(TARGETS ir_build OPTIONAL + RUNTIME DESTINATION output +) diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/src/main.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0d7aade45259b06332e4ca9df5eeb4e50573ba6 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/IRBuild/src/main.cpp @@ -0,0 +1,415 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include "tensorflow_parser.h" +#include "caffe_parser.h" +#include "graph.h" +#include "types.h" +#include "tensor.h" +#include "attr_value.h" +#include "ge_error_codes.h" +#include "ge_api_types.h" +#include "ge_ir_build.h" +#include "all_ops.h" +#include +#include +//#include "add.h" // custom op ,if you have one new or different op defination with frame's,please + // add head file here.If same with frame , no need to add head file here + +using namespace std; +using namespace ge; +using ge::Operator; + +namespace { +static const int kArgsNum = 3; +static const int kSocVersion = 1; +static const int kDataPath = 2; +std::string kPath = "../data/"; +} // namespace + +void PrepareOptions(std::map& options) { +} + +bool CheckIsLHisi(string soc_version) { + if (soc_version == "Hi3796CV300ES" || soc_version == "Hi3796CV300CS") { + return true; + } + return false; +} + +bool GetConstTensorFromBin(string path, Tensor &weight, uint32_t len) { + ifstream in_file(path.c_str(), std::ios::in | std::ios::binary); + if (!in_file.is_open()) { + std::cout << "failed to open" << path.c_str() << '\n'; + return false; + } + in_file.seekg(0, ios_base::end); + istream::pos_type file_size = in_file.tellg(); + in_file.seekg(0, ios_base::beg); + + if (len != file_size) { + cout << "Invalid Param.len:" << len << " is not equal with binary size(" << file_size << ")\n"; + in_file.close(); + return false; + } + char* pdata = new(std::nothrow) char[len]; + if (pdata == nullptr) { + cout << "Invalid Param.len:" << len << " is not equal with binary size(" << file_size << ")\n"; + in_file.close(); + return false; + } + in_file.read(reinterpret_cast(pdata), len); + auto status = weight.SetData(reinterpret_cast(pdata), len); + if (status != ge::GRAPH_SUCCESS) { + cout << "Set Tensor Data Failed"<< "\n"; + delete [] pdata; + in_file.close(); + return false; + } + in_file.close(); + return true; +} +bool GenGraph(Graph& graph) +{ + auto shape_data = vector({ 1,1,28,28 }); + TensorDesc desc_data(ge::Shape(shape_data), FORMAT_ND, DT_FLOAT16); + + // data op + auto data = op::Data("data"); + data.update_input_desc_x(desc_data); + data.update_output_desc_y(desc_data); + // custom op ,using method is the same with frame internal op + // [Notice]: if you want to use custom self-define op, please prepare custom op according to custum op define user guides + auto add = op::Add("add") + .set_input_x1(data) + .set_input_x2(data); + // AscendQuant + auto quant = op::AscendQuant("quant") + .set_input_x(data) + .set_attr_scale(1.0) + .set_attr_offset(0.0); + + // const op: conv2d weight + auto weight_shape = ge::Shape({ 2,2,1,1 }); + TensorDesc desc_weight_1(weight_shape, FORMAT_ND, DT_INT8); + Tensor weight_tensor(desc_weight_1); + uint32_t weight_1_len = weight_shape.GetShapeSize() * sizeof(int8_t); + bool res = GetConstTensorFromBin(kPath+"Conv2D_kernel_quant.bin", weight_tensor, weight_1_len); + if (!res) { + cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl; + return -1; + } + auto conv_weight = op::Const("Conv2D/weight") + .set_attr_value(weight_tensor); + + // conv2d op + auto conv2d = op::Conv2D("Conv2d1") + .set_input_x(quant) + .set_input_filter(conv_weight) + .set_attr_strides({ 1, 1, 1, 1 }) + .set_attr_pads({ 0, 1, 0, 1 }) + .set_attr_dilations({ 1, 1, 1, 1 }); + + TensorDesc conv2d_input_desc_x(ge::Shape(), FORMAT_NCHW, DT_INT8); + TensorDesc conv2d_input_desc_filter(ge::Shape(), FORMAT_HWCN, DT_INT8); + TensorDesc conv2d_output_desc_y(ge::Shape(), FORMAT_NCHW, DT_INT8); + conv2d.update_input_desc_x(conv2d_input_desc_x); + conv2d.update_input_desc_filter(conv2d_input_desc_filter); + conv2d.update_output_desc_y(conv2d_output_desc_y); + // dequant scale + TensorDesc desc_dequant_shape(ge::Shape({ 1 }), FORMAT_ND, DT_UINT64); + Tensor dequant_tensor(desc_dequant_shape); + uint64_t dequant_scale_val = 1; + auto status = dequant_tensor.SetData(reinterpret_cast(&dequant_scale_val), sizeof(uint64_t)); + if (status != ge::GRAPH_SUCCESS) { + cout << __LINE__ << "Set Tensor Data Failed" << "\n"; + return false; + } + auto dequant_scale = op::Const("dequant_scale") + .set_attr_value(dequant_tensor); + + // AscendDequant + auto dequant = op::AscendDequant("dequant") + .set_input_x(conv2d) + .set_input_deq_scale(dequant_scale); + + // const op: BiasAdd weight + auto weight_bias_add_shape_1 = ge::Shape({ 1 }); + TensorDesc desc_weight_bias_add_1(weight_bias_add_shape_1, FORMAT_ND, DT_FLOAT); + Tensor weight_bias_add_tensor_1(desc_weight_bias_add_1); + uint32_t weight_bias_add_len_1 = weight_bias_add_shape_1.GetShapeSize() * sizeof(float); + float weight_bias_add_value = 0.006448820233345032; + status = weight_bias_add_tensor_1.SetData(reinterpret_cast(&weight_bias_add_value), weight_bias_add_len_1); + if (status != ge::GRAPH_SUCCESS) { + cout << __LINE__ << "Set Tensor Data Failed" << "\n"; + return false; + } + auto bias_weight_1 = op::Const("Bias/weight_1") + .set_attr_value(weight_bias_add_tensor_1); + // BiasAdd 1 + auto bias_add_1 = op::BiasAdd("bias_add_1") + .set_input_x(dequant) + .set_input_bias(bias_weight_1) + .set_attr_data_format("NCHW"); + + // const + int32_t value[2] = {1,-1}; + + auto value_shape = ge::Shape({ 2 }); + TensorDesc desc_dynamic_const(value_shape, FORMAT_ND, DT_INT32); + Tensor dynamic_const_tensor(desc_dynamic_const); + uint32_t dynamic_const_len = value_shape.GetShapeSize() * sizeof(int32_t); + status = dynamic_const_tensor.SetData(reinterpret_cast(&(value[0])), dynamic_const_len); + if (status != ge::GRAPH_SUCCESS) { + cout << __LINE__ << "Set Tensor Data Failed" << "\n"; + return false; + } + auto dynamic_const = op::Const("dynamic_const").set_attr_value(dynamic_const_tensor); + + // ReShape op + auto reshape = op::Reshape("Reshape") + .set_input_x(bias_add_1) + .set_input_shape(dynamic_const); + // MatMul + BiasAdd + // MatMul weight 1 + auto matmul_weight_shape_1 = ge::Shape({784,512}); + TensorDesc desc_matmul_weight_1(matmul_weight_shape_1, FORMAT_ND, DT_FLOAT); + Tensor matmul_weight_tensor_1(desc_matmul_weight_1); + uint32_t matmul_weight_1_len = matmul_weight_shape_1.GetShapeSize() * sizeof(float); + res = GetConstTensorFromBin(kPath + "dense_kernel.bin", matmul_weight_tensor_1, matmul_weight_1_len); + if (!res) { + cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl; + return -1; + } + auto matmul_weight_1 = op::Const("dense/kernel") + .set_attr_value(matmul_weight_tensor_1); + // MatMul1 + auto matmul_1 = op::MatMul("MatMul_1") + .set_input_x1(reshape) + .set_input_x2(matmul_weight_1); + // BiasAdd const 2 + auto bias_add_shape_2 = ge::Shape({ 512 }); + TensorDesc desc_bias_add_const_1(bias_add_shape_2, FORMAT_ND, DT_FLOAT); + Tensor bias_add_const_tensor_1(desc_bias_add_const_1); + uint32_t bias_add_const_len_1 = bias_add_shape_2.GetShapeSize() * sizeof(float); + res = GetConstTensorFromBin(kPath + "dense_bias.bin", bias_add_const_tensor_1, bias_add_const_len_1); + if (!res) { + cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl; + return -1; + } + auto bias_add_const_1 = op::Const("dense/bias") + .set_attr_value(bias_add_const_tensor_1); + // BiasAdd 2 + auto bias_add_2 = op::BiasAdd("bias_add_2") + .set_input_x(matmul_1) + .set_input_bias(bias_add_const_1) + .set_attr_data_format("NCHW"); + // Relu6 + auto relu6 = op::Relu6("relu6") + .set_input_x(bias_add_2); + // MatMul weight 2 + auto matmul_weight_shape_2 = ge::Shape({ 512, 10 }); + TensorDesc desc_matmul_weight_2(matmul_weight_shape_2, FORMAT_ND, DT_FLOAT); + Tensor matmul_weight_tensor_2(desc_matmul_weight_2); + uint32_t matmul_weight_2_len = matmul_weight_shape_2.GetShapeSize() * sizeof(float); + res = GetConstTensorFromBin(kPath + "OutputLayer_kernel.bin", matmul_weight_tensor_2, matmul_weight_2_len); + if (!res) { + cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl; + return -1; + } + auto matmul_weight_2 = op::Const("OutputLayer/kernel") + .set_attr_value(matmul_weight_tensor_2); + // MatMul 2 + auto matmul_2 = op::MatMul("MatMul_2") + .set_input_x1(relu6) + .set_input_x2(matmul_weight_2); + // BiasAdd const 3 + auto bias_add_shape_3 = ge::Shape({ 10 }); + TensorDesc desc_bias_add_const_3(bias_add_shape_3, FORMAT_ND, DT_FLOAT); + Tensor bias_add_const_tensor_3(desc_bias_add_const_3); + uint32_t bias_add_const_len_3 = bias_add_shape_3.GetShapeSize() * sizeof(float); + res = GetConstTensorFromBin(kPath + "OutputLayer_bias.bin", bias_add_const_tensor_3, bias_add_const_len_3); + if (!res) { + cout << __LINE__ << "GetConstTensorFromBin Failed!" << endl; + return -1; + } + auto bias_add_const_3 = op::Const("OutputLayer/bias") + .set_attr_value(bias_add_const_tensor_3); + // BiasAdd 3 + /* + * When set input for some node, there are two methodes for you. + * Method 1: operator level method. Frame will auto connect the node's output edge to netoutput nodes for user + * we recommend this method when some node own only one out node + * Method 2: edge of operator level. Frame will find the edge according to the output edge name + * we recommend this method when some node own multi out nodes and only one out edge data wanted back + */ + auto bias_add_3 = op::BiasAdd("bias_add_3") + .set_input_x_by_name(matmul_2, "y") + .set_input_bias_by_name(bias_add_const_3, "y") + .set_attr_data_format("NCHW"); + // Softmax op + auto softmax = op::SoftmaxV2("Softmax") + .set_input_x_by_name(bias_add_3, "y"); + + std::vector inputs{ data }; + /* + * The same as set input, when point net output ,Davince framework alos support multi method to set outputs info + * Method 1: operator level method. Frame will auto connect the node's output edge to netoutput nodes for user + * we recommend this method when some node own only one out node + * Method 2: edge of operator level. Frame will find the edge according to the output edge name + * we recommend this method when some node own multi out nodes and only one out edge data wanted back + * Using method is like follows: + */ + std::vector outputs{ softmax, add }; + std::vector> outputs_with_name = {{softmax, "y"}}; + + graph.SetInputs(inputs).SetOutputs(outputs); + + return true; +} + +// |o>------------------------- +// |o> data +// |o> | +// |o> data abs const +// |o> | | / +// |o> abs add +// |o> \ / +// |o> add +// modify tf graph +bool ModifyGraph(Graph &graph) { + /* First, you need to know where to insert new node , and find src node and dest node of new node + * by Node name from all nodes of graph; + * Second, remove edge between src node and dest node(data or control edge). + * Third, create new node by operator. + * Last, add edge(data or control) between src node and new node. + * add edge between new node and dest node. + * Here, we will insert Abs between Add and Const. + */ + // Option: If you need to know shape and type info of node, you can call infer shape interface: + // aclgrphInferShapeAndType ,then view this info by graph file which generated by dump graph + // interface: aclgrphDumpGraph. + std::cout<<"Modify Graph Start."< nodes = graph.GetAllNodes(); + graphStatus ret = GRAPH_FAILED; + for (auto &node : nodes) { + ge::AscendString name; + ret = node.GetName(name); + if (ret != GRAPH_SUCCESS) { + std::cout<<"Get node name failed."< global_options = { + {AscendString(ge::ir_option::SOC_VERSION), AscendString(argv[kSocVersion])} , + }; + auto status = aclgrphBuildInitialize(global_options); + // 3. Build Ir Model1 + ModelBufferData model1; + std::map options; + PrepareOptions(options); + + status = aclgrphBuildModel(graph1, options, model1); + if (status == GRAPH_SUCCESS) { + cout << "Build Model1 SUCCESS!" << endl; + } + else { + cout << "Build Model1 Failed!" << endl; + return 1; + } + // 4. Save Ir Model + status = aclgrphSaveModel("ir_build_sample1", model1); + if (status == GRAPH_SUCCESS) { + cout << "Save Offline Model1 SUCCESS!" << endl; + } + else { + cout << "Save Offline Model1 Failed!" << endl; + return 1; + } + + // release resource + aclgrphBuildFinalize(); + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..6a1f690652a5ca168c43dd49372838827af8cafe --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SAMPLE_ASCEND_GRAPH_IR_BUILD/TEST.sh @@ -0,0 +1,14 @@ + +unsupported=(A3) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +set -e +mkdir -p "$OEC_OUTPUT_PATH" +current_dir=$(pwd)/IRBuild +cd "$OEC_OUTPUT_PATH" +cmake "${current_dir}" +make +./ir_build Ascend310P3 "${OEC_DATA_PATH}/ir_build_data/" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..6f0f9c3bb2e20ad3731b3050778dd5b9cdd450d2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/TEST.sh @@ -0,0 +1,25 @@ +set -e +echo `date` +SIP_ENV="$ASCEND_HOME_PATH/../../nnal/asdsip/set_env.sh" + +source ${SIP_ENV} +mkdir -p "$OEC_OUTPUT_PATH" + +g++ example.cpp \ + -I${ASCEND_HOME_PATH}/include/aclnn \ + -I${ASCEND_HOME_PATH}/include \ + -L${ASCEND_HOME_PATH}/lib64/ -lascendcl -lopapi -lnnopbase \ + -I${ASDSIP_HOME_PATH}/include \ + -L${ASDSIP_HOME_PATH}/lib -lmki \ + -L${ASDSIP_HOME_PATH}/lib -lasdsip \ + -L${ASDSIP_HOME_PATH}/lib -lasdsip_core \ + -L${ASDSIP_HOME_PATH}/lib -lasdsip_host \ + -o $OEC_OUTPUT_PATH/example +cd "$OEC_OUTPUT_PATH" + +# export ASCEND_SLOG_PRINT_TO_STDOUT=1 +# export ASCEND_GLOBAL_LOG_LEVEL=0 +# export ASDOPS_LOG_TO_STDOUT=1 +# export ASDOPS_LOG_LEVEL=WARN +./example +# > example_0905.log \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/example.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/example.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e0ee799ae6efc00b51997797e85933a05949ed71 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/Basic/SIP_DEMO/example.cpp @@ -0,0 +1,188 @@ +#include +#include +#include "asdsip.h" +#include "acl/acl.h" +#include "acl_meta.h" + +// using namespace AsdSip; + +#define ASD_STATUS_CHECK(err) \ + do { \ + AsdSip::AspbStatus err_ = (err); \ + if (err_ != ACL_SUCCESS) { \ + std::cout << "Execute failed." << std::endl; \ + exit(-1); \ + } \ + } while (0) + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector &shape) +{ + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream *stream) +{ + // 固定写法,acl初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector &hostData, const std::vector &shape, void **deviceAddr, + aclDataType dataType, aclTensor **tensor) +{ + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据复制到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), + shape.size(), + dataType, + strides.data(), + 0, + aclFormat::ACL_FORMAT_ND, + shape.data(), + shape.size(), + *deviceAddr); + return 0; +} + +int main(int argc, char **argv) +{ + int deviceId = 0; + + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + int64_t n = 5; + int64_t incx = 1; + int64_t incy = 1; + + int64_t xSize = 5; + std::vector tensorInXData; + tensorInXData.reserve(xSize); + for (int64_t i = 0; i < xSize; i++) { + tensorInXData[i] = 1.0 + i; + } + + int64_t ySize = 5; + std::vector tensorInYData; + tensorInYData.reserve(xSize); + for (int64_t i = 0; i < ySize; i++) { + tensorInYData[i] = 10.0 + i; + } + + int64_t resultSize = 1; + std::vector resultData; + resultData.reserve(resultSize); + + std::cout << "------- input x -------" << std::endl; + for (int64_t i = 0; i < xSize; i++) { + std::cout << tensorInXData[i] << " "; + } + std::cout << std::endl; + + std::cout << "------- input y -------" << std::endl; + for (int64_t i = 0; i < ySize; i++) { + std::cout << tensorInYData[i] << " "; + } + std::cout << std::endl; + + std::vector xShape = {xSize}; + std::vector yShape = {ySize}; + std::vector resultShape = {resultSize}; + aclTensor *inputX = nullptr; + aclTensor *inputY = nullptr; + aclTensor *result = nullptr; + void *inputXDeviceAddr = nullptr; + void *inputYDeviceAddr = nullptr; + void *resultDeviceAddr = nullptr; + ret = CreateAclTensor(tensorInXData, xShape, &inputXDeviceAddr, aclDataType::ACL_FLOAT, &inputX); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(tensorInYData, yShape, &inputYDeviceAddr, aclDataType::ACL_FLOAT, &inputY); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(resultData, resultShape, &resultDeviceAddr, aclDataType::ACL_FLOAT, &result); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + AsdSip::asdBlasHandle handle; + AsdSip::asdBlasCreate(handle); + + size_t lwork = 0; + void *buffer = nullptr; + AsdSip::asdBlasMakeDotPlan(handle); + AsdSip::asdBlasGetWorkspaceSize(handle, lwork); + std::cout << "lwork = " << lwork << std::endl; + if (lwork > 0) { + ret = aclrtMalloc(&buffer, static_cast(lwork), ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + AsdSip::asdBlasSetWorkspace(handle, buffer); + AsdSip::asdBlasSetStream(handle, stream); + auto xxx = AsdSip::asdBlasSdot(handle, n, inputX, incx, inputY, incy, result); + std::cout<< "AsdSip::asdBlasSdot(handle, n, inputX, incx, inputY, incy, result) = " << xxx <&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "\[ERROR\]"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_HBM.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_HBM.sh new file mode 100644 index 0000000000000000000000000000000000000000..f0311c8b03ecd86ea8684e9a179d03b8d7291ad9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_HBM.sh @@ -0,0 +1,26 @@ +unsupported=(A300) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +source $ASCEND_HOME_PATH/bin/setenv.bash + + + +#!/bin/bash + +# 执行命令并捕获所有输出 +output=$(asys diagnose -r=hbm_detect -d 0 --output="$OEC_OUTPUT_PATH" 2>&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "\[ERROR\]"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_STRESS.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_STRESS.sh new file mode 100644 index 0000000000000000000000000000000000000000..7a47f88b134a45c8fba6ed039825765507d2fee7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_DIAGNOSE/TEST_STRESS.sh @@ -0,0 +1,25 @@ +unsupported=(A300) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +source $ASCEND_HOME_PATH/bin/setenv.bash + + +#!/bin/bash + +# 执行命令并捕获所有输出 +output=$(asys diagnose -r=stress_detect -d 0 --output="$OEC_OUTPUT_PATH" 2>&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "\[ERROR\]"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_COLLECT.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_COLLECT.sh new file mode 100644 index 0000000000000000000000000000000000000000..d4eeed24ba51847eb5358d1f1e9f47520053b86a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_COLLECT.sh @@ -0,0 +1,5 @@ +bash build.sh +source $ASCEND_HOME_PATH/bin/setenv.bash +cd "$OEC_OUTPUT_PATH/tmp/resnet50" +./resnet50 "$OEC_OUTPUT_PATH/tmp/resnet50" 5000 +asys collect --task_dir="$(pwd)" --tar="TRUE" --output=$OEC_OUTPUT_PATH/asys_output diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_RERUN.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_RERUN.sh new file mode 100644 index 0000000000000000000000000000000000000000..24314dd0ee070952225f6a3ec4411324e9d5a69b --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/TEST_RERUN.sh @@ -0,0 +1,5 @@ +bash build.sh +source $ASCEND_HOME_PATH/bin/setenv.bash +cd "$OEC_OUTPUT_PATH/tmp/resnet50" + +asys launch --task="./resnet50 \"$OEC_OUTPUT_PATH/tmp/resnet50\" 5000" --tar="TRUE" --output=$OEC_OUTPUT_PATH/asys_output diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/build.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..c1c370889022491aee12164ec3d373e1bde73639 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/build.sh @@ -0,0 +1,24 @@ + +data="$OEC_DATA_PATH" +output="$OEC_OUTPUT_PATH" +npu=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +mkdir -p "$output/tmp/resnet50/model" +g++ resnet50.cpp -o "$output/tmp/resnet50/resnet50" -lascendcl -ldl -lpthread -L${ASCEND_HOME_PATH}/lib64 -I${ASCEND_HOME_PATH}/include +if [[ $? != 0 ]]; then + cd .. + rm -rf build + exit -1 +fi + +cd "$output/tmp/resnet50" +if [[ ! -f "model/resnet50.om" ]]; then + atc --model="$data/model/resnet50.onnx" --framework=5 --output="model/resnet50" --input_shape="actual_input_1:1,3,224,224" --soc_version=$npu +fi + +cp -r "$data/data" "$output/tmp/resnet50" diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/resnet50.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/resnet50.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da022fde1838561438b121ff3cfa992990a2f80a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/DevTools/ASYS_TOOL_ERROR/resnet50.cpp @@ -0,0 +1,327 @@ +#include "acl/acl.h" +#include +#include // 添加此行以引入accumulate函数 +#include +#include +#include +#include +#include +#include +using namespace std; + +// ---------------------- 全局变量定义 ---------------------- +int32_t deviceId = 0; // 计算设备ID +uint32_t modelId = 0; // 模型ID +size_t pictureDataSize = 0; // 图片数据大小 +void* pictureHostData = nullptr; // 主机侧图片数据 +void* pictureDeviceData = nullptr; // 设备侧图片数据 +aclmdlDataset* inputDataSet = nullptr;// 输入数据集 +aclDataBuffer* inputDataBuffer = nullptr; +aclmdlDataset* outputDataSet = nullptr;// 输出数据集 +aclDataBuffer* outputDataBuffer = nullptr; +aclmdlDesc* modelDesc = nullptr; // 模型描述信息 +size_t outputDataSize = 0; // 输出数据大小 +void* outputDeviceData = nullptr; // 设备侧输出数据 +void* outputHostData = nullptr; // 主机侧输出数据 + +// ---------------------- 预期结果配置 ---------------------- +const unsigned int EXPECTED_TOP1_INDEX = 162; // 预期Top1类别索引(需根据模型数据集调整) +const double MIN_CONFIDENCE_THRESHOLD = 0.9; // 最小置信度阈值(建议≥0.9) + +// ---------------------- 函数声明 ---------------------- +void InitResource(); // 资源初始化 +void LoadModel(const char* modelPath); // 加载模型 +void LoadPicture(const char* picturePath); // 加载图片(主机+设备内存) +void Inference(); // 执行推理 +int PrintResultAndValidate(); // 打印结果并验证 +void UnloadModel(); // 卸载模型 +void UnloadPicture(); // 释放图片相关资源 +void DestroyResource(); // 释放全局资源 + +// ---------------------- 函数定义 ---------------------- +// 1. 资源初始化(AscendCL初始化 + 指定计算设备) +void InitResource() { + aclError ret = aclInit(nullptr); // 初始化AscendCL,使用默认配置 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclInit failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtSetDevice(deviceId); // 指定计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtSetDevice failed, error code: " << ret << endl; + exit(1); + } +} + +// 2. 加载模型(.om文件) +void LoadModel(const char* modelPath) { + aclError ret = aclmdlLoadFromFile(modelPath, &modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to load model from " << modelPath << ", error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Model loaded successfully: " << modelPath << endl; +} + +// 3. 读取图片到主机内存 +void ReadPictureToHost(const char* picturePath) { + ifstream binFile(picturePath, ios::binary); + if (!binFile.is_open()) { + cerr << "[ERROR] Failed to open picture file: " << picturePath << endl; + exit(1); + } + // 获取文件大小并读取数据 + binFile.seekg(0, ios::end); + pictureDataSize = binFile.tellg(); + binFile.seekg(0, ios::beg); + + aclError ret = aclrtMallocHost(&pictureHostData, pictureDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMallocHost failed, error code: " << ret << endl; + exit(1); + } + binFile.read((char*)pictureHostData, pictureDataSize); + binFile.close(); + cout << "[INFO] Picture loaded to host memory: " << picturePath << endl; +} + +// 4. 复制数据到设备内存 +void CopyDataFromHostToDevice() { + aclError ret = aclrtMalloc(&pictureDeviceData, pictureDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMalloc failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtMemcpy(pictureDeviceData, pictureDataSize, pictureHostData, pictureDataSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMemcpy failed, error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Picture data copied to device memory" << endl; +} + +// 5. 加载图片(组合函数) +void LoadPicture(const char* picturePath) { + ReadPictureToHost(picturePath); + CopyDataFromHostToDevice(); +} + +// 6. 创建模型输入数据结构 +void CreateModelInput() { + inputDataSet = aclmdlCreateDataset(); + inputDataBuffer = aclCreateDataBuffer(pictureDeviceData, pictureDataSize); + aclError ret = aclmdlAddDatasetBuffer(inputDataSet, inputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model input, error code: " << ret << endl; + exit(1); + } +} + +// 7. 创建模型输出数据结构 +void CreateModelOutput() { + modelDesc = aclmdlCreateDesc(); + aclError ret = aclmdlGetDesc(modelDesc, modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to get model description, error code: " << ret << endl; + exit(1); + } + + outputDataSet = aclmdlCreateDataset(); + outputDataSize = aclmdlGetOutputSizeByIndex(modelDesc, 0); // 获取第一个输出的大小 + + ret = aclrtMalloc(&outputDeviceData, outputDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to allocate output memory, error code: " << ret << endl; + exit(1); + } + outputDataBuffer = aclCreateDataBuffer(outputDeviceData, outputDataSize); + ret = aclmdlAddDatasetBuffer(outputDataSet, outputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model output, error code: " << ret << endl; + exit(1); + } +} + +// 8. 执行推理 +void Inference() { + CreateModelInput(); + CreateModelOutput(); + aclError ret = aclmdlExecute(modelId, inputDataSet, outputDataSet); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Inference failed, error code: " << ret << endl; + exit(1); + } +} + +// 9. 打印结果并验证 +int PrintResultAndValidate() { + // 复制输出数据到主机内存 + aclError ret = aclrtMallocHost(&outputHostData, outputDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to malloc host memory for output, error code: " << ret << endl; + return 1; + } + ret = aclrtMemcpy(outputHostData, outputDataSize, outputDeviceData, outputDataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to copy output data to host, error code: " << ret << endl; + return 1; + } + + // 解析输出数据(转换为float数组) + float* outFloatData = reinterpret_cast(outputHostData); + map> resultMap; // 按置信度降序排序 + for (unsigned int j = 0; j < outputDataSize / sizeof(float); ++j) { + resultMap[outFloatData[j]] = j; + } + + // 检查是否有推理结果 + if (resultMap.empty()) { + cerr << "[ERROR] No inference results found" << endl; + return 1; + } + + // 提取Top1结果 + auto top1 = resultMap.begin(); + unsigned int top1Index = top1->second; + double top1Score = top1->first; + double top1Confidence = exp(top1Score) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + + // 打印Top5结果 + cout << "\nTop 5 Inference Results:" << endl; + int cnt = 0; + for (auto it = resultMap.begin(); it != resultMap.end() && cnt < 5; ++it, ++cnt) { + double prob = exp(it->first) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + cout << "Top " << cnt + 1 << ": Index[" << it->second << "] Confidence[" << fixed << prob << "]" << endl; + } + + // 结果验证 + bool isSuccess = (top1Index == EXPECTED_TOP1_INDEX && top1Confidence >= MIN_CONFIDENCE_THRESHOLD); + if (isSuccess) { + cout << "\n[VALIDATION SUCCESS] Top1 matches expectations: Index[" << top1Index + << "] Confidence[" << fixed << top1Confidence << "]" << endl; + return 0; // 验证通过,返回0 + } else { + cerr << "\n[VALIDATION FAILED] Top1 does not match expectations:" << endl + << " Expected Index: " << EXPECTED_TOP1_INDEX << ", Confidence ≥ " << MIN_CONFIDENCE_THRESHOLD << endl + << " Actual Index: " << top1Index << ", Confidence: " << fixed << top1Confidence << endl; + return 1; // 验证失败,返回1 + } +} + +// 10. 卸载模型 +void UnloadModel() { + if (modelDesc != nullptr) { + aclmdlDestroyDesc(modelDesc); + modelDesc = nullptr; + } + aclError ret = aclmdlUnload(modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to unload model, error code: " << ret << endl; + } + cout << "[INFO] Model unloaded successfully" << endl; +} + +// 11. 释放图片相关资源 +void UnloadPicture() { + if (pictureHostData != nullptr) { + aclrtFreeHost(pictureHostData); + pictureHostData = nullptr; + } + if (pictureDeviceData != nullptr) { + aclrtFree(pictureDeviceData); + pictureDeviceData = nullptr; + } + if (inputDataBuffer != nullptr) { + aclDestroyDataBuffer(inputDataBuffer); + inputDataBuffer = nullptr; + } + if (inputDataSet != nullptr) { + aclmdlDestroyDataset(inputDataSet); + inputDataSet = nullptr; + } + if (outputHostData != nullptr) { + aclrtFreeHost(outputHostData); + outputHostData = nullptr; + } + if (outputDeviceData != nullptr) { + aclrtFree(outputDeviceData); + outputDeviceData = nullptr; + } + if (outputDataBuffer != nullptr) { + aclDestroyDataBuffer(outputDataBuffer); + outputDataBuffer = nullptr; + } + if (outputDataSet != nullptr) { + aclmdlDestroyDataset(outputDataSet); + outputDataSet = nullptr; + } + cout << "[INFO] Picture resources unloaded successfully" << endl; +} + +// 12. 释放全局资源 +void DestroyResource() { + aclError ret = aclrtResetDevice(deviceId); // 重置计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtResetDevice failed, error code: " << ret << endl; + } + aclFinalize(); // 去初始化AscendCL + cout << "[INFO] Global resources released successfully" << endl; +} + +// ---------------------- 主函数 ---------------------- +int main(int argc, char* argv[]) { + // 检查命令行参数 + if (argc != 3) { + cerr << "[ERROR] Usage: " << argv[0] << " "<<" " << endl; + cerr << " Example: " << argv[0] << " /path/to/resources" <<" 1000" << endl; + cerr << " Model will be loaded from: /model/resnet50.om" << endl; + cerr << " Picture will be loaded from: /data/dog1_1024_683.bin" << endl; + return 1; + } + + // 构建模型和图片路径 + string basePath = argv[1]; + int test_times = atoi(argv[2]); + string modelPath = basePath + "/model/resnet50.om"; + string picturePath = basePath + "/data/dog1_1024_683.bin"; + + cout << "[INFO] Base path: " << basePath << endl; + cout << "[INFO] Model path: " << modelPath << endl; + cout << "[INFO] Picture path: " << picturePath << endl; + + // 1. 资源初始化 + InitResource(); + + // 2. 加载模型 + LoadModel(modelPath.c_str()); + + // 3. 加载测试图片 + LoadPicture(picturePath.c_str()); + + auto start = std::chrono::high_resolution_clock::now(); + for(int i =0; i < test_times; ++i){ + // 4. 执行推理 + Inference(); + } + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + double fps = static_cast(test_times) / duration.count() * 1000000; + + // 5. 打印结果并验证 + int status = PrintResultAndValidate(); + + std::cout <<"\n" << "FPS: " << fps << "\n" < +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_add.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector otherShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* otherDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* other = nullptr; + aclScalar* alpha = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector otherHostData = {1, 1, 1, 2, 2, 2, 3, 3}; + std::vector outHostData(8, 0); + float alphaValue = 1.2f; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建other aclTensor + ret = CreateAclTensor(otherHostData, otherShape, &otherDeviceAddr, aclDataType::ACL_FLOAT, &other); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建alpha aclScalar + alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT); + CHECK_RET(alpha != nullptr, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + + // aclnnAdd接口调用示例 + // 3. 调用CANN算子库API + // 调用aclnnAdd第一段接口 + ret = aclnnAddGetWorkspaceSize(self, other, alpha, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAddGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnAdd第二段接口 + ret = aclnnAdd(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAdd failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + + // aclnnInplaceAdd接口调用示例 + // 3. 调用CANN算子库API + LOG_PRINT("\ntest aclnnInplaceAdd\n"); + // 调用aclnnInplaceAdd第一段接口 + ret = aclnnInplaceAddGetWorkspaceSize(self, other, alpha, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnInplaceAddGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnInplaceAdd第二段接口 + ret = aclnnInplaceAdd(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnInplaceAdd failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), selfDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(other); + aclDestroyScalar(alpha); + aclDestroyTensor(out); + + // 7. 释放Device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(otherDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c129437bef1560759fefd2929e57c2493f1a87dc --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c5e2731d17c58fb5a338cd68706071f2a38aaa9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/TEST.sh @@ -0,0 +1,11 @@ +set -e +output_path="$OEC_OUTPUT_PATH" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c1856e98e3799bf730a67d58f8ac90309f2c04e --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ADD_LAYER_NORM/testcase.cpp @@ -0,0 +1,129 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_softmax.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + int64_t dim = 0; + // 调用aclnnSoftmax第一段接口 + ret = aclnnSoftmaxGetWorkspaceSize(self, dim, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmaxGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnSoftmax第二段接口 + ret = aclnnSoftmax(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmax failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device 资源 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee714c3249a98b5b8287fce9854ec52581889d9e --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) +if(DEFINED DEV_NUM) + target_compile_definitions(testcase PRIVATE DEV_NUM=${DEV_NUM}) +endif() +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) +# 设置链接的库文件路径 +find_package(Threads REQUIRED) +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so + ${ASCEND_PATH}/lib64/libhccl.so # 集合通信库文件 + pthread) # 多线程依赖的库文件 +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..b08a7ea8d8d793514f7d062186aed43e22107270 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/TEST.sh @@ -0,0 +1,22 @@ +set -e +output_path="$OEC_OUTPUT_PATH" +device_num=$(python3 -c " +try: + import acl + count,ret = acl.rt.get_device_count() + assert ret == 0 + print(count) +except: + print('unknow') +") +echo device_num = $device_num +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +#device_num 是环境上npu的数量,当前测试在A2上需要为2,4,8时才能运行成功 +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE -DDEV_NUM=${device_num} +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..104f7821aa6cc32ccc50ffbbf512c12274070e82 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_ALL_GATHER_MATMUL/testcase.cpp @@ -0,0 +1,207 @@ +#include +#include +#include +#include "aclnnop/aclnn_all_gather_matmul.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while(0) + +#ifndef DEV_NUM +#warning "DEVNUM is not defined, default is 8" +#define DEV_NUM 8 +#endif + +int64_t GetShapeSize(const std::vector &shape) +{ + int64_t shape_size = 1; + for (auto i : shape) { + shape_size *= i; + } + return shape_size; +} + +template +int CreateAclTensor(const std::vector &hostData, const std::vector &shape, void **deviceAddr, + aclDataType dataType, aclTensor **tensor) +{ + auto size = GetShapeSize(shape) * sizeof(T); + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMalloc failed. ret: %d\n", ret); return ret); + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMemcpy failed. ret: %d\n", ret); return ret); + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i +1] * strides[i + 1]; + } + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +struct Args { + int rankId; + HcclComm hcclComm; + aclrtStream stream; + }; + +int launchOneThread_AllGatherMm(Args &args) +{ + int ret = aclrtSetDevice(args.rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSetDevice failed. ret = %d \n", ret); return ret); + + char hcomName[128] = {0}; + ret = HcclGetCommName(args.hcclComm, hcomName); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] HcclGetCommName failed. ret: %d\n", ret); return -1); + LOG_PRINT("[INFO] rank = %d, hcomName = %s, stream = %p\n", args.rankId, hcomName, args.stream); + std::vector x1Shape = {128, 256}; + std::vector x2Shape = {256, 512}; + std::vector biasShape = {512}; + std::vector outShape = {128 * DEV_NUM, 512}; + std::vector gatherOutShape = {128 * DEV_NUM, 256}; + void *x1DeviceAddr = nullptr; + void *x2DeviceAddr = nullptr; + void *biasDeviceAddr = nullptr; + void *outDeviceAddr = nullptr; + void *gatherOutDeviceAddr = nullptr; + aclTensor *x1 = nullptr; + aclTensor *x2 = nullptr; + aclTensor *bias = nullptr; + aclTensor *out = nullptr; + aclTensor *gatherOut = nullptr; + + int64_t gatherIndex = 0; + int64_t commTurn = 0; + int64_t streamMode = 1; + uint64_t workspaceSize = 0; + aclOpExecutor *executor = nullptr; + void *workspaceAddr = nullptr; + + long long x1ShapeSize = GetShapeSize(x1Shape); + long long x2ShapeSize = GetShapeSize(x2Shape); + long long biasShapeSize = GetShapeSize(biasShape); + long long outShapeSize = GetShapeSize(outShape); + long long gatherOutShapeSize = GetShapeSize(gatherOutShape); + + std::vector x1HostData(x1ShapeSize, 0); + std::vector x2HostData(x2ShapeSize, 0); + std::vector biasHostData(biasShapeSize, 0); + std::vector outHostData(outShapeSize, 0); + std::vector gatherOutHostData(gatherOutShapeSize, 0); + + ret = CreateAclTensor(x1HostData, x1Shape, &x1DeviceAddr, aclDataType::ACL_FLOAT16, &x1); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(x2HostData, x2Shape, &x2DeviceAddr, aclDataType::ACL_FLOAT16, &x2); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT16, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(gatherOutHostData, gatherOutShape, &gatherOutDeviceAddr, + aclDataType::ACL_FLOAT16, &gatherOut); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 调用第一阶段接口 + ret = aclnnAllGatherMatmulGetWorkspaceSize( + x1, x2, bias, hcomName, gatherIndex, commTurn, streamMode, out, gatherOut, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, + LOG_PRINT("[ERROR] aclnnAllGatherMatmulGetWorkspaceSize failed. ret = %d \n", ret); return ret); + // 根据第一阶段接口计算出的workspaceSize申请device内存 + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtMalloc workspace failed. ret = %d \n", ret); return ret); + } + // 调用第二阶段接口 + ret = aclnnAllGatherMatmul(workspaceAddr, workspaceSize, executor, args.stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclnnAllGatherMatmul failed. ret = %d \n", ret); return ret); + // (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStreamWithTimeout(args.stream, 10000); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSynchronizeStreamWithTimeout failed. ret = %d \n", ret); + return ret); + LOG_PRINT("[INFO] device_%d aclnnAllGatherMatmul execute successfully.\n", args.rankId); + // 释放device资源,需要根据具体API的接口定义修改 + if (x1 != nullptr) { + aclDestroyTensor(x1); + } + if (x2 != nullptr) { + aclDestroyTensor(x2); + } + if (bias != nullptr) { + aclDestroyTensor(bias); + } + if (out != nullptr) { + aclDestroyTensor(out); + } + if (gatherOut != nullptr) { + aclDestroyTensor(gatherOut); + } + if (x1DeviceAddr != nullptr) { + aclrtFree(x1DeviceAddr); + } + if (x2DeviceAddr != nullptr) { + aclrtFree(x2DeviceAddr); + } + if (biasDeviceAddr != nullptr) { + aclrtFree(biasDeviceAddr); + } + if (outDeviceAddr != nullptr) { + aclrtFree(outDeviceAddr); + } + if (gatherOutDeviceAddr != nullptr) { + aclrtFree(gatherOutDeviceAddr); + } + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + ret = aclrtDestroyStream(args.stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtDestroyStream failed. ret = %d \n", ret); return ret); + ret = aclrtResetDevice(args.rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtResetDevice failed. ret = %d \n", ret); return ret); + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclInit failed. ret = %d \n", ret); return ret); + aclrtStream stream[DEV_NUM]; + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + ret = aclrtSetDevice(rankId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtSetDevice failed. ret = %d \n", ret); return ret); + ret = aclrtCreateStream(&stream[rankId]); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] aclrtCreateStream failed. ret = %d \n", ret); return ret); + } + int32_t devices[DEV_NUM]; + for (int i = 0; i < DEV_NUM; i++) { + devices[i] = i; + } + // 初始化集合通信域 + HcclComm comms[DEV_NUM]; + ret = HcclCommInitAll(DEV_NUM, devices, comms); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("[ERROR] HcclCommInitAll failed. ret = %d \n", ret); return ret); + + Args args[DEV_NUM]; + // 启动多线程 + std::vector> threads(DEV_NUM); + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + args[rankId].rankId = rankId; + args[rankId].hcclComm = comms[rankId]; + args[rankId].stream = stream[rankId]; + threads[rankId].reset(new(std::nothrow) std::thread(&launchOneThread_AllGatherMm, std::ref(args[rankId]))); + } + for (uint32_t rankId = 0; rankId < DEV_NUM; rankId++) { + threads[rankId]->join(); + } + for (int i = 0; i < DEV_NUM; i++) { + auto hcclRet = HcclCommDestroy(comms[i]); + CHECK_RET(hcclRet == HCCL_SUCCESS, LOG_PRINT("[ERROR] HcclCommDestory failed. ret = %d \n", ret); return -1); + } + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c129437bef1560759fefd2929e57c2493f1a87dc --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..ffdcf890b33145b703283d1289e9831a99fb8f04 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/TEST.sh @@ -0,0 +1,17 @@ +unsupported=(A300) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +set -e +output_path="$OEC_OUTPUT_PATH" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41e4074f8ba1494cb335260dbcfb9338211c7564 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_FLASH_ATTENTION_SCORE/testcase.cpp @@ -0,0 +1,206 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_flash_attention_score.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +void PrintOutResult(std::vector &shape, void** deviceAddr) { + auto size = GetShapeSize(shape); + std::vector resultData(size, 0); + auto ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), + *deviceAddr, size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("mean result[%ld] is: %f\n", i, resultData[i]); + } +} + +int Init(int32_t deviceId, aclrtContext* context, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateContext(context, deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateContext failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetCurrentContext(*context); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetCurrentContext failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/context/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtContext context; + aclrtStream stream; + auto ret = Init(deviceId, &context, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector qShape = {256, 1, 128}; + std::vector kShape = {256, 1, 128}; + std::vector vShape = {256, 1, 128}; + std::vector attenmaskShape = {256, 256}; + + std::vector attentionOutShape = {256, 1, 128}; + std::vector softmaxMaxShape = {1, 1, 256, 8}; + std::vector softmaxSumShape = {1, 1, 256, 8}; + + void* qDeviceAddr = nullptr; + void* kDeviceAddr = nullptr; + void* vDeviceAddr = nullptr; + void* attenmaskDeviceAddr = nullptr; + void* attentionOutDeviceAddr = nullptr; + void* softmaxMaxDeviceAddr = nullptr; + void* softmaxSumDeviceAddr = nullptr; + + aclTensor* q = nullptr; + aclTensor* k = nullptr; + aclTensor* v = nullptr; + aclTensor* pse = nullptr; + aclTensor* dropMask = nullptr; + aclTensor* padding = nullptr; + aclTensor* attenmask = nullptr; + aclTensor* attentionOut = nullptr; + aclTensor* softmaxMax = nullptr; + aclTensor* softmaxSum = nullptr; + aclTensor* softmaxOut = nullptr; + + std::vector qHostData(32768, 1); + std::vector kHostData(32768, 1); + std::vector vHostData(32768, 1); + std::vector attenmaskHostData(65536, 0); + std::vector attentionOutHostData(32768, 0); + std::vector softmaxMaxHostData(2048, 3.0); + std::vector softmaxSumHostData(2048, 3.0); + + ret = CreateAclTensor(qHostData, qShape, &qDeviceAddr, aclDataType::ACL_FLOAT16, &q); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(kHostData, kShape, &kDeviceAddr, aclDataType::ACL_FLOAT16, &k); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(vHostData, vShape, &vDeviceAddr, aclDataType::ACL_FLOAT16, &v); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(attenmaskHostData, attenmaskShape, &attenmaskDeviceAddr, aclDataType::ACL_UINT8, &attenmask); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(attentionOutHostData, attentionOutShape, &attentionOutDeviceAddr, aclDataType::ACL_FLOAT16, &attentionOut); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(softmaxMaxHostData, softmaxMaxShape, &softmaxMaxDeviceAddr, aclDataType::ACL_FLOAT, &softmaxMax); + CHECK_RET(ret == ACL_SUCCESS, return ret); + ret = CreateAclTensor(softmaxSumHostData, softmaxSumShape, &softmaxSumDeviceAddr, aclDataType::ACL_FLOAT, &softmaxSum); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + std::vector prefixOp = {0}; + aclIntArray *prefix = aclCreateIntArray(prefixOp.data(), 1); + double scaleValue = 0.088388; + double keepProb = 1; + int64_t preTokens = 65536; + int64_t nextTokens = 65536; + int64_t headNum = 1; + int64_t innerPrecise = 0; + int64_t sparseMod = 0; + + char layOut[5] = {'S', 'B', 'H', 0}; + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + + // 调用aclnnFlashAttentionScore第一段接口 + ret = aclnnFlashAttentionScoreGetWorkspaceSize( + q, k, v, pse, dropMask, padding, attenmask, prefix, scaleValue, + keepProb, preTokens, nextTokens, headNum, layOut, innerPrecise, + sparseMod, softmaxMax, softmaxSum, softmaxOut, attentionOut, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnFlashAttentionScoreGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + + // 调用aclnnFlashAttentionScore第二段接口 + ret = aclnnFlashAttentionScore(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnFlashAttentionScore failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + PrintOutResult(attentionOutShape, &attentionOutDeviceAddr); + PrintOutResult(softmaxMaxShape, &softmaxMaxDeviceAddr); + PrintOutResult(softmaxSumShape, &softmaxSumDeviceAddr); + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(q); + aclDestroyTensor(k); + aclDestroyTensor(v); + aclDestroyTensor(attenmask); + aclDestroyTensor(attentionOut); + aclDestroyTensor(softmaxMax); + aclDestroyTensor(softmaxSum); + + // 7. 释放device资源 + aclrtFree(qDeviceAddr); + aclrtFree(kDeviceAddr); + aclrtFree(vDeviceAddr); + aclrtFree(attenmaskDeviceAddr); + aclrtFree(attentionOutDeviceAddr); + aclrtFree(softmaxMaxDeviceAddr); + aclrtFree(softmaxSumDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtDestroyContext(context); + aclrtResetDevice(deviceId); + aclFinalize(); + + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c129437bef1560759fefd2929e57c2493f1a87dc --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c5e2731d17c58fb5a338cd68706071f2a38aaa9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/TEST.sh @@ -0,0 +1,11 @@ +set -e +output_path="$OEC_OUTPUT_PATH" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0214ca930010b48f5bb1d0a84d88f6cd18a2e2b7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_MATMUL/testcase.cpp @@ -0,0 +1,138 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_matmul.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {16, 32}; + std::vector mat2Shape = {32, 16}; + std::vector outShape = {16, 16}; + void* selfDeviceAddr = nullptr; + void* mat2DeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* mat2 = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData(512, 1); + std::vector mat2HostData(512, 1); + std::vector outHostData(256, 0); + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建mat2 aclTensor + ret = CreateAclTensor(mat2HostData, mat2Shape, &mat2DeviceAddr, aclDataType::ACL_FLOAT, &mat2); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + int8_t cubeMathType = 1; + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + // 调用aclnnMatmul第一段接口 + ret = aclnnMatmulGetWorkspaceSize(self, mat2, out, cubeMathType, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnMatmulGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnMatmul第二段接口 + ret = aclnnMatmul(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnMatmul failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(mat2); + aclDestroyTensor(out); + + // 7. 释放device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(mat2DeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c129437bef1560759fefd2929e57c2493f1a87dc --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c5e2731d17c58fb5a338cd68706071f2a38aaa9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/TEST.sh @@ -0,0 +1,11 @@ +set -e +output_path="$OEC_OUTPUT_PATH" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b3680a3775cf079e6a203653e69c66969e1f6953 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SILU/testcase.cpp @@ -0,0 +1,127 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_silu.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shape_size = 1; + for (auto i : shape) { + shape_size *= i; + } + return shape_size; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化, 参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + // check根据自己的需要处理 + CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的API + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + // 调用aclnnSilu第一段接口 + ret = aclnnSiluGetWorkspaceSize(self, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSiluGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret;); + } + // 调用aclnnSilu第二段接口 + ret = aclnnSilu(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSilu failed. ERROR: %d\n", ret); return ret); + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, size * sizeof(float), + ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device资源,需要根据具体API的接口定义修改 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c129437bef1560759fefd2929e57c2493f1a87dc --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.14) + +# 设置工程名 +project(ACLNN_EXAMPLE) + +# Compile options +add_compile_options(-std=c++11) + +# 设置编译选项 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./bin") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +# 设置可执行文件名(如opapi_test),并指定待运行算子文件*.cpp所在目录 +add_executable(testcase + testcase.cpp) + +# 设置ASCEND_PATH(CANN软件包目录,请根据实际路径修改)和INCLUDE_BASE_DIR(头文件目录) +if(NOT "$ENV{ASCEND_HOME_PATH}" STREQUAL "") + set(ASCEND_PATH $ENV{ASCEND_HOME_PATH}) +else() + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") +endif() +set(INCLUDE_BASE_DIR "${ASCEND_PATH}/include") +include_directories( + ${INCLUDE_BASE_DIR} + ${INCLUDE_BASE_DIR}/aclnn +) + +# 设置链接的库文件路径 +target_link_libraries(testcase PRIVATE + ${ASCEND_PATH}/lib64/libascendcl.so + ${ASCEND_PATH}/lib64/libnnopbase.so + ${ASCEND_PATH}/lib64/libopapi.so) + +# 可执行文件在CMakeLists文件所在目录的bin目录下 +install(TARGETS testcase DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c5e2731d17c58fb5a338cd68706071f2a38aaa9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/TEST.sh @@ -0,0 +1,11 @@ +set -e +output_path="$OEC_OUTPUT_PATH" +src_path=$(pwd) +mkdir -p "$output_path" +cd "${output_path}" +cmake "$src_path" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_SKIP_RPATH=TRUE +make + +cd bin +./testcase +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/testcase.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/testcase.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c1856e98e3799bf730a67d58f8ac90309f2c04e --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/aclnn/ACLNN_SOFTMAX/testcase.cpp @@ -0,0 +1,129 @@ +#include +#include +#include "acl/acl.h" +#include "aclnnop/aclnn_softmax.h" + +#define CHECK_RET(cond, return_expr) \ + do { \ + if (!(cond)) { \ + return_expr; \ + } \ + } while (0) + +#define LOG_PRINT(message, ...) \ + do { \ + printf(message, ##__VA_ARGS__); \ + } while (0) + +int64_t GetShapeSize(const std::vector& shape) { + int64_t shapeSize = 1; + for (auto i : shape) { + shapeSize *= i; + } + return shapeSize; +} + +int Init(int32_t deviceId, aclrtStream* stream) { + // 固定写法,AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return ret); + ret = aclrtSetDevice(deviceId); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return ret); + ret = aclrtCreateStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return ret); + return 0; +} + +template +int CreateAclTensor(const std::vector& hostData, const std::vector& shape, void** deviceAddr, + aclDataType dataType, aclTensor** tensor) { + auto size = GetShapeSize(shape) * sizeof(T); + // 调用aclrtMalloc申请device侧内存 + auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return ret); + // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上 + ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return ret); + + // 计算连续tensor的strides + std::vector strides(shape.size(), 1); + for (int64_t i = shape.size() - 2; i >= 0; i--) { + strides[i] = shape[i + 1] * strides[i + 1]; + } + + // 调用aclCreateTensor接口创建aclTensor + *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, strides.data(), 0, aclFormat::ACL_FORMAT_ND, + shape.data(), shape.size(), *deviceAddr); + return 0; +} + +int main() { + // 1. (固定写法)device/stream初始化,参考AscendCL对外接口列表 + // 根据自己的实际device填写deviceId + int32_t deviceId = 0; + aclrtStream stream; + auto ret = Init(deviceId, &stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return ret); + + // 2. 构造输入与输出,需要根据API的接口自定义构造 + std::vector selfShape = {4, 2}; + std::vector outShape = {4, 2}; + void* selfDeviceAddr = nullptr; + void* outDeviceAddr = nullptr; + aclTensor* self = nullptr; + aclTensor* out = nullptr; + std::vector selfHostData = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector outHostData = {0, 0, 0, 0, 0, 0, 0, 0}; + // 创建self aclTensor + ret = CreateAclTensor(selfHostData, selfShape, &selfDeviceAddr, aclDataType::ACL_FLOAT, &self); + CHECK_RET(ret == ACL_SUCCESS, return ret); + // 创建out aclTensor + ret = CreateAclTensor(outHostData, outShape, &outDeviceAddr, aclDataType::ACL_FLOAT, &out); + CHECK_RET(ret == ACL_SUCCESS, return ret); + + // 3. 调用CANN算子库API,需要修改为具体的Api名称 + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + int64_t dim = 0; + // 调用aclnnSoftmax第一段接口 + ret = aclnnSoftmaxGetWorkspaceSize(self, dim, out, &workspaceSize, &executor); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmaxGetWorkspaceSize failed. ERROR: %d\n", ret); return ret); + // 根据第一段接口计算出的workspaceSize申请device内存 + void* workspaceAddr = nullptr; + if (workspaceSize > 0) { + ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return ret); + } + // 调用aclnnSoftmax第二段接口 + ret = aclnnSoftmax(workspaceAddr, workspaceSize, executor, stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnSoftmax failed. ERROR: %d\n", ret); return ret); + + // 4. (固定写法)同步等待任务执行结束 + ret = aclrtSynchronizeStream(stream); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return ret); + + // 5. 获取输出的值,将device侧内存上的结果拷贝至host侧,需要根据具体API的接口定义修改 + auto size = GetShapeSize(outShape); + std::vector resultData(size, 0); + ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outDeviceAddr, + size * sizeof(resultData[0]), ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return ret); + for (int64_t i = 0; i < size; i++) { + LOG_PRINT("result[%ld] is: %f\n", i, resultData[i]); + } + + // 6. 释放aclTensor和aclScalar,需要根据具体API的接口定义修改 + aclDestroyTensor(self); + aclDestroyTensor(out); + + // 7. 释放device 资源 + aclrtFree(selfDeviceAddr); + aclrtFree(outDeviceAddr); + if (workspaceSize > 0) { + aclrtFree(workspaceAddr); + } + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e16c7eedbef07ed7a40c430d26ede753e330977 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..09dc7f18e1c58529347961ed5e00f024f1f5edfd --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/TEST.sh @@ -0,0 +1,12 @@ +#! /bin/bash +set -e +src_path=$(pwd) +argv=2 +data_path="$OEC_DATA_PATH" +output_path="$OEC_OUTPUT_PATH" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${argv}" "${data_path}" "${output_path}" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/dvpp_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/dvpp_process.h new file mode 100644 index 0000000000000000000000000000000000000000..ce5ebb7fbacc7c719457e350a05155b5a65d5182 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/dvpp_process.h @@ -0,0 +1,164 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitDvppOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set jpegd input + * @param [in] inDevBuffer: device buffer of input pic + * @param [in] inDevBufferSize: device buffer size of input pic + * @param [in] picDesc:picture description + */ + void SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc); + + /** + * @brief set jpege input + * @param [in] inDevBuffer: device buffer of input yuv file + * @param [in] inDevBufferSize: device input pic buffer size after align + * @param [in] inputWidth:width of pic after encode + * @param [in] inputHeight:height of pic after encode + */ + void SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight); + + /** + * @brief get dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetDvppOutput(void **outputBuffer, int &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(); + + /** + * @brief set dvpp type after JpegD(vpcResize/vpcCrop/vpcCropAndPaste) + * @return result + */ + void SetDvppType(DvppType dvppType); + + /** + * @brief compute encode input pic desc size + * @return input pic desc size + */ + uint32_t ComputeEncodeInputSize(int inputWidth, int inputHeight); + + /** + * @brief process encode + * @return result + */ + Result ProcessJpegE(); + + /** + * @brief process 8k resize + * @return result + */ + Result Process8kResize(); + +private: + Result InitDecodeOutputDesc(); + Result ProcessDecode(); + void DestroyDecodeResource(); + + Result InitResizeInputDesc(); + Result Init8kResizeInputDesc(); + Result InitResizeOutputDesc(); + Result Init8kResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + Result InitCropInputDesc(); + Result InitCropOutputDesc(); + Result ProcessCrop(); + void DestroyCropResource(); + + Result InitCropAndPasteInputDesc(); + Result InitCropAndPasteOutputDesc(); + Result ProcessCropAndPaste(); + void DestroyCropAndPasteResource(); + + Result InitEncodeResource(); + void DestroyEncodeResource(); + + void DestroyResource(); + void DestroyDvppOutputPara(); + void DestroyDecodeOutBuff(); + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + + DvppType dvppType_; + acldvppRoiConfig *cropArea_; + acldvppRoiConfig *pasteArea_; + acldvppJpegeConfig *jpegeConfig_; + acldvppResizeConfig *resizeConfig_; + + void* decodeOutBufferDev_; // decode output buffer + acldvppPicDesc *decodeOutputDesc_; //decode output desc + + void* encodeOutBufferDev_; // encode output buffer + uint32_t encodeOutBufferSize_; // encode output buffer size + acldvppPicDesc *encodeInputDesc_; //encode input desc + + acldvppPicDesc *vpcInputDesc_; // vpc input desc + acldvppPicDesc *vpcOutputDesc_; // vpc output desc + + char *inDevBuffer_; // input pic dev buffer + uint32_t inDevBufferSizeD_; // input pic size for decode + uint32_t inDevBufferSizeE_; // input pic size for encode + uint32_t jpegDecodeOutputSize_; // jpeg decode output size + + uint32_t decodeOutputWidth_; // decode output width + uint32_t decodeOutputWidthStride_; // decode output width aligned + uint32_t decodeOutputHeight_; // decode output height + + void *vpcInBufferDev_; // vpc input buffer + void *vpcOutBufferDev_; // vpc output buffer + uint32_t vpcOutBufferSize_; // vpc output size + + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + + uint32_t jpegeInputWidth_; // encode input width + uint32_t jpegeInputHeight_; // encode input height +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/sample_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/sample_process.h new file mode 100644 index 0000000000000000000000000000000000000000..14c42bf652c29efa1abb0f0f70f20677bf8151b6 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/sample_process.h @@ -0,0 +1,63 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include "utils.h" +#include "acl/acl.h" + +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief decode, vpc and infer sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegdProcess(DvppType dvpptype); + + /** + * @brief encode sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegeProcess(DvppType dvpptype); + + /** + * @brief resize 8k sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result Resize8kProcess(DvppType dvpptype); + +private: + /** + * @brief destroy resource + */ + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/utils.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..375374de20b26b8fbc16714bda7c9616d12bd5c1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/inc/utils.h @@ -0,0 +1,128 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "acl/acl.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef enum DvppType { + VPC_RESIZE = 0, + VPC_CROP = 1, + VPC_CROP_AND_PASTE = 2, + JPEG_ENCODE = 3, + VPC_8K_RESIZE = 4 +} DvppType; + +typedef struct PicDesc { + std::string picName; + uint32_t width; + uint32_t height; + uint32_t jpegDecodeSize; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [out] picDevBuffer: device memory of picture + * @param [out] devPicBufferSize: actual pic size + * @return device buffer of pic + */ + static Result GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize); + + /** + * @brief create buffer of bin file + * @param [in] fileName: file name + * @param [out] inputBuff: input data buffer + * @param [out] fileSize: actual file szie + * @return buffer of pic + */ + static Result ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize); + + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [in] PicBufferSize: aligned pic size + * @return device buffer of pic + */ + static void *GetPicDevBuffer(const PicDesc &picDesc, uint32_t &PicBufferSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char* foldName); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a8815aef2c1bc1cfdf1d9692b59a4633ad01f2f --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") + +# Header path +include_directories( + ${INC_PATH}/runtime/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main + libascendcl libacl_dvpp) +else () + target_link_libraries(main + ascendcl acl_dvpp stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/dvpp_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/dvpp_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e255a6a5f784871ca97fe9aa3260849a34480c40 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/dvpp_process.cpp @@ -0,0 +1,895 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +#include +#include +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream &stream) + : stream_(stream), dvppChannelDesc_(nullptr), dvppType_(VPC_RESIZE), + cropArea_(nullptr), pasteArea_(nullptr), jpegeConfig_(nullptr), resizeConfig_(nullptr), + decodeOutBufferDev_(nullptr), decodeOutputDesc_(nullptr), encodeOutBufferDev_(nullptr), + encodeInputDesc_(nullptr), vpcInputDesc_(nullptr), vpcOutputDesc_(nullptr), inDevBuffer_(nullptr), + inDevBufferSizeD_(0), inDevBufferSizeE_(0), jpegDecodeOutputSize_(0), decodeOutputWidth_(0), + decodeOutputWidthStride_(0), decodeOutputHeight_(0), vpcInBufferDev_(nullptr), vpcOutBufferDev_(nullptr), + vpcOutBufferSize_(0), modelInputWidth_(0), modelInputHeight_(0), jpegeInputWidth_(0), jpegeInputHeight_(0) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResource(); + DestroyDvppOutputPara(); +} + +uint32_t AlignSize(uint32_t origSize, uint32_t alignment) +{ + if (alignment == 0) { + return 0; + } + uint32_t alignmentH = alignment - 1; + return (origSize + alignmentH) / alignment * alignment; +} + +void DvppProcess::SetDvppType(DvppType dvppType) +{ + dvppType_ = dvppType; +} + +Result DvppProcess::InitResource() +{ + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppCreateChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + INFO_LOG("dvpp init resource success"); + return SUCCESS; +} + +void DvppProcess::DestroyResource() +{ + if (dvppChannelDesc_ != nullptr) { + aclError aclRet = acldvppDestroyChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(aclRet)); + } + + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeD_ = inDevBufferSize; + jpegDecodeOutputSize_ = picDesc.jpegDecodeSize; +} + +void DvppProcess::GetDvppOutput(void **outputBuffer, int &outputSize) +{ + *outputBuffer = vpcOutBufferDev_; + outputSize = vpcOutBufferSize_; + vpcOutBufferDev_ = nullptr; + vpcOutBufferSize_ = 0; +} + +Result DvppProcess::InitDvppOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("init dvpp output para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + return SUCCESS; +} + +void DvppProcess::DestroyDvppOutputPara() +{ + if (vpcOutBufferDev_ != nullptr) { + (void)acldvppFree(vpcOutBufferDev_); + vpcOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitDecodeOutputDesc() +{ + aclError aclRet = acldvppMalloc(&decodeOutBufferDev_, jpegDecodeOutputSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc decodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + decodeOutputDesc_ = acldvppCreatePicDesc(); + if (decodeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc decodeOutputDesc_ failed"); + return FAILED; + } + + acldvppSetPicDescData(decodeOutputDesc_, decodeOutBufferDev_); + acldvppSetPicDescFormat(decodeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + acldvppSetPicDescSize(decodeOutputDesc_, jpegDecodeOutputSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessDecode() +{ + Result ret = InitDecodeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitDecodeOutputDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppJpegDecodeAsync(dvppChannelDesc_, reinterpret_cast(inDevBuffer_), + inDevBufferSizeD_, decodeOutputDesc_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegDecodeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("decode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + // get yuv image width and height + decodeOutputWidth_ = acldvppGetPicDescWidth(decodeOutputDesc_); + decodeOutputHeight_ = acldvppGetPicDescHeight(decodeOutputDesc_); + decodeOutputWidthStride_ = acldvppGetPicDescWidthStride(decodeOutputDesc_); + + return SUCCESS; +} + +void DvppProcess::DestroyDecodeResource() +{ + if (decodeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(decodeOutputDesc_); + decodeOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitResizeInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + int widthAlignment = 16; + int heightAlignment = 2; + int sizeAlignment = 3; + int sizeNum = 2; + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = AlignSize(modelInputWidth_, widthAlignment); + int resizeOutHeightStride = AlignSize(modelInputHeight_, heightAlignment); + if (resizeOutWidthStride == 0 || resizeOutHeightStride == 0) { + ERROR_LOG("InitResizeOutputDesc AlignSize failed"); + return FAILED; + } + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} +extern string data_path ,output_path; +Result DvppProcess::Init8kResizeInputDesc() +{ + uint32_t inWidthStride = 8192; // 8k picture width + uint32_t inHeightStride = 8192; // 8k picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t inBufferSize = inWidthStride * inWidthStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + std::string dvppImagePath = data_path + "/data/dvpp_vpc_8192x8192_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 8192, 8192} + // other yuv file + }; + vpcInBufferDev_ = Utils::GetPicDevBuffer(testPic[0], inBufferSize); + if (vpcInBufferDev_ == nullptr) { + ERROR_LOG("get picDevBuffer failed, file name = %s", testPic[0].picName.c_str()); + return FAILED; + } + (void)acldvppSetPicDescData(vpcInputDesc_, vpcInBufferDev_); // JpegD -> vpcResize + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeight(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, inBufferSize); + return SUCCESS; +} + +Result DvppProcess::Init8kResizeOutputDesc() +{ + uint32_t resizeOutWidthStride = 4000; // output picture width + uint32_t resizeOutHeightStride = 4000; // output picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + + +Result DvppProcess::ProcessResize() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + Result inputRet = SUCCESS; + Result outputRet = SUCCESS; + if (dvppType_ == VPC_RESIZE) { + inputRet = InitResizeInputDesc(); + outputRet = InitResizeOutputDesc(); + } else if (dvppType_ == VPC_8K_RESIZE) { + inputRet = Init8kResizeInputDesc(); + outputRet = Init8kResizeOutputDesc(); + } else { + ERROR_LOG("invalid dvppType_ %d", static_cast(dvppType_)); + return FAILED; + } + if ((inputRet != SUCCESS) || (outputRet != SUCCESS)) { + ERROR_LOG("init resize input or output description failed"); + return FAILED; + } + + // resize pic + aclError aclRet = acldvppVpcResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("resize aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } + + if (vpcInBufferDev_ != nullptr) { + (void)acldvppFree(vpcInBufferDev_); + vpcInBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitCropInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCrop + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropOutputDesc() +{ + int sizeAlignment = 3; + int sizeNum = 2; + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + vpcOutBufferSize_ = dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCrop() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 550; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 480; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + Result ret = InitCropInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropInputDesc failed"); + return FAILED; + } + + ret = InitCropOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropOutputDesc failed"); + return FAILED; + } + + // crop pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitCropAndPasteInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropAndPasteInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCropAndPaste + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropAndPasteOutputDesc() +{ + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + int sizeAlignment = 3; + int sizeNum = 2; + vpcOutBufferSize_ = + dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCropAndPaste() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 512; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 512; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + uint32_t pasteLeftOffset = 16; // must even + uint32_t pasteRightOffset = pasteLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t pasteTopOffset = 16; // must even + uint32_t pasteBottomOffset = pasteTopOffset + cropSizeHeight - oddNum; // must odd + pasteArea_ = acldvppCreateRoiConfig(pasteLeftOffset, pasteRightOffset, + pasteTopOffset, pasteBottomOffset); + if (pasteArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig pasteArea_ failed"); + return FAILED; + } + + Result ret = InitCropAndPasteInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteInputDesc failed"); + return FAILED; + } + + ret = InitCropAndPasteOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteOutputDesc failed"); + return FAILED; + } + + // crop and patse pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizePasteAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, pasteArea_, + resizeConfig_, stream_); + + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAndPasteAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop and paste aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropAndPasteResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (pasteArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(pasteArea_); + pasteArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeE_ = inDevBufferSize; + jpegeInputWidth_ = inputWidth; + jpegeInputHeight_ = inputHeight; +} + +uint32_t DvppProcess::ComputeEncodeInputSize(int inputWidth, int inputHeight) +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t encodeInWidthStride = AlignSize(inputWidth, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(inputHeight, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("ComputeEncodeInputSize AlignSize failed"); + return FAILED; + } + uint32_t encodeInBufferSize = + encodeInWidthStride * encodeInHeightStride * sizeAlignment / sizeNum; + return encodeInBufferSize; +} + +Result DvppProcess::InitEncodeResource() +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t encodeInWidthStride = AlignSize(jpegeInputWidth_, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(jpegeInputHeight_, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("InitEncodeInputDesc AlignSize failed"); + return FAILED; + } + encodeInputDesc_ = acldvppCreatePicDesc(); + if (encodeInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc encodeInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(encodeInputDesc_, reinterpret_cast(inDevBuffer_)); + (void)acldvppSetPicDescFormat(encodeInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(encodeInputDesc_, jpegeInputWidth_); + (void)acldvppSetPicDescHeight(encodeInputDesc_, jpegeInputHeight_); + (void)acldvppSetPicDescWidthStride(encodeInputDesc_, encodeInWidthStride); + (void)acldvppSetPicDescHeightStride(encodeInputDesc_, encodeInHeightStride); + (void)acldvppSetPicDescSize(encodeInputDesc_, inDevBufferSizeE_); + + jpegeConfig_ = acldvppCreateJpegeConfig(); + uint32_t encodeLevel = 100; // default optimal level (0-100) + (void)acldvppSetJpegeConfigLevel(jpegeConfig_, encodeLevel); + + aclError aclRet = acldvppJpegPredictEncSize(encodeInputDesc_, jpegeConfig_, &encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("predict encodeOutBufferSize_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = acldvppMalloc(&encodeOutBufferDev_, encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc encodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +Result DvppProcess::ProcessJpegE() +{ + std::string encodeOutFileName = output_path + "/result/jpege_output_"; + std::string dvppImagePath = data_path + "/data/wood_rabbit_1024_1068_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 1024, 1068} + // other yuv file + }; + + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to jpege picture %s", testPic[index].picName.c_str()); + + uint32_t jpegInBufferSize; + jpegInBufferSize = ComputeEncodeInputSize(testPic[index].width, testPic[index].height); + + // get input data buffer + char *picDevBuffer = reinterpret_cast(Utils::GetPicDevBuffer(testPic[index], jpegInBufferSize)); + if (picDevBuffer == nullptr) { + ERROR_LOG("get picDevBuffer failed, index is %zu", index); + return FAILED; + } + + // set jpege input data + SetInput4JpegE(picDevBuffer, jpegInBufferSize, testPic[index].width, testPic[index].height); + picDevBuffer = nullptr; + + // init jpege resource + Result ret = InitEncodeResource(); + if (ret != SUCCESS) { + ERROR_LOG("init jpeg encode failed"); + DestroyEncodeResource(); + return FAILED; + } + + aclError aclRet = acldvppJpegEncodeAsync(dvppChannelDesc_, encodeInputDesc_, encodeOutBufferDev_, + &encodeOutBufferSize_, jpegeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegEncodeAsync failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("encode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + // save jpege result + encodeOutFileName = encodeOutFileName + std::to_string(index) + ".jpg"; + ret = Utils::SaveDvppOutputData(encodeOutFileName.c_str(), encodeOutBufferDev_, encodeOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyEncodeResource(); + return FAILED; + } + } + DestroyEncodeResource(); + return SUCCESS; +} + +void DvppProcess::DestroyEncodeResource() +{ + if (jpegeConfig_ != nullptr) { + (void)acldvppDestroyJpegeConfig(jpegeConfig_); + jpegeConfig_ = nullptr; + } + + if (encodeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(encodeInputDesc_); + encodeInputDesc_ = nullptr; + } + + if (inDevBuffer_ != nullptr) { + (void)acldvppFree(inDevBuffer_); + inDevBuffer_ = nullptr; + } + + if (encodeOutBufferDev_ != nullptr) { + (void)acldvppFree(encodeOutBufferDev_); + encodeOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::Process8kResize() +{ + std::string vpcOutFileName = output_path + "/result/dvpp_vpc_4000x4000_nv12.yuv"; + Result ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + ret = Utils::SaveDvppOutputData(vpcOutFileName.c_str(), vpcOutBufferDev_, vpcOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + return SUCCESS; +} + +Result DvppProcess::Process() +{ + // pic decode + INFO_LOG("call JpegD"); + Result ret = ProcessDecode(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessDecode failed"); + DestroyDecodeOutBuff(); + DestroyDecodeResource(); + return FAILED; + } + + DestroyDecodeResource(); + + switch (dvppType_) { + case VPC_RESIZE: + INFO_LOG("call vpcResize"); + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + break; + + case VPC_CROP: + INFO_LOG("call vpcCrop"); + ret = ProcessCrop(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCrop failed"); + DestroyCropResource(); + return FAILED; + } + DestroyCropResource(); + break; + + case VPC_CROP_AND_PASTE: + INFO_LOG("call vpcCropAndPaste"); + ret = ProcessCropAndPaste(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCropAndPaste failed"); + DestroyCropAndPasteResource(); + return FAILED; + } + DestroyCropAndPasteResource(); + break; + + default: + ERROR_LOG("unsupported type"); + DestroyDecodeOutBuff(); + break; + } + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} + +void DvppProcess::DestroyDecodeOutBuff() +{ + if (decodeOutBufferDev_ != nullptr) { + (void)acldvppFree(decodeOutBufferDev_); + decodeOutBufferDev_ = nullptr; + } +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/main.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8abbaec9af49446b43ba1545ec4a2e605361a024 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/main.cpp @@ -0,0 +1,68 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include +#include +#include "sample_process.h" +#include "utils.h" + +using namespace std; +string data_path ,output_path; +int main(int argc,const char *argv[]) +{ + INFO_LOG("./main param, param represents a vpc feature and must be set"); + if (argc != 4) { + ERROR_LOG("input param not be set"); + return FAILED; + } + data_path = string(argv[2]); + output_path = string(argv[3]); + string result_path_str = output_path + "/result"; + Result ret = Utils::CheckAndCreateFolder(result_path_str.c_str()); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error, dir = %s.", result_path_str.c_str()); + return FAILED; + } + + SampleProcess sampleProcess; + ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + return FAILED; + } + + DvppType dvppType = static_cast(atoi(argv[1])); + if (dvppType == JPEG_ENCODE) { + ret = sampleProcess.JpegeProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample jpege process failed"); + return FAILED; + } + return SUCCESS; + } + + if (dvppType == VPC_8K_RESIZE) { + ret = sampleProcess.Resize8kProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample resize 8k process failed"); + return FAILED; + } + return SUCCESS; + } + + ret = sampleProcess.JpegdProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/sample_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/sample_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d42b34a316667ce9516797b66b5a0bb1e5118279 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/sample_process.cpp @@ -0,0 +1,218 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "dvpp_process.h" +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +SampleProcess::SampleProcess() : deviceId_(0), context_(nullptr), stream_(nullptr) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::JpegeProcess(DvppType dvpptype) +{ + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.ProcessJpegE(); + if (ret != SUCCESS) { + ERROR_LOG("process jpege failed"); + return FAILED; + } + return SUCCESS; +} + +Result SampleProcess::Resize8kProcess(DvppType dvpptype) +{ + INFO_LOG("dvpp process 8k resize begin"); + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.Process8kResize(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process 8k resize failed"); + return FAILED; + } + INFO_LOG("dvpp process 8k resize success"); + + return SUCCESS; +} +extern string data_path ,output_path; +// jpegd -> vpc -> model execute +Result SampleProcess::JpegdProcess(DvppType dvpptype) +{ + std::string dvppOutputfileName = output_path + "/result/dvpp_output_"; + + // dvpp init + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + int modelInputWidth = 224; + int modelInputHeight = 224; + std::string dvppImagePath1 = data_path + "/data/persian_cat_1024_1536_283.jpg"; + std::string dvppImagePath2 = data_path + "/data/wood_rabbit_1024_1061_330.jpg"; + // input image + PicDesc testPic[] = { + {dvppImagePath1.c_str(), 0, 0}, + {dvppImagePath2.c_str(), 0, 0}, + }; + INFO_LOG( "-------------------------------------------"); + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to process picture:%s", testPic[index].picName.c_str()); + // 1.dvpp process + uint32_t devPicBufferSize; + char *picDevBuffer = nullptr; + // get input image data buffer + ret = Utils::GetPicDevBuffer4JpegD(testPic[index], picDevBuffer, devPicBufferSize); + if (ret != SUCCESS) { + ERROR_LOG("get pic device buffer failed, index is %zu", index); + return FAILED; + } + + dvppProcess.SetInput4JpegD(picDevBuffer, devPicBufferSize, testPic[index]); + + ret = dvppProcess.InitDvppOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + return FAILED; + } + + ret = dvppProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process failed"); + return FAILED; + } + + (void)acldvppFree(picDevBuffer); + picDevBuffer = nullptr; + + void *dvppOutputBuffer = nullptr; + int dvppOutputSize; + dvppProcess.GetDvppOutput(&dvppOutputBuffer, dvppOutputSize); + + std::string dvppOutputfileNameCur = dvppOutputfileName + std::to_string(index); + (void)Utils::SaveDvppOutputData(dvppOutputfileNameCur.c_str(), dvppOutputBuffer, dvppOutputSize); + + + (void)acldvppFree(dvppOutputBuffer); + + } + + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/utils.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25d7456967a426cb9ba8caafc224ae3d0cb3e0f0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGD_VPC_CROP_PASTE/src/utils.cpp @@ -0,0 +1,422 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#include +#include +#include +#include +#if defined(_MSC_VER) +#include +#else +#include +#include +#include +#endif +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +bool RunStatus::isDevice_ = false; + +Result Utils::ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize) +{ + std::ifstream binFile(fileName, std::ifstream::binary); + if (!binFile.is_open()) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return FAILED; + } + + binFile.seekg(0, binFile.end); + auto binFileBufferLen = binFile.tellg(); + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return FAILED; + } + binFile.seekg(0, binFile.beg); + + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, binFileBufferLen); + if (inputBuff == nullptr) { + ERROR_LOG("host malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } else { // app is running in device + aclRet = acldvppMalloc(&inputBuff, binFileBufferLen); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("device malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } + binFile.read(static_cast(inputBuff), binFileBufferLen); + binFile.close(); + fileSize = binFileBufferLen; + + return SUCCESS; +} + +Result Utils::GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return FAILED; + } + + uint32_t inputBuffSize = 0; + void *inputBuff = nullptr; + auto ret = ReadBinFile(picDesc.picName, inputBuff, inputBuffSize); + if (ret != SUCCESS) { + ERROR_LOG("read bin file failed, file name is %s", picDesc.picName.c_str()); + return FAILED; + } + + aclError aclRet = acldvppJpegGetImageInfoV2(inputBuff, inputBuffSize, &picDesc.width, &picDesc.height, + nullptr, nullptr); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg image info failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + aclRet = acldvppJpegPredictDecSize(inputBuff, inputBuffSize, PIXEL_FORMAT_YUV_SEMIPLANAR_420, + &picDesc.jpegDecodeSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg decode size failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + + void *inBufferDev = nullptr; + uint32_t inBufferSize = inputBuffSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = acldvppMalloc(&inBufferDev, inBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc inBufferSize failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + + // if app is running in host, need copy data from host to device + aclRet = aclrtMemcpy(inBufferDev, inBufferSize, inputBuff, inputBuffSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inBufferDev); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + inBufferDev = inputBuff; + } + devPicBufferSize = inBufferSize; + picDevBuffer = reinterpret_cast(inBufferDev); + + return SUCCESS; +} + +void *Utils::GetPicDevBuffer(const PicDesc &picDesc, uint32_t &picBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return nullptr; + } + + FILE *fp = fopen(picDesc.picName.c_str(), "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed", picDesc.picName.c_str()); + return nullptr; + } + + fseek(fp, 0, SEEK_END); + long fileLen = ftell(fp); + fseek(fp, 0, SEEK_SET); + + if (static_cast(fileLen) < picBufferSize) { + ERROR_LOG("need read %u bytes but file %s only %ld bytes", + picBufferSize, picDesc.picName.c_str(), fileLen); + fclose(fp); + return nullptr; + } + + void *inputDevBuff = nullptr; + aclError aclRet = acldvppMalloc(&inputDevBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc device data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + return nullptr; + } + + void *inputBuff = nullptr; + size_t readSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + (void)acldvppFree(inputDevBuff); + return nullptr; + } + + readSize = fread(inputBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)aclrtFreeHost(inputBuff); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + + // if app is running in host, need copy model output data from host to device + aclRet = aclrtMemcpy(inputDevBuff, picBufferSize, inputBuff, picBufferSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inputDevBuff); + (void)aclrtFreeHost(inputBuff); + fclose(fp); + return nullptr; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + readSize = fread(inputDevBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + } + + fclose(fp); + return inputDevBuff; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("model out dataset num can't be 0"); + } + for (size_t i = 0; i < outDatasetNum; ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer failed"); + return FAILED; + } + + void *dataBufferDev = aclGetDataBufferAddr(dataBuffer); + if (dataBufferDev == nullptr) { + ERROR_LOG("aclGetDataBufferAddr failed"); + return FAILED; + } + + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, bufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, bufferSize, dataBufferDev, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("aclrtMemcpy device to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + } + } else { + dataPtr = dataBufferDev; + } + + uint32_t len = static_cast(bufferSize); + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile != nullptr) { + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + fclose(outputFile); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + } else { + ERROR_LOG("create output file %s failed, size is %u", fileName, len); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + } + return SUCCESS; +} + +Result Utils::SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize) +{ + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, dataSize, devPtr, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("dvpp output memcpy to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + return FAILED; + } + } else { + dataPtr = devPtr; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed.", fileName); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + size_t writeSize = fwrite(dataPtr, sizeof(char), dataSize, outFileFp); + if (writeSize != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes.", + dataSize, fileName, writeSize); + fclose(outFileFp); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + fflush(outFileFp); + fclose(outFileFp); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + while (i < 10) { + std::ifstream f (fileName); + if (f.good()) { + break; + } + SleepTime(1); // slepp 1s + INFO_LOG("check result, wait time %d second", i + 1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout, expect file:%s", fileName); + return FAILED; + } + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist"); + return FAILED; + } + FILE *model_output = fopen(srcfileName, "rb"); + if (model_output == nullptr) { + ERROR_LOG("fopen out file %s failed.", srcfileName); + return FAILED; + } + + FILE *model_output_txt = fopen(dstfileName, "wb+"); + if (model_output_txt == nullptr) { + ERROR_LOG("fopen out file %s failed.", dstfileName); + fclose(model_output); + return FAILED; + } + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f,%d\n", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result : classType[%d], top1[%f], top5[%f]", classType, max, sum); + INFO_LOG("-------------------------------------------"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char* foldName) +{ + INFO_LOG("start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make directory successfully."); + } else { + INFO_LOG("make directory errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag = mkdir(foldName , 0777); + if (flag == 0) { + INFO_LOG("make directory successfully."); + } else { + ERROR_LOG("make directory errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e16c7eedbef07ed7a40c430d26ede753e330977 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..96d27b510439f52426f96cd165606912c02c91e0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/TEST.sh @@ -0,0 +1,11 @@ +#! /bin/bash +src_path=$(pwd) +argv=3 +data_path="$OEC_DATA_PATH" +output_path="$OEC_OUTPUT_PATH" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${argv}" "${data_path}" "${output_path}" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/dvpp_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/dvpp_process.h new file mode 100644 index 0000000000000000000000000000000000000000..ce5ebb7fbacc7c719457e350a05155b5a65d5182 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/dvpp_process.h @@ -0,0 +1,164 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitDvppOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set jpegd input + * @param [in] inDevBuffer: device buffer of input pic + * @param [in] inDevBufferSize: device buffer size of input pic + * @param [in] picDesc:picture description + */ + void SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc); + + /** + * @brief set jpege input + * @param [in] inDevBuffer: device buffer of input yuv file + * @param [in] inDevBufferSize: device input pic buffer size after align + * @param [in] inputWidth:width of pic after encode + * @param [in] inputHeight:height of pic after encode + */ + void SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight); + + /** + * @brief get dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetDvppOutput(void **outputBuffer, int &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(); + + /** + * @brief set dvpp type after JpegD(vpcResize/vpcCrop/vpcCropAndPaste) + * @return result + */ + void SetDvppType(DvppType dvppType); + + /** + * @brief compute encode input pic desc size + * @return input pic desc size + */ + uint32_t ComputeEncodeInputSize(int inputWidth, int inputHeight); + + /** + * @brief process encode + * @return result + */ + Result ProcessJpegE(); + + /** + * @brief process 8k resize + * @return result + */ + Result Process8kResize(); + +private: + Result InitDecodeOutputDesc(); + Result ProcessDecode(); + void DestroyDecodeResource(); + + Result InitResizeInputDesc(); + Result Init8kResizeInputDesc(); + Result InitResizeOutputDesc(); + Result Init8kResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + Result InitCropInputDesc(); + Result InitCropOutputDesc(); + Result ProcessCrop(); + void DestroyCropResource(); + + Result InitCropAndPasteInputDesc(); + Result InitCropAndPasteOutputDesc(); + Result ProcessCropAndPaste(); + void DestroyCropAndPasteResource(); + + Result InitEncodeResource(); + void DestroyEncodeResource(); + + void DestroyResource(); + void DestroyDvppOutputPara(); + void DestroyDecodeOutBuff(); + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + + DvppType dvppType_; + acldvppRoiConfig *cropArea_; + acldvppRoiConfig *pasteArea_; + acldvppJpegeConfig *jpegeConfig_; + acldvppResizeConfig *resizeConfig_; + + void* decodeOutBufferDev_; // decode output buffer + acldvppPicDesc *decodeOutputDesc_; //decode output desc + + void* encodeOutBufferDev_; // encode output buffer + uint32_t encodeOutBufferSize_; // encode output buffer size + acldvppPicDesc *encodeInputDesc_; //encode input desc + + acldvppPicDesc *vpcInputDesc_; // vpc input desc + acldvppPicDesc *vpcOutputDesc_; // vpc output desc + + char *inDevBuffer_; // input pic dev buffer + uint32_t inDevBufferSizeD_; // input pic size for decode + uint32_t inDevBufferSizeE_; // input pic size for encode + uint32_t jpegDecodeOutputSize_; // jpeg decode output size + + uint32_t decodeOutputWidth_; // decode output width + uint32_t decodeOutputWidthStride_; // decode output width aligned + uint32_t decodeOutputHeight_; // decode output height + + void *vpcInBufferDev_; // vpc input buffer + void *vpcOutBufferDev_; // vpc output buffer + uint32_t vpcOutBufferSize_; // vpc output size + + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + + uint32_t jpegeInputWidth_; // encode input width + uint32_t jpegeInputHeight_; // encode input height +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/sample_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/sample_process.h new file mode 100644 index 0000000000000000000000000000000000000000..14c42bf652c29efa1abb0f0f70f20677bf8151b6 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/sample_process.h @@ -0,0 +1,63 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include "utils.h" +#include "acl/acl.h" + +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief decode, vpc and infer sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegdProcess(DvppType dvpptype); + + /** + * @brief encode sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegeProcess(DvppType dvpptype); + + /** + * @brief resize 8k sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result Resize8kProcess(DvppType dvpptype); + +private: + /** + * @brief destroy resource + */ + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/utils.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..375374de20b26b8fbc16714bda7c9616d12bd5c1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/inc/utils.h @@ -0,0 +1,128 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "acl/acl.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef enum DvppType { + VPC_RESIZE = 0, + VPC_CROP = 1, + VPC_CROP_AND_PASTE = 2, + JPEG_ENCODE = 3, + VPC_8K_RESIZE = 4 +} DvppType; + +typedef struct PicDesc { + std::string picName; + uint32_t width; + uint32_t height; + uint32_t jpegDecodeSize; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [out] picDevBuffer: device memory of picture + * @param [out] devPicBufferSize: actual pic size + * @return device buffer of pic + */ + static Result GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize); + + /** + * @brief create buffer of bin file + * @param [in] fileName: file name + * @param [out] inputBuff: input data buffer + * @param [out] fileSize: actual file szie + * @return buffer of pic + */ + static Result ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize); + + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [in] PicBufferSize: aligned pic size + * @return device buffer of pic + */ + static void *GetPicDevBuffer(const PicDesc &picDesc, uint32_t &PicBufferSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char* foldName); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a8815aef2c1bc1cfdf1d9692b59a4633ad01f2f --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") + +# Header path +include_directories( + ${INC_PATH}/runtime/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main + libascendcl libacl_dvpp) +else () + target_link_libraries(main + ascendcl acl_dvpp stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/dvpp_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/dvpp_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e255a6a5f784871ca97fe9aa3260849a34480c40 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/dvpp_process.cpp @@ -0,0 +1,895 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +#include +#include +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream &stream) + : stream_(stream), dvppChannelDesc_(nullptr), dvppType_(VPC_RESIZE), + cropArea_(nullptr), pasteArea_(nullptr), jpegeConfig_(nullptr), resizeConfig_(nullptr), + decodeOutBufferDev_(nullptr), decodeOutputDesc_(nullptr), encodeOutBufferDev_(nullptr), + encodeInputDesc_(nullptr), vpcInputDesc_(nullptr), vpcOutputDesc_(nullptr), inDevBuffer_(nullptr), + inDevBufferSizeD_(0), inDevBufferSizeE_(0), jpegDecodeOutputSize_(0), decodeOutputWidth_(0), + decodeOutputWidthStride_(0), decodeOutputHeight_(0), vpcInBufferDev_(nullptr), vpcOutBufferDev_(nullptr), + vpcOutBufferSize_(0), modelInputWidth_(0), modelInputHeight_(0), jpegeInputWidth_(0), jpegeInputHeight_(0) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResource(); + DestroyDvppOutputPara(); +} + +uint32_t AlignSize(uint32_t origSize, uint32_t alignment) +{ + if (alignment == 0) { + return 0; + } + uint32_t alignmentH = alignment - 1; + return (origSize + alignmentH) / alignment * alignment; +} + +void DvppProcess::SetDvppType(DvppType dvppType) +{ + dvppType_ = dvppType; +} + +Result DvppProcess::InitResource() +{ + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppCreateChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + INFO_LOG("dvpp init resource success"); + return SUCCESS; +} + +void DvppProcess::DestroyResource() +{ + if (dvppChannelDesc_ != nullptr) { + aclError aclRet = acldvppDestroyChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(aclRet)); + } + + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeD_ = inDevBufferSize; + jpegDecodeOutputSize_ = picDesc.jpegDecodeSize; +} + +void DvppProcess::GetDvppOutput(void **outputBuffer, int &outputSize) +{ + *outputBuffer = vpcOutBufferDev_; + outputSize = vpcOutBufferSize_; + vpcOutBufferDev_ = nullptr; + vpcOutBufferSize_ = 0; +} + +Result DvppProcess::InitDvppOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("init dvpp output para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + return SUCCESS; +} + +void DvppProcess::DestroyDvppOutputPara() +{ + if (vpcOutBufferDev_ != nullptr) { + (void)acldvppFree(vpcOutBufferDev_); + vpcOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitDecodeOutputDesc() +{ + aclError aclRet = acldvppMalloc(&decodeOutBufferDev_, jpegDecodeOutputSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc decodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + decodeOutputDesc_ = acldvppCreatePicDesc(); + if (decodeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc decodeOutputDesc_ failed"); + return FAILED; + } + + acldvppSetPicDescData(decodeOutputDesc_, decodeOutBufferDev_); + acldvppSetPicDescFormat(decodeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + acldvppSetPicDescSize(decodeOutputDesc_, jpegDecodeOutputSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessDecode() +{ + Result ret = InitDecodeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitDecodeOutputDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppJpegDecodeAsync(dvppChannelDesc_, reinterpret_cast(inDevBuffer_), + inDevBufferSizeD_, decodeOutputDesc_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegDecodeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("decode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + // get yuv image width and height + decodeOutputWidth_ = acldvppGetPicDescWidth(decodeOutputDesc_); + decodeOutputHeight_ = acldvppGetPicDescHeight(decodeOutputDesc_); + decodeOutputWidthStride_ = acldvppGetPicDescWidthStride(decodeOutputDesc_); + + return SUCCESS; +} + +void DvppProcess::DestroyDecodeResource() +{ + if (decodeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(decodeOutputDesc_); + decodeOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitResizeInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + int widthAlignment = 16; + int heightAlignment = 2; + int sizeAlignment = 3; + int sizeNum = 2; + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = AlignSize(modelInputWidth_, widthAlignment); + int resizeOutHeightStride = AlignSize(modelInputHeight_, heightAlignment); + if (resizeOutWidthStride == 0 || resizeOutHeightStride == 0) { + ERROR_LOG("InitResizeOutputDesc AlignSize failed"); + return FAILED; + } + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} +extern string data_path ,output_path; +Result DvppProcess::Init8kResizeInputDesc() +{ + uint32_t inWidthStride = 8192; // 8k picture width + uint32_t inHeightStride = 8192; // 8k picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t inBufferSize = inWidthStride * inWidthStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + std::string dvppImagePath = data_path + "/data/dvpp_vpc_8192x8192_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 8192, 8192} + // other yuv file + }; + vpcInBufferDev_ = Utils::GetPicDevBuffer(testPic[0], inBufferSize); + if (vpcInBufferDev_ == nullptr) { + ERROR_LOG("get picDevBuffer failed, file name = %s", testPic[0].picName.c_str()); + return FAILED; + } + (void)acldvppSetPicDescData(vpcInputDesc_, vpcInBufferDev_); // JpegD -> vpcResize + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeight(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, inBufferSize); + return SUCCESS; +} + +Result DvppProcess::Init8kResizeOutputDesc() +{ + uint32_t resizeOutWidthStride = 4000; // output picture width + uint32_t resizeOutHeightStride = 4000; // output picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + + +Result DvppProcess::ProcessResize() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + Result inputRet = SUCCESS; + Result outputRet = SUCCESS; + if (dvppType_ == VPC_RESIZE) { + inputRet = InitResizeInputDesc(); + outputRet = InitResizeOutputDesc(); + } else if (dvppType_ == VPC_8K_RESIZE) { + inputRet = Init8kResizeInputDesc(); + outputRet = Init8kResizeOutputDesc(); + } else { + ERROR_LOG("invalid dvppType_ %d", static_cast(dvppType_)); + return FAILED; + } + if ((inputRet != SUCCESS) || (outputRet != SUCCESS)) { + ERROR_LOG("init resize input or output description failed"); + return FAILED; + } + + // resize pic + aclError aclRet = acldvppVpcResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("resize aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } + + if (vpcInBufferDev_ != nullptr) { + (void)acldvppFree(vpcInBufferDev_); + vpcInBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitCropInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCrop + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropOutputDesc() +{ + int sizeAlignment = 3; + int sizeNum = 2; + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + vpcOutBufferSize_ = dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCrop() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 550; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 480; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + Result ret = InitCropInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropInputDesc failed"); + return FAILED; + } + + ret = InitCropOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropOutputDesc failed"); + return FAILED; + } + + // crop pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitCropAndPasteInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropAndPasteInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCropAndPaste + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropAndPasteOutputDesc() +{ + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + int sizeAlignment = 3; + int sizeNum = 2; + vpcOutBufferSize_ = + dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCropAndPaste() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 512; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 512; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + uint32_t pasteLeftOffset = 16; // must even + uint32_t pasteRightOffset = pasteLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t pasteTopOffset = 16; // must even + uint32_t pasteBottomOffset = pasteTopOffset + cropSizeHeight - oddNum; // must odd + pasteArea_ = acldvppCreateRoiConfig(pasteLeftOffset, pasteRightOffset, + pasteTopOffset, pasteBottomOffset); + if (pasteArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig pasteArea_ failed"); + return FAILED; + } + + Result ret = InitCropAndPasteInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteInputDesc failed"); + return FAILED; + } + + ret = InitCropAndPasteOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteOutputDesc failed"); + return FAILED; + } + + // crop and patse pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizePasteAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, pasteArea_, + resizeConfig_, stream_); + + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAndPasteAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop and paste aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropAndPasteResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (pasteArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(pasteArea_); + pasteArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeE_ = inDevBufferSize; + jpegeInputWidth_ = inputWidth; + jpegeInputHeight_ = inputHeight; +} + +uint32_t DvppProcess::ComputeEncodeInputSize(int inputWidth, int inputHeight) +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t encodeInWidthStride = AlignSize(inputWidth, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(inputHeight, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("ComputeEncodeInputSize AlignSize failed"); + return FAILED; + } + uint32_t encodeInBufferSize = + encodeInWidthStride * encodeInHeightStride * sizeAlignment / sizeNum; + return encodeInBufferSize; +} + +Result DvppProcess::InitEncodeResource() +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t encodeInWidthStride = AlignSize(jpegeInputWidth_, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(jpegeInputHeight_, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("InitEncodeInputDesc AlignSize failed"); + return FAILED; + } + encodeInputDesc_ = acldvppCreatePicDesc(); + if (encodeInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc encodeInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(encodeInputDesc_, reinterpret_cast(inDevBuffer_)); + (void)acldvppSetPicDescFormat(encodeInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(encodeInputDesc_, jpegeInputWidth_); + (void)acldvppSetPicDescHeight(encodeInputDesc_, jpegeInputHeight_); + (void)acldvppSetPicDescWidthStride(encodeInputDesc_, encodeInWidthStride); + (void)acldvppSetPicDescHeightStride(encodeInputDesc_, encodeInHeightStride); + (void)acldvppSetPicDescSize(encodeInputDesc_, inDevBufferSizeE_); + + jpegeConfig_ = acldvppCreateJpegeConfig(); + uint32_t encodeLevel = 100; // default optimal level (0-100) + (void)acldvppSetJpegeConfigLevel(jpegeConfig_, encodeLevel); + + aclError aclRet = acldvppJpegPredictEncSize(encodeInputDesc_, jpegeConfig_, &encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("predict encodeOutBufferSize_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = acldvppMalloc(&encodeOutBufferDev_, encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc encodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +Result DvppProcess::ProcessJpegE() +{ + std::string encodeOutFileName = output_path + "/result/jpege_output_"; + std::string dvppImagePath = data_path + "/data/wood_rabbit_1024_1068_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 1024, 1068} + // other yuv file + }; + + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to jpege picture %s", testPic[index].picName.c_str()); + + uint32_t jpegInBufferSize; + jpegInBufferSize = ComputeEncodeInputSize(testPic[index].width, testPic[index].height); + + // get input data buffer + char *picDevBuffer = reinterpret_cast(Utils::GetPicDevBuffer(testPic[index], jpegInBufferSize)); + if (picDevBuffer == nullptr) { + ERROR_LOG("get picDevBuffer failed, index is %zu", index); + return FAILED; + } + + // set jpege input data + SetInput4JpegE(picDevBuffer, jpegInBufferSize, testPic[index].width, testPic[index].height); + picDevBuffer = nullptr; + + // init jpege resource + Result ret = InitEncodeResource(); + if (ret != SUCCESS) { + ERROR_LOG("init jpeg encode failed"); + DestroyEncodeResource(); + return FAILED; + } + + aclError aclRet = acldvppJpegEncodeAsync(dvppChannelDesc_, encodeInputDesc_, encodeOutBufferDev_, + &encodeOutBufferSize_, jpegeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegEncodeAsync failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("encode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + // save jpege result + encodeOutFileName = encodeOutFileName + std::to_string(index) + ".jpg"; + ret = Utils::SaveDvppOutputData(encodeOutFileName.c_str(), encodeOutBufferDev_, encodeOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyEncodeResource(); + return FAILED; + } + } + DestroyEncodeResource(); + return SUCCESS; +} + +void DvppProcess::DestroyEncodeResource() +{ + if (jpegeConfig_ != nullptr) { + (void)acldvppDestroyJpegeConfig(jpegeConfig_); + jpegeConfig_ = nullptr; + } + + if (encodeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(encodeInputDesc_); + encodeInputDesc_ = nullptr; + } + + if (inDevBuffer_ != nullptr) { + (void)acldvppFree(inDevBuffer_); + inDevBuffer_ = nullptr; + } + + if (encodeOutBufferDev_ != nullptr) { + (void)acldvppFree(encodeOutBufferDev_); + encodeOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::Process8kResize() +{ + std::string vpcOutFileName = output_path + "/result/dvpp_vpc_4000x4000_nv12.yuv"; + Result ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + ret = Utils::SaveDvppOutputData(vpcOutFileName.c_str(), vpcOutBufferDev_, vpcOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + return SUCCESS; +} + +Result DvppProcess::Process() +{ + // pic decode + INFO_LOG("call JpegD"); + Result ret = ProcessDecode(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessDecode failed"); + DestroyDecodeOutBuff(); + DestroyDecodeResource(); + return FAILED; + } + + DestroyDecodeResource(); + + switch (dvppType_) { + case VPC_RESIZE: + INFO_LOG("call vpcResize"); + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + break; + + case VPC_CROP: + INFO_LOG("call vpcCrop"); + ret = ProcessCrop(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCrop failed"); + DestroyCropResource(); + return FAILED; + } + DestroyCropResource(); + break; + + case VPC_CROP_AND_PASTE: + INFO_LOG("call vpcCropAndPaste"); + ret = ProcessCropAndPaste(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCropAndPaste failed"); + DestroyCropAndPasteResource(); + return FAILED; + } + DestroyCropAndPasteResource(); + break; + + default: + ERROR_LOG("unsupported type"); + DestroyDecodeOutBuff(); + break; + } + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} + +void DvppProcess::DestroyDecodeOutBuff() +{ + if (decodeOutBufferDev_ != nullptr) { + (void)acldvppFree(decodeOutBufferDev_); + decodeOutBufferDev_ = nullptr; + } +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/main.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8abbaec9af49446b43ba1545ec4a2e605361a024 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/main.cpp @@ -0,0 +1,68 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include +#include +#include "sample_process.h" +#include "utils.h" + +using namespace std; +string data_path ,output_path; +int main(int argc,const char *argv[]) +{ + INFO_LOG("./main param, param represents a vpc feature and must be set"); + if (argc != 4) { + ERROR_LOG("input param not be set"); + return FAILED; + } + data_path = string(argv[2]); + output_path = string(argv[3]); + string result_path_str = output_path + "/result"; + Result ret = Utils::CheckAndCreateFolder(result_path_str.c_str()); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error, dir = %s.", result_path_str.c_str()); + return FAILED; + } + + SampleProcess sampleProcess; + ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + return FAILED; + } + + DvppType dvppType = static_cast(atoi(argv[1])); + if (dvppType == JPEG_ENCODE) { + ret = sampleProcess.JpegeProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample jpege process failed"); + return FAILED; + } + return SUCCESS; + } + + if (dvppType == VPC_8K_RESIZE) { + ret = sampleProcess.Resize8kProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample resize 8k process failed"); + return FAILED; + } + return SUCCESS; + } + + ret = sampleProcess.JpegdProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/sample_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/sample_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d42b34a316667ce9516797b66b5a0bb1e5118279 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/sample_process.cpp @@ -0,0 +1,218 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "dvpp_process.h" +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +SampleProcess::SampleProcess() : deviceId_(0), context_(nullptr), stream_(nullptr) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::JpegeProcess(DvppType dvpptype) +{ + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.ProcessJpegE(); + if (ret != SUCCESS) { + ERROR_LOG("process jpege failed"); + return FAILED; + } + return SUCCESS; +} + +Result SampleProcess::Resize8kProcess(DvppType dvpptype) +{ + INFO_LOG("dvpp process 8k resize begin"); + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.Process8kResize(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process 8k resize failed"); + return FAILED; + } + INFO_LOG("dvpp process 8k resize success"); + + return SUCCESS; +} +extern string data_path ,output_path; +// jpegd -> vpc -> model execute +Result SampleProcess::JpegdProcess(DvppType dvpptype) +{ + std::string dvppOutputfileName = output_path + "/result/dvpp_output_"; + + // dvpp init + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + int modelInputWidth = 224; + int modelInputHeight = 224; + std::string dvppImagePath1 = data_path + "/data/persian_cat_1024_1536_283.jpg"; + std::string dvppImagePath2 = data_path + "/data/wood_rabbit_1024_1061_330.jpg"; + // input image + PicDesc testPic[] = { + {dvppImagePath1.c_str(), 0, 0}, + {dvppImagePath2.c_str(), 0, 0}, + }; + INFO_LOG( "-------------------------------------------"); + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to process picture:%s", testPic[index].picName.c_str()); + // 1.dvpp process + uint32_t devPicBufferSize; + char *picDevBuffer = nullptr; + // get input image data buffer + ret = Utils::GetPicDevBuffer4JpegD(testPic[index], picDevBuffer, devPicBufferSize); + if (ret != SUCCESS) { + ERROR_LOG("get pic device buffer failed, index is %zu", index); + return FAILED; + } + + dvppProcess.SetInput4JpegD(picDevBuffer, devPicBufferSize, testPic[index]); + + ret = dvppProcess.InitDvppOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + return FAILED; + } + + ret = dvppProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process failed"); + return FAILED; + } + + (void)acldvppFree(picDevBuffer); + picDevBuffer = nullptr; + + void *dvppOutputBuffer = nullptr; + int dvppOutputSize; + dvppProcess.GetDvppOutput(&dvppOutputBuffer, dvppOutputSize); + + std::string dvppOutputfileNameCur = dvppOutputfileName + std::to_string(index); + (void)Utils::SaveDvppOutputData(dvppOutputfileNameCur.c_str(), dvppOutputBuffer, dvppOutputSize); + + + (void)acldvppFree(dvppOutputBuffer); + + } + + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/utils.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25d7456967a426cb9ba8caafc224ae3d0cb3e0f0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEGE/src/utils.cpp @@ -0,0 +1,422 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#include +#include +#include +#include +#if defined(_MSC_VER) +#include +#else +#include +#include +#include +#endif +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +bool RunStatus::isDevice_ = false; + +Result Utils::ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize) +{ + std::ifstream binFile(fileName, std::ifstream::binary); + if (!binFile.is_open()) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return FAILED; + } + + binFile.seekg(0, binFile.end); + auto binFileBufferLen = binFile.tellg(); + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return FAILED; + } + binFile.seekg(0, binFile.beg); + + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, binFileBufferLen); + if (inputBuff == nullptr) { + ERROR_LOG("host malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } else { // app is running in device + aclRet = acldvppMalloc(&inputBuff, binFileBufferLen); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("device malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } + binFile.read(static_cast(inputBuff), binFileBufferLen); + binFile.close(); + fileSize = binFileBufferLen; + + return SUCCESS; +} + +Result Utils::GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return FAILED; + } + + uint32_t inputBuffSize = 0; + void *inputBuff = nullptr; + auto ret = ReadBinFile(picDesc.picName, inputBuff, inputBuffSize); + if (ret != SUCCESS) { + ERROR_LOG("read bin file failed, file name is %s", picDesc.picName.c_str()); + return FAILED; + } + + aclError aclRet = acldvppJpegGetImageInfoV2(inputBuff, inputBuffSize, &picDesc.width, &picDesc.height, + nullptr, nullptr); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg image info failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + aclRet = acldvppJpegPredictDecSize(inputBuff, inputBuffSize, PIXEL_FORMAT_YUV_SEMIPLANAR_420, + &picDesc.jpegDecodeSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg decode size failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + + void *inBufferDev = nullptr; + uint32_t inBufferSize = inputBuffSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = acldvppMalloc(&inBufferDev, inBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc inBufferSize failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + + // if app is running in host, need copy data from host to device + aclRet = aclrtMemcpy(inBufferDev, inBufferSize, inputBuff, inputBuffSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inBufferDev); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + inBufferDev = inputBuff; + } + devPicBufferSize = inBufferSize; + picDevBuffer = reinterpret_cast(inBufferDev); + + return SUCCESS; +} + +void *Utils::GetPicDevBuffer(const PicDesc &picDesc, uint32_t &picBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return nullptr; + } + + FILE *fp = fopen(picDesc.picName.c_str(), "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed", picDesc.picName.c_str()); + return nullptr; + } + + fseek(fp, 0, SEEK_END); + long fileLen = ftell(fp); + fseek(fp, 0, SEEK_SET); + + if (static_cast(fileLen) < picBufferSize) { + ERROR_LOG("need read %u bytes but file %s only %ld bytes", + picBufferSize, picDesc.picName.c_str(), fileLen); + fclose(fp); + return nullptr; + } + + void *inputDevBuff = nullptr; + aclError aclRet = acldvppMalloc(&inputDevBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc device data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + return nullptr; + } + + void *inputBuff = nullptr; + size_t readSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + (void)acldvppFree(inputDevBuff); + return nullptr; + } + + readSize = fread(inputBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)aclrtFreeHost(inputBuff); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + + // if app is running in host, need copy model output data from host to device + aclRet = aclrtMemcpy(inputDevBuff, picBufferSize, inputBuff, picBufferSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inputDevBuff); + (void)aclrtFreeHost(inputBuff); + fclose(fp); + return nullptr; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + readSize = fread(inputDevBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + } + + fclose(fp); + return inputDevBuff; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("model out dataset num can't be 0"); + } + for (size_t i = 0; i < outDatasetNum; ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer failed"); + return FAILED; + } + + void *dataBufferDev = aclGetDataBufferAddr(dataBuffer); + if (dataBufferDev == nullptr) { + ERROR_LOG("aclGetDataBufferAddr failed"); + return FAILED; + } + + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, bufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, bufferSize, dataBufferDev, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("aclrtMemcpy device to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + } + } else { + dataPtr = dataBufferDev; + } + + uint32_t len = static_cast(bufferSize); + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile != nullptr) { + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + fclose(outputFile); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + } else { + ERROR_LOG("create output file %s failed, size is %u", fileName, len); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + } + return SUCCESS; +} + +Result Utils::SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize) +{ + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, dataSize, devPtr, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("dvpp output memcpy to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + return FAILED; + } + } else { + dataPtr = devPtr; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed.", fileName); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + size_t writeSize = fwrite(dataPtr, sizeof(char), dataSize, outFileFp); + if (writeSize != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes.", + dataSize, fileName, writeSize); + fclose(outFileFp); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + fflush(outFileFp); + fclose(outFileFp); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + while (i < 10) { + std::ifstream f (fileName); + if (f.good()) { + break; + } + SleepTime(1); // slepp 1s + INFO_LOG("check result, wait time %d second", i + 1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout, expect file:%s", fileName); + return FAILED; + } + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist"); + return FAILED; + } + FILE *model_output = fopen(srcfileName, "rb"); + if (model_output == nullptr) { + ERROR_LOG("fopen out file %s failed.", srcfileName); + return FAILED; + } + + FILE *model_output_txt = fopen(dstfileName, "wb+"); + if (model_output_txt == nullptr) { + ERROR_LOG("fopen out file %s failed.", dstfileName); + fclose(model_output); + return FAILED; + } + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f,%d\n", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result : classType[%d], top1[%f], top5[%f]", classType, max, sum); + INFO_LOG("-------------------------------------------"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char* foldName) +{ + INFO_LOG("start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make directory successfully."); + } else { + INFO_LOG("make directory errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag = mkdir(foldName , 0777); + if (flag == 0) { + INFO_LOG("make directory successfully."); + } else { + ERROR_LOG("make directory errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e16c7eedbef07ed7a40c430d26ede753e330977 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..282061b75364295f9e5b26edc2bb336796aa7e6d --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/TEST.sh @@ -0,0 +1,12 @@ +#! /bin/bash +set -e +src_path=$(pwd) +argv=4 +data_path="$OEC_DATA_PATH" +output_path="$OEC_OUTPUT_PATH" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${argv}" "${data_path}" "${output_path}" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/dvpp_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/dvpp_process.h new file mode 100644 index 0000000000000000000000000000000000000000..ce5ebb7fbacc7c719457e350a05155b5a65d5182 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/dvpp_process.h @@ -0,0 +1,164 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitDvppOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set jpegd input + * @param [in] inDevBuffer: device buffer of input pic + * @param [in] inDevBufferSize: device buffer size of input pic + * @param [in] picDesc:picture description + */ + void SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc); + + /** + * @brief set jpege input + * @param [in] inDevBuffer: device buffer of input yuv file + * @param [in] inDevBufferSize: device input pic buffer size after align + * @param [in] inputWidth:width of pic after encode + * @param [in] inputHeight:height of pic after encode + */ + void SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight); + + /** + * @brief get dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetDvppOutput(void **outputBuffer, int &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(); + + /** + * @brief set dvpp type after JpegD(vpcResize/vpcCrop/vpcCropAndPaste) + * @return result + */ + void SetDvppType(DvppType dvppType); + + /** + * @brief compute encode input pic desc size + * @return input pic desc size + */ + uint32_t ComputeEncodeInputSize(int inputWidth, int inputHeight); + + /** + * @brief process encode + * @return result + */ + Result ProcessJpegE(); + + /** + * @brief process 8k resize + * @return result + */ + Result Process8kResize(); + +private: + Result InitDecodeOutputDesc(); + Result ProcessDecode(); + void DestroyDecodeResource(); + + Result InitResizeInputDesc(); + Result Init8kResizeInputDesc(); + Result InitResizeOutputDesc(); + Result Init8kResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + Result InitCropInputDesc(); + Result InitCropOutputDesc(); + Result ProcessCrop(); + void DestroyCropResource(); + + Result InitCropAndPasteInputDesc(); + Result InitCropAndPasteOutputDesc(); + Result ProcessCropAndPaste(); + void DestroyCropAndPasteResource(); + + Result InitEncodeResource(); + void DestroyEncodeResource(); + + void DestroyResource(); + void DestroyDvppOutputPara(); + void DestroyDecodeOutBuff(); + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + + DvppType dvppType_; + acldvppRoiConfig *cropArea_; + acldvppRoiConfig *pasteArea_; + acldvppJpegeConfig *jpegeConfig_; + acldvppResizeConfig *resizeConfig_; + + void* decodeOutBufferDev_; // decode output buffer + acldvppPicDesc *decodeOutputDesc_; //decode output desc + + void* encodeOutBufferDev_; // encode output buffer + uint32_t encodeOutBufferSize_; // encode output buffer size + acldvppPicDesc *encodeInputDesc_; //encode input desc + + acldvppPicDesc *vpcInputDesc_; // vpc input desc + acldvppPicDesc *vpcOutputDesc_; // vpc output desc + + char *inDevBuffer_; // input pic dev buffer + uint32_t inDevBufferSizeD_; // input pic size for decode + uint32_t inDevBufferSizeE_; // input pic size for encode + uint32_t jpegDecodeOutputSize_; // jpeg decode output size + + uint32_t decodeOutputWidth_; // decode output width + uint32_t decodeOutputWidthStride_; // decode output width aligned + uint32_t decodeOutputHeight_; // decode output height + + void *vpcInBufferDev_; // vpc input buffer + void *vpcOutBufferDev_; // vpc output buffer + uint32_t vpcOutBufferSize_; // vpc output size + + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + + uint32_t jpegeInputWidth_; // encode input width + uint32_t jpegeInputHeight_; // encode input height +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/sample_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/sample_process.h new file mode 100644 index 0000000000000000000000000000000000000000..14c42bf652c29efa1abb0f0f70f20677bf8151b6 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/sample_process.h @@ -0,0 +1,63 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include "utils.h" +#include "acl/acl.h" + +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief decode, vpc and infer sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegdProcess(DvppType dvpptype); + + /** + * @brief encode sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result JpegeProcess(DvppType dvpptype); + + /** + * @brief resize 8k sample process + * @param [in] dvpptype: dvpp type + * @return result + */ + Result Resize8kProcess(DvppType dvpptype); + +private: + /** + * @brief destroy resource + */ + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/utils.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..375374de20b26b8fbc16714bda7c9616d12bd5c1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/inc/utils.h @@ -0,0 +1,128 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include "acl/acl.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef enum DvppType { + VPC_RESIZE = 0, + VPC_CROP = 1, + VPC_CROP_AND_PASTE = 2, + JPEG_ENCODE = 3, + VPC_8K_RESIZE = 4 +} DvppType; + +typedef struct PicDesc { + std::string picName; + uint32_t width; + uint32_t height; + uint32_t jpegDecodeSize; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [out] picDevBuffer: device memory of picture + * @param [out] devPicBufferSize: actual pic size + * @return device buffer of pic + */ + static Result GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize); + + /** + * @brief create buffer of bin file + * @param [in] fileName: file name + * @param [out] inputBuff: input data buffer + * @param [out] fileSize: actual file szie + * @return buffer of pic + */ + static Result ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize); + + /** + * @brief create device buffer of pic + * @param [in] picDesc: pic desc + * @param [in] PicBufferSize: aligned pic size + * @return device buffer of pic + */ + static void *GetPicDevBuffer(const PicDesc &picDesc, uint32_t &PicBufferSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char* foldName); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a8815aef2c1bc1cfdf1d9692b59a4633ad01f2f --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_DVPP_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") + +# Header path +include_directories( + ${INC_PATH}/runtime/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main + libascendcl libacl_dvpp) +else () + target_link_libraries(main + ascendcl acl_dvpp stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/dvpp_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/dvpp_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e255a6a5f784871ca97fe9aa3260849a34480c40 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/dvpp_process.cpp @@ -0,0 +1,895 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +#include +#include +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream &stream) + : stream_(stream), dvppChannelDesc_(nullptr), dvppType_(VPC_RESIZE), + cropArea_(nullptr), pasteArea_(nullptr), jpegeConfig_(nullptr), resizeConfig_(nullptr), + decodeOutBufferDev_(nullptr), decodeOutputDesc_(nullptr), encodeOutBufferDev_(nullptr), + encodeInputDesc_(nullptr), vpcInputDesc_(nullptr), vpcOutputDesc_(nullptr), inDevBuffer_(nullptr), + inDevBufferSizeD_(0), inDevBufferSizeE_(0), jpegDecodeOutputSize_(0), decodeOutputWidth_(0), + decodeOutputWidthStride_(0), decodeOutputHeight_(0), vpcInBufferDev_(nullptr), vpcOutBufferDev_(nullptr), + vpcOutBufferSize_(0), modelInputWidth_(0), modelInputHeight_(0), jpegeInputWidth_(0), jpegeInputHeight_(0) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResource(); + DestroyDvppOutputPara(); +} + +uint32_t AlignSize(uint32_t origSize, uint32_t alignment) +{ + if (alignment == 0) { + return 0; + } + uint32_t alignmentH = alignment - 1; + return (origSize + alignmentH) / alignment * alignment; +} + +void DvppProcess::SetDvppType(DvppType dvppType) +{ + dvppType_ = dvppType; +} + +Result DvppProcess::InitResource() +{ + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppCreateChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + INFO_LOG("dvpp init resource success"); + return SUCCESS; +} + +void DvppProcess::DestroyResource() +{ + if (dvppChannelDesc_ != nullptr) { + aclError aclRet = acldvppDestroyChannel(dvppChannelDesc_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(aclRet)); + } + + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegD(char *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeD_ = inDevBufferSize; + jpegDecodeOutputSize_ = picDesc.jpegDecodeSize; +} + +void DvppProcess::GetDvppOutput(void **outputBuffer, int &outputSize) +{ + *outputBuffer = vpcOutBufferDev_; + outputSize = vpcOutBufferSize_; + vpcOutBufferDev_ = nullptr; + vpcOutBufferSize_ = 0; +} + +Result DvppProcess::InitDvppOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("init dvpp output para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + return SUCCESS; +} + +void DvppProcess::DestroyDvppOutputPara() +{ + if (vpcOutBufferDev_ != nullptr) { + (void)acldvppFree(vpcOutBufferDev_); + vpcOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitDecodeOutputDesc() +{ + aclError aclRet = acldvppMalloc(&decodeOutBufferDev_, jpegDecodeOutputSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc decodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + decodeOutputDesc_ = acldvppCreatePicDesc(); + if (decodeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc decodeOutputDesc_ failed"); + return FAILED; + } + + acldvppSetPicDescData(decodeOutputDesc_, decodeOutBufferDev_); + acldvppSetPicDescFormat(decodeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + acldvppSetPicDescSize(decodeOutputDesc_, jpegDecodeOutputSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessDecode() +{ + Result ret = InitDecodeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitDecodeOutputDesc failed"); + return FAILED; + } + + aclError aclRet = acldvppJpegDecodeAsync(dvppChannelDesc_, reinterpret_cast(inDevBuffer_), + inDevBufferSizeD_, decodeOutputDesc_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegDecodeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("decode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + // get yuv image width and height + decodeOutputWidth_ = acldvppGetPicDescWidth(decodeOutputDesc_); + decodeOutputHeight_ = acldvppGetPicDescHeight(decodeOutputDesc_); + decodeOutputWidthStride_ = acldvppGetPicDescWidthStride(decodeOutputDesc_); + + return SUCCESS; +} + +void DvppProcess::DestroyDecodeResource() +{ + if (decodeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(decodeOutputDesc_); + decodeOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitResizeInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + int widthAlignment = 16; + int heightAlignment = 2; + int sizeAlignment = 3; + int sizeNum = 2; + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = AlignSize(modelInputWidth_, widthAlignment); + int resizeOutHeightStride = AlignSize(modelInputHeight_, heightAlignment); + if (resizeOutWidthStride == 0 || resizeOutHeightStride == 0) { + ERROR_LOG("InitResizeOutputDesc AlignSize failed"); + return FAILED; + } + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} +extern string data_path ,output_path; +Result DvppProcess::Init8kResizeInputDesc() +{ + uint32_t inWidthStride = 8192; // 8k picture width + uint32_t inHeightStride = 8192; // 8k picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t inBufferSize = inWidthStride * inWidthStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + std::string dvppImagePath = data_path + "/data/dvpp_vpc_8192x8192_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 8192, 8192} + // other yuv file + }; + vpcInBufferDev_ = Utils::GetPicDevBuffer(testPic[0], inBufferSize); + if (vpcInBufferDev_ == nullptr) { + ERROR_LOG("get picDevBuffer failed, file name = %s", testPic[0].picName.c_str()); + return FAILED; + } + (void)acldvppSetPicDescData(vpcInputDesc_, vpcInBufferDev_); // JpegD -> vpcResize + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeight(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, inWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, inHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, inBufferSize); + return SUCCESS; +} + +Result DvppProcess::Init8kResizeOutputDesc() +{ + uint32_t resizeOutWidthStride = 4000; // output picture width + uint32_t resizeOutHeightStride = 4000; // output picture height + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + vpcOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, resizeOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, resizeOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + + +Result DvppProcess::ProcessResize() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + Result inputRet = SUCCESS; + Result outputRet = SUCCESS; + if (dvppType_ == VPC_RESIZE) { + inputRet = InitResizeInputDesc(); + outputRet = InitResizeOutputDesc(); + } else if (dvppType_ == VPC_8K_RESIZE) { + inputRet = Init8kResizeInputDesc(); + outputRet = Init8kResizeOutputDesc(); + } else { + ERROR_LOG("invalid dvppType_ %d", static_cast(dvppType_)); + return FAILED; + } + if ((inputRet != SUCCESS) || (outputRet != SUCCESS)) { + ERROR_LOG("init resize input or output description failed"); + return FAILED; + } + + // resize pic + aclError aclRet = acldvppVpcResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("resize aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } + + if (vpcInBufferDev_ != nullptr) { + (void)acldvppFree(vpcInBufferDev_); + vpcInBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitCropInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); // 16-byte alignment + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCrop + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropOutputDesc() +{ + int sizeAlignment = 3; + int sizeNum = 2; + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + vpcOutBufferSize_ = dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCrop() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 550; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 480; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + Result ret = InitCropInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropInputDesc failed"); + return FAILED; + } + + ret = InitCropOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropOutputDesc failed"); + return FAILED; + } + + // crop pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizeAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, resizeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +Result DvppProcess::InitCropAndPasteInputDesc() +{ + uint32_t heightAlignment = 16; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + + uint32_t jpegOutWidthStride = decodeOutputWidthStride_; // 128-byte alignment on 310, 64-byte alignment on 310P + uint32_t jpegOutHeightStride = AlignSize(decodeOutputHeight_, heightAlignment); + if (jpegOutWidthStride == 0 || jpegOutHeightStride == 0) { + ERROR_LOG("InitCropAndPasteInputDesc AlignSize failed"); + return FAILED; + } + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * sizeAlignment / sizeNum; + vpcInputDesc_ = acldvppCreatePicDesc(); + if (vpcInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc vpcInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcInputDesc_, decodeOutBufferDev_); // JpegD -> vpcCropAndPaste + (void)acldvppSetPicDescFormat(vpcInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcInputDesc_, decodeOutputWidth_); + (void)acldvppSetPicDescHeight(vpcInputDesc_, decodeOutputHeight_); + (void)acldvppSetPicDescWidthStride(vpcInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(vpcInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitCropAndPasteOutputDesc() +{ + int dvppOutWidth = modelInputWidth_; + int dvppOutHeight = modelInputHeight_; + int dvppOutWidthStride = modelInputWidth_; + int dvppOutHeightStride = modelInputHeight_; + int sizeAlignment = 3; + int sizeNum = 2; + vpcOutBufferSize_ = + dvppOutWidthStride * dvppOutHeightStride * sizeAlignment / sizeNum; + aclError aclRet = acldvppMalloc(&vpcOutBufferDev_, vpcOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc vpcOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + vpcOutputDesc_ = acldvppCreatePicDesc(); + if (vpcOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc vpcOutputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(vpcOutputDesc_, vpcOutBufferDev_); + (void)acldvppSetPicDescFormat(vpcOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(vpcOutputDesc_, dvppOutWidth); + (void)acldvppSetPicDescHeight(vpcOutputDesc_, dvppOutHeight); + (void)acldvppSetPicDescWidthStride(vpcOutputDesc_, dvppOutWidthStride); + (void)acldvppSetPicDescHeightStride(vpcOutputDesc_, dvppOutHeightStride); + (void)acldvppSetPicDescSize(vpcOutputDesc_, vpcOutBufferSize_); + return SUCCESS; +} + +Result DvppProcess::ProcessCropAndPaste() +{ + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + uint32_t midNum = 2; + uint32_t oddNum = 1; + uint32_t cropSizeWidth = 200; + uint32_t cropSizeHeight = 200; + uint32_t cropLeftOffset = 512; // must even + uint32_t cropRightOffset = cropLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t cropTopOffset = 512; // must even + uint32_t cropBottomOffset = cropTopOffset + cropSizeHeight - oddNum; // must odd + cropArea_ = acldvppCreateRoiConfig(cropLeftOffset, cropRightOffset, + cropTopOffset, cropBottomOffset); + if (cropArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig cropArea_ failed"); + return FAILED; + } + + uint32_t pasteLeftOffset = 16; // must even + uint32_t pasteRightOffset = pasteLeftOffset + cropSizeWidth - oddNum; // must odd + uint32_t pasteTopOffset = 16; // must even + uint32_t pasteBottomOffset = pasteTopOffset + cropSizeHeight - oddNum; // must odd + pasteArea_ = acldvppCreateRoiConfig(pasteLeftOffset, pasteRightOffset, + pasteTopOffset, pasteBottomOffset); + if (pasteArea_ == nullptr) { + ERROR_LOG("acldvppCreateRoiConfig pasteArea_ failed"); + return FAILED; + } + + Result ret = InitCropAndPasteInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteInputDesc failed"); + return FAILED; + } + + ret = InitCropAndPasteOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitCropAndPasteOutputDesc failed"); + return FAILED; + } + + // crop and patse pic + aclError aclRet = acldvppSetResizeConfigInterpolation(resizeConfig_, 0); + aclRet = acldvppVpcCropResizePasteAsync(dvppChannelDesc_, vpcInputDesc_, + vpcOutputDesc_, cropArea_, pasteArea_, + resizeConfig_, stream_); + + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcCropAndPasteAsync failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("crop and paste aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyCropAndPasteResource() +{ + if (cropArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(cropArea_); + cropArea_ = nullptr; + } + + if (pasteArea_ != nullptr) { + (void)acldvppDestroyRoiConfig(pasteArea_); + pasteArea_ = nullptr; + } + + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + DestroyDecodeOutBuff(); + + if (vpcInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcInputDesc_); + vpcInputDesc_ = nullptr; + } + + if (vpcOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(vpcOutputDesc_); + vpcOutputDesc_ = nullptr; + } +} + +void DvppProcess::SetInput4JpegE(char *inDevBuffer, int inDevBufferSize, int inputWidth, int inputHeight) +{ + inDevBuffer_ = inDevBuffer; + inDevBufferSizeE_ = inDevBufferSize; + jpegeInputWidth_ = inputWidth; + jpegeInputHeight_ = inputHeight; +} + +uint32_t DvppProcess::ComputeEncodeInputSize(int inputWidth, int inputHeight) +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t sizeAlignment = 3; + uint32_t sizeNum = 2; + uint32_t encodeInWidthStride = AlignSize(inputWidth, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(inputHeight, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("ComputeEncodeInputSize AlignSize failed"); + return FAILED; + } + uint32_t encodeInBufferSize = + encodeInWidthStride * encodeInHeightStride * sizeAlignment / sizeNum; + return encodeInBufferSize; +} + +Result DvppProcess::InitEncodeResource() +{ + uint32_t widthAlignment = 16; + uint32_t heightAlignment = 2; + uint32_t encodeInWidthStride = AlignSize(jpegeInputWidth_, widthAlignment); + uint32_t encodeInHeightStride = AlignSize(jpegeInputHeight_, heightAlignment); + if (encodeInWidthStride == 0 || encodeInHeightStride == 0) { + ERROR_LOG("InitEncodeInputDesc AlignSize failed"); + return FAILED; + } + encodeInputDesc_ = acldvppCreatePicDesc(); + if (encodeInputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc encodeInputDesc_ failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(encodeInputDesc_, reinterpret_cast(inDevBuffer_)); + (void)acldvppSetPicDescFormat(encodeInputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420); + (void)acldvppSetPicDescWidth(encodeInputDesc_, jpegeInputWidth_); + (void)acldvppSetPicDescHeight(encodeInputDesc_, jpegeInputHeight_); + (void)acldvppSetPicDescWidthStride(encodeInputDesc_, encodeInWidthStride); + (void)acldvppSetPicDescHeightStride(encodeInputDesc_, encodeInHeightStride); + (void)acldvppSetPicDescSize(encodeInputDesc_, inDevBufferSizeE_); + + jpegeConfig_ = acldvppCreateJpegeConfig(); + uint32_t encodeLevel = 100; // default optimal level (0-100) + (void)acldvppSetJpegeConfigLevel(jpegeConfig_, encodeLevel); + + aclError aclRet = acldvppJpegPredictEncSize(encodeInputDesc_, jpegeConfig_, &encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("predict encodeOutBufferSize_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = acldvppMalloc(&encodeOutBufferDev_, encodeOutBufferSize_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc encodeOutBufferDev_ failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + return SUCCESS; +} + +Result DvppProcess::ProcessJpegE() +{ + std::string encodeOutFileName = output_path + "/result/jpege_output_"; + std::string dvppImagePath = data_path + "/data/wood_rabbit_1024_1068_nv12.yuv"; + PicDesc testPic[] = { + { dvppImagePath.c_str(), 1024, 1068} + // other yuv file + }; + + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to jpege picture %s", testPic[index].picName.c_str()); + + uint32_t jpegInBufferSize; + jpegInBufferSize = ComputeEncodeInputSize(testPic[index].width, testPic[index].height); + + // get input data buffer + char *picDevBuffer = reinterpret_cast(Utils::GetPicDevBuffer(testPic[index], jpegInBufferSize)); + if (picDevBuffer == nullptr) { + ERROR_LOG("get picDevBuffer failed, index is %zu", index); + return FAILED; + } + + // set jpege input data + SetInput4JpegE(picDevBuffer, jpegInBufferSize, testPic[index].width, testPic[index].height); + picDevBuffer = nullptr; + + // init jpege resource + Result ret = InitEncodeResource(); + if (ret != SUCCESS) { + ERROR_LOG("init jpeg encode failed"); + DestroyEncodeResource(); + return FAILED; + } + + aclError aclRet = acldvppJpegEncodeAsync(dvppChannelDesc_, encodeInputDesc_, encodeOutBufferDev_, + &encodeOutBufferSize_, jpegeConfig_, stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acldvppJpegEncodeAsync failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + aclRet = aclrtSynchronizeStream(stream_); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("encode aclrtSynchronizeStream failed, errorCode = %d", static_cast(aclRet)); + DestroyEncodeResource(); + return FAILED; + } + + // save jpege result + encodeOutFileName = encodeOutFileName + std::to_string(index) + ".jpg"; + ret = Utils::SaveDvppOutputData(encodeOutFileName.c_str(), encodeOutBufferDev_, encodeOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyEncodeResource(); + return FAILED; + } + } + DestroyEncodeResource(); + return SUCCESS; +} + +void DvppProcess::DestroyEncodeResource() +{ + if (jpegeConfig_ != nullptr) { + (void)acldvppDestroyJpegeConfig(jpegeConfig_); + jpegeConfig_ = nullptr; + } + + if (encodeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(encodeInputDesc_); + encodeInputDesc_ = nullptr; + } + + if (inDevBuffer_ != nullptr) { + (void)acldvppFree(inDevBuffer_); + inDevBuffer_ = nullptr; + } + + if (encodeOutBufferDev_ != nullptr) { + (void)acldvppFree(encodeOutBufferDev_); + encodeOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::Process8kResize() +{ + std::string vpcOutFileName = output_path + "/result/dvpp_vpc_4000x4000_nv12.yuv"; + Result ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + ret = Utils::SaveDvppOutputData(vpcOutFileName.c_str(), vpcOutBufferDev_, vpcOutBufferSize_); + if (ret != SUCCESS) { + ERROR_LOG("save encode output data failed."); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + return SUCCESS; +} + +Result DvppProcess::Process() +{ + // pic decode + INFO_LOG("call JpegD"); + Result ret = ProcessDecode(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessDecode failed"); + DestroyDecodeOutBuff(); + DestroyDecodeResource(); + return FAILED; + } + + DestroyDecodeResource(); + + switch (dvppType_) { + case VPC_RESIZE: + INFO_LOG("call vpcResize"); + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + break; + + case VPC_CROP: + INFO_LOG("call vpcCrop"); + ret = ProcessCrop(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCrop failed"); + DestroyCropResource(); + return FAILED; + } + DestroyCropResource(); + break; + + case VPC_CROP_AND_PASTE: + INFO_LOG("call vpcCropAndPaste"); + ret = ProcessCropAndPaste(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessCropAndPaste failed"); + DestroyCropAndPasteResource(); + return FAILED; + } + DestroyCropAndPasteResource(); + break; + + default: + ERROR_LOG("unsupported type"); + DestroyDecodeOutBuff(); + break; + } + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} + +void DvppProcess::DestroyDecodeOutBuff() +{ + if (decodeOutBufferDev_ != nullptr) { + (void)acldvppFree(decodeOutBufferDev_); + decodeOutBufferDev_ = nullptr; + } +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/main.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8abbaec9af49446b43ba1545ec4a2e605361a024 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/main.cpp @@ -0,0 +1,68 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include +#include +#include "sample_process.h" +#include "utils.h" + +using namespace std; +string data_path ,output_path; +int main(int argc,const char *argv[]) +{ + INFO_LOG("./main param, param represents a vpc feature and must be set"); + if (argc != 4) { + ERROR_LOG("input param not be set"); + return FAILED; + } + data_path = string(argv[2]); + output_path = string(argv[3]); + string result_path_str = output_path + "/result"; + Result ret = Utils::CheckAndCreateFolder(result_path_str.c_str()); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error, dir = %s.", result_path_str.c_str()); + return FAILED; + } + + SampleProcess sampleProcess; + ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + return FAILED; + } + + DvppType dvppType = static_cast(atoi(argv[1])); + if (dvppType == JPEG_ENCODE) { + ret = sampleProcess.JpegeProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample jpege process failed"); + return FAILED; + } + return SUCCESS; + } + + if (dvppType == VPC_8K_RESIZE) { + ret = sampleProcess.Resize8kProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample resize 8k process failed"); + return FAILED; + } + return SUCCESS; + } + + ret = sampleProcess.JpegdProcess(dvppType); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/sample_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/sample_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d42b34a316667ce9516797b66b5a0bb1e5118279 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/sample_process.cpp @@ -0,0 +1,218 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "dvpp_process.h" +#include "acl/acl.h" +#include "utils.h" +using namespace std; + +SampleProcess::SampleProcess() : deviceId_(0), context_(nullptr), stream_(nullptr) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::JpegeProcess(DvppType dvpptype) +{ + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.ProcessJpegE(); + if (ret != SUCCESS) { + ERROR_LOG("process jpege failed"); + return FAILED; + } + return SUCCESS; +} + +Result SampleProcess::Resize8kProcess(DvppType dvpptype) +{ + INFO_LOG("dvpp process 8k resize begin"); + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + ret = dvppProcess.Process8kResize(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process 8k resize failed"); + return FAILED; + } + INFO_LOG("dvpp process 8k resize success"); + + return SUCCESS; +} +extern string data_path ,output_path; +// jpegd -> vpc -> model execute +Result SampleProcess::JpegdProcess(DvppType dvpptype) +{ + std::string dvppOutputfileName = output_path + "/result/dvpp_output_"; + + // dvpp init + DvppProcess dvppProcess(stream_); + dvppProcess.SetDvppType(dvpptype); + Result ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + int modelInputWidth = 224; + int modelInputHeight = 224; + std::string dvppImagePath1 = data_path + "/data/persian_cat_1024_1536_283.jpg"; + std::string dvppImagePath2 = data_path + "/data/wood_rabbit_1024_1061_330.jpg"; + // input image + PicDesc testPic[] = { + {dvppImagePath1.c_str(), 0, 0}, + {dvppImagePath2.c_str(), 0, 0}, + }; + INFO_LOG( "-------------------------------------------"); + for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) { + INFO_LOG("start to process picture:%s", testPic[index].picName.c_str()); + // 1.dvpp process + uint32_t devPicBufferSize; + char *picDevBuffer = nullptr; + // get input image data buffer + ret = Utils::GetPicDevBuffer4JpegD(testPic[index], picDevBuffer, devPicBufferSize); + if (ret != SUCCESS) { + ERROR_LOG("get pic device buffer failed, index is %zu", index); + return FAILED; + } + + dvppProcess.SetInput4JpegD(picDevBuffer, devPicBufferSize, testPic[index]); + + ret = dvppProcess.InitDvppOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + return FAILED; + } + + ret = dvppProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp process failed"); + return FAILED; + } + + (void)acldvppFree(picDevBuffer); + picDevBuffer = nullptr; + + void *dvppOutputBuffer = nullptr; + int dvppOutputSize; + dvppProcess.GetDvppOutput(&dvppOutputBuffer, dvppOutputSize); + + std::string dvppOutputfileNameCur = dvppOutputfileName + std::to_string(index); + (void)Utils::SaveDvppOutputData(dvppOutputfileNameCur.c_str(), dvppOutputBuffer, dvppOutputSize); + + + (void)acldvppFree(dvppOutputBuffer); + + } + + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/utils.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25d7456967a426cb9ba8caafc224ae3d0cb3e0f0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_JPEG_YUV_VPC_RESIZE/src/utils.cpp @@ -0,0 +1,422 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#include +#include +#include +#include +#if defined(_MSC_VER) +#include +#else +#include +#include +#include +#endif +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +bool RunStatus::isDevice_ = false; + +Result Utils::ReadBinFile(const std::string &fileName, void *&inputBuff, uint32_t &fileSize) +{ + std::ifstream binFile(fileName, std::ifstream::binary); + if (!binFile.is_open()) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return FAILED; + } + + binFile.seekg(0, binFile.end); + auto binFileBufferLen = binFile.tellg(); + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return FAILED; + } + binFile.seekg(0, binFile.beg); + + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, binFileBufferLen); + if (inputBuff == nullptr) { + ERROR_LOG("host malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } else { // app is running in device + aclRet = acldvppMalloc(&inputBuff, binFileBufferLen); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("device malloc binFileBufferData failed, errorCode = %d", static_cast(aclRet)); + binFile.close(); + return FAILED; + } + } + binFile.read(static_cast(inputBuff), binFileBufferLen); + binFile.close(); + fileSize = binFileBufferLen; + + return SUCCESS; +} + +Result Utils::GetPicDevBuffer4JpegD(PicDesc &picDesc, char *&picDevBuffer, uint32_t &devPicBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return FAILED; + } + + uint32_t inputBuffSize = 0; + void *inputBuff = nullptr; + auto ret = ReadBinFile(picDesc.picName, inputBuff, inputBuffSize); + if (ret != SUCCESS) { + ERROR_LOG("read bin file failed, file name is %s", picDesc.picName.c_str()); + return FAILED; + } + + aclError aclRet = acldvppJpegGetImageInfoV2(inputBuff, inputBuffSize, &picDesc.width, &picDesc.height, + nullptr, nullptr); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg image info failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + aclRet = acldvppJpegPredictDecSize(inputBuff, inputBuffSize, PIXEL_FORMAT_YUV_SEMIPLANAR_420, + &picDesc.jpegDecodeSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("get jpeg decode size failed, errorCode = %d", static_cast(aclRet)); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(inputBuff); + } else { + (void)acldvppFree(inputBuff); + } + return FAILED; + } + + void *inBufferDev = nullptr; + uint32_t inBufferSize = inputBuffSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = acldvppMalloc(&inBufferDev, inBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc inBufferSize failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + + // if app is running in host, need copy data from host to device + aclRet = aclrtMemcpy(inBufferDev, inBufferSize, inputBuff, inputBuffSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inBufferDev); + (void)aclrtFreeHost(inputBuff); + return FAILED; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + inBufferDev = inputBuff; + } + devPicBufferSize = inBufferSize; + picDevBuffer = reinterpret_cast(inBufferDev); + + return SUCCESS; +} + +void *Utils::GetPicDevBuffer(const PicDesc &picDesc, uint32_t &picBufferSize) +{ + if (picDesc.picName.empty()) { + ERROR_LOG("picture file name is empty"); + return nullptr; + } + + FILE *fp = fopen(picDesc.picName.c_str(), "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed", picDesc.picName.c_str()); + return nullptr; + } + + fseek(fp, 0, SEEK_END); + long fileLen = ftell(fp); + fseek(fp, 0, SEEK_SET); + + if (static_cast(fileLen) < picBufferSize) { + ERROR_LOG("need read %u bytes but file %s only %ld bytes", + picBufferSize, picDesc.picName.c_str(), fileLen); + fclose(fp); + return nullptr; + } + + void *inputDevBuff = nullptr; + aclError aclRet = acldvppMalloc(&inputDevBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc device data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + return nullptr; + } + + void *inputBuff = nullptr; + size_t readSize; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + aclRet = aclrtMallocHost(&inputBuff, picBufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + fclose(fp); + (void)acldvppFree(inputDevBuff); + return nullptr; + } + + readSize = fread(inputBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)aclrtFreeHost(inputBuff); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + + // if app is running in host, need copy model output data from host to device + aclRet = aclrtMemcpy(inputDevBuff, picBufferSize, inputBuff, picBufferSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("memcpy from host to device failed, errorCode = %d", static_cast(aclRet)); + (void)acldvppFree(inputDevBuff); + (void)aclrtFreeHost(inputBuff); + fclose(fp); + return nullptr; + } + (void)aclrtFreeHost(inputBuff); + } else { // app is running in device + readSize = fread(inputDevBuff, sizeof(char), picBufferSize, fp); + if (readSize < picBufferSize) { + ERROR_LOG("need read file %s %u bytes, but only %zu readed", + picDesc.picName.c_str(), picBufferSize, readSize); + (void)acldvppFree(inputDevBuff); + fclose(fp); + return nullptr; + } + } + + fclose(fp); + return inputDevBuff; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("model out dataset num can't be 0"); + } + for (size_t i = 0; i < outDatasetNum; ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer failed"); + return FAILED; + } + + void *dataBufferDev = aclGetDataBufferAddr(dataBuffer); + if (dataBufferDev == nullptr) { + ERROR_LOG("aclGetDataBufferAddr failed"); + return FAILED; + } + + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, bufferSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, bufferSize, dataBufferDev, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("aclrtMemcpy device to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + } + } else { + dataPtr = dataBufferDev; + } + + uint32_t len = static_cast(bufferSize); + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile != nullptr) { + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + fclose(outputFile); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + } else { + ERROR_LOG("create output file %s failed, size is %u", fileName, len); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + } + return SUCCESS; +} + +Result Utils::SaveDvppOutputData(const char *fileName, void *devPtr, uint32_t dataSize) +{ + void *dataPtr = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&dataPtr, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d", static_cast(aclRet)); + return FAILED; + } + + aclRet = aclrtMemcpy(dataPtr, dataSize, devPtr, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("dvpp output memcpy to host failed, errorCode = %d", static_cast(aclRet)); + (void)aclrtFreeHost(dataPtr); + return FAILED; + } + } else { + dataPtr = devPtr; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed.", fileName); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + size_t writeSize = fwrite(dataPtr, sizeof(char), dataSize, outFileFp); + if (writeSize != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes.", + dataSize, fileName, writeSize); + fclose(outFileFp); + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + return FAILED; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(dataPtr); + } + fflush(outFileFp); + fclose(outFileFp); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + while (i < 10) { + std::ifstream f (fileName); + if (f.good()) { + break; + } + SleepTime(1); // slepp 1s + INFO_LOG("check result, wait time %d second", i + 1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout, expect file:%s", fileName); + return FAILED; + } + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist"); + return FAILED; + } + FILE *model_output = fopen(srcfileName, "rb"); + if (model_output == nullptr) { + ERROR_LOG("fopen out file %s failed.", srcfileName); + return FAILED; + } + + FILE *model_output_txt = fopen(dstfileName, "wb+"); + if (model_output_txt == nullptr) { + ERROR_LOG("fopen out file %s failed.", dstfileName); + fclose(model_output); + return FAILED; + } + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f,%d\n", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result : classType[%d], top1[%f], top5[%f]", classType, max, sum); + INFO_LOG("-------------------------------------------"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char* foldName) +{ + INFO_LOG("start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make directory successfully."); + } else { + INFO_LOG("make directory errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag = mkdir(foldName , 0777); + if (flag == 0) { + INFO_LOG("make directory successfully."); + } else { + ERROR_LOG("make directory errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4680624e650abaf2cebf45ee0926c7801047894 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_VDEC_RESNET50) + +add_subdirectory("./src") diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..1dd3647cfe48a666efa468c86f21ca5fd3f94df2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/TEST.sh @@ -0,0 +1,10 @@ +#! /bin/bash +src_path=$(pwd) +data_path="$OEC_DATA_PATH/data" +output_path="$OEC_OUTPUT_PATH" + +mkdir -p "${output_path}" +cd "${output_path}" +cmake "${src_path}" +make +./main "${data_path}" "${output_path}/out_dir" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/dvpp_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/dvpp_process.h new file mode 100644 index 0000000000000000000000000000000000000000..be541993c1f57d6edde3f138fa719bc78905c2fa --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/dvpp_process.h @@ -0,0 +1,100 @@ +/** +* @file dvpp_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +/** + * DvppProcess + */ +class DvppProcess { +public: + /** + * @brief Constructor + * @param [in] stream: stream + */ + explicit DvppProcess(aclrtStream &stream); + + /** + * @brief Destructor + */ + virtual ~DvppProcess(); + + /** + * @brief dvpp global init + * @return result + */ + Result InitResource(); + + /** + * @brief init dvpp output para + * @param [in] modelInputWidth: model input width + * @param [in] modelInputHeight: model input height + * @return result + */ + Result InitOutputPara(int modelInputWidth, int modelInputHeight); + + /** + * @brief set dvpp input + * @param [in] inputWidth:width of pic + * @param [in] inputHeight:height of pic + * @param [in] format:format of pic + */ + void SetInput(int inputWidth, int inputHeight, acldvppPixelFormat format); + + /** + * @brief gett dvpp output + * @param [in] outputBuffer: pointer which points to dvpp output buffer + * @param [out] outputSize: output size + */ + void GetOutput(void **outputBuffer, uint32_t &outputSize); + + /** + * @brief dvpp process + * @return result + */ + Result Process(void *buffer, uint32_t size); + + /** + * @brief destroy resource + */ + void DestroyResource(); + + void DestroyOutputPara(); + +private: + Result InitResizeInputDesc(); + Result InitResizeOutputDesc(); + Result ProcessResize(); + void DestroyResizeResource(); + + + aclrtStream stream_; + acldvppChannelDesc *dvppChannelDesc_; + acldvppResizeConfig *resizeConfig_; + + acldvppPicDesc *resizeOutputDesc_; // resize output desc + acldvppPicDesc *resizeInputDesc_; // resize input desc + + void *resizeOutBufferDev_; // resize output buffer + void *picOutBufferDev_; + + uint32_t resizeInBufferSize_; // resize input size + uint32_t resizeOutBufferSize_; // resize output size + uint32_t inputWidth_; // input pic width + uint32_t inputHeight_; // input pic height + uint32_t modelInputWidth_; // model input width + uint32_t modelInputHeight_; // model input height + acldvppPixelFormat format_; // pic format +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/sample_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/sample_process.h new file mode 100644 index 0000000000000000000000000000000000000000..4bd46eead5848e72cb5bab559a89ab5489086554 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/sample_process.h @@ -0,0 +1,77 @@ +/** +* @file sample_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include +#include "utils.h" +#include "dvpp_process.h" +#include "acl/acl.h" +#include "vdec_process.h" + +/** +* SampleProcess +*/ +class SampleProcess { +public: + /** + * @brief Constructor + */ + SampleProcess(const char *data, const char *outFolder); + + /** + * @brief Destructor + */ + virtual ~SampleProcess(); + + /** + * @brief init reousce + * @return result + */ + Result InitResource(); + + /** + * @brief vdec process + * @return result + */ + Result DoVdecProcess(); + + /** + * @brief model process + * @return result + */ + Result DoModelProcess(); + +private: + void DestroyResource(); + + int32_t deviceId_; + aclrtContext context_; + aclrtStream stream_; + std::string filePath; + std::thread thread_; + const char *outFolder_; + PicDesc picDesc_; + + /** + * 0:H265 main level + * 1:H264 baseline level + * 2:H264 main level + * 3:H264 high level + */ + acldvppStreamFormat enType_; + + /** + * 1:YUV420 semi-planner(nv12) + * 2:YVU420 semi-planner(nv21) + */ + acldvppPixelFormat format_; +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/utils.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..8de334e22175952275278d043b3b1bc522c99b75 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/utils.h @@ -0,0 +1,133 @@ +/** +* @file utils.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stderr, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +typedef struct PicDesc { + std::string picName; + int width; + int height; +} PicDesc; + +class RunStatus { +public: + static void SetDeviceStatus(bool isDevice) + { + isDevice_ = isDevice; + } + static bool GetDeviceStatus() + { + return isDevice_; + } +private: + RunStatus() = default; + ~RunStatus() = default; + static bool isDevice_; +}; + +class Utils { +public: + /** + * @brief write device memory to file + * @param [in] fileName: file name + * @param [in] buffer of input data + * @param [in] dataSize: size of data + * @return success or fail + */ + static bool WriteDeviceMemoryToFile(const char *fileName, void *dataDev, uint32_t dataSize); + + /** + * @brief read file to device memory + * @param [in] fileName: file name + * @param [out] buffer of input data + * @param [out] dataSize: size of data + * @return success or fail + */ + static bool ReadFileToDeviceMem(const char *fileName, void *&dataDev, uint32_t &dataSize); + + /** + * @brief pull model output data to file + * @param [in] modelOutput: model output dataset + * @param [in] fileName: file name + * @return result + */ + static Result PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName); + + /** + * @brief save model output data to dst file + * @param [in] srcfileName: src file name + * @param [in] dstfileName: dst file name + * @return result + */ + static Result SaveModelOutputData(const char *srcfileName, const char *dstfileName); + + /** + * @brief save dvpp output data + * @param [in] fileName: file name + * @param [in] devPtr: dvpp output data device addr + * @param [in] dataSize: dvpp output data size + * @return result + */ + static Result SaveDvppOutputData(const char *fileName, const void *devPtr, uint32_t dataSize); + + /** + * @brief check file if exist + * @param [in] fileName: file to check + * @return result + */ + static Result CheckFile(const char* fileName); + + /** + * @brief check fold, if not exist, create it + * @param [in] fileName: fold to check + * @return result + */ + static Result CheckAndCreateFolder(const char *foldName); + + /** + * @brief read file of a dir + * @param [in] fileName: folder + * @return fileList + */ + static std::vector ReadDir(const char *folder); + + /** + * @brief remove dir + * @param [in] fileName: folder + * @return fileList + */ + static void RemoveDir(const char* outFolder_); + + /** + * @brief program waiting + * @param [in] wating time: seconds + * @return void + */ + static void SleepTime(unsigned int seconds); +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/vdec_process.h b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/vdec_process.h new file mode 100644 index 0000000000000000000000000000000000000000..9fc12b0ee64c625991b401372940e39080e9a298 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/inc/vdec_process.h @@ -0,0 +1,91 @@ +/** +* @file vdec_process.h +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#pragma once +#include +#include +#include "utils.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + +class VdecProcess { +public: + /** + * @brief Constructor + */ + VdecProcess(); + + /** + * @brief Destructor + */ + virtual ~VdecProcess(); + + /** + * @brief vdec global init + * @param [in] threadId: index of thread + * @param [in] enType: type of input stream + * @param [in] format: format of pic + * @return result + */ + Result InitResource(uint64_t threadId, acldvppStreamFormat enType, acldvppPixelFormat format); + + /** + * @brief set vdec input + * @param [in] inBufferDev: input buffer + * @param [in] inBufferSize: buffer size + * @param [in] inputWidth:width of pic + * @param [in] inputHeight:height of pic + */ + void SetInput(void *inBufferDev, uint32_t inBufferSize, int inputWidth, int inputHeight); + + /** + * @brief destroy StreamDesc + */ + void DestroyStreamDesc(); + + /** + * @brief destroy PicDesc + */ + void DestroyPicDesc(); + + /** + * @brief destroy resource + */ + void DestroyResource(); + + /** + * @brief vdec process + * @return result + */ + Result Process(); + + /** + * @brief vdec send eos frame + * @return result + */ + Result SendVdecEos(); + +private: + Result CreateStreamDesc(); + Result CreatePicDesc(size_t size); + + uint64_t threadId_; + + aclvdecChannelDesc *vdecChannelDesc_; + acldvppStreamDesc *streamInputDesc_; + acldvppPicDesc *picOutputDesc_; + void *picOutBufferDev_; + void *inBufferDev_; + uint32_t inBufferSize_; + uint32_t inputWidth_; + uint32_t inputHeight_; + acldvppStreamFormat enType_; + acldvppPixelFormat format_; +}; + diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..13ce6e25e546bde16c78f16518a7f4eacec9f3ce --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_VDEC_RESNET50) + +# Compile options +add_compile_options(-std=c++11) + +add_definitions(-DENABLE_DVPP_INTERFACE) + +# Specify target generation path +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../") +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{ASCEND_HOME_PATH}) +message(STATUS "env INC_PATH: ${INC_PATH}") +set(LIB_PATH "$ENV{ASCEND_HOME_PATH}/lib64") +message(STATUS "env LIB_PATH: ${LIB_PATH}") +# Header path +include_directories( + ${INC_PATH}/include/ + ../inc/ +) + +# add host lib path +link_directories( + ${LIB_PATH} +) + +add_executable(main + utils.cpp + dvpp_process.cpp + + vdec_process.cpp + sample_process.cpp + main.cpp) + +if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(main libascendcl libacl_dvpp) +elseif (${CMAKE_CXX_COMPILER} MATCHES "android") + target_link_libraries(main + ascendcl acl_dvpp stdc++) +else () + target_link_libraries(main + ascendcl acl_dvpp pthread stdc++) +endif () + +install(TARGETS main DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/dvpp_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/dvpp_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f97f4ded78751953dde828c1e6281c321f13b3d3 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/dvpp_process.cpp @@ -0,0 +1,232 @@ +/** +* @file dvpp_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "dvpp_process.h" +using namespace std; + +DvppProcess::DvppProcess(aclrtStream& stream) : stream_(stream), + dvppChannelDesc_(nullptr), resizeConfig_(nullptr), + resizeInputDesc_(nullptr), resizeOutputDesc_(nullptr), + resizeOutBufferDev_(nullptr), picOutBufferDev_(nullptr), + resizeInBufferSize_(0),resizeOutBufferSize_(0), + inputWidth_(0), inputHeight_(0),modelInputWidth_(0), + modelInputHeight_(0), format_(PIXEL_FORMAT_YUV_400) +{ +} + +DvppProcess::~DvppProcess() +{ + DestroyResizeResource(); + DestroyOutputPara(); + DestroyResource(); +} + +Result DvppProcess::InitResource() +{ + // create vpc channel description + dvppChannelDesc_ = acldvppCreateChannelDesc(); + if (dvppChannelDesc_ == nullptr) { + ERROR_LOG("acldvppCreateChannelDesc failed"); + return FAILED; + } + + // create vpc channel + aclError ret = acldvppCreateChannel(dvppChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppCreateChannel failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // create vpc resize config + resizeConfig_ = acldvppCreateResizeConfig(); + if (resizeConfig_ == nullptr) { + ERROR_LOG("acldvppCreateResizeConfig failed"); + return FAILED; + } + + INFO_LOG("dvpp init dvpp resource success"); + return SUCCESS; +} + +void DvppProcess::SetInput(int inputWidth, int inputHeight, acldvppPixelFormat format) +{ + inputWidth_ = inputWidth; + inputHeight_ = inputHeight; + format_ = format; +} + +void DvppProcess::GetOutput(void **outputBuffer, uint32_t &outputSize) +{ + *outputBuffer = resizeOutBufferDev_; + outputSize = resizeOutBufferSize_; + resizeOutBufferDev_ = nullptr; + resizeOutBufferSize_ = 0; +} + +Result DvppProcess::InitOutputPara(int modelInputWidth, int modelInputHeight) +{ + if ((modelInputWidth <= 0) || (modelInputHeight <= 0)) { + ERROR_LOG("InitInput para invalid, modelInputWidth = %d, modelInputHeight = %d", + modelInputWidth, modelInputHeight); + return FAILED; + } + + modelInputWidth_ = modelInputWidth; + modelInputHeight_ = modelInputHeight; + + // output buffer, adjust the value based on the actual model + int resizeOutWidth = modelInputWidth_; + int resizeOutHeight = modelInputHeight_; + int resizeOutWidthStride = (resizeOutWidth + 15) / 16 * 16; // 16-byte alignment + int resizeOutHeightStride = (resizeOutHeight + 1) / 2 * 2; // 2-byte alignment + resizeOutBufferSize_ = resizeOutWidthStride * resizeOutHeightStride * 3 / 2; // yuv format size + aclError ret = acldvppMalloc(&resizeOutBufferDev_, resizeOutBufferSize_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppMalloc resizeOutBuffer failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyOutputPara() +{ + if (resizeOutBufferDev_ != nullptr) { + (void)acldvppFree(resizeOutBufferDev_); + resizeOutBufferDev_ = nullptr; + } + if (picOutBufferDev_ != nullptr) { + (void)acldvppFree(picOutBufferDev_); + picOutBufferDev_ = nullptr; + } +} + +Result DvppProcess::InitResizeInputDesc() +{ + uint32_t jpegOutWidthStride = (inputWidth_ + 15) / 16 * 16; // 16-byte alignment + uint32_t jpegOutHeightStride = (inputHeight_ + 1) / 2 * 2; // 2-byte alignment + uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * 3 / 2; // yuv format size + resizeInputDesc_ = acldvppCreatePicDesc(); + if (resizeInputDesc_ == nullptr) { + ERROR_LOG("InitResizeInputDesc failed"); + return FAILED; + } + if (jpegOutBufferSize != resizeInBufferSize_) { + ERROR_LOG("jpegOutBufferSize [%u] != resizeInBufferSize_ [%u]", + jpegOutBufferSize, resizeInBufferSize_); + return FAILED; + } + + (void)acldvppSetPicDescData(resizeInputDesc_, picOutBufferDev_); + (void)acldvppSetPicDescFormat(resizeInputDesc_, format_); + (void)acldvppSetPicDescWidth(resizeInputDesc_, inputWidth_); + (void)acldvppSetPicDescHeight(resizeInputDesc_, inputHeight_); + (void)acldvppSetPicDescWidthStride(resizeInputDesc_, jpegOutWidthStride); + (void)acldvppSetPicDescHeightStride(resizeInputDesc_, jpegOutHeightStride); + (void)acldvppSetPicDescSize(resizeInputDesc_, jpegOutBufferSize); + return SUCCESS; +} + +Result DvppProcess::InitResizeOutputDesc() +{ + // adjust based on the actual model + int resizeOutputWidthStride = (modelInputWidth_+ 15) / 16 * 16; // 16-byte alignment + int resizeOutputHeightStride = (modelInputHeight_ + 1) / 2 * 2; // 2-byte alignment + resizeOutputDesc_ = acldvppCreatePicDesc(); + if (resizeOutputDesc_ == nullptr) { + ERROR_LOG("acldvppCreatePicDesc failed"); + return FAILED; + } + + (void)acldvppSetPicDescData(resizeOutputDesc_, resizeOutBufferDev_); + (void)acldvppSetPicDescFormat(resizeOutputDesc_, format_); + (void)acldvppSetPicDescWidth(resizeOutputDesc_, modelInputWidth_); + (void)acldvppSetPicDescHeight(resizeOutputDesc_, modelInputHeight_); + (void)acldvppSetPicDescWidthStride(resizeOutputDesc_, resizeOutputWidthStride); + (void)acldvppSetPicDescHeightStride(resizeOutputDesc_, resizeOutputHeightStride); + (void)acldvppSetPicDescSize(resizeOutputDesc_, resizeOutBufferSize_); + + return SUCCESS; +} + +Result DvppProcess::ProcessResize() +{ + // resize pic size + aclError ret = acldvppVpcResizeAsync(dvppChannelDesc_, resizeInputDesc_, + resizeOutputDesc_, resizeConfig_, stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppVpcResizeAsync failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = aclrtSynchronizeStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtSynchronizeStream failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + + return SUCCESS; +} + +void DvppProcess::DestroyResizeResource() +{ + if (resizeOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(resizeOutputDesc_); + resizeOutputDesc_ = nullptr; + } + if (resizeInputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(resizeInputDesc_); + resizeInputDesc_ = nullptr; + } +} + +void DvppProcess::DestroyResource() +{ + if (resizeConfig_ != nullptr) { + (void)acldvppDestroyResizeConfig(resizeConfig_); + resizeConfig_ = nullptr; + } + + if (dvppChannelDesc_ != nullptr) { + aclError ret = acldvppDestroyChannel(dvppChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, errorCode = %d", static_cast(ret)); + } + (void)acldvppDestroyChannelDesc(dvppChannelDesc_); + dvppChannelDesc_ = nullptr; + } +} + +Result DvppProcess::Process(void *buffer, uint32_t size) +{ + picOutBufferDev_ = buffer; + resizeInBufferSize_ = size; + Result ret = InitResizeInputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitResizeInputDesc failed"); + DestroyResizeResource(); + return FAILED; + } + + ret = InitResizeOutputDesc(); + if (ret != SUCCESS) { + ERROR_LOG("InitResizeOutputDesc failed"); + DestroyResizeResource(); + return FAILED; + } + ret = ProcessResize(); + if (ret != SUCCESS) { + ERROR_LOG("ProcessResize failed"); + DestroyResizeResource(); + return FAILED; + } + DestroyResizeResource(); + + INFO_LOG("Process dvpp success"); + return SUCCESS; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/main.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0a7112acbb731e61bfe01bbd4af55d2a7d5ea5c6 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/main.cpp @@ -0,0 +1,43 @@ +/** +* @file main.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include "sample_process.h" +#include "utils.h" +using namespace std; +const char* output_dir; +int main(int argn, const char* argv[]) +{ + if(argn != 3){ + ERROR_LOG("please provide data and outpu dir to continue!"); + } + output_dir = argv[2]; + SampleProcess sampleProcess(argv[1], argv[2]); + Result ret = sampleProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("sample init resource failed"); + + return FAILED; + } + + ret = sampleProcess.DoVdecProcess(); + if (ret != SUCCESS) { + ERROR_LOG("sample vdec process failed"); + return FAILED; + } + + ret = sampleProcess.DoModelProcess(); + if (ret != SUCCESS) { + ERROR_LOG("sample model process failed"); + return FAILED; + } + + INFO_LOG("execute sample success"); + return SUCCESS; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/sample_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/sample_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f98e04997f20a352e5356d1c08ac402b861da681 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/sample_process.cpp @@ -0,0 +1,270 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "sample_process.h" +#include +#include +#include "utils.h" + +using namespace std; + +namespace { + // const std::string filePath= "../data/vdec_h265_1frame_rabbit_1280x720.h265"; + bool runFlag = true; +} + +SampleProcess::SampleProcess(const char *data, const char *outFolder) : deviceId_(0), context_(nullptr), stream_(nullptr), thread_(),filePath(string(data)+"/vdec_h265_1frame_rabbit_1280x720.h265"), + outFolder_(outFolder), picDesc_({}), enType_(H265_MAIN_LEVEL), format_(PIXEL_FORMAT_YUV_SEMIPLANAR_420) +{ +} + +SampleProcess::~SampleProcess() +{ + DestroyResource(); +} +void *ThreadFunc(aclrtContext sharedContext) +{ + if (sharedContext == nullptr) { + ERROR_LOG("sharedContext can not be nullptr"); + return ((void*)(-1)); + } + INFO_LOG("use shared context for this thread"); + aclError ret = aclrtSetCurrentContext(sharedContext); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtSetCurrentContext failed, errorCode = %d", static_cast(ret)); + return ((void*)(-1)); + } + + INFO_LOG("thread start "); + while (runFlag) { + // Notice: timeout 1000ms + (void)aclrtProcessReport(1000); + } + return (void*)0; +} + +Result SampleProcess::InitResource() +{ + // ACL init + aclError ret = aclInit(nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl set device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("set device %d success", deviceId_); + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, deviceId = %d, errorCode = %d", + deviceId_, static_cast(ret)); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, errorCode = %d", static_cast(ret)); + return FAILED; + } + bool isDivece = (runMode == ACL_DEVICE); + RunStatus::SetDeviceStatus(isDivece); + INFO_LOG("get run mode success"); + + return SUCCESS; +} + +Result SampleProcess::DoVdecProcess() +{ + // create threadId + thread_ = std::thread(ThreadFunc, context_); + std::ostringstream oss; + oss << thread_.get_id(); + uint64_t tid = std::stoull(oss.str()); + INFO_LOG("create thread successfully, threadId = %lu", tid); + + Result ret = Utils::CheckAndCreateFolder(outFolder_); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error."); + return FAILED; + } + + // dvpp init + VdecProcess vdecProcess; + ret = vdecProcess.InitResource(tid, enType_, format_); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + vdecProcess.DestroyResource(); + return FAILED; + } + + const int inputWidth = 1280; + const int inputHeight = 720; + int rest_len = 10; + picDesc_.width = inputWidth; + picDesc_.height = inputHeight; + + uint64_t count = 0; + while (rest_len > 0) { + void *inBufferDev = nullptr; + uint32_t inBufferSize = 0; + + // read file to device memory + if (!Utils::ReadFileToDeviceMem(filePath.c_str(), inBufferDev, inBufferSize)) { + ERROR_LOG("read file %s to device mem failed.\n", filePath.c_str()); + vdecProcess.DestroyResource(); + return FAILED; + } + vdecProcess.SetInput(inBufferDev, inBufferSize, picDesc_.width, picDesc_.height); + + ret = vdecProcess.Process(); + if (ret != SUCCESS) { + ERROR_LOG("dvpp ProcessVdec failed"); + vdecProcess.DestroyResource(); + return FAILED; + } + ++count; + rest_len = rest_len - 1; + INFO_LOG("success to execute aclvdecSendFrame, count = %lu", count); + } + ret = vdecProcess.SendVdecEos(); + if (ret != SUCCESS) { + ERROR_LOG("send vdec eos frame failed, errorCode = %d", static_cast(ret)); + vdecProcess.DestroyResource(); + return FAILED; + } + INFO_LOG("success to send vdec eos frame"); + + vdecProcess.DestroyResource(); + + return SUCCESS; +} + +Result SampleProcess::DoModelProcess() +{ + // model init + + Result ret = Utils::CheckAndCreateFolder("result"); + if (ret != SUCCESS) { + ERROR_LOG("mkdir out folder error."); + return FAILED; + } + + // dvpp init + DvppProcess dvppProcess(stream_); + ret = dvppProcess.InitResource(); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + + int modelInputWidth = 1280; + int modelInputHeight = 720; + + INFO_LOG("modelInputWidth %d modelInputHeight %d",modelInputWidth,modelInputHeight); + std::vector fileList = Utils::ReadDir(outFolder_); + for (size_t frameId = 0; frameId < fileList.size(); frameId++) { + + void *dvppOutputBuffer = nullptr; + uint32_t dvppOutputSize; + // read image file to device memory + std::string fileNameSave = std::string(outFolder_) + "/" + fileList[frameId]; + INFO_LOG("read file %s \n", fileNameSave.c_str()); + if (!Utils::ReadFileToDeviceMem(fileNameSave.c_str(), dvppOutputBuffer, dvppOutputSize)) { + ERROR_LOG("read file %s to device mem failed.\n", fileNameSave.c_str()); + return FAILED; + } + dvppProcess.SetInput(picDesc_.width, picDesc_.height, format_); + ret = dvppProcess.InitOutputPara(modelInputWidth, modelInputHeight); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp output para failed"); + (void)acldvppFree(dvppOutputBuffer); + return FAILED; + } + + // dvpp process + ret = dvppProcess.Process(dvppOutputBuffer, dvppOutputSize); + if (ret != SUCCESS) { + ERROR_LOG("init dvpp resource failed"); + return FAILED; + } + + dvppProcess.GetOutput(&dvppOutputBuffer, dvppOutputSize); + + (void)acldvppFree(dvppOutputBuffer); + + dvppProcess.DestroyOutputPara(); + + } + + dvppProcess.DestroyResource(); + // Utils::RemoveDir(outFolder_); + return SUCCESS; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + runFlag = false; + if (thread_.joinable()) { + thread_.join(); + } + + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed, errorCode = %d", static_cast(ret)); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed, errorCode = %d", static_cast(ret)); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device %d failed, errorCode = %d", deviceId_, static_cast(ret)); + } + INFO_LOG("end to reset device %d", deviceId_); + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed, errorCode = %d", static_cast(ret)); + } + INFO_LOG("end to finalize acl"); +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/utils.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f61025be9278793c20d2db2cc4fff6fb1dae1f1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/utils.cpp @@ -0,0 +1,355 @@ +/** +* @file utils.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "utils.h" +#include +#if defined(_MSC_VER) +#include +#include +#else +#include +#include +#endif + +bool RunStatus::isDevice_ = false; + +bool Utils::ReadFileToDeviceMem(const char *fileName, void *&dataDev, uint32_t &dataSize) +{ + // read data from file. + FILE *fp = fopen(fileName, "rb"); + if (fp == nullptr) { + ERROR_LOG("open file %s failed.", fileName); + return false; + } + + fseek(fp, 0, SEEK_END); + long fileLenLong = ftell(fp); + if (fileLenLong <= 0) { + ERROR_LOG("file %s len is invalid.", fileName); + fclose(fp); + return false; + } + fseek(fp, 0, SEEK_SET); + + auto fileLen = static_cast(fileLenLong); + dataSize = fileLen; + size_t readSize; + // Malloc input device memory + auto aclRet = acldvppMalloc(&dataDev, dataSize); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl malloc dvpp data failed, dataSize = %u, errorCode = %d.", + dataSize, static_cast(aclRet)); + fclose(fp); + return false; + } + + if (!RunStatus::GetDeviceStatus()) { + void *dataHost = nullptr; + auto aclRet = aclrtMallocHost(&dataHost, fileLen); + if (dataHost == nullptr) { + ERROR_LOG("acl malloc host data buffer failed. dataSize = %u, errorCode = %d.", + fileLen, static_cast(aclRet)); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + + readSize = fread(dataHost, 1, fileLen, fp); + if (readSize < fileLen) { + ERROR_LOG("need read file %s %u bytes, but only %zu read.", fileName, fileLen, readSize); + (void)aclrtFreeHost(dataHost); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + + // copy input to device memory + aclRet = aclrtMemcpy(dataDev, dataSize, dataHost, fileLen, ACL_MEMCPY_HOST_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl memcpy data to dev failed, fileLen = %u, errorCode = %d.", + fileLen, static_cast(aclRet)); + (void)aclrtFreeHost(dataHost); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + (void)aclrtFreeHost(dataHost); + } else { + readSize = fread(dataDev, 1, fileLen, fp); + if (readSize < fileLen) { + ERROR_LOG("need read file %s %u bytes, but only %zu read.", fileName, fileLen, readSize); + (void)acldvppFree(dataDev); + dataDev = nullptr; + fclose(fp); + return false; + } + } + + fclose(fp); + return true; +} + +bool Utils::WriteDeviceMemoryToFile(const char *fileName, void *dataDev, uint32_t dataSize) +{ + if (dataDev == nullptr) { + ERROR_LOG("dataDev is nullptr!"); + return false; + } + + // copy output to host memory + void *data = nullptr; + aclError aclRet; + if (!(RunStatus::GetDeviceStatus())) { + aclRet = aclrtMallocHost(&data, dataSize); + if (data == nullptr) { + ERROR_LOG("malloc host data buffer failed. dataSize = %u, errorCode = %d.", + dataSize, static_cast(aclRet)); + return false; + } + aclRet = aclrtMemcpy(data, dataSize, dataDev, dataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + ERROR_LOG("acl memcpy data to host failed, dataSize=%u, ret=%d.", dataSize, aclRet); + (void)aclrtFreeHost(data); + return false; + } + } else { + data = dataDev; + } + + FILE *outFileFp = fopen(fileName, "wb+"); + if (outFileFp == nullptr) { + ERROR_LOG("fopen out file %s failed, error=%s.", fileName, strerror(errno)); + (void)aclrtFreeHost(data); + return false; + } + + bool ret = true; + size_t writeRet = fwrite(data, 1, dataSize, outFileFp); + if (writeRet != dataSize) { + ERROR_LOG("need write %u bytes to %s, but only write %zu bytes, error=%s.\n", + dataSize, fileName, writeRet, strerror(errno)); + ret = false; + } + + if (!(RunStatus::GetDeviceStatus())) { + (void)aclrtFreeHost(data); + } + fflush(outFileFp); + fclose(outFileFp); + return ret; +} + +Result Utils::PullModelOutputData(aclmdlDataset *modelOutput, const char *fileName) +{ + size_t outDatasetNum = aclmdlGetDatasetNumBuffers(modelOutput); + if (outDatasetNum == 0) { + ERROR_LOG("aclmdlGetDatasetNumBuffers from model output failed, outDatasetNum = 0"); + return FAILED; + } + FILE *outputFile = fopen(fileName, "wb+"); + if (outputFile == nullptr) { + ERROR_LOG("create output file %s failed", fileName); + return FAILED; + } + for (size_t i = 0; i < outDatasetNum; ++i) { + // get model output data + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(modelOutput, i); + if (dataBuffer == nullptr) { + ERROR_LOG("aclmdlGetDatasetBuffer from model output failed."); + continue; + } + void *data = aclGetDataBufferAddr(dataBuffer); + if (data == nullptr) { + ERROR_LOG("aclGetDataBufferAddr from dataBuffer failed."); + continue; + } + uint32_t bufferSize = aclGetDataBufferSizeV2(dataBuffer); + INFO_LOG("output[%zu] DataBuffer, buffer addr = %p, buffer size = %u", + i, data, bufferSize); + + void *dataPtr = nullptr; + aclError ret; + if (!(RunStatus::GetDeviceStatus())) { // app is running in host + ret = aclrtMallocHost(&dataPtr, bufferSize); + if (ret != ACL_SUCCESS) { + ERROR_LOG("malloc host data buffer failed, errorCode = %d.", static_cast(ret)); + fclose(outputFile); + return FAILED; + } + // if app is running in host, need copy model output data from device to host + ret = aclrtMemcpy(dataPtr, bufferSize, data, bufferSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_SUCCESS) { + (void)aclrtFreeHost(dataPtr); + ERROR_LOG("memcpy device to host failed, errorCode = %d.", static_cast(ret)); + } + INFO_LOG("memcopy output data from device to host buffer success."); + } else { + dataPtr = data; + } + uint32_t len = static_cast(bufferSize); + fwrite(static_cast(dataPtr), len, sizeof(char), outputFile); + INFO_LOG("create output file success, filename=%s, size=%u", fileName, len); + + if (!(RunStatus::GetDeviceStatus())) { // app is running in device + (void)aclrtFreeHost(dataPtr); + } + } + fclose(outputFile); + return SUCCESS; +} + +Result Utils::CheckFile(const char *fileName) +{ + int i = 0; + INFO_LOG("start check result file:%s", fileName); + while (i < 10) { + std::ifstream f(fileName); + if (f.good()) { + break; + } + SleepTime(1); // sleep 1s + INFO_LOG("check result, wait time [%ds]", i+1); + i++; + } + // 10 is max time of checking + if (i == 10) { + ERROR_LOG("check result failed, timeout,expect file:%s", fileName); + return FAILED; + } + INFO_LOG("check result success, file exist"); + return SUCCESS; +} + +Result Utils::CheckAndCreateFolder(const char *foldName) +{ + INFO_LOG( "start check result fold:%s", foldName); +#if defined(_MSC_VER) + DWORD ret = GetFileAttributes((LPCSTR)foldName); + if (ret == INVALID_FILE_ATTRIBUTES) { + BOOL flag = CreateDirectory((LPCSTR)foldName, nullptr); + if (flag) { + INFO_LOG("make dir successfully."); + } else { + INFO_LOG("make dir errorly."); + return FAILED; + } + } +#else + if (access(foldName , 0) == -1) { + int flag=mkdir(foldName , 0777); + if (flag == 0) + { + INFO_LOG("make dir successfully."); + } else { + ERROR_LOG("make dir errorly."); + return FAILED; + } + } +#endif + INFO_LOG("check result success, fold exist"); + return SUCCESS; +} + +Result Utils::SaveModelOutputData(const char *srcfileName, const char *dstfileName) +{ + Result ret = CheckFile(srcfileName); + if (ret != SUCCESS) { + ERROR_LOG("model output file not exist."); + return FAILED; + } + FILE *model_output; + model_output = fopen(srcfileName,"rb" ); + + FILE *model_output_txt; + model_output_txt = fopen(dstfileName, "wb+"); + INFO_LOG("open result file: [%s]", dstfileName); + + int i = 0; + float prop = 0.0; + std::map> mp; + std::map::iterator ite; + while (feof(model_output) == 0) { + ite = mp.end(); + fread(&prop, sizeof(float), 1, model_output); + mp.insert(ite, std::map::value_type(prop, i)); + fprintf(model_output_txt, "%f, %d", prop, i); + i++; + } + fclose(model_output); + ite = mp.begin(); + float sum = 0.0; + float max = ite->first; + int classType = ite->second; + for (i = 0 ; i < 5; i++) { + sum += ite->first; + ++ite; + } + fprintf(model_output_txt, "classType[%d], top1[%f], top5[%f]", classType, max, sum); + fclose(model_output_txt); + INFO_LOG("result:classType[%d], top1[%f], top5[%f]", classType,max,sum); + return SUCCESS; +} + +std::vector Utils::ReadDir(const char* folder) +{ + std::vector fileList; +#if defined(_MSC_VER) + std::string inputDirectory = folder; + inputDirectory = inputDirectory.append("*"); + + _finddata_t fileinfo; + long long handle = (long long)_findfirst(inputDirectory.c_str(), &fileinfo); + if (handle == -1) { + ERROR_LOG("_findfirst failed!"); + return fileList; + } + + do { + DWORD ret = GetFileAttributes((LPCSTR)fileinfo.name); + if (ret == FILE_ATTRIBUTE_DIRECTORY) { + continue; + } + fileList.push_back(fileinfo.name); + } while (!_findnext(handle, &fileinfo)); + + _findclose(handle); +#else + struct dirent *dirp; + DIR* dir = opendir(folder); + while ((dirp = readdir(dir)) != nullptr) { + if (dirp->d_type == DT_REG) { + fileList.push_back(dirp->d_name); + } + } + closedir(dir); +#endif + return fileList; +} + +void Utils::RemoveDir(const char* outFolder_) +{ +#if defined(_MSC_VER) + RemoveDirectory((LPCSTR)outFolder_); +#else + rmdir(outFolder_); +#endif +} + +void Utils::SleepTime(unsigned int seconds) +{ +#if defined(_MSC_VER) + unsigned long secs = static_cast(seconds); + Sleep(secs * 1000); // sleep 1 second +#else + sleep(seconds); +#endif +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/vdec_process.cpp b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/vdec_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dff9f33835f2260595ac5ee7ad009cdde9c96557 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/ACL_MEDIA_VDEC/src/vdec_process.cpp @@ -0,0 +1,314 @@ +/** +* @file vdec_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include "vdec_process.h" +#include + +using namespace std; + +VdecProcess::VdecProcess() + : vdecChannelDesc_(nullptr), streamInputDesc_(nullptr), + picOutputDesc_(nullptr), picOutBufferDev_(nullptr), + inBufferDev_(nullptr), inBufferSize_(0), inputWidth_(0), + inputHeight_(0), enType_(H265_MAIN_LEVEL), format_(PIXEL_FORMAT_YUV_400) +{ +} + +VdecProcess::~VdecProcess() +{ +} +extern const char *output_dir; +void callback(acldvppStreamDesc *input, acldvppPicDesc *output, void *userdata) +{ + uint64_t frameIndex = 0; + if (userdata != nullptr) { + // get frame index in callback process + frameIndex = *((uint64_t *)userdata); + INFO_LOG("start processing callback, frame index is %lu", frameIndex); + free(userdata); + userdata = nullptr; + } + // free input vdecInBufferDev and destroy stream desc + if (input != nullptr) { + void *vdecInBufferDev = acldvppGetStreamDescData(input); + if (vdecInBufferDev != nullptr) { + aclError ret = acldvppFree(vdecInBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free input stream desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyStreamDesc(input); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy input stream desc, errorCode = %d", static_cast(ret)); + } + } + + // if output is nullptr, acldvppGetPicDescData return nullptr + if (output != nullptr) { + void *vdecOutBufferDev = acldvppGetPicDescData(output); + // check whether decode success + int retCode = acldvppGetPicDescRetCode(output); + // decode fail, release resource and retuen + if (retCode != 0) { + ERROR_LOG("vdec decode frame failed, retCode = %d.", retCode); + if (vdecOutBufferDev != nullptr) { + aclError ret = acldvppFree(vdecOutBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free output pic desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyPicDesc(output); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy output pic desc, errorCode = %d", static_cast(ret)); + } + return; + } + + // decode success, process output pic + if (vdecOutBufferDev != nullptr) { + uint32_t size = acldvppGetPicDescSize(output); + std::string fileNameSave = std::string(output_dir) + "/image" + std::to_string(frameIndex); + if (!Utils::WriteDeviceMemoryToFile(fileNameSave.c_str(), vdecOutBufferDev, size)) { + ERROR_LOG("write file failed."); + } + + aclError ret = acldvppFree(vdecOutBufferDev); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to free output pic desc data, errorCode = %d", static_cast(ret)); + } + } + aclError ret = acldvppDestroyPicDesc(output); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to destroy output pic desc, errorCode = %d", static_cast(ret)); + } + } + + INFO_LOG("success to process vdec callback %lu.", frameIndex); +} + +Result VdecProcess::InitResource(uint64_t threadId, acldvppStreamFormat enType, acldvppPixelFormat format) +{ + threadId_ = threadId; + enType_ = enType; + format_ = format; + // create vdec channelDesc + vdecChannelDesc_ = aclvdecCreateChannelDesc(); + if (vdecChannelDesc_ == nullptr) { + ERROR_LOG("fail to create vdec channel desc"); + return FAILED; + } + + // channelId: 0-15 + aclError ret = aclvdecSetChannelDescChannelId(vdecChannelDesc_, 10); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec ChannelId, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescThreadId(vdecChannelDesc_, threadId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to create threadId, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // callback func + ret = aclvdecSetChannelDescCallback(vdecChannelDesc_, callback); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec Callback, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescEnType(vdecChannelDesc_, enType_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec EnType, errorCode = %d", static_cast(ret)); + return FAILED; + } + + ret = aclvdecSetChannelDescOutPicFormat(vdecChannelDesc_, format_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set vdec OutPicFormat, errorCode = %d", static_cast(ret)); + return FAILED; + } + + // create vdec channel + ret = aclvdecCreateChannel(vdecChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to create vdec channel, errorCode = %d", static_cast(ret)); + return FAILED; + } + + INFO_LOG("vdec init resource success"); + return SUCCESS; +} + +void VdecProcess::SetInput(void *inBufferDev, uint32_t inBufferSize, + int inputWidth, int inputHeight) +{ + inBufferDev_ = inBufferDev; + inBufferSize_ = inBufferSize; + inputWidth_ = inputWidth; + inputHeight_ = inputHeight; +} + +Result VdecProcess::CreateStreamDesc() +{ + // create input stream desc + streamInputDesc_ = acldvppCreateStreamDesc(); + if (streamInputDesc_ == nullptr) { + ERROR_LOG("fail to create input stream desc"); + return FAILED; + } + + aclError ret = acldvppSetStreamDescData(streamInputDesc_, inBufferDev_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set data for stream desc, errorCode = %d", static_cast(ret)); + return FAILED; + } + // set size for dvpp stream desc + ret = acldvppSetStreamDescSize(streamInputDesc_, inBufferSize_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set size for stream desc, errorCode = %d", static_cast(ret)); + return FAILED; + } + return SUCCESS; +} + +void VdecProcess::DestroyStreamDesc() +{ + if (inBufferDev_ != nullptr) { + (void)acldvppFree(inBufferDev_); + inBufferDev_ = nullptr; + } + if (streamInputDesc_ != nullptr) { + (void)acldvppDestroyStreamDesc(streamInputDesc_); + streamInputDesc_ = nullptr; + } +} + +Result VdecProcess::CreatePicDesc(size_t size) +{ + // Malloc output device memory + aclError ret = acldvppMalloc(&picOutBufferDev_, size); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclrtMalloc failed, ret=%d", ret); + return FAILED; + } + picOutputDesc_ = acldvppCreatePicDesc(); + if (picOutputDesc_ == nullptr) { + ERROR_LOG("fail to create output pic desc"); + return FAILED; + } + ret = acldvppSetPicDescData(picOutputDesc_, picOutBufferDev_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescData, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = acldvppSetPicDescSize(picOutputDesc_, size); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescSize, errorCode = %d", static_cast(ret)); + return FAILED; + } + ret = acldvppSetPicDescFormat(picOutputDesc_, format_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set PicDescHeight, errorCode = %d", static_cast(ret)); + return FAILED; + } + return SUCCESS; +} + +void VdecProcess::DestroyPicDesc() +{ + if (picOutBufferDev_ != nullptr) { + (void)acldvppFree(picOutBufferDev_); + picOutBufferDev_ = nullptr; + } + if (picOutputDesc_ != nullptr) { + (void)acldvppDestroyPicDesc(picOutputDesc_); + picOutputDesc_ = nullptr; + } +} + +Result VdecProcess::Process() +{ + // create stream desc + Result err = CreateStreamDesc(); + if (err != SUCCESS) { + DestroyStreamDesc(); + return FAILED; + } + // create pic desc + size_t DataSize = (inputWidth_ * inputHeight_ * 3) / 2; // yuv format size + err = CreatePicDesc(DataSize); + if (err != SUCCESS) { + DestroyStreamDesc(); + DestroyPicDesc(); + return FAILED; + } + + // set frame index, callback function can use it + static uint64_t index = 0; + uint64_t *frameIndex = (uint64_t *)malloc(sizeof(uint64_t)); + if (frameIndex != nullptr) { + *frameIndex = index++; + } + + // send vdec frame + aclError ret = aclvdecSendFrame(vdecChannelDesc_, streamInputDesc_, + picOutputDesc_, nullptr, static_cast(frameIndex)); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to send frame, ret=%u", ret); + DestroyStreamDesc(); + DestroyPicDesc(); + if (frameIndex != nullptr) { + free(frameIndex); + frameIndex = nullptr; + } + return FAILED; + } + return SUCCESS; +} + +Result VdecProcess::SendVdecEos() +{ + // create stream desc + acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc(); + if (streamInputDesc == nullptr) { + ERROR_LOG("fail to create input stream desc"); + return FAILED; + } + aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to set eos for stream desc, errorCode = %d", static_cast(ret)); + (void)acldvppDestroyStreamDesc(streamInputDesc); + return FAILED; + } + + // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned. + ret = aclvdecSendFrame(vdecChannelDesc_, streamInputDesc, nullptr, nullptr, nullptr); + if (ret != ACL_SUCCESS) { + ERROR_LOG("fail to send eos frame, ret=%u", ret); + (void)acldvppDestroyStreamDesc(streamInputDesc); + return FAILED; + } + (void)acldvppDestroyStreamDesc(streamInputDesc); + + return SUCCESS; +} + +void VdecProcess::DestroyResource() +{ + if (vdecChannelDesc_ != nullptr) { + aclError ret = aclvdecDestroyChannel(vdecChannelDesc_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acldvppDestroyChannel failed, , errorCode = %d", static_cast(ret)); + } + (void)aclvdecDestroyChannelDesc(vdecChannelDesc_); + vdecChannelDesc_ = nullptr; + } +} diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/TEST.sh b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..95c51d5cf5b56b32bc720d6fc0cdd6226942070c --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/TEST.sh @@ -0,0 +1 @@ +python3 ./test_acl_vpc.py "${OEC_DATA_PATH}" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/test_acl_vpc.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/test_acl_vpc.py new file mode 100644 index 0000000000000000000000000000000000000000..5cb596a7b3059075539a6f111f908b64177e2b35 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/test_acl_vpc.py @@ -0,0 +1,310 @@ +# -*- coding:utf-8 -*- +import os +import unittest +import numpy as np +from decimal import Decimal, getcontext +import acl +import utils as util +from utils import align_size +from utils import get_align_size +import sys + +data_path = sys.argv[1] +print(f"data path is {data_path}") + +YUV400 = 0 +YUV420 = 1 +YUV422 = 3 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +HEIGHT_STRIDE = (1, 2) +ONE_PIXEL_OCCUPY_TWO_BYTE = 2 +ONE_PIXEL_OCCUPY_THREE_BYTE = 3 +ONE_PIXEL_OCCUPY_FOUR_BYTE = 4 + +WIDTH_STRIDE = { + (0, 1, 2, 3, 4, 5, 6, 1000, 1001):lambda x,y: align_size(y, 16), + (7, 8, 9, 10): lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_TWO_BYTE, + (11, 12, 13):lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_THREE_BYTE, + (14, 15, 16, 17):lambda x,y: align_size(y, 16) * ONE_PIXEL_OCCUPY_FOUR_BYTE +} + +BUFFER_SIZE = { + (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17):lambda x, y:x * y, + (1, 2):lambda x, y:(x * y) * 3 // 2, + (3, 4):lambda x, y:(x * y) * 2, + (6):lambda x, y:(x * y) * 3, +} + + +class AclVpc(object): + def __init__(self, in_batch_size=1, out_batch_size=1): + self.out_batch_size = out_batch_size + self.in_batch_size = in_batch_size + self.out_batch_pic_desc = None + self.in_batch_pic_desc = None + self.crop_area = None + self.paste_area = None + self.output_desc = None + self.input_desc = None + self.in_buffer_dev = None + self.out_buffer_dev = None + self.dvpp_channel_desc = None + self.resize_config = None + self.context, ret = acl.rt.create_context(0) + assert ret == 0 + acl.rt.set_context(self.context) + self.stream, ret = acl.rt.create_stream() + assert ret == 0 + self.dev_buffer = {} + self.corpList, self.pasteList = [], [] + + def __del__(self): + acl.rt.set_context(self.context) + self._free_pic_desc() + for i in range(len(self.corpList)): + ret = acl.media.dvpp_destroy_roi_config(self.corpList[i]) + assert ret == 0 + for i in range(len(self.pasteList)): + ret = acl.media.dvpp_destroy_roi_config(self.pasteList[i]) + assert ret == 0 + for key in self.dev_buffer.keys(): + if self.dev_buffer[key]: + ret = acl.media.dvpp_free(self.dev_buffer[key]) + assert ret == 0 + if self.resize_config: + ret = acl.media.dvpp_destroy_resize_config(self.resize_config) + assert ret == 0 + roi_conf = [self.crop_area, self.paste_area] + for i in range(len(roi_conf)): + if roi_conf[i]: + ret = acl.media.dvpp_destroy_roi_config(roi_conf[i]) + assert ret == 0 + buffer_dev = [self.in_buffer_dev, self.out_buffer_dev] + for i in range(len(buffer_dev)): + if buffer_dev[i]: + ret = acl.media.dvpp_free(buffer_dev[i]) + assert ret == 0 + if self.dvpp_channel_desc: + ret = acl.media.dvpp_destroy_channel(self.dvpp_channel_desc) + assert ret == 0 + ret = acl.media.dvpp_destroy_channel_desc(self.dvpp_channel_desc) + assert ret == 0 + ret = acl.rt.destroy_stream(self.stream) + assert ret == 0 + ret = acl.rt.destroy_context(self.context) + assert ret == 0 + print("vpc free resource") + + def _free_pic_desc(self): + desc = [self.output_desc, self.input_desc] + for i in range(len(desc)): + if desc[i]: + ret = acl.mdeia.dvpp_destroy_pic_desc(desc[i]) + assert ret == 0 + + batch_pic_desc = [self.out_batch_pic_desc, self.in_batch_pic_desc] + for i in range(len(batch_pic_desc)): + if batch_pic_desc[i]: + ret = acl.media.dvpp_destroy_batch_pic_desc(batch_pic_desc[i]) + assert ret == 0 + + def dvpp_set_pic_desc(self, desc, buffer, width, height, wstride, hstride, size, format=YUV420): + ret = acl.media.dvpp_set_pic_desc_data(desc, buffer) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_format(desc, format) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_width(desc, width) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_height(desc, height) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_width_stride(desc, wstride) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_height_stride(desc, hstride) + assert ret == 0 + ret = acl.media.dvpp_set_pic_desc_size(desc, size) + assert ret == 0 + + def vpc_init(self): + acl.rt.set_context(self.context) + + #create channel desc + self.dvpp_channel_desc = acl.media.dvpp_create_channel_desc() + assert self.dvpp_channel_desc != 0 + + #create channel desc + ret = acl.media.dvpp_create_channel(self.dvpp_channel_desc) + assert ret == 0 + + def get_picture_height_stride(self, format, height): + """ + get picture height stride + 1.YUV420 height stride 2 alignment + 2.other format height stride no aligment. + """ + if format in HEIGHT_STRIDE: + return int(((height + 1) // 2) * 2) + return int(height) + + def get_picture_width_stride(self, format, width): + """ + get picture width stride: + 1.width stride 16 alignment, + 2.width stride 16 alignment, one PIXEL occupy two byte, + 3.width stride 16 alignment, one PIXEL occupy three byte, + 4.width stride 16 alignment, one PIXEL occupy four byte, + """ + return get_align_size(WIDTH_STRIDE, format, 0, width) + + def get_picture_buffer_size(self, format, width_stride, height_stride, flag): + """ + get pictutre buffer size: + 1.YUV400 in 310P memory is width_stride * height_stride + 2.YUV400,YUV420 memory is width_stride * height_stride * 3 //2 + 3.YUV422SP,YUV440SP memory is width_stride * height_stride * 2 + 4.YUV4442SP memory is width_stride * height_stride * 3 + 5.other support format memory is width_stride * height_stride. + """ + if flag: + return width_stride * height_stride * 3 // 2 + return get_align_size(BUFFER_SIZE, + format, + width_stride, + height_stride) + + def set_picture_desc(self, desc, width, height, opt, i, format=YUV420, flag=True): + """"get picture info and set picture description""" + width_stride = self.get_picture_width_stride(format, width) + height_stride = self.get_picture_height_stride(format, height) + buffer_size = self.get_picture_buffer_size(format, + width_stride, + height_stride, + flag) + buffer_size = int(buffer_size) + dev, ret =acl.media.dvpp_malloc(buffer_size) + assert ret == 0 + ret = acl.rt.memset(dev, buffer_size, 0, buffer_size) + assert ret == 0 + key = opt + '_' + str(i) + self.dev_buffer[key] = dev + self.dvpp_set_pic_desc(desc, dev, width, + height, width_stride, height_stride, + buffer_size, format) + return buffer_size + + def get_pic_desc_data(self, pic_desc): + pic_data = acl.media.dvpp_get_pic_desc_data(pic_desc) + pic_data_size = acl.media.dvpp_get_pic_desc_size(pic_desc) + ret_code = acl.media.dvpp_get_pic_desc_ret_code(pic_desc) + assert ret_code == 0 + + # pic memcpy d2h + np_pic = np.zeros(pic_data_size, dtype=np.byte) + bytes_data = np_pic.tobytes() + np_pic_ptr = acl.util.bytes_to_ptr(bytes_data) + ret = acl.rt.memcpy(np_pic_ptr, pic_data_size, + pic_data, pic_data_size, ACL_MEMCPY_DEVICE_TO_HOST) + assert ret == 0 + return np_pic + + def async_vpc_batch_crop_resize_paste_synchronize(self, w, h, path): + self.out_batch_pic_desc = acl.media.dvpp_create_batch_pic_desc(self.out_batch_size) + self.in_batch_pic_desc = acl.media.dvpp_create_batch_pic_desc(self.in_batch_size) + # load data from file + np_yuv = np.fromfile(path, dtype=np.byte) + in_buffer_size = np_yuv.itemsize * np_yuv.size + bytes_data = np_yuv.tobytes() + bytes_yuv_ptr = acl.util.bytes_to_ptr(bytes_data) + roiList = [] + for i in range(self.in_batch_size): + input_desc = acl.media.dvpp_get_pic_desc(self.in_batch_pic_desc, i) + print(self.in_batch_pic_desc, input_desc, i) + assert input_desc != 0 + self.set_picture_desc(input_desc, w, h, "input", i) + #copy from host to device + key = "input" + '_' + str(i) + ret = acl.rt.memcpy(self.dev_buffer[key], in_buffer_size, bytes_yuv_ptr, + in_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE) + assert ret == 0 + roiList.append(self.out_batch_size // self.in_batch_size) + + for i in range(self.out_batch_size): + out_desc = acl.media.dvpp_get_pic_desc(self.out_batch_pic_desc, i) + assert out_desc != 0 + self.set_picture_desc(out_desc, w // 2, h // 2, "output", i) + if i % 2 == 0: + crop_area = acl.media.dvpp_create_roi_config(w // 2, w - 1, h // 2, h - 1) + paste_area = acl.media.dvpp_create_roi_config(w // 4, w // 2 - 1, + h // 4, h // 2 - 1 ) + else: + crop_area = acl.media.dvpp_create_roi_config(0, w // 2 -1, 0, h // 2 -1) + paste_area = acl.media.dvpp_create_roi_config(0, w // 4 - 1, 0, h // 4 -1) + self.corpList.append(crop_area) + self.pasteList.append(paste_area) + + total_num = 0 + for i in range(self.in_batch_size): + total_num += roiList[i] + if self.out_batch_size % self.in_batch_size != 0: + roiList[-1] = self.out_batch_size - total_num + roiList[-1] + + self.resize_config = acl.media.dvpp_create_resize_config() + ret = acl.media.dvpp_vpc_batch_crop_resize_paste_async(self.dvpp_channel_desc, self.in_batch_pic_desc, + roiList, self.out_batch_pic_desc, self.corpList, + self.pasteList, self.resize_config, self.stream) + print("ret:",ret) + assert ret == 0 + ret = acl.rt.synchronize_stream(self.stream) + assert ret == 0 + np_list = [] + for i in range(self.out_batch_size): + output_desc = acl.media.dvpp_get_pic_desc(self.out_batch_pic_desc, i) + np_output = self.get_pic_desc_data(output_desc) + np_list.append(np_output) + + return np_list + + +class TestVpc(unittest.TestCase): + @classmethod + def setUpClass(cls): + """called only once before all testcase""" + # init + ret = acl.init("") + assert ret == 0 + ret = acl.rt.set_device(0) + assert ret == 0 + + @classmethod + def tearDownClass(cls): + """ called only once after all testcase """ + ret = acl.rt.reset_device(0) + assert ret == 0 + ret = acl.finalize() + assert ret == 0 + + def setUp(self) -> None: + pass + + def tearDown(self) -> None: + pass + + + def test_vpc_019_batch_crop_resize_paste_1_batch_input(self): + """ + test case for vpc batch crop resize paste + """ + vpc_handle = AclVpc(1, 2) + vpc_handle.vpc_init() + # 512x368 -> 256x184(crop) -> 128x92(resize) -> 256x184(paste) + out = vpc_handle.async_vpc_batch_crop_resize_paste_synchronize(1024, 368, f"{data_path}/data/wood_rabbit_1024_1068_nv12.yuv") + print("out:", out) + device_type = util.get_device_type() + + + +if __name__ == "__main__": + #util.show_growth() + suite = util.switch_cases(TestVpc, "all") + unittest.TextTestRunner(verbosity=2).run(suite) + #util.show_growth() \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/utils.py b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..54d57ce8e8d268ab2d46b32ee5612182950e7353 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/ApplicationDev/media/PYACL_VPC/utils.py @@ -0,0 +1,40 @@ +import unittest +import numpy as np +import acl + +def get_class_methods(class_name): + method_list = [method.split("_") for method in dir(class_name) if method.startswith("test_")] + method_list = sorted(method_list, key=lambda x: x[2]) + methods = ["_".join(method) for method in method_list] + return methods + +def switch_cases(case_class, opt): + suite = unittest.TestSuite() + methods = get_class_methods(case_class) + + if opt == "all": + for method in methods: + suite.addTest(case_class(method)) + return suite + +def align_size(origin_size, alignment): + if not alignment: + return 0 + return ((origin_size + (alignment - 1)) // alignment) * alignment + +def get_align_size(align_dict, pixel_fotmat, defaule_vale=0, case_value=0): + for key in align_dict.keys(): + if pixel_fotmat in key: + return align_dict.get(key)(defaule_vale, case_value) + return defaule_vale + +def get_device_type(): + device_type = acl.get_soc_name()[len('Ascend'):] + if "P" in device_type: + device_type = device_type[0:4] + else: + device_type = device_type[0:3] + device_type = device_type == "910P" and "910" or device_type + if device_type not in ["310", "310P", "910"]: + raise Exception(f"device_type = {device_type} not in 310/310P/910, npu-smi not found!") + return device_type \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/TEST.sh b/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c69cb132c6be3dc7cadc5093753f5b0e165e0f0 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/TEST.sh @@ -0,0 +1,9 @@ +if [[ -f /etc/Ascend/ascend_cann_install.info ]]; then + mv /etc/Ascend/ascend_cann_install.info /etc/Ascend/ascend_cann_install.info.bac +fi +bash install_cann_packages.sh "$OEC_WORKDIR" "$OEC_OUTPUT_PATH" +rst=$? +if [[ -f /etc/Ascend/ascend_cann_install.info.bac ]]; then + mv /etc/Ascend/ascend_cann_install.info.bac /etc/Ascend/ascend_cann_install.info +fi +exit $rst \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/install_cann_packages.sh b/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/install_cann_packages.sh new file mode 100755 index 0000000000000000000000000000000000000000..96793e33b7eb2dee55f481d02b48917c367265b7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Environment/CANNInstall/INSTALL_CANN/install_cann_packages.sh @@ -0,0 +1,86 @@ +echo =============================================== +echo == CANN PACKAGES INSTALL UNINSTALL TEST == +echo =============================================== + +cd $1 +echo try to find Ascend-cann packages in $1 +test_path="$(realpath $2)" +install_path="${test_path}/Ascend" +install_tmpdir="${install_path}" +mkdir -p "$install_path" +mkdir -p "$install_tmpdir" +export TMP_DIR="$install_tmpdir" +function install(){ + package=$1 + count=$(ls | grep "$package" | wc -l) + if [ "$count" -ne 1 ]; then + echo "ERROR: numer of $package is not equal to 1, count = $count ." + exit 1 + fi + echo =============================================== + echo INSTALL ./$package* + echo ">>>>>>>>>>>> ASCEND_HOME_PATH <<<<<<<<<<<<<<<<<" + env |grep ASCEND_HOME_PATH + echo =============================================== + echo ./*$package* --install --install-path="$install_path" --quiet + chmod +x *$package* + ./*$package* --install --quiet --install-path="$install_path" + rst=$? + if [[ $rst != 0 ]]; then + exit $rst + fi +} +function uninstall(){ + package=$1 + count=$(find . -type f -name "*$package*" | wc -l) + if [ "$count" -ne 1 ]; then + echo "ERROR: numer of $package is not equal to 1" + exit 1 + fi + echo ./*$package* --uninstall --install-path="$install_path" + chmod +x *$package* + ./*$package* --uninstall --install-path="$install_path" + rst=$? + if [[ $rst != 0 ]]; then + exit $rst + fi +} + +install cann-toolkit +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +source ${install_path}/ascend-toolkit/set_env.sh +install cann-kernels +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +install cann-nnal +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-nnal +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-kernels +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +uninstall cann-toolkit +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi + +code=0 +if [[ -d Ascend/ascend-toolkit ]]; then + code=1 +fi +rm -rf Ascend +exit $code diff --git a/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_OS/TEST.sh b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_OS/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..5714b0f0b9c913b78f22e7abb8b86199dbe3c14d --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_OS/TEST.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# 检查glibc版本是否大于2.17 +check_glibc() { + local required_version="2.17" + local glibc_version + + # 尝试多种方式获取glibc版本 + if command -v ldd >/dev/null 2>&1; then + glibc_version=$(ldd --version 2>&1 | awk 'NR==1 {print $NF}') + elif [ -f /lib/x86_64-linux-gnu/libc.so.6 ]; then + glibc_version=$(/lib/x86_64-linux-gnu/libc.so.6 2>&1 | grep "GNU C Library" | awk '{print $NF}') + elif [ -f /lib64/libc.so.6 ]; then + glibc_version=$(/lib64/libc.so.6 2>&1 | grep "GNU C Library" | awk '{print $NF}') + else + echo "错误: 无法检测glibc版本 - 请手动安装glibc" + return 1 + fi + + # 验证版本格式 + if ! [[ $glibc_version =~ ^[0-9]+\.[0-9]+ ]]; then + echo "错误: 无法解析glibc版本: '$glibc_version'" + return 1 + fi + + # 版本比较 + if awk -v req="$required_version" -v curr="$glibc_version" 'BEGIN { + split(req, r, "."); split(curr, c, "."); + for (i=1; i<=3; i++) { + if (c[i]+0 < r[i]+0) exit 1; + if (c[i]+0 > r[i]+0) exit 0; + } + exit 0 + }'; then + echo "通过: glibc版本满足要求 ($glibc_version >= $required_version)" + return 0 + else + echo "错误: glibc版本过低 (当前: $glibc_version < 要求: $required_version)" + return 191 + fi +} + +# 检查命令是否存在 +check_command() { + if command -v "$1" >/dev/null 2>&1; then + echo "通过: $1 命令可用" + return 0 + else + echo "错误: $1 命令未找到" + return 1 + fi +} + +# 主检查函数 +main() { + local all_success=0 + # 所有需要检查的命令列表 + local commands=( + "gcc" "g++" "cmake" "make" "ifconfig" + "tar" "realpath" "arch" "grep" "sed" "timeout" + ) + + echo "开始依赖检查..." + echo "==============================" + + # 检查glibc版本 + if ! check_glibc; then + all_success=1 + fi + + # 检查必需命令 + for cmd in "${commands[@]}"; do + if ! check_command "$cmd"; then + all_success=1 + fi + done + + echo "==============================" + + # 返回最终状态 + if [ $all_success -ne 0 ]; then + echo "依赖检查失败! 请解决以上问题后再运行程序" + exit 1 + else + echo "所有依赖检查通过! 可以安全运行程序" + exit 0 + fi +} + +# 执行主函数 +main \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/TEST.sh b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..cd57b53cbc9b60bce2a92451a7ac4db421551ddf --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/TEST.sh @@ -0,0 +1 @@ +python3 check_package_version.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/check_package_version.py b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/check_package_version.py new file mode 100644 index 0000000000000000000000000000000000000000..cd299b36e5232f4f282be204f9498aeabe1e65d3 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Environment/Dependency/DEPENDENCY_DETECTION_PYTHON/check_package_version.py @@ -0,0 +1,393 @@ +import sys +import subprocess +from packaging.version import parse as parse_version + +def get_python_version(): + """获取当前Python版本字符串""" + return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + +def get_installed_packages(): + """获取所有已安装的包及其版本 (使用pip list)""" + try: + result = subprocess.run( + [sys.executable, '-m', 'pip', 'list', '--format=freeze'], + capture_output=True, + text=True, + check=True + ) + packages = {} + for line in result.stdout.splitlines(): + if '==' in line: + name, version = line.split('==', 1) + packages[name.lower()] = version.strip() + return packages + except Exception as e: + print(f"错误: 无法获取已安装包列表 - {str(e)}") + print("请确保pip已安装并能正常工作") + sys.exit(1) + +def check_python_version(min_version=None, max_version=None): + """ + 检查Python版本是否在指定范围内 + + 参数: + min_version (str): 最小支持版本 (e.g., "3.8.0") + max_version (str): 最大支持版本 (e.g., "3.10.0") + + 返回: + tuple: (是否满足, 问题描述) + """ + current_ver = parse_version(get_python_version()) + problems = [] + + if min_version: + min_ver = parse_version(min_version) + if current_ver < min_ver: + problems.append(f"需要 ≥ {min_version}") + + if max_version: + max_ver = parse_version(max_version) + if current_ver > max_ver: + problems.append(f"需要 ≤ {max_version}") + + return (len(problems) == 0, problems) + +def check_package(pkg_info, installed_packages): + """ + 检查单个包是否满足要求 + + 参数: + pkg_info (dict): 包配置信息 + installed_packages (dict): 已安装包的字典 + + 返回: + tuple: (是否满足, 安装的版本, 问题描述) + """ + pypi_name = pkg_info["pypi_name"].lower() + installed_version = installed_packages.get(pypi_name) + + # 包未安装 + if not installed_version: + return (False, None, ["未安装"]) + + # 没有版本要求 + if "min_version" not in pkg_info and "max_version" not in pkg_info: + return (True, installed_version, []) + + # 检查版本要求 + problems = [] + try: + installed_ver = parse_version(installed_version) + + if "min_version" in pkg_info: + min_ver = parse_version(pkg_info["min_version"]) + if installed_ver < min_ver: + problems.append(f"需要 ≥ {pkg_info['min_version']}") + + if "max_version" in pkg_info: + max_ver = parse_version(pkg_info["max_version"]) + if installed_ver > max_ver: + problems.append(f"需要 ≤ {pkg_info['max_version']}") + except Exception as e: + problems.append(f"版本解析错误: {str(e)}") + + return (len(problems) == 0, installed_version, problems) + +def check_dependencies(requirements): + """ + 检查所有依赖项 + + 参数: + requirements (dict): 依赖配置字典 + + 返回: + tuple: (所有依赖是否满足, 包检查结果列表) + """ + # 获取已安装包列表 + installed_packages = get_installed_packages() + + print("=" * 70) + print("Python环境与包依赖检查") + print("=" * 70) + + all_ok = "ok" + results = [] + + # 1. 检查Python版本 + py_req = requirements.get("python", {}) + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + py_ok, py_problems = check_python_version(min_py, max_py) + + version_range = [] + if min_py: version_range.append(f"≥ {min_py}") + if max_py: version_range.append(f"≤ {max_py}") + if min_py and max_py and min_py == max_py: + range_str = min_py + else: + range_str = " 且 ".join(version_range) if version_range else "任意版本" + + status = "✓" if py_ok else "✗" + problems = ", ".join(py_problems) if py_problems else "满足要求" + print(f"Python版本: {get_python_version()} | 要求: {range_str}") + print(f" {status} 状态: {problems}") + print("-" * 70) + + if not py_ok: + all_ok = 'warning' + + # 2. 检查包依赖 + packages = requirements.get("packages", []) + if not packages: + print("未配置包依赖检查") + else: + print("\n包依赖检查:") + print("-" * 70) + + for pkg in packages: + # 获取包信息 + name = pkg["name"] + pypi_name = pkg["name"] + + # 确定当前Python版本适用的规则 + current_py = f"{sys.version_info.major}.{sys.version_info.minor}" + version_rules = pkg.get("version_rules", {}) + rule = version_rules.get(current_py, pkg.get("default", {})) + + # 检查包 + satisfied, version, problems = check_package( + {"pypi_name": pypi_name, **rule}, + installed_packages + ) + + # 确定显示的要求范围 + + range_parts = [] + if "min_version" in rule: + range_parts.append(f"≥ {rule['min_version']}") + if "max_version" in rule: + range_parts.append(f"≤ {rule['max_version']}") + if len(range_parts) == 2 and rule['min_version'] == rule['max_version']: + range_str = rule['min_version'] + else: + range_str = " 且 ".join(range_parts) if range_parts else "任意版本" + + # 确定状态 + if not satisfied: + status = "✗" + all_ok = "warning" if version else "no" + else: + status = "✓" + + # 收集结果 + results.append({ + "display_name": name, + "pypi_name": pypi_name, + "status": status, + "installed": version or "未安装", + "required": range_str, + "problems": problems, + "rule": rule + }) + + # 打印结果 + print(f"{status} {name}") + print(f" 已安装: {version or '未安装'}") + print(f" 要求: {range_str}") + if problems: + print(f" 问题: {', '.join(problems)}") + print("-" * 70) + + print("=" * 70) + return all_ok, results + +def generate_install_commands(results, py_req=None): + """ + 生成安装命令 + + 参数: + results (list): 包检查结果列表 + py_req (dict): Python版本要求 + + 返回: + str: 安装命令字符串 + """ + commands = [] + + # Python版本要求 + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + if min_py or max_py: + commands.append("# 请确保使用正确的Python版本") + if min_py and max_py: + if max_py == min_py: + commands.append(f"# 推荐使用 Python {min_py}") + else: + commands.append(f"# 推荐使用 Python {min_py} 到 {max_py} 之间的版本") + elif min_py: + commands.append(f"# 需要 Python {min_py} 或更高版本") + elif max_py: + commands.append(f"# 需要 Python {max_py} 或更低版本") + + # 包安装命令 + commands.append("\n# 包安装命令:") + + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version']==rule['max_version']: + commands.append(f"pip install '{pkg_name}=={rule['min_version']}'") + else: + commands.append(f"pip install '{pkg_name}>={rule['min_version']},<={rule['max_version']}'") + elif "min_version" in rule: + commands.append(f"pip install '{pkg_name}>={rule['min_version']}'") + elif "max_version" in rule: + commands.append(f"pip install '{pkg_name}<={rule['max_version']}'") + else: + commands.append(f"pip install {pkg_name}") + + # 创建 requirements.txt 的建议 + commands.append("\n# 或者创建 requirements.txt 文件:") + commands.append("# 将以下内容保存到 requirements.txt 文件中:") + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version'] == rule['max_version']: + commands.append(f"{pkg_name}=={rule['min_version']}") + else: + commands.append(f"{pkg_name}>={rule['min_version']},<={rule['max_version']}") + elif "min_version" in rule: + commands.append(f"{pkg_name}>={rule['min_version']}") + elif "max_version" in rule: + commands.append(f"{pkg_name}<={rule['max_version']}") + else: + commands.append(pkg_name) + + commands.append("\n# 然后运行:") + commands.append("pip install -r requirements.txt") + + return "\n".join(commands) + +if __name__ == "__main__": + # ====== 依赖配置 ====== + # 配置说明: + # - "python": 可选的Python版本要求 + # - "packages": 包依赖列表 + # 每个包必须包含: + # - "name": PyPI上的包名 + # - "version_rules": (可选) 针对不同Python版本的规则 + # - "default": (可选) 默认规则 + # + # 规则格式: + # { + # "min_version": "最低版本", + # "max_version": "最高版本" + # } + + DEPENDENCY_CONFIG = { + # Python版本要求 + "python": { + "min_version": "3.7.5", # 最低支持Python 3.8 + "max_version": "3.11.4" # 最高支持Python 3.10 + }, + + "packages": [ + # 通用包 - 所有Python版本使用相同要求 + { + "name": "numpy", + "version_rules": { + # Python 3.7 使用这个要求 + "3.7": { + "min_version": "1.21.6", + "max_version": "1.21.6" + } + }, + "default": { + "min_version": "1.19.2", + "max_version": "1.24.0" + } + }, + { + "name": "decorator", + "default": { + "min_version": "4.4.0" + } + }, + { + "name": "sympy", + "default": { + "min_version": "1.5.1" + } + }, + + # 仅在某些Python版本有特殊要求 + { + "name": "cffi", + # 其他Python版本使用默认要求 + "default": { + "min_version": "1.12.3" + } + }, + { + "name": "protobuf", + # 其他Python版本使用默认要求 + "default": { + "min_version": "3.20", + "max_version": "3.20", + } + }, + + # 仅检查是否安装,不限制版本 + { + "name": "attrs" + }, + { + "name": "cython" + }, + { + "name": "pyyaml" + }, + { + "name": "pathlib2" + }, + { + "name": "scipy" + }, + { + "name": "requests" + }, + { + "name": "psutil" + }, + { + "name": "absl-py" + }, + + ] + } + # ==================== + + # 检查依赖 + all_ok, results = check_dependencies(DEPENDENCY_CONFIG) + + if all_ok == 'ok': + print("\n所有依赖满足! 可以运行主程序。") + # 这里可以继续执行你的主程序 + # from main import main + # main() + else: + print("\n错误: 环境不满足要求!") + print("请根据以下提示解决问题:") + + # 生成安装建议 + py_req = DEPENDENCY_CONFIG.get("python", {}) + commands = generate_install_commands(results, py_req) + print("\n" + commands) + + sys.exit(191 if all_ok == "warning" else 1) # 非零退出码表示错误 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/TEST.sh b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..55cdfac53aa093af8e80e14b3712c5ec85cd44ab --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/TEST.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e +echo `date` +NNAL_path="$ASCEND_HOME_PATH/../../nnal/atb/set_env.sh" +env_flag=0 +source ${NNAL_path} + +if [ ${env_flag} = 0 ] +then + if test -e ./add_demo.cpp + then + mkdir -p "$OEC_OUTPUT_PATH" + echo 'Compiling file with g++...' + g++ -I "$ATB_HOME_PATH/include" -I "$ASCEND_HOME_PATH/include" -L "$ATB_HOME_PATH/lib" -L "$ASCEND_HOME_PATH/lib64" add_demo.cpp -latb -lascendcl -o "$OEC_OUTPUT_PATH/demo" + cd "$OEC_OUTPUT_PATH" + ./demo + if [ $? = 0 ] + then + echo "Success!" + exit 0 + fi + exit $? + fi +fi +exit 1 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/add_demo.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/add_demo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d6243c88596183e07bd93df19ccf20355a873ab --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_ADD_DEMO/add_demo.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +using namespace atb; +void warmup(atb::Operation *operation, atb::VariantPack &pack, uint64_t &workspaceSize, atb::Context *context) { + operation->Setup(pack, workspaceSize, context); + void *workSpace = nullptr; + int ret = 0; + if (workspaceSize != 0) { + ret = aclrtMalloc(&workSpace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != 0) { + std::cout << "alloc error!"; + exit(100); + } + } + + operation->Execute(pack, (uint8_t*)workSpace, workspaceSize, context); +} + +void exeop(atb::Operation *operation, atb::VariantPack &pack, uint64_t &workspaceSize, atb::Context *context) { + operation->Setup(pack, workspaceSize, context); + void *workSpace = nullptr; + int ret = 0; + if (workspaceSize != 0) { + ret =aclrtMalloc(&workSpace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + if(ret != 0) { + std::cout<< "alloc error!"; + exit(200); + } + } + + operation->Execute(pack, (uint8_t*)workSpace, workspaceSize, context); + sleep(1); + std::cout<<"sleep(1)" <SetExecuteStream(stream); + uint64_t workspaceSize = 0; + warmup(op, variantPack, workspaceSize, context); + exeop(op, variantPack, workspaceSize, context); + exeop(op, variantPack, workspaceSize, context); + st = op->Setup(variantPack, workspaceSize, context); + std::cout< 0){ + status = aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_STATUS(30); + } + st = op->Execute(variantPack, (uint8_t *)workspace, workspaceSize, context); + CHECK_ST(27); + status = aclrtDestroyStream(stream); + CHECK_STATUS(40); + if(workspace){ + status = aclrtFree(workspace); + CHECK_STATUS(50); + } + st =atb::DestroyOperation(op); + CHECK_ST(53); + st = atb::DestroyContext(context); + CHECK_ST(56); + status = aclrtFree(a.deviceData); + CHECK_STATUS(60); + a.deviceData = nullptr; + a.dataSize = 0; + status = aclrtFree(b.deviceData); + CHECK_STATUS(70); + b.deviceData = nullptr; + b.dataSize = 0; + status = aclrtFree(output.deviceData); + CHECK_STATUS(80); + output.deviceData = nullptr; + output.dataSize = 0; + std::cout<<"SUCCESS"< &inTensorDesc, atb::SVector &outTensorDesc) const +{ + LOG_INFO(opName_ + " InferShape start"); + outTensorDesc.at(0).format = inTensorDesc.at(0).format; + outTensorDesc.at(0).dtype = inTensorDesc.at(0).dtype; + outTensorDesc.at(0).shape.dimNum = inTensorDesc.at(0).shape.dimNum; + + if (inTensorDesc.at(0).shape.dimNum == DIM3) { + LOG_INFO("[input0 dimNum = 3] CHECK " + opName_ + " input shape: [input0] " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM0]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM1]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM2])); + outTensorDesc.at(0).shape.dims[DIM0] = inTensorDesc.at(0).shape.dims[DIM0]; + outTensorDesc.at(0).shape.dims[DIM1] = inTensorDesc.at(0).shape.dims[DIM1]; + outTensorDesc.at(0).shape.dims[DIM2] = inTensorDesc.at(0).shape.dims[DIM2]; + } else if (inTensorDesc.at(0).shape.dimNum == DIM2) { + LOG_INFO("[input0 dimNum = 2] CHECK " + opName_ + " input shape: [input0] " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM0]) + ", " + + std::to_string(inTensorDesc.at(0).shape.dims[DIM1])); + outTensorDesc.at(0).shape.dims[DIM0] = inTensorDesc.at(0).shape.dims[DIM0]; + outTensorDesc.at(0).shape.dims[DIM1] = inTensorDesc.at(0).shape.dims[DIM1]; + } else { + LOG_ERROR(opName_ + " invalid dimNum = " + std::to_string(inTensorDesc.at(0).shape.dimNum)); + } + + LOG_INFO(opName_ + " InferShape end"); + return atb::NO_ERROR; +} + +uint32_t GeluOperation::GetInputNum() const +{ + return 1; // gelu入参个数 +} + +uint32_t GeluOperation::GetOutputNum() const +{ + return 1; // gelu出参个数 +} + +// 重写父类方法, 创建输入输出tensor,并存入VariantPack +atb::Status GeluOperation::CreateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + LOG_INFO(opName_ + " CreateAclnnVariantPack start"); + + auto ret = CreateAclnnInTensor(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " CreateAclnnInTensor fail"); + return atb::ERROR_INVALID_PARAM; + } + + ret = CreateAclnnOutTensor(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " CreateAclNNOutTensorVariantPack fail"); + return atb::ERROR_INVALID_PARAM; + } + + LOG_INFO(opName_ + " CreateAclnnVariantPack end"); + return atb::NO_ERROR; +} + +atb::Status GeluOperation::CreateAclnnInTensor(const atb::VariantPack &variantPack) +{ + aclInTensors_.resize(GetInputNum()); + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.inTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + LOG_ERROR(opName_ + " InTensor aclCreateTensor index " + std::to_string(i) + " fail"); + return atb::ERROR_INTERNAL_ERROR; + } + aclInTensors_[i] = aclnnTensor; + } + return atb::NO_ERROR; +} + +atb::Status GeluOperation::CreateAclnnOutTensor(const atb::VariantPack &variantPack) +{ + aclOutTensors_.resize(GetOutputNum()); + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.outTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + LOG_ERROR(opName_ + " outTensor aclCreateTensor index " + std::to_string(i) + " fail"); + return atb::ERROR_INTERNAL_ERROR; + } + LOG_INFO(opName_ + " input[" + std::to_string(i) + "] CreateAclnnTensor start"); + aclOutTensors_[i] = aclnnTensor; + } + return atb::NO_ERROR; +} + +atb::SVector GetCopyTensorStride(atb::Dims &tensorDims) +{ + atb::SVector tmpStrides(tensorDims.dimNum, 1); + if (tensorDims.dimNum > 8) { // 8: tensor最大维度数量 + LOG_ERROR("tensor's dimNum is larger than 8, GetCopyTensorStride failed."); + return tmpStrides; + } + for (int64_t i = static_cast(tensorDims.dimNum) - 2; i >= 0; i--) { + tmpStrides[i] = (tensorDims.dims[i + 1] * tmpStrides[i + 1]); + } + return tmpStrides; +} + +std::shared_ptr GeluOperation::CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx) +{ + auto aclnnTensor = std::make_shared(); + aclnnTensor->tensorIdx = static_cast(tensorIdx); + aclnnTensor->needUpdateTensorDataPtr = true; + aclnnTensor->atbTensor = atbTensor; + aclnnTensor->strides = GetCopyTensorStride(atbTensor.desc.shape); + + // 创建Aclnn tensor + aclnnTensor->tensor = aclCreateTensor(atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.desc.dtype, + aclnnTensor->strides.data(), + 0, + atbTensor.desc.format, + atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.deviceData); + return aclnnTensor; +} + +// 重写父类方法, 创建workspace和aclexecutor +atb::Status GeluOperation::SetAclnnWorkspaceExecutor() +{ + // 调用aclnn接口获取workspace大小 + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor start"); + if (param_.geluApproximate == -1) { + auto ret = aclnnGeluGetWorkspaceSize(aclInTensors_.at(0)->tensor, // self + aclOutTensors_.at(0)->tensor, // out + &workspaceSize_, + &aclExecutor_); + CHECK_RET(ret, opName_ + " aclnnGeluGetWorkspaceSize failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor end, workspaceSize_: " + std::to_string(workspaceSize_)); + return ret; + } + auto ret = aclnnGeluV2GetWorkspaceSize(aclInTensors_.at(0)->tensor, // x + param_.geluApproximate, // approximate + aclOutTensors_.at(0)->tensor, // y + &workspaceSize_, + &aclExecutor_); + CHECK_RET(ret, opName_ + " aclnnGeluV2GetWorkspaceSize failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " SetAclnnWorkspaceExecutor end, workspaceSize_: " + std::to_string(workspaceSize_)); + return ret; +} + +// 重写父类方法, 执行aclnn算子 +atb::Status GeluOperation::ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) +{ + // 调用aclnn算子进行算子下发 + LOG_INFO(opName_ + " ExecuteAclnnOp start"); + if (param_.geluApproximate == -1) { + auto ret = aclnnGelu(workspace, workspaceSize_, aclExecutor_, stream); + CHECK_RET(ret, opName_ + " ExecuteAclnnOp failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " ExecuteAclnnOp end"); + return ret; + } + auto ret = aclnnGeluV2(workspace, workspaceSize_, aclExecutor_, stream); + CHECK_RET(ret, opName_ + " aclnnGeluV2 failed, ret: " + std::to_string(ret)); + LOG_INFO(opName_ + " ExecuteAclnnOp end"); + return ret; +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_gelu_operation.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_gelu_operation.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1008301a15973622d55c171239fae2f32f9f7d --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_gelu_operation.h @@ -0,0 +1,32 @@ +#ifndef ACLNN_GELU_OPERATION_H +#define ACLNN_GELU_OPERATION_H + +#include "aclnn/aclnn_operation_base.h" + +struct AclnnGeluParam +{ + int64_t geluApproximate = -1; // gelu_v2计算的入参,指定高斯近似算法,0: "none", 1: "tanh" , -1: 不使用gelu_v2 +}; + +class GeluOperation : public AclnnBaseOperation +{ +public: + GeluOperation(const std::string &name, AclnnGeluParam param); + atb::Status InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const override; + uint32_t GetInputNum() const override; + uint32_t GetOutputNum() const override; + + atb::Status CreateAclnnVariantPack(const atb::VariantPack &variantPack) override; + atb::Status SetAclnnWorkspaceExecutor() override; + atb::Status ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) override; + +private: + atb::Status CreateAclnnInTensor(const atb::VariantPack &variantPack); + atb::Status CreateAclnnOutTensor(const atb::VariantPack &variantPack); + std::shared_ptr CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx); + + AclnnGeluParam param_; +}; + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c64fdb4149c571c8e544f7116d5a53dc6d08bee2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.cpp @@ -0,0 +1,116 @@ +#include "aclnn/aclnn_operation_base.h" +#include "utils/log.h" + +AclnnBaseOperation::AclnnBaseOperation(const std::string &opName) : opName_(opName) +{} + +AclnnBaseOperation::~AclnnBaseOperation() +{ + aclExecutor_ = nullptr; +} + +std::string AclnnBaseOperation::GetName() const +{ + return opName_; +} + +atb::Status AclnnBaseOperation::Setup( + const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) +{ + LOG_INFO(opName_ + " setup start"); + + // 调用子类,创建输入输出tensor,并存入VariantPack + int ret = CreateAclnnVariantPack(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " call CreateAclnnVariantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_INVALID_PARAM; + } + + // 调用子类,获取Executor和Workspace + ret = SetAclnnWorkspaceExecutor(); + if (ret != 0) { + LOG_ERROR( + opName_ + " call CreateAclnnVaSetAclnnWorkspaceExecutorriantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_INVALID_PARAM; + } + // 返回计算出的workspaceSize + workspaceSize = workspaceSize_; + LOG_INFO(opName_ + " setup end"); + return ret; +} + +atb::Status AclnnBaseOperation::Execute( + const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, atb::Context *context) +{ + LOG_INFO(opName_ + " execute start"); + if (!context) { + LOG_ERROR(opName_ + " execute fail, context param is null"); + return atb::ERROR_INVALID_PARAM; + } + + aclrtStream stream = context->GetExecuteStream(); + if (!stream) { + LOG_ERROR(opName_ + " execute fail, execute stream in context is null"); + return atb::ERROR_INVALID_PARAM; + } + + // 更新数据传入的地址 + int ret = UpdateAclnnVariantPack(variantPack); + if (ret != 0) { + LOG_ERROR(opName_ + " call UpdateAclnnVariantPack fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + + LOG_INFO("Input workspaceSize " + std::to_string(workspaceSize) + " localCache workspaceSize " + + std::to_string(workspaceSize_)); + ret = ExecuteAclnnOp(workspace, stream); // 调用aclnn接口 + if (ret != 0) { + LOG_ERROR(opName_ + " call ExecuteAclnnOp fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + LOG_INFO(opName_ + " execute start"); + + return ret; +} + +atb::Status AclnnBaseOperation::UpdateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + // 更新inTensor的device地址 + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + int ret = -1; + if (!aclInTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclInTensors_[i]->atbTensor = variantPack.inTensors.at(i); + ret = aclSetInputTensorAddr(aclExecutor_, + aclInTensors_[i]->tensorIdx, + aclInTensors_[i]->tensor, + aclInTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + LOG_ERROR( + "inTensor " + std::to_string(i) + " call UpdateAclTensorDataPtr fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + } + + // 更新outTensor的device地址 + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + int ret = -1; + if (!aclOutTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclOutTensors_[i]->atbTensor = variantPack.outTensors.at(i); + ret = aclSetOutputTensorAddr(aclExecutor_, + aclOutTensors_[i]->tensorIdx, + aclOutTensors_[i]->tensor, + aclOutTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + LOG_ERROR( + "outTensor " + std::to_string(i) + " call UpdateAclTensorDataPtr fail, error: " + std::to_string(ret)); + return atb::ERROR_CANN_ERROR; + } + } + return atb::NO_ERROR; +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.h new file mode 100644 index 0000000000000000000000000000000000000000..6b384eb033f0d73e482e37b8809f129075609476 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/aclnn/aclnn_operation_base.h @@ -0,0 +1,57 @@ +#ifndef ACLNN_OPERATION_BASE_H +#define ACLNN_OPERATION_BASE_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" + +// 对atb::tensor的一层封装 +struct AclnnTensor +{ +public: + atb::Tensor atbTensor; // + aclTensor *tensor = nullptr; + int tensorIdx = -1; // aclTensor在aclExecutor中的index + bool needUpdateTensorDataPtr = false; + atb::SVector strides = {}; +}; + +// 保持与atb的算子的统一接口调用 +class AclnnBaseOperation : public atb::Operation +{ +public: + explicit AclnnBaseOperation(const std::string &opName); + ~AclnnBaseOperation() override; + std::string GetName() const override; + + // 仿atb接口,获取workspace的大小 + atb::Status Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) override; + + // 仿atb接口,算子执行 + atb::Status Execute(const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, + atb::Context *context) override; + + // 创建输入aclnntensor + virtual atb::Status CreateAclnnVariantPack(const atb::VariantPack &variantPack) = 0; + + // 计算workspace大小 + virtual atb::Status SetAclnnWorkspaceExecutor() = 0; + + // 执行Aclnn op + virtual atb::Status ExecuteAclnnOp(uint8_t *workspace, aclrtStream &stream) = 0; + + // 更新aclnn输入和输出tensor的地址 + atb::Status UpdateAclnnVariantPack(const atb::VariantPack &variantPack); + + std::string opName_; + aclOpExecutor *aclExecutor_ = nullptr; + atb::SVector> aclInTensors_; + atb::SVector> aclOutTensors_; + uint64_t workspaceSize_; + int workspaceBlockId_ = -1; +}; + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6185920c67a850be4cdf2adfcb795230b1e89f3a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.cpp @@ -0,0 +1,59 @@ +#include "atb/atb_graph_op.h" +#include "utils/utils.h" + +atb::Status CreateGraphOperation(atb::Operation **operation) +{ + // 构图流程 + // 图算子的输入a,b,c,d + // 计算公式:(a+b) + (c+d) + // 输入是4个参数,输出是1个参数,有3个add算子,中间产生的临时输出是2个 + atb::GraphParam opGraph; + + opGraph.inTensorNum = 4; + opGraph.outTensorNum = 1; + opGraph.internalTensorNum = 2; + opGraph.nodes.resize(3); + + enum InTensorId + { // 定义各TensorID + IN_TENSOR_A = 0, + IN_TENSOR_B, + IN_TENSOR_C, + IN_TENSOR_D, + ADD3_OUT, + ADD1_OUT, + ADD2_OUT + }; + + size_t nodeId = 0; + atb::Node &addNode = opGraph.nodes.at(nodeId++); + atb::Node &addNode2 = opGraph.nodes.at(nodeId++); + atb::Node &addNode3 = opGraph.nodes.at(nodeId++); + + atb::Operation *op = nullptr; + atb::infer::ElewiseParam addParam; + addParam.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + auto status = atb::CreateOperation(addParam, &addNode.operation); + CHECK_RET(status, "addParam CreateOperation failed. status: " + std::to_string(status)); + addNode.inTensorIds = {IN_TENSOR_A, IN_TENSOR_B}; + addNode.outTensorIds = {ADD1_OUT}; + + atb::infer::ElewiseParam addParam2; + addParam2.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + status = atb::CreateOperation(addParam2, &addNode2.operation); + CHECK_RET(status, "addParam2 CreateOperation failed. status: " + std::to_string(status)); + addNode2.inTensorIds = {IN_TENSOR_C, IN_TENSOR_D}; + addNode2.outTensorIds = {ADD2_OUT}; + + atb::infer::ElewiseParam addParam3; + addParam3.elewiseType = atb::infer::ElewiseParam::ElewiseType::ELEWISE_ADD; + status = CreateOperation(addParam3, &addNode3.operation); + CHECK_RET(status, "addParam3 CreateOperation failed. status: " + std::to_string(status)); + addNode3.inTensorIds = {ADD1_OUT, ADD2_OUT}; + addNode3.outTensorIds = {ADD3_OUT}; + + status = atb::CreateOperation(opGraph, operation); + CHECK_RET(status, "GraphParam CreateOperation failed. status: " + std::to_string(status)); + + return atb::NO_ERROR; +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.h new file mode 100644 index 0000000000000000000000000000000000000000..05ca62a4672e895152aa919cc7e35d550998068f --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/atb/atb_graph_op.h @@ -0,0 +1,15 @@ +#ifndef ATB_GRAPH_OP_H +#define ATB_GRAPH_OP_H + +#include +#include +#include +#include +#include "atb/infer_op_params.h" + +// 在构造图参数时,有两个点需要重点关注。一是Tensor的ID,ATB图接口中把Tensor分为三种类型,输入、输出和中间tensor,顾名思义,输入输出tensor是整图的输入输出tensor, +// 中间tensor则是在整图内的Tensor。构图时的TensorID从小到大应保证//为输入tensor、输出tensor、中间tensor的顺序,且每一种Tensor的个数要与参数中设置的一致。 +// 二是要注意排布Node的顺序,用户需要根据计算图的拓扑结构把计算图变成一个有序队列,同时还要保证tensor与节点之间的关系和计算图保持一致。 +atb::Status CreateGraphOperation(atb::Operation **operation); + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/main.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3b40f337be58a5e302379fe644ae00fb6aba6649 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/main.cpp @@ -0,0 +1,57 @@ +#include "model/model.h" +#include "memory/memory_utils.h" +#include +#include "utils/utils.h" + +void ModelExecute(uint32_t deviceId, Model &model) +{ + // 初始化模型,创建需要的context,stream + model.InitResource(deviceId); + + // 创建模型图 + model.CreateModelGraph(); + + // 创建模型输入,并填入值 + model.CreateModelInput(); + + // 创建模型的输出大小 + model.CreateModelOutput(); + + // 模型执行 + model.Execute(); + + // 打印输出Tensor的值 + PrintOutTensorValue(model.modelOutTensors_.at(0)); + + // 资源释放 + model.FreeResource(); +} + +int main() +{ + // AscendCL初始化 + auto ret = aclInit(nullptr); + CHECK_RET(ret, "aclInit failed. ret: " + std::to_string(ret)); + + // 创建内存池 + size_t poolSize = 104857600; // Alloceted memory 100 MiB. + GetMemoryManager().CreateMemoryPool(poolSize); + + // 创建模型图 + uint32_t deviceCount = 0; + CHECK_RET(aclrtGetDeviceCount(&deviceCount), "get devicecount fail"); + std::vector modelArray(deviceCount); + + // 分多个线程进行模型图的下发 + std::vector threadArray(deviceCount); + for (size_t i = 0; i < deviceCount; i++) { + Model &model = modelArray.at(i); + threadArray.at(i) = std::thread([i, &model]{ModelExecute(i, model);}); // 线程创建及函数绑定 + } + for (size_t i = 0; i < deviceCount; i++) { + threadArray.at(i).join(); // 等待子线程结束 + } + + aclFinalize(); + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_env.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_env.h new file mode 100644 index 0000000000000000000000000000000000000000..6b4bafa80a1842755057c49abc0e8720c9e0f4f1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_env.h @@ -0,0 +1,12 @@ +#ifndef MEMORY_ENV_H +#define MEMORY_ENV_H + +#include + +struct MemoryBlock { + int64_t blockId; // 内存块索引 + size_t blockSize; // 内存块大小 + void *address = nullptr; // 物理内存地址 +}; + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65c90597c0bead210cd8df6fe2ae02a6e07c8189 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.cpp @@ -0,0 +1,63 @@ +#include +#include "memory_utils.h" +#include "utils/log.h" +#include "utils/utils.h" + +// 全局MemoryManager实例 +static MemoryManager g_memoryManager; + +MemoryManager::MemoryManager() +{} + +void MemoryManager::CreateMemoryPool(size_t poolSize) +{ + uint32_t deviceCount = 0; + + // 获取全部Device的数量 + CHECK_RET(aclrtGetDeviceCount(&deviceCount), "get devicecount fail"); + for (size_t i = 0; i < deviceCount; i++) { + + // 指定操作的Device + aclrtSetDevice(i); + + // 创建内存池,poolSize参数指定预分配空间大小 + std::shared_ptr memoryPool = std::make_shared(poolSize); + memoryPools_.push_back(memoryPool); + LOG_INFO("create mempool for device " + std::to_string(i) + " success"); + } +} + +int32_t MemoryManager::GetDeviceId() +{ + int32_t deviceId = -1; + CHECK_RET(aclrtGetDevice(&deviceId), "get device ID fail"); + return deviceId; +} + +std::shared_ptr &MemoryManager::GetMemoryPool() +{ + // 获取当前操作的Device,返回对应的内存池 + size_t deviceId = static_cast(GetDeviceId()); + CHECK_RET(deviceId >= memoryPools_.size(), "Invalid device id " + deviceId); + return memoryPools_[deviceId]; +} + +void MemoryManager::AllocateBlock(uint32_t size, int &blockId) +{ + GetMemoryPool()->AllocateBlock(size, blockId); +} + +void MemoryManager::FreeBlock(int blockId) +{ + GetMemoryPool()->FreeBlock(blockId); +} + +void MemoryManager::GetBlockPtr(int blockId, void *&addr) +{ + GetMemoryPool()->GetBlockPtr(blockId, addr); +} + +MemoryManager &GetMemoryManager() +{ + return g_memoryManager; +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..d45a0a8fe87e8cc39a52b2edc6f9593d5f5dd28d --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memory_utils.h @@ -0,0 +1,38 @@ +#ifndef MEMORY_UTILS_H +#define MEMORY_UTILS_H + +#include +#include +#include "memorypool.h" + +// 内存管理类,管理每个Device上的内存池 +class MemoryManager { +public: + MemoryManager(); + + // 在每个Device上创建对应的内存池 + void CreateMemoryPool(size_t poolSize); + + // 获取当前线程对应的Device + int32_t GetDeviceId(); + + // 获取当前线程对应设备上的内存池 + std::shared_ptr &GetMemoryPool(); + + // 分配内存块 + void AllocateBlock(uint32_t size, int &blockId); + + // 释放内存块 + void FreeBlock(int blockId); + + // 获取内存块的物理地址 + void GetBlockPtr(int blockId, void *&addr); + +private: + std::vector> memoryPools_; +}; + +// 获取全局MemoryManager实例 +MemoryManager &GetMemoryManager(); + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..adeab1c19a471ded23818bbe7be8534d71a477fa --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.cpp @@ -0,0 +1,104 @@ +#include +#include +#include "memorypool.h" +#include "utils/log.h" +#include "utils/utils.h" + +constexpr size_t POOL_SIZE = 104857600; // 预分配内存大小 100 MiB,大小可按实际需求决定 + +MemoryPool::MemoryPool(size_t poolSize = POOL_SIZE) +{ + // 调用acl接口预分配内存 + CHECK_RET(aclrtMalloc(&baseMemPtr_, poolSize, ACL_MEM_MALLOC_HUGE_FIRST), + "malloc huge size memrory " + std::to_string(poolSize) + " bytes fail"); + curMemPtr_ = baseMemPtr_; + remainSize_ = poolSize; +} + +MemoryPool::~MemoryPool() +{ + // 销毁时通过acl接口释放内存 + if (baseMemPtr_ != nullptr) { + CHECK_RET(aclrtFree(baseMemPtr_), "free huge memory fail"); + } + LOG_INFO("release MemoryPool success"); +} + +uint64_t MemoryPool::GenerateBlocksId() +{ + return static_cast(id_.fetch_add(1, std::memory_order_relaxed)); +} + +void MemoryPool::AllocateBlock(uint32_t size, int &blockId) +{ + std::unique_lock lock(blockMutex_); + + size_t alignSize = ((size + 31) & ~31) + 32; // 分配的空间需要32字节对齐后再加32字节 + + // 寻找是否有足够大小的空闲内存块 + for (auto it = freeBlocks_.begin(); it != freeBlocks_.end(); it++) { + if (it->second.blockSize >= alignSize) { + blockId = it->second.blockId; + usedBlocks_.insert(*it); + freeBlocks_.erase(it); + LOG_INFO("find free block id " + std::to_string(blockId) + " to allocate"); + return; + } + } + + // 没有找到符合的内存块,需要从剩余的内存空间中创建新内存块 + if (remainSize_ > alignSize) { + blockId = GenerateBlocksId(); + uint64_t curMemPtrAlign = (reinterpret_cast(curMemPtr_) + 63) & ~63; // 内存地址需要64字节对齐 + remainSize_ -= (curMemPtrAlign - reinterpret_cast(curMemPtr_)); + curMemPtr_ = reinterpret_cast(curMemPtrAlign); + + MemoryBlock block = {blockId, alignSize, curMemPtr_}; + usedBlocks_.insert({blockId, block}); + remainSize_ -= alignSize; + curMemPtr_ = reinterpret_cast(curMemPtr_) + alignSize; + LOG_INFO("allocate block id " + std::to_string(blockId) + " for size " + std::to_string(alignSize)); + return; + } + + // 剩余的内存空间不足,无法分配内存块 + LOG_ERROR("allocate block fail"); +} + +void MemoryPool::FreeBlock(int blockId) +{ + std::unique_lock lock(blockMutex_); + + // 内存块索引合法性校验 + if (blockId < 0) { + LOG_INFO("skip over the invalid block id " + std::to_string(blockId)); + return; + } + + // 将需要释放的内存块标记为空闲 + auto it = usedBlocks_.find(blockId); + if (it != usedBlocks_.end()) { + freeBlocks_.insert(*it); + usedBlocks_.erase(it); + } else { + LOG_ERROR("Double free block id " + std::to_string(blockId)); + } +} + +void MemoryPool::GetBlockPtr(int blockId, void *&addr) +{ + std::unique_lock lock(blockMutex_); + + if (blockId < 0) { + LOG_INFO("Invalid block id " + std::to_string(blockId) + "to get ptr"); + return; + } + + // 寻找内存块,返回物理内存地址 + auto it = usedBlocks_.find(blockId); + if (it != usedBlocks_.end()) { + addr = it->second.address; + } else { + LOG_ERROR("Get block address error, block id " + std::to_string(blockId)); + } +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.h new file mode 100644 index 0000000000000000000000000000000000000000..9ffcd662c1f8623bdf97648f631de3a3ef48acfd --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/memory/memorypool.h @@ -0,0 +1,38 @@ +#ifndef MEMORYPOOL_H +#define MEMORYPOOL_H + +#include +#include +#include +#include +#include "memory_env.h" + +// Device内存池 +class MemoryPool { +public: + explicit MemoryPool(size_t poolSize); + ~MemoryPool(); + + // 分配内存块 + void AllocateBlock(uint32_t size, int &blockId); + + // 释放内存块 + void FreeBlock(int blockId); + + // 获取内存块的物理地址 + void GetBlockPtr(int blockId, void *&addr); + +private: + // 生成内存块索引 + uint64_t GenerateBlocksId(); + + std::atomic id_ = 0; + std::mutex blockMutex_; + void *baseMemPtr_ = nullptr; + void *curMemPtr_ = nullptr; + int64_t remainSize_ = 0; + std::unordered_map freeBlocks_; + std::unordered_map usedBlocks_; +}; + +#endif \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.cpp new file mode 100644 index 0000000000000000000000000000000000000000..508b9d15daea92452fd892dd58e913f8666b7705 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.cpp @@ -0,0 +1,263 @@ +#define USE_MEMPOOL + +#include "model/model.h" +#include "aclnn/aclnn_gelu_operation.h" +#include "utils/utils.h" +#include "atb/atb_graph_op.h" +#include "memory/memory_utils.h" + +void Model::InitResource(uint32_t deviceId) +{ + // 配置deviceId + deviceId_ = deviceId; + auto ret = aclrtSetDevice(deviceId_); + CHECK_RET(ret, "aclrtSetDevice failed. ret: " + std::to_string(ret)); + + // 创建context + ret = atb::CreateContext(&modeContext_); + CHECK_RET(ret, "ATB CreateContext failed. ret: " + std::to_string(ret)); + + // 创建stream + ret = aclrtCreateStream(&modelStream_); + CHECK_RET(ret, "aclrtCreateStream failed. ret: " + std::to_string(ret)); + + // 配置stream + modeContext_->SetExecuteStream(modelStream_); +} + +void Model::CreateModelGraph() +{ + LOG_INFO("CreateModelGraph start"); + // 这里以模型中有2个节点参与演示 + nodes_.resize(2); + for (size_t i = 0; i < nodes_.size(); i++) { + auto node = Node(); + nodes_[i] = node; + } + + modelInTensors_.resize(Mode_INPUT_SIZE); + modelOutTensors_.resize(Mode_OUTPUT_SIZE); + + internalTensors_.resize(1); + size_t nodeId = 0; + CreateGraphOpLayer(nodeId++); + + // step2:创建aclnn算子的Node + CreateAclnnOpLayer(nodeId); + LOG_INFO("CreateModelGraph end"); +} + +void Model::CreateGraphOpLayer(size_t nodeId) +{ + // 创建图算子的opreation + Node &graph_node = nodes_[nodeId]; + auto ret = CreateGraphOperation(&graph_node.operation_); + CHECK_RET(ret, "CreateGraphOperation failed"); + graph_node.inTensors_.resize(graph_node.operation_->GetInputNum()); + + // 设置图算子node节点的输入 + // 因为图算子的输入就是整个model的输入,因此这里直接从model的inTensors_赋值 + size_t layerInTensorId = 0; + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_A); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_B); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_C); + graph_node.inTensors_.at(layerInTensorId++) = &modelInTensors_.at(IN_TENSOR_D); + + // 设置图算子node节点的输出,因为只有一个中间节点 + graph_node.outTensors_ = {&internalTensors_.at(0)}; + graph_node.outTensorTypes_ = {TensorType::INTERNAL_TENSOR}; +}; + +void Model::CreateAclnnOpLayer(size_t nodeId) +{ + // 创建aclnn算子的opreation + Node &aclnn_node = nodes_[nodeId]; + AclnnGeluParam AclnnGeluParam; + AclnnGeluParam.geluApproximate = -1; + aclnn_node.operation_ = new GeluOperation("Gelu", AclnnGeluParam); + aclnn_node.inTensors_.resize(aclnn_node.operation_->GetInputNum()); + + // 设置aclnn算子node节点的输入 + // 因为图算子的输出就是aclnn算子的输入, + size_t layerInTensorId = 0; + aclnn_node.inTensors_.at(layerInTensorId++) = &internalTensors_.at(0); + + // 设置aclnn算子node节点的输出,model的输出 + aclnn_node.outTensors_ = {&modelOutTensors_.at(GLUE_OUT)}; + aclnn_node.outTensorTypes_ = {TensorType::NOT_INTERNAL_TENSOR}; +} + +void Model::CreateModelInput() +{ + LOG_INFO("CreateModelInput start"); + atb::SVector intensorDescs; + intensorDescs.resize(Mode_INPUT_SIZE); + CreateInTensorDescs(intensorDescs); + CreateInTensors(modelInTensors_, intensorDescs); + LOG_INFO("CreateModelInput end"); +} + +void Model::CreateModelOutput() +{ + LOG_INFO("CreateModelOutput start"); + atb::SVector outtensorDescs; + outtensorDescs.resize(Mode_OUTPUT_SIZE); + + // 设置输入的input desc + atb::SVector inTensorDescs; + inTensorDescs.resize(Mode_INPUT_SIZE); + for (size_t i = 0; i < modelInTensors_.size(); ++i) { + inTensorDescs.at(i) = modelInTensors_.at(i).desc; + } + + // 调用infer shape,推导出模型的输出 + InferShape(inTensorDescs, outtensorDescs); + CreateOutTensors(modelOutTensors_, outtensorDescs); + LOG_INFO("CreateModelOutput end"); +} + +atb::Status Model::InferShape( + const atb::SVector &inTensorDescs, atb::SVector &outTensorDescs) +{ + // 输出的shape和输入是相同的。取第一个的输入即可 + outTensorDescs.at(0) = modelInTensors_.at(0).desc; + return atb::NO_ERROR; +} + +void Model::Execute() +{ + LOG_INFO(modelName_ + " Execute start"); + for (size_t nodeId = 0; nodeId < nodes_.size(); ++nodeId) { + BuildNodeVariantPack(nodeId); + atb::Status status = ExecuteNode(nodeId); + CHECK_RET(status, "ExecuteNode " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + } + + WaitFinish(); + LOG_INFO(modelName_ + " Execute end"); +} + +void Model::BuildNodeVariantPack(int nodeId) +{ + LOG_INFO("buildNodeVariantPack nodes[" + std::to_string(nodeId) + "] start"); + + auto &node = nodes_.at(nodeId); + atb::SVector inTensorDescs; + node.variantPack_.inTensors.resize(node.operation_->GetInputNum()); + inTensorDescs.resize(node.operation_->GetInputNum()); + + // 获取node中operation_的输入tensor desc + for (size_t i = 0; i < node.inTensors_.size(); ++i) { + node.variantPack_.inTensors.at(i) = *node.inTensors_.at(i); + inTensorDescs.at(i) = node.inTensors_.at(i)->desc; + } + + atb::SVector outTensorDescs; + outTensorDescs.resize(node.operation_->GetOutputNum()); + + // 调用operation_的InferShape,推导出out tensor的desc + atb::Status st = node.operation_->InferShape(inTensorDescs, outTensorDescs); + + node.variantPack_.outTensors.resize(node.operation_->GetOutputNum()); + for (size_t i = 0; i < node.outTensors_.size(); ++i) { + node.variantPack_.outTensors.at(i) = *node.outTensors_.at(i); + if (node.outTensorTypes_.at(i) == TensorType::INTERNAL_TENSOR) { + // 创建输出tensor的空间 + CreateTensorFromDesc(node.variantPack_.outTensors.at(i), outTensorDescs.at(i)); + *node.outTensors_.at(i) = node.variantPack_.outTensors.at(i); + } + } + LOG_INFO("buildNodeVariantPack nodes[" + std::to_string(nodeId) + "] end"); +} + +atb::Status Model::ExecuteNode(int nodeId) +{ + auto &node = nodes_.at(nodeId); + + // 调用Setup接口 + uint64_t workspaceSize = 0; + atb::Status status = node.operation_->Setup(node.variantPack_, workspaceSize, modeContext_); + CHECK_RET(status, "Setup node " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + + LOG_INFO("Get node[" + std::to_string(nodeId) + "] workspace size:" + std::to_string(workspaceSize)); + + // 分配workspace +#ifdef USE_MEMPOOL + CreateWorkspaceBuffer(nodeId, workspaceSize); +#else + if (workspaceSize != 0) { + status = aclrtMalloc(&node.workspace_, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(status, "alloc error!"); + } +#endif + + // 调用Execute接口 + LOG_INFO("Execute node[" + std::to_string(nodeId) + "] start"); + status = node.operation_->Execute(node.variantPack_, (uint8_t *)(node.workspace_), workspaceSize, modeContext_); + CHECK_RET(status, "Execute node " + std::to_string(nodeId) + " failed. status: " + std::to_string(status)); + LOG_INFO("Execute node[" + std::to_string(nodeId) + "] end"); + return atb::NO_ERROR; +} + +void Model::CreateWorkspaceBuffer(int nodeId, int workspaceSizeNeeded) +{ + auto &node = nodes_.at(nodeId); + if (workspaceSizeNeeded == 0) { + LOG_INFO("skip the workspacebuffer for size 0"); + return; + } + if (node.workspaceBlockId_ == -1 || node.workspaceSize_ == 0) { + node.workspaceSize_ = workspaceSizeNeeded; + GetMemoryManager().AllocateBlock(node.workspaceSize_, node.workspaceBlockId_); + } + if (node.workspaceSize_ < workspaceSizeNeeded) { + GetMemoryManager().FreeBlock(node.workspaceBlockId_); + GetMemoryManager().AllocateBlock(workspaceSizeNeeded, node.workspaceBlockId_); + node.workspaceSize_ = workspaceSizeNeeded; + } + + GetMemoryManager().GetBlockPtr(node.workspaceBlockId_, node.workspace_); +} + +void Model::FreeResource() +{ + LOG_INFO("FreeResource start"); + auto status = aclrtDestroyStream(modelStream_); // 销毁stream + CHECK_RET(status, "aclrtDestroyStream failed"); + + // 释放operation + for (auto &node : nodes_) { + atb::DestroyOperation(node.operation_); +#ifdef USE_MEMPOOL + GetMemoryManager().FreeBlock(node.workspaceBlockId_); +#endif + } + // 销毁context + status = atb::DestroyContext(modeContext_); + CHECK_RET(status, "aclrtDestroyStream failed"); + // 销毁输入tensor + for (size_t i = 0; i < modelInTensors_.size(); i++) { + aclrtFree(modelInTensors_.at(i).deviceData); + } + + // 销毁输出tensor + for (size_t i = 0; i < modelOutTensors_.size(); i++) { + aclrtFree(modelOutTensors_.at(i).deviceData); + } + + // 释放中间tensor + for (size_t i = 0; i < internalTensors_.size(); i++) { + aclrtFree(internalTensors_.at(i).deviceData); + } + + aclrtResetDevice(deviceId_); // 重置deviceId + LOG_INFO("FreeResource end"); +} + +void Model::WaitFinish() +{ + // step9:销毁创建的对象,释放内存 + // 流同步,作用是等待device侧任务计算完成 + auto ret = aclrtSynchronizeStream(modelStream_); + CHECK_RET(ret, "sync error!"); +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.h new file mode 100644 index 0000000000000000000000000000000000000000..f00419b4e3265667bd654a4a394b56bf68cf8cf4 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/model/model.h @@ -0,0 +1,122 @@ +#ifndef MODEL_H +#define MODEL_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" +#include "utils/log.h" + +enum class TensorType +{ + INTERNAL_TENSOR = 0, + NOT_INTERNAL_TENSOR, +}; + +// 图节点,每个Node表示一个Operation或者GraphOperation +struct Node +{ + // Node对应的operation或者graphOperation。 + atb::Operation *operation_ = nullptr; + + // Node的输入tensors + atb::SVector inTensors_{}; + + // Node的输出tensors + atb::SVector outTensors_{}; + + // Node的输出是中间tensor类型 + atb::SVector outTensorTypes_{}; + + atb::VariantPack variantPack_{}; + + uint64_t workspaceSize_ = 0; + int workspaceBlockId_ = -1; + void *workspace_ = nullptr; +}; + +// 所有的Node组成一个完整的图。 +class Model +{ +public: + // 描述该模型的输入 + enum InTensorId : int + { // 定义各TensorID + IN_TENSOR_A = 0, + IN_TENSOR_B, + IN_TENSOR_C, + IN_TENSOR_D, + Mode_INPUT_SIZE, + }; + + enum OutTensorId : int + { + GLUE_OUT = 0, + Mode_OUTPUT_SIZE, + }; + + explicit Model(std::string &&modelName = "") : modelName_(std::move(modelName)) + { + LOG_INFO("Create model: " + modelName_); + } + + // 模型初始化,设置模型的 + void InitResource(uint32_t deviceId); + + // 创建模型图 + void CreateModelGraph(); + + // 创建模型的输入tensors + void CreateModelInput(); + + // 创建模型的输入tensors + void CreateModelOutput(); + + // modle执行 + void Execute(); + + // stream流同步 + void WaitFinish(); + + // 资源释放 + void FreeResource(); + + // 模型的输入tensors + atb::SVector modelInTensors_; + + // 模型的输出tensors + atb::SVector modelOutTensors_; + +private: + // 创建图算子的opreation + void CreateGraphOpLayer(size_t nodeId); + + // 创建aclnn算子的opreation + void CreateAclnnOpLayer(size_t nodeId); + + // 构造对应nodeId的node的VariantPack + void BuildNodeVariantPack(int nodeId); + + // 下发nodeId对应的Operation + atb::Status ExecuteNode(int nodeId); + + // workspace创建函数 + void CreateWorkspaceBuffer(int nodeId, int workspaceSizeNeeded); + + // 模型图的shape推导函数 + atb::Status InferShape( + const atb::SVector &inTensorDescs, atb::SVector &outTensorDescs); + + std::string modelName_; + uint32_t deviceId_ = 1; + atb::Context *modeContext_ = nullptr; + aclrtStream modelStream_ = nullptr; + std::vector nodes_; + + // 模型的中间tensors,layer之间以internalTensors进行连接,这里要注意顺序 + std::vector internalTensors_; +}; + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a7ba9be688ce158a21830ed9dd83758828a7f5d --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.cpp @@ -0,0 +1,28 @@ +#include "utils/log.h" + +const char *logLevelToString(LogLevel level) +{ + switch (level) + { + case LogLevel::DEBUG: + return "DEBUG"; + case LogLevel::INFO: + return "INFO"; + case LogLevel::WARNING: + return "WARNING"; + case LogLevel::ERROR: + return "ERROR"; + default: + return "UNKNOWN"; + } +} + +std::string getCurrentTime() +{ + auto now = std::chrono::system_clock::now(); + auto in_time_t = std::chrono::system_clock::to_time_t(now); + + std::stringstream ss; + ss << std::put_time(std::localtime(&in_time_t), "%Y-%m-%d %X"); + return ss.str(); +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.h new file mode 100644 index 0000000000000000000000000000000000000000..06f38c014d7c68f2dd0cdae4c41ee7a89a1a10bd --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/log.h @@ -0,0 +1,94 @@ +#ifndef LOG_H +#define LOG_H + +#include +#include +#include +#include +#include +#include +#include + +// 定义日志级别 +enum class LogLevel +{ + DEBUG, + INFO, + WARNING, + ERROR +}; + +// 将日志级别转换为字符串 +const char *logLevelToString(LogLevel level); + +// 获取当前时间的字符串表示 +std::string getCurrentTime(); + +// 日志类 +class Logger +{ +public: + // 构造函数 + Logger(const std::string &filename, LogLevel minLevel = LogLevel::INFO) : minLogLevel(minLevel) + { + logFile.open(filename, std::ios::out | std::ios::app); + if (!logFile.is_open()) + { + std::cerr << "Failed to open log file: " << filename << std::endl; + } + } + + // 析构函数 + ~Logger() + { + if (logFile.is_open()) + { + logFile.close(); + } + } + + // 设置最小日志级别 + void setMinLogLevel(LogLevel level) + { + minLogLevel = level; + } + + // 打印日志 + template + void log(LogLevel level, const char *file, int line, const char *format, Args... args) + { + std::lock_guard lock(mutex); + if (level >= minLogLevel) + { + std::stringstream ss; + ss << "[" << getCurrentTime() << "] [" << logLevelToString(level) << "] [" << file << ":" << line << "] "; + (ss << ... << args); + + std::string logMessage = ss.str(); + std::cout << logMessage << std::endl; + if (logFile.is_open()) + { + logFile << logMessage << std::endl; + } + } + } + +private: + std::ofstream logFile; + LogLevel minLogLevel; + std::mutex mutex; +}; + +// 全局 logger 对象 +static Logger g_logger("app.log", LogLevel::DEBUG); + +// 辅助宏,用于处理可变参数列表 +#define LOG_HELPER(level, ...) g_logger.log(level, __FILE__, __LINE__, "%s", ##__VA_ARGS__) + +// 使用宏定义简化日志调用 +#define LOG_DEBUG(...) LOG_HELPER(LogLevel::DEBUG, ##__VA_ARGS__) +#define LOG_INFO(...) LOG_HELPER(LogLevel::INFO, ##__VA_ARGS__) +#define LOG_WARNING(...) LOG_HELPER(LogLevel::WARNING, ##__VA_ARGS__) +#define LOG_ERROR(...) LOG_HELPER(LogLevel::ERROR, ##__VA_ARGS__) + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..431aa84f780dbe299e8b1bce7b8dd2d2228fade4 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.cpp @@ -0,0 +1,70 @@ +#include "utils/log.h" +#include "utils/utils.h" + +void CreateInTensorDescs(atb::SVector &intensorDescs) +{ + for (size_t i = 0; i < intensorDescs.size(); i++) + { + intensorDescs.at(i).dtype = ACL_FLOAT16; + intensorDescs.at(i).format = ACL_FORMAT_ND; + intensorDescs.at(i).shape.dimNum = 2; + intensorDescs.at(i).shape.dims[0] = 2; + intensorDescs.at(i).shape.dims[1] = 2; + } +} + +void CreateInTensors(atb::SVector &inTensors, atb::SVector &intensorDescs) +{ + for (size_t i = 0; i < inTensors.size(); i++) + { + inTensors.at(i).desc = intensorDescs.at(i); + inTensors.at(i).dataSize = atb::Utils::GetTensorSize(inTensors.at(i)); + std::vector hostData(atb::Utils::GetTensorNumel(inTensors.at(i)), 2); // 一段全2的hostBuffer + int ret = aclrtMalloc( + &inTensors.at(i).deviceData, inTensors.at(i).dataSize, ACL_MEM_MALLOC_HUGE_FIRST); // 分配NPU内存 + CHECK_RET(ret, "alloc error!"); + + ret = aclrtMemcpy(inTensors.at(i).deviceData, + inTensors.at(i).dataSize, + hostData.data(), + hostData.size() * sizeof(uint16_t), + ACL_MEMCPY_HOST_TO_DEVICE); // 拷贝CPU内存到NPU侧 + CHECK_RET(ret, "aclrtMemcpy error!"); + } +} + +void CreateOutTensors(atb::SVector &outTensors, atb::SVector &outtensorDescs) +{ + for (size_t i = 0; i < outTensors.size(); i++) + { + outTensors.at(i).desc = outtensorDescs.at(i); + outTensors.at(i).dataSize = atb::Utils::GetTensorSize(outTensors.at(i)); + int ret = aclrtMalloc(&outTensors.at(i).deviceData, outTensors.at(i).dataSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret, "aclrtMalloc error!"); + } +} + +void CreateTensorFromDesc(atb::Tensor &tensor, atb::TensorDesc &tensorDescs) +{ + tensor.desc = tensorDescs; + tensor.dataSize = atb::Utils::GetTensorSize(tensor); + int ret = aclrtMalloc(&tensor.deviceData, tensor.dataSize, ACL_MEM_MALLOC_HUGE_FIRST); + CHECK_RET(ret, "aclrtMalloc error!"); +} + +void PrintOutTensorValue(atb::Tensor &outTensor) +{ + // 输出tensor拷贝回host侧并打印 + std::vector outBuffer(atb::Utils::GetTensorNumel(outTensor)); + int ret = aclrtMemcpy(outBuffer.data(), + outBuffer.size() * sizeof(uint16_t), + outTensor.deviceData, + outTensor.dataSize, + ACL_MEMCPY_DEVICE_TO_HOST); + CHECK_RET(ret, "copy error!"); + + for (size_t i = 0; i < outBuffer.size(); i = i + 1) + { + LOG_INFO("out[" + std::to_string(i) + "] = " + std::to_string((uint32_t)outBuffer.at(i))); + } +} diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.h b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..411f5be1b34061ba3395242449cf7400e3add06f --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/ATB/ATB_MASH_UP_GRAPH/utils/utils.h @@ -0,0 +1,39 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" +#include "utils/log.h" + +#define CHECK_RET(cond, str) \ + do \ + { \ + if (cond) \ + { \ + LOG_ERROR(str); \ + exit(0); \ + } \ + } while (0) + +// 设置各个intensor的属性 +void CreateInTensorDescs(atb::SVector &intensorDescs); + +// 设置各个输入tensor并且为各个输入tensor分配内存空间,此处的输入tensor为手动设置,工程实现上可以使用torchTensor转换或者其他简单数据结构转换的方式 +void CreateInTensors(atb::SVector &inTensors, atb::SVector &intensorDescs); + +// 设置各个outtensor并且为outtensor分配内存空间,同输入tensor设置 +void CreateOutTensors(atb::SVector &outTensors, atb::SVector &outtensorDescs); + +void CreateTensorFromDesc(atb::Tensor &tensor, atb::TensorDesc &tensorDescs); + +// 输出打印 +void PrintOutTensorValue(atb::Tensor &outTensor); + +// 创建图算子 +atb::Status CreateGraphOperation(atb::Operation **operation); + +#endif diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/TEST.sh b/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..34b535bdca01c7ed856f76407f2db7cef43454da --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/TEST.sh @@ -0,0 +1,31 @@ + +set -e +data="$OEC_DATA_PATH" +output="$OEC_OUTPUT_PATH" +npu=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +mkdir -p "$output/tmp/resnet50/model" +g++ resnet50.cpp -o "$output/tmp/resnet50/resnet50" -lascendcl -ldl -lpthread -L${ASCEND_HOME_PATH}/lib64 -I${ASCEND_HOME_PATH}/include +if [[ $? != 0 ]]; then + cd .. + rm -rf build + exit -1 +fi + +cd "$output/tmp/resnet50" +if [[ ! -f "model/resnet50.om" ]]; then + atc --model="$data/model/resnet50.onnx" --framework=5 --output="model/resnet50" --input_shape="actual_input_1:1,3,224,224" --soc_version=$npu +fi + +cp -r "$data/data" "$output/tmp/resnet50" + +./resnet50 "$output/tmp/resnet50" 5000 +rst=$? +echo rst=$rst + +exit $rst \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/resnet50.cpp b/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/resnet50.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b7f9de47a2d78ff34a80cc6091f66c90119f7683 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/IntegrationTest/OfflineInference/OFFLINE_ACL_RESNET50/resnet50.cpp @@ -0,0 +1,352 @@ +#include "acl/acl.h" +#include +#include // 添加此行以引入accumulate函数 +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +// ---------------------- 全局变量定义 ---------------------- +int32_t deviceId = 0; // 计算设备ID +uint32_t modelId = 0; // 模型ID +size_t pictureDataSize = 0; // 图片数据大小 +void* pictureHostData = nullptr; // 主机侧图片数据 +void* pictureDeviceData = nullptr; // 设备侧图片数据 +aclmdlDataset* inputDataSet = nullptr;// 输入数据集 +aclDataBuffer* inputDataBuffer = nullptr; +aclmdlDataset* outputDataSet = nullptr;// 输出数据集 +aclDataBuffer* outputDataBuffer = nullptr; +aclmdlDesc* modelDesc = nullptr; // 模型描述信息 +size_t outputDataSize = 0; // 输出数据大小 +void* outputDeviceData = nullptr; // 设备侧输出数据 +void* outputHostData = nullptr; // 主机侧输出数据 + +// ---------------------- 预期结果配置 ---------------------- +const unsigned int EXPECTED_TOP1_INDEX = 162; // 预期Top1类别索引(需根据模型数据集调整) +const double MIN_CONFIDENCE_THRESHOLD = 0.9; // 最小置信度阈值(建议≥0.9) + +// ---------------------- 函数声明 ---------------------- +void InitResource(); // 资源初始化 +void LoadModel(const char* modelPath); // 加载模型 +void LoadPicture(const char* picturePath); // 加载图片(主机+设备内存) +void Inference(); // 执行推理 +int PrintResultAndValidate(); // 打印结果并验证 +void UnloadModel(); // 卸载模型 +void UnloadPicture(); // 释放图片相关资源 +void DestroyResource(); // 释放全局资源 + +// ---------------------- 函数定义 ---------------------- +// 1. 资源初始化(AscendCL初始化 + 指定计算设备) +void InitResource() { + aclError ret = aclInit(nullptr); // 初始化AscendCL,使用默认配置 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclInit failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtSetDevice(deviceId); // 指定计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtSetDevice failed, error code: " << ret << endl; + exit(1); + } +} + +// 2. 加载模型(.om文件) +void LoadModel(const char* modelPath) { + aclError ret = aclmdlLoadFromFile(modelPath, &modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to load model from " << modelPath << ", error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Model loaded successfully: " << modelPath << endl; +} + +// 3. 读取图片到主机内存 +void ReadPictureToHost(const char* picturePath) { + ifstream binFile(picturePath, ios::binary); + if (!binFile.is_open()) { + cerr << "[ERROR] Failed to open picture file: " << picturePath << endl; + exit(1); + } + // 获取文件大小并读取数据 + binFile.seekg(0, ios::end); + pictureDataSize = binFile.tellg(); + binFile.seekg(0, ios::beg); + + aclError ret = aclrtMallocHost(&pictureHostData, pictureDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMallocHost failed, error code: " << ret << endl; + exit(1); + } + binFile.read((char*)pictureHostData, pictureDataSize); + binFile.close(); + cout << "[INFO] Picture loaded to host memory: " << picturePath << endl; +} + +// 4. 复制数据到设备内存 +void CopyDataFromHostToDevice() { + aclError ret = aclrtMalloc(&pictureDeviceData, pictureDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMalloc failed, error code: " << ret << endl; + exit(1); + } + ret = aclrtMemcpy(pictureDeviceData, pictureDataSize, pictureHostData, pictureDataSize, ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtMemcpy failed, error code: " << ret << endl; + exit(1); + } + cout << "[INFO] Picture data copied to device memory" << endl; +} + +// 5. 加载图片(组合函数) +void LoadPicture(const char* picturePath) { + ReadPictureToHost(picturePath); + CopyDataFromHostToDevice(); +} + +// 6. 创建模型输入数据结构 +void CreateModelInput() { + inputDataSet = aclmdlCreateDataset(); + inputDataBuffer = aclCreateDataBuffer(pictureDeviceData, pictureDataSize); + aclError ret = aclmdlAddDatasetBuffer(inputDataSet, inputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model input, error code: " << ret << endl; + exit(1); + } +} + +// 7. 创建模型输出数据结构 +void CreateModelOutput() { + modelDesc = aclmdlCreateDesc(); + aclError ret = aclmdlGetDesc(modelDesc, modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to get model description, error code: " << ret << endl; + exit(1); + } + + outputDataSet = aclmdlCreateDataset(); + outputDataSize = aclmdlGetOutputSizeByIndex(modelDesc, 0); // 获取第一个输出的大小 + + ret = aclrtMalloc(&outputDeviceData, outputDataSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to allocate output memory, error code: " << ret << endl; + exit(1); + } + outputDataBuffer = aclCreateDataBuffer(outputDeviceData, outputDataSize); + ret = aclmdlAddDatasetBuffer(outputDataSet, outputDataBuffer); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to create model output, error code: " << ret << endl; + exit(1); + } +} + +// 8. 执行推理 +void Inference() { + CreateModelInput(); + CreateModelOutput(); + aclError ret = aclmdlExecute(modelId, inputDataSet, outputDataSet); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Inference failed, error code: " << ret << endl; + exit(1); + } +} + +// 9. 打印结果并验证 +int PrintResultAndValidate() { + // 复制输出数据到主机内存 + aclError ret = aclrtMallocHost(&outputHostData, outputDataSize); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to malloc host memory for output, error code: " << ret << endl; + return 1; + } + ret = aclrtMemcpy(outputHostData, outputDataSize, outputDeviceData, outputDataSize, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to copy output data to host, error code: " << ret << endl; + return 1; + } + + // 解析输出数据(转换为float数组) + float* outFloatData = reinterpret_cast(outputHostData); + map> resultMap; // 按置信度降序排序 + for (unsigned int j = 0; j < outputDataSize / sizeof(float); ++j) { + resultMap[outFloatData[j]] = j; + } + + // 检查是否有推理结果 + if (resultMap.empty()) { + cerr << "[ERROR] No inference results found" << endl; + return 1; + } + + // 提取Top1结果 + auto top1 = resultMap.begin(); + unsigned int top1Index = top1->second; + double top1Score = top1->first; + double top1Confidence = exp(top1Score) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + + // 打印Top5结果 + cout << "\nTop 5 Inference Results:" << endl; + int cnt = 0; + for (auto it = resultMap.begin(); it != resultMap.end() && cnt < 5; ++it, ++cnt) { + double prob = exp(it->first) / accumulate(resultMap.begin(), resultMap.end(), 0.0, + [](double sum, const pair& item) { return sum + exp(item.first); }); + cout << "Top " << cnt + 1 << ": Index[" << it->second << "] Confidence[" << fixed << prob << "]" << endl; + } + + // 结果验证 + bool isSuccess = (top1Index == EXPECTED_TOP1_INDEX && top1Confidence >= MIN_CONFIDENCE_THRESHOLD); + if (isSuccess) { + cout << "\n[VALIDATION SUCCESS] Top1 matches expectations: Index[" << top1Index + << "] Confidence[" << fixed << top1Confidence << "]" << endl; + return 0; // 验证通过,返回0 + } else { + cerr << "\n[VALIDATION FAILED] Top1 does not match expectations:" << endl + << " Expected Index: " << EXPECTED_TOP1_INDEX << ", Confidence ≥ " << MIN_CONFIDENCE_THRESHOLD << endl + << " Actual Index: " << top1Index << ", Confidence: " << fixed << top1Confidence << endl; + return 1; // 验证失败,返回1 + } +} + +// 10. 卸载模型 +void UnloadModel() { + if (modelDesc != nullptr) { + aclmdlDestroyDesc(modelDesc); + modelDesc = nullptr; + } + aclError ret = aclmdlUnload(modelId); + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] Failed to unload model, error code: " << ret << endl; + } + cout << "[INFO] Model unloaded successfully" << endl; +} + +// 11. 释放图片相关资源 +void UnloadPicture() { + if (pictureHostData != nullptr) { + aclrtFreeHost(pictureHostData); + pictureHostData = nullptr; + } + if (pictureDeviceData != nullptr) { + aclrtFree(pictureDeviceData); + pictureDeviceData = nullptr; + } + if (inputDataBuffer != nullptr) { + aclDestroyDataBuffer(inputDataBuffer); + inputDataBuffer = nullptr; + } + if (inputDataSet != nullptr) { + aclmdlDestroyDataset(inputDataSet); + inputDataSet = nullptr; + } + if (outputHostData != nullptr) { + aclrtFreeHost(outputHostData); + outputHostData = nullptr; + } + if (outputDeviceData != nullptr) { + aclrtFree(outputDeviceData); + outputDeviceData = nullptr; + } + if (outputDataBuffer != nullptr) { + aclDestroyDataBuffer(outputDataBuffer); + outputDataBuffer = nullptr; + } + if (outputDataSet != nullptr) { + aclmdlDestroyDataset(outputDataSet); + outputDataSet = nullptr; + } + cout << "[INFO] Picture resources unloaded successfully" << endl; +} + +// 12. 释放全局资源 +void DestroyResource() { + aclError ret = aclrtResetDevice(deviceId); // 重置计算设备 + if (ret != ACL_SUCCESS) { + cerr << "[ERROR] aclrtResetDevice failed, error code: " << ret << endl; + } + aclFinalize(); // 去初始化AscendCL + cout << "[INFO] Global resources released successfully" << endl; +} + +// ---------------------- 主函数 ---------------------- +int main(int argc, char* argv[]) { + // 检查命令行参数 + if (argc != 3) { + cerr << "[ERROR] Usage: " << argv[0] << " "<<" " << endl; + cerr << " Example: " << argv[0] << " /path/to/resources" <<" 1000" << endl; + cerr << " Model will be loaded from: /model/resnet50.om" << endl; + cerr << " Picture will be loaded from: /data/dog1_1024_683.bin" << endl; + return 1; + } + + // 构建模型和图片路径 + string basePath = argv[1]; + int test_times = atoi(argv[2]); + string modelPath = basePath + "/model/resnet50.om"; + string picturePath = basePath + "/data/dog1_1024_683.bin"; + + cout << "[INFO] Base path: " << basePath << endl; + cout << "[INFO] Model path: " << modelPath << endl; + cout << "[INFO] Picture path: " << picturePath << endl; + + // 1. 资源初始化 + InitResource(); + + // 2. 加载模型 + LoadModel(modelPath.c_str()); + + // 3. 加载测试图片 + LoadPicture(picturePath.c_str()); + double maxFps = 0; + for(int j=0;j<3;++j){ + auto start = std::chrono::high_resolution_clock::now(); + for(int i =0; i < test_times; ++i){ + // 4. 执行推理 + Inference(); + } + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + double fps = static_cast(test_times) / duration.count() * 1000000; + maxFps = fps > maxFps? fps : maxFps; + std::cout < baseLine; + baseLine["Ascend310P3"] = 1632.7; + baseLine["Ascend910B2"] = 1950.33; + baseLine["Ascend910B3"] = 1941.3; + baseLine["Ascend910B4"] = 1407.368; + baseLine["Ascend910_9392"] = 2000.866; + + if (baseLine.find(socName) != baseLine.end()){ + double base = baseLine[socName]; + double delta = abs(maxFps - base) / base * 100; + std::cout <<"soc: "< 5){ + std::cout <<"ERROR: delta > 5%" < +#include + +#ifdef ASCENDC_CPU_DEBUG +#define __aicore__ +#else +#define __aicore__ [aicore] +#endif + +#define BLOCKS 4 +#define CACHELINE_SZ 64 + +// Define a kernel +__global__ __aicore__ void foo(__gm__ uint8_t *Out, int Stride) { + Out[block_idx * Stride] = block_idx; +} + +int main(int argc, char *argv[]) { + aclInit(nullptr); + aclrtSetDevice(0); + aclrtStream stream; + aclrtCreateStream(&stream); + + uint8_t ExpectedValue[] = {0, 1, 2, 3}; + uint8_t *OutputValue = nullptr; + aclrtMalloc((void **)&OutputValue, BLOCKS, ACL_MEM_MALLOC_HUGE_FIRST); + + uint8_t InitValue[BLOCKS] = {0}; + aclrtMemcpyAsync((void *)OutputValue, sizeof(InitValue), InitValue, + sizeof(InitValue), ACL_MEMCPY_HOST_TO_DEVICE, stream); + aclrtSynchronizeStream(stream); + + // Invoke a kernel + foo<<>>(OutputValue, CACHELINE_SZ); + + uint8_t *OutHost = nullptr; + aclrtMallocHost((void **)&OutHost, BLOCKS * CACHELINE_SZ); + aclrtMemcpyAsync(OutHost, BLOCKS * CACHELINE_SZ, OutputValue, + BLOCKS * CACHELINE_SZ, ACL_MEMCPY_DEVICE_TO_HOST, stream); + aclrtSynchronizeStream(stream); + + for (int I = 0; I < sizeof(ExpectedValue) / sizeof(uint8_t); I++) { + printf("i%d\t Expect: 0x%04x\t\t\t\tResult: 0x%04x\n", I, ExpectedValue[I], + OutHost[I * CACHELINE_SZ]); + } + + aclrtFreeHost(OutHost); + aclrtFree(OutputValue); + + aclrtDestroyStream(stream); + aclrtResetDevice(0); + aclFinalize(); + return 0; +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_BISHENG_DEMO/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_BISHENG_DEMO/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..0c24bd226bf8960352232ab47c9ac8b584d281da --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_BISHENG_DEMO/TEST.sh @@ -0,0 +1,30 @@ +unsupported=(A3) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +RT_INC=${ASCEND_HOME_PATH}/runtime/include +RT_LIB=${ASCEND_HOME_PATH}/runtime/lib64 +NPU=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +ouput="$OEC_OUTPUT_PATH" +mkdir -p "$ouput" +inputpath=$(pwd) +cd "$ouput" +# 功能:Host & Device代码混合编译,生成可执行文件,仅需链接libruntime.so +# 编译选项--cce-soc-version和--cce-soc-core-type指的是编译AscendXXXYY上的Vector核程序 +bisheng -O2 --cce-soc-version=$NPU --cce-soc-core-type=VecCore -I$RT_INC -L$RT_LIB -lascendcl -lruntime "$inputpath/QuickStartDemo.cce" -o "QuickStartDemo" +rst=$? +if [[ $rst != 0 ]]; then + exit $rst +fi +if [[ ! -f QuickStartDemo ]]; then + exit 1 +fi + diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..82874a94a37778de8d2a9e919c674c56883d53a3 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/TEST.sh @@ -0,0 +1 @@ +python3 dsl_vabs.py "$OEC_OUTPUT_PATH" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/dsl_vabs.py b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/dsl_vabs.py new file mode 100644 index 0000000000000000000000000000000000000000..307400d977869173b542cb04243aa8dc09d5fc50 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_DSL_VABS/dsl_vabs.py @@ -0,0 +1,33 @@ +from tbe import tvm +from tbe import dsl +import argparse +parser = argparse.ArgumentParser( + prog="dsl-test", + ) + +parser.add_argument( + "output_dir", + default=".", + help="The path where the kernel mate data is saved, which is the current directory by default", +) +args = parser.parse_args() + + +shape = (28,28) +dtype = "float16" +# 定义输入占位符 +data = tvm.placeholder(shape, name="data", dtype=dtype) +with tvm.target.cce(): + # 描述算子计算过程 + res = dsl.vabs(data) + # 生成schedule对象 + sch = dsl.auto_schedule(res) +# 定义build配置参数 +config = {"print_ir" : True, + "need_build" : True, + "name" : "abs_28_28_float16", + "tensor_list" : [data,res], + "kernel_meta_parent_dir": args.output_dir + } +# build算子 +dsl.build(sch, config) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_GCC_PATH_CHECK/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_GCC_PATH_CHECK/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..288c7d6c1dc0f9ac61359fd2ed02c2ee0d48e33a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_GCC_PATH_CHECK/TEST.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# 使用数组直接存储路径(避免空格问题) +paths=( + "/usr/lib/gcc/x86_64-linux-gnu/" + "/usr/lib/gcc/aarch64-linux-gnu" + + "/usr/lib/gcc/x86_64-openEuler-linux" + "/usr/lib/gcc/aarch64-openEuler-linux" +) + +for path in "${paths[@]}"; do + if [[ -e "$path" ]]; then + echo "存在路径: $path" + exit 0 + fi +done + +echo "所有路径均不存在" +exit 255 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d4c47d84bec9be9c372ac0d637c412cb045b08a7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.16.0) + +# project information +project(Ascend_C) +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +if(DEFINED ENV{USER} AND "$ENV{USER}" STREQUAL "root") + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for root user") +else() + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "$ENV{HOME}/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for other user") +endif() + +if(DEFINED ASCEND_CANN_PACKAGE_PATH) +elseif(DEFINED ENV{ASCEND_HOME_PATH}) + set(ASCEND_CANN_PACKAGE_PATH "$ENV{ASCEND_HOME_PATH}" CACHE PATH "ASCEND CANN package installation directory" FORCE) +else() + set(ASCEND_CANN_PACKAGE_PATH "${DEFAULT_ASCEND_CANN_PACKAGE_PATH}" CACHE PATH "ASCEND CANN package installation directory") +endif() + +set(RUN_MODE "npu" CACHE STRING "run mode: npu") +set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()") + +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed." ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +endif() + +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +# ascendc_library use to add kernel file to generate ascendc library +ascendc_library(kernels STATIC + hello_world.cpp +) + +add_executable(main main.cpp) + +target_link_libraries(main PRIVATE + kernels +) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..8f639c5276d95b7571ca9a7d33edd3603246ce44 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/TEST.sh @@ -0,0 +1,50 @@ +SOC_VERSION=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +output="$OEC_OUTPUT_PATH" +type=build +function make_run(){ + mkdir -p "$output/HelloWorld/build" + + cmake -B "$output/HelloWorld/build" \ + -DSOC_VERSION=${SOC_VERSION} \ + -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_PATH} \ + -DCMAKE_INSTALL_PREFIX="$output/HelloWorld/out" + if [ $? -ne 0 ]; then + echo "cmake hello world failed" + return 1 + fi + cd "$output/HelloWorld" + cmake --build build -j + if [ $? -ne 0 ]; then + echo "buid hello world failed" + return 2 + fi + cmake --install build + if [ $? -ne 0 ]; then + echo "install hello world failed" + return 3 + fi + if [[ $type == "build" ]];then + return 0 # 算子编译场景下无需执行用例,算子开发需要执行用例 + fi + check_msg="Hello World" + file_path=output_msg.txt + + ./build/main | tee $file_path + count=$(grep -c "$check_msg" $file_path) + + if [ $count -ne 8 ]; then + echo "Error, Expected 8 occurrences of $check_msg, but found $count occurrences." + return 3 + fi + +} + + +make_run +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/hello_world.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/hello_world.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d758710ddacefb231648ca13b8e568e55285ed2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/hello_world.cpp @@ -0,0 +1,10 @@ +#include "kernel_operator.h" +extern "C" __global__ __aicore__ void hello_world() +{ + AscendC::printf("Hello World!!!\n"); +} + +void hello_world_do(uint32_t blockDim, void* stream) +{ + hello_world<<>>(); +} diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/main.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4cb6424f1994a95384fdc0cd6ad1787de8131bb9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_HELLO_WORLD/main.cpp @@ -0,0 +1,24 @@ +#include "acl/acl.h" +extern void hello_world_do(uint32_t coreDim, void* stream); + +int32_t main(int argc, char const *argv[]) +{ + // AscendCL初始化 + aclInit(nullptr); + // 运行管理资源申请 + int32_t deviceId = 0; + aclrtSetDevice(deviceId); + aclrtStream stream = nullptr; + aclrtCreateStream(&stream); + + // 设置参与运算的核数为8(核数可根据实际需求设置) + constexpr uint32_t blockDim = 8; + // 用内核调用符<<<>>>调用核函数,hello_world_do中封装了<<<>>>调用 + hello_world_do(blockDim, stream); + aclrtSynchronizeStream(stream); + // 资源释放和AscendCL去初始化 + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..26d21e58d3c3306806bbb6e50ffdbc7b9c41814a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/TEST.sh @@ -0,0 +1,7 @@ + +set -e +current_dir=$(pwd) +mkdir -p "$OEC_OUTPUT_PATH" + +cd "$OEC_OUTPUT_PATH" +bishengir-compile "${current_dir}/demo.mlir" -o test diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/demo.mlir b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/demo.mlir new file mode 100644 index 0000000000000000000000000000000000000000..6e460608ef08527faa0ba7f26476a6205552f11c --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_NPU_IR_DEMO/demo.mlir @@ -0,0 +1,16 @@ +func.func @test_basic_kernel0(%valueA: memref<16xf16, #hivm.address_space>, + %valueB: memref<16xf16, #hivm.address_space>, + %valueC: memref<16xf16, #hivm.address_space>) + attributes {hacc.entry} +{ + %ubA = memref.alloc() : memref<16xf16, #hivm.address_space> + hivm.hir.load ins(%valueA : memref<16xf16, #hivm.address_space>) outs(%ubA : memref<16xf16, #hivm.address_space>) + + %ubB = memref.alloc() : memref<16xf16, #hivm.address_space> + hivm.hir.load ins(%valueB : memref<16xf16, #hivm.address_space>) outs(%ubB : memref<16xf16, #hivm.address_space>) + + %ubC = memref.alloc() : memref<16xf16, #hivm.address_space> + hivm.hir.vadd ins(%ubA, %ubB: memref<16xf16, #hivm.address_space>, memref<16xf16, #hivm.address_space>) outs(%ubC: memref<16xf16, #hivm.address_space>) + hivm.hir.store ins(%ubC : memref<16xf16, #hivm.address_space>) outs(%valueC : memref<16xf16, #hivm.address_space>) + return +} \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..540965926fae59615a67ba2d5c5aa80de4dbf111 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/TEST.sh @@ -0,0 +1 @@ +python3 tik_matmul.py "$OEC_OUTPUT_PATH" \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/tik_matmul.py b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/tik_matmul.py new file mode 100644 index 0000000000000000000000000000000000000000..765bf03a10c98af5dd260ae29f568b6d5e9963d7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Compile/KERNEL_COMPILE_TIK_MATMUL/tik_matmul.py @@ -0,0 +1,174 @@ +from tbe import tik +import argparse +parser = argparse.ArgumentParser( + prog="tik-test", +) +parser.add_argument( + "output_dir", + default=".", + help="The path where the kernel mate data is saved, which is the current directory by default", +) +args = parser.parse_args() + +DTYPE_SIZE = { + 'int8': 1, + 'float16': 2, + 'float32': 4, +} + +def MK_TO_K1MK0(tik_instance, mk_input_tensor, k1mk0_tensor, dtype, k1, m, k0): + """change data format mk to k1mk0""" + src_ub = tik_instance.Tensor(dtype, (k1, m, k0), name="src_ub", scope=tik.scope_ubuf) + + # data_move(m,k) --> (k1,m,k0) + with tik_instance.for_range(0, k1) as i: + tik_instance.data_move(src_ub[i * m * k0:], mk_input_tensor[i * k0:], 0, m, k0 * DTYPE_SIZE[dtype] // 32, + (k1 - 1) * k0 * DTYPE_SIZE[dtype] // 32, 0) + # data_move out + tik_instance.data_move(k1mk0_tensor, src_ub, 0, 1, k1 * m * k0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def KN_TO_K1NK0(tik_instance, kn_input_tensor, k1nk0_tensor, dtype, k1, n, k0): + """change data format kn to k1nk0""" + with tik_instance.for_range(0, k1) as index: + k1nk0_ub = tik_instance.Tensor(dtype, (n, k0), tik.scope_ubuf, "k1nk0_ub") + src_ub = tik_instance.Tensor(dtype, (k0, n), tik.scope_ubuf, "src_ub") + burst_len = k0 * n * DTYPE_SIZE[dtype] // 32 + tik_instance.data_move(src_ub, kn_input_tensor[index * k0 * n], 0, 1, burst_len, 0, 0) + dst_list = [k1nk0_ub[16 * i] for i in range(16)] + src_list = [src_ub[n * i] for i in range(16)] + rep_times = n // k0 + dst_rep_stride = k0 + src_rep_stride = 1 + tik_instance.vec_trans_scatter(False, False, dst_list, src_list, rep_times, dst_rep_stride, src_rep_stride) + tik_instance.data_move(k1nk0_tensor[index * k0 * n], k1nk0_ub, 0, 1, burst_len, 0, 0) + +def N1MN0_TO_MN(tik_instance, mn_output_tensor, n1mn0_tensor, dtype, n1, m, n0): + """change data format n1mn0 to mn""" + src_ub = tik_instance.Tensor(dtype, (m, n1 * n0), name="src_ub", scope=tik.scope_ubuf) + + # data_move (n1,m,n0) --> (m,n) + with tik_instance.for_range(0, n1) as i: + tik_instance.data_move(src_ub[i * n0:], n1mn0_tensor[i * m * n0:], 0, m, + n0 * DTYPE_SIZE[dtype] // 32, 0, (n1 - 1) * n0 * DTYPE_SIZE[dtype] // 32) + # data_move out + tik_instance.data_move(mn_output_tensor, src_ub, 0, 1, m * n1 * n0 * DTYPE_SIZE[dtype] // 32, 0, 0) + + +def matmul_tik_compute(params, kernel_name): + """ + matmul tik compute + @param params: matmul data + @param kernel_name: kernel name + @return: tik instance + """ + tik_instance = tik.Tik() + if not isinstance(params, dict): + params = params.__dict__ + m_size, k_size, n_size = params['M'], params['K'], params['N'] + data_type = params["data_type"] + m_tiling_size = int(params["m_tiling_size"]) + n_tiling_size = int(params["n_tiling_size"]) + k_tiling_size = int(params['k_tiling_size']) + + m_cycle_times = params["m_cycle_times"] + n_cycle_times = params["n_cycle_times"] + k_cycle_times = params["k_cycle_times"] + + # Determine the output type + if data_type == "float16": + C_loc_out_type = "float32" + K0 = 16 + else: + C_loc_out_type = "int32" + K0 = 32 + block_size = 16 + + n_thread_num = params['n_thread_num'] + m_thread_num = params['m_thread_num'] + k_thread_num = params['k_thread_num'] + + mk_gm_input = tik_instance.Tensor(data_type, (m_size, k_size), name="mk_input_gm", scope=tik.scope_gm) + kn_gm_input = tik_instance.Tensor(data_type, (k_size, n_size), name="kn_input_gm", scope=tik.scope_gm) + k1mk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, m_size, K0), name="k1mk0_workspace", + scope=tik.scope_gm, is_workspace=True) + k1nk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, n_size, K0), name="k1nk0_workspace", + scope=tik.scope_gm, is_workspace=True) + + mn_gm_output = tik_instance.Tensor(C_loc_out_type, (m_size, n_size), tik.scope_gm, "mn_output_gm") + nmk0_workspace = tik_instance.Tensor(C_loc_out_type, (n_size // block_size, m_size, block_size), + name="nmk0_workspace", scope=tik.scope_gm, is_workspace=True) + + MK_TO_K1MK0(tik_instance, mk_gm_input, k1mk0_workspace, data_type, k_size // K0, m_size, K0) + KN_TO_K1NK0(tik_instance, kn_gm_input, k1nk0_workspace, data_type, k_size // K0, n_size, K0) + + # Tiling is realized through the for_range() loop. + with tik_instance.for_range(0, 2, block_num=1) as core_id: + with tik_instance.for_range(0, n_cycle_times // 2, thread_num=n_thread_num) as n_idx: + with tik_instance.for_range(0, m_cycle_times, thread_num=m_thread_num) as m_idx: + dst_l0c = tik_instance.Tensor(C_loc_out_type, [n_tiling_size // 16, m_tiling_size, 16], name='dst_l0c', + scope=tik.scope_cbuf_out) + with tik_instance.for_range(0, k_cycle_times, + thread_num=k_thread_num) as k_idx: + # Calculation result data transfer. + inputa_l1 = tik_instance.Tensor(params['data_type'], [k_tiling_size // K0, m_tiling_size, K0], + name="A_tiling_l1", scope=tik.scope_cbuf) + tik_instance.data_move(inputa_l1, + k1mk0_workspace[k_idx * k_tiling_size // K0, m_idx * m_tiling_size, :], + 0, k_tiling_size // K0, m_tiling_size, m_size - m_tiling_size, 0) + inputb_l1 = tik_instance.Tensor(params["data_type"], [k_tiling_size // K0, n_tiling_size, K0], + name="B_tiling_l1", scope=tik.scope_cbuf) + if n_size - n_tiling_size > 65535: + with tik_instance.for_range(0, k_tiling_size // K0) \ + as dma_k_idx: + tik_instance.data_move(inputb_l1[dma_k_idx, :, :], + k1nk0_workspace[k_idx * k_tiling_size // K0 + dma_k_idx, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], + 0, 1, n_tiling_size, 0, 0) + else: + tik_instance.data_move(inputb_l1, k1nk0_workspace[k_idx * k_tiling_size // K0, + (core_id * n_cycle_times // 2 + n_idx) * n_tiling_size, :], 0, + k_tiling_size // K0, n_tiling_size, n_size - n_tiling_size, 0) + # Call matmul API to matrix multiplication calculation. + with tik_instance.if_scope(k_idx == 0): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=True) + with tik_instance.else_scope(): + tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size, + init_l1out=False) + tik_instance.fixpipe(nmk0_workspace[n_tiling_size // 16 * (core_id * n_cycle_times // 2 + n_idx), + m_idx * m_tiling_size, :], dst_l0c, n_tiling_size // 16, m_tiling_size * 16 * + DTYPE_SIZE[C_loc_out_type] // 32, + (m_size - m_tiling_size) * 16 * DTYPE_SIZE[C_loc_out_type] // 32, 0) + + N1MN0_TO_MN(tik_instance, mn_gm_output, nmk0_workspace, C_loc_out_type, n_size // K0, m_size, K0) + + tik_instance.BuildCCE(kernel_name=kernel_name, + inputs=[mk_gm_input, kn_gm_input], outputs=[mn_gm_output], + output_files_path=args.output_dir, + config={'l2_mode': 1}) + return tik_instance + +def test_matmul_tik(): + shape_a = [16, 64] + shape_b = [64, 1024] + # 输入参数和tiling信息 + params = { + 'M': shape_a[0], + 'K': shape_a[1], + 'N': shape_b[1], + 'data_type': "float16", + 'm_tiling_size': 16, + 'm_cycle_times': 1, + 'm_thread_num': 1, + 'n_tiling_size': 64, + 'n_cycle_times': 16, + 'n_thread_num': 1, + 'k_tiling_size': 32, + 'k_cycle_times': 2, + 'k_thread_num': 2, + } + tik_instance = matmul_tik_compute(params, "simple_matmul") + +if __name__ == "__main__": + test_matmul_tik() \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e4d6de99eb77103ff1448394a307ac436a44f4c --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) + +set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +set(ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" + CACHE STRING "ASCEND CANN package installation directory" +) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +# ${KERNEL_FILES} are used to compile library, push files written by ascendc in ${KERNEL_FILES}. +# ref to cmake/npu.cmake ascendc_library, cmake/cpu.cmake add_library +file(GLOB KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/add_custom.cpp) + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() +add_executable(ascendc_kernels_bbit ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp) + +target_compile_options(ascendc_kernels_bbit PRIVATE + $:-g>> + -O2 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=0 -Wall -Werror +) + +target_link_libraries(ascendc_kernels_bbit PRIVATE + $,$>:host_intf_pub>> + $:ascendcl>> + ascendc_kernels_${RUN_MODE} +) + +install(TARGETS ascendc_kernels_bbit + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/add_custom.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/add_custom.cpp new file mode 100644 index 0000000000000000000000000000000000000000..896b5c81b4fc3481e93087cf702ac16de6a33cd2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/add_custom.cpp @@ -0,0 +1,89 @@ +/** + * @file add_custom.cpp + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "kernel_operator.h" + +constexpr int32_t TOTAL_LENGTH = 8 * 2048; // total length of data +constexpr int32_t USE_CORE_NUM = 8; // num of core used +constexpr int32_t BLOCK_LENGTH = TOTAL_LENGTH / USE_CORE_NUM; // length computed of each core +constexpr int32_t TILE_NUM = 8; // split data into 8 tiles for each core +constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue +constexpr int32_t TILE_LENGTH = BLOCK_LENGTH / TILE_NUM / BUFFER_NUM; // separate to 2 parts, due to double buffer + +class KernelAdd { +public: + __aicore__ inline KernelAdd() {} + __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z) + { + xGm.SetGlobalBuffer((__gm__ half *)x + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + yGm.SetGlobalBuffer((__gm__ half *)y + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + zGm.SetGlobalBuffer((__gm__ half *)z + BLOCK_LENGTH * AscendC::GetBlockIdx(), BLOCK_LENGTH); + pipe.InitBuffer(inQueueX, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, TILE_LENGTH * sizeof(half)); + } + __aicore__ inline void Process() + { + int32_t loopCount = TILE_NUM * BUFFER_NUM; + for (int32_t i = 0; i < loopCount; i++) { + CopyIn(i); + Compute(i); + CopyOut(i); + } + } + +private: + __aicore__ inline void CopyIn(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::DataCopy(xLocal, xGm[progress * TILE_LENGTH], TILE_LENGTH); + AscendC::DataCopy(yLocal, yGm[progress * TILE_LENGTH], TILE_LENGTH); + inQueueX.EnQue(xLocal); + inQueueY.EnQue(yLocal); + } + __aicore__ inline void Compute(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + AscendC::Add(zLocal, xLocal, yLocal, TILE_LENGTH); + outQueueZ.EnQue(zLocal); + inQueueX.FreeTensor(xLocal); + inQueueY.FreeTensor(yLocal); + } + __aicore__ inline void CopyOut(int32_t progress) + { + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::DataCopy(zGm[progress * TILE_LENGTH], zLocal, TILE_LENGTH); + outQueueZ.FreeTensor(zLocal); + } + +private: + AscendC::TPipe pipe; + AscendC::TQue inQueueX, inQueueY; + AscendC::TQue outQueueZ; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; +}; + +extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z) +{ + KernelAdd op; + op.Init(x, y, z); + op.Process(); +} + +#ifndef ASCENDC_CPU_DEBUG +void add_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z) +{ + add_custom<<>>(x, y, z); +} +#endif diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/cpu_lib.cmake b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/cpu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..5362c8b5a53b1f730ac6fe542ee226a42dff40ff --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/cpu_lib.cmake @@ -0,0 +1,9 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES}) +target_link_libraries(ascendc_kernels_${RUN_MODE} PUBLIC tikicpulib::${SOC_VERSION}) +target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 -std=c++17) +install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/npu_lib.cmake b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/npu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f92b095d1f4f258af274b98dfcba2dccf6165b30 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/cmake/npu_lib.cmake @@ -0,0 +1,11 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +# ascendc_library use to add kernel file to generate ascendc library +ascendc_library(ascendc_kernels_${RUN_MODE} SHARED ${KERNEL_FILES}) diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/data_utils.h b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/data_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..09d906371fb8fd86ba988199ea61642d02110c49 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/data_utils.h @@ -0,0 +1,203 @@ +/** + * @file data_utils.h + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#ifndef DATA_UTILS_H +#define DATA_UTILS_H +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "acl/acl.h" + +typedef enum { + DT_UNDEFINED = -1, + FLOAT = 0, + HALF = 1, + INT8_T = 2, + INT32_T = 3, + UINT8_T = 4, + INT16_T = 6, + UINT16_T = 7, + UINT32_T = 8, + INT64_T = 9, + UINT64_T = 10, + DOUBLE = 11, + BOOL = 12, + STRING = 13, + COMPLEX64 = 16, + COMPLEX128 = 17, + BF16 = 27 +} printDataType; + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) +#define ERROR_LOG(fmt, args...) fprintf(stdout, "[ERROR] " fmt "\n", ##args) +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +/** + * @brief Read data from file + * @param [in] filePath: file path + * @param [out] fileSize: file size + * @return read result + */ +bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize) +{ + struct stat sBuf; + int fileStatus = stat(filePath.data(), &sBuf); + if (fileStatus == -1) { + ERROR_LOG("failed to get file"); + return false; + } + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); + return false; + } + + std::ifstream file; + file.open(filePath, std::ios::binary); + if (!file.is_open()) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + std::filebuf *buf = file.rdbuf(); + size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); + if (size == 0) { + ERROR_LOG("file size is 0"); + file.close(); + return false; + } + if (size > bufferSize) { + ERROR_LOG("file size is larger than buffer size"); + file.close(); + return false; + } + buf->pubseekpos(0, std::ios::in); + buf->sgetn(static_cast(buffer), size); + fileSize = size; + file.close(); + return true; +} + +/** + * @brief Write data to file + * @param [in] filePath: file path + * @param [in] buffer: data to write to file + * @param [in] size: size to write + * @return write result + */ +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); + if (fd < 0) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + size_t writeSize = write(fd, buffer, size); + (void)close(fd); + if (writeSize != size) { + ERROR_LOG("Write file Failed."); + return false; + } + + return true; +} + +template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << data[i]; + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void DoPrintHalfData(const aclFloat16 *data, size_t count, size_t elementsPerRow) +{ + assert(elementsPerRow != 0); + for (size_t i = 0; i < count; ++i) { + std::cout << std::setw(10) << std::setprecision(6) << aclFloat16ToFloat(data[i]); + if (i % elementsPerRow == elementsPerRow - 1) { + std::cout << std::endl; + } + } +} + +void PrintData(const void *data, size_t count, printDataType dataType, size_t elementsPerRow = 16) +{ + if (data == nullptr) { + ERROR_LOG("Print data failed. data is nullptr"); + return; + } + + switch (dataType) { + case BOOL: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT8_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT16_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT32_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case INT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case UINT64_T: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case HALF: + DoPrintHalfData(reinterpret_cast(data), count, elementsPerRow); + break; + case FLOAT: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + case DOUBLE: + DoPrintData(reinterpret_cast(data), count, elementsPerRow); + break; + default: + ERROR_LOG("Unsupported type: %d", dataType); + } + std::cout << std::endl; +} +#endif // DATA_UTILS_H diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/main.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d3d8fea3359d5218d51ee66a5182c3ca0192b648 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/main.cpp @@ -0,0 +1,82 @@ +/** + * @file main.cpp + * + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +extern void add_custom_do(uint32_t blockDim, void *stream, uint8_t *x, uint8_t *y, uint8_t *z); +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z); +#endif + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = 8; + size_t inputByteSize = 8 * 2048 * sizeof(uint16_t); + size_t outputByteSize = 8 * 2048 * sizeof(uint16_t); + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *y = (uint8_t *)AscendC::GmAlloc(inputByteSize); + uint8_t *z = (uint8_t *)AscendC::GmAlloc(outputByteSize); + + ReadFile("./input/input_x.bin", inputByteSize, x, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize, y, inputByteSize); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); + ICPU_RUN_KF(add_custom, blockDim, x, y, z); // use this macro for cpu debug + + WriteFile("./output/output_z.bin", z, outputByteSize); + + AscendC::GmFree((void *)x); + AscendC::GmFree((void *)y); + AscendC::GmFree((void *)z); +#else + CHECK_ACL(aclInit(nullptr)); + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost, *yHost, *zHost; + uint8_t *xDevice, *yDevice, *zDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&yHost), inputByteSize)); + CHECK_ACL(aclrtMallocHost((void **)(&zHost), outputByteSize)); + CHECK_ACL(aclrtMalloc((void **)&xDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&yDevice, inputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&zDevice, outputByteSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("./input/input_x.bin", inputByteSize, xHost, inputByteSize); + ReadFile("./input/input_y.bin", inputByteSize, yHost, inputByteSize); + + CHECK_ACL(aclrtMemcpy(xDevice, inputByteSize, xHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(yDevice, inputByteSize, yHost, inputByteSize, ACL_MEMCPY_HOST_TO_DEVICE)); + + add_custom_do(blockDim, stream, xDevice, yDevice, zDevice); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtMemcpy(zHost, outputByteSize, zDevice, outputByteSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("./output/output_z.bin", zHost, outputByteSize); + + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(yDevice)); + CHECK_ACL(aclrtFree(zDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(yHost)); + CHECK_ACL(aclrtFreeHost(zHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/run.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..097440d29623b2eed58517184b24eeaf3ddbbdb9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/run.sh @@ -0,0 +1,132 @@ +#!/bin/bash +CURRENT_DIR=$( + cd $(dirname ${BASH_SOURCE:-$0}) + pwd +) + +BUILD_TYPE="Debug" + + +SHORT=r:,v:,i:,b:,p:,o:, +LONG=run-mode:,soc-version:,install-path:,build-type:,install-prefix:,output:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +SOC_VERSION="Ascend310P3" +export OUTPUT_DIR=$CURRENT_DIR +while :; do + case "$1" in + -r | --run-mode) + RUN_MODE="$2" + shift 2 + ;; + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" + shift 2 + ;; + -b | --build-type) + BUILD_TYPE="$2" + shift 2 + ;; + -p | --install-prefix) + INSTALL_PREFIX="$2" + shift 2 + ;; + -o | --output) + export OUTPUT_DIR="$2" + export CAMODEL_LOG_PATH="${OUTPUT_DIR}/sim_log" + INSTALL_PREFIX="${OUTPUT_DIR}/out" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "[ERROR] Unexpected option: $1" + break + ;; + esac +done + +RUN_MODE_LIST="cpu sim npu" +if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then + echo "ERROR: RUN_MODE error, This sample only support specify cpu, sim or npu!" + exit -1 +fi + +# VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +# if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then +# echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]" +# exit -1 +# fi +echo $ASCEND_INSTALL_PATH +echo $ASCEND_HOME_PATH +if [ -n "$ASCEND_INSTALL_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH +elif [ -n "$ASCEND_HOME_PATH" ]; then + _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH +else + if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then + _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + else + _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + fi +fi + +export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH} +export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH} +echo "Current compile soc version is ${SOC_VERSION}" +source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash +if [ "${RUN_MODE}" = "sim" ]; then + # in case of running op in simulator, use stub .so instead + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH + if [ ! $CAMODEL_LOG_PATH ]; then + export CAMODEL_LOG_PATH=$(pwd)/sim_log + fi + if [ -d "$CAMODEL_LOG_PATH" ]; then + rm -rf $CAMODEL_LOG_PATH + fi + mkdir -p $CAMODEL_LOG_PATH +elif [ "${RUN_MODE}" = "cpu" ]; then + export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib:${_ASCEND_INSTALL_PATH}/tools/tikicpulib/lib/${SOC_VERSION}:${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:${_ASCEND_INSTALL_PATH}/lib64:$LD_LIBRARY_PATH +fi +mkdir -p "$OUTPUT_DIR" +cd "$OUTPUT_DIR" +set -e +rm -rf build out +mkdir -p build +echo ${_ASCEND_INSTALL_PATH} +cmake "$CURRENT_DIR" -B build \ + -DRUN_MODE=${RUN_MODE} \ + -DSOC_VERSION=${SOC_VERSION} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ + -DASCEND_CANN_PACKAGE_PATH=${_ASCEND_INSTALL_PATH} +cmake --build build -j +cmake --install build + +rm -f ascendc_kernels_bbit +cp ./out/bin/ascendc_kernels_bbit ./ +rm -rf input output +mkdir -p input output +python3 "${CURRENT_DIR}/scripts/gen_data.py" +( + export LD_LIBRARY_PATH=$(pwd)/out/lib:$(pwd)/out/lib64:$LD_LIBRARY_PATH + if [[ "$RUN_WITH_TOOLCHAIN" -eq 1 ]]; then + if [ "${RUN_MODE}" = "npu" ]; then + msprof op --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "sim" ]; then + msprof op simulator --application=./ascendc_kernels_bbit + elif [ "${RUN_MODE}" = "cpu" ]; then + ./ascendc_kernels_bbit + fi + else + ./ascendc_kernels_bbit + fi +) +md5sum output/*.bin +python3 "${CURRENT_DIR}/scripts/verify_result.py" "output/output_z.bin" "output/golden.bin" diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/gen_data.py b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..ea8ce828aea146c9ab462290be403c4cfd483b75 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/gen_data.py @@ -0,0 +1,25 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import numpy as np + + +def gen_golden_data_simple(): + input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) + input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) + golden = (input_x + input_y).astype(np.float16) + + input_x.tofile("./input/input_x.bin") + input_y.tofile("./input/input_y.bin") + golden.tofile("./output/golden.bin") + + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/verify_result.py b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/verify_result.py new file mode 100644 index 0000000000000000000000000000000000000000..2dd46f80375dcd9c687e2789ee9e9f69cd3b1a45 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/AddKernelInvocationNeo/scripts/verify_result.py @@ -0,0 +1,53 @@ +#!/usr/bin/python3 +# coding=utf-8 +# +# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# =============================================================================== + +import sys +import numpy as np + +# for float16 +relative_tol = 1e-3 +absolute_tol = 1e-5 +error_tol = 1e-3 + + +def verify_result(output, golden): + output = np.fromfile(output, dtype=np.float16).reshape(-1) + golden = np.fromfile(golden, dtype=np.float16).reshape(-1) + different_element_results = np.isclose(output, + golden, + rtol=relative_tol, + atol=absolute_tol, + equal_nan=True) + different_element_indexes = np.where(different_element_results == False)[0] + for index in range(len(different_element_indexes)): + real_index = different_element_indexes[index] + golden_data = golden[real_index] + output_data = output[real_index] + print( + "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % + (real_index, golden_data, output_data, + abs(output_data - golden_data) / golden_data)) + if index == 100: + break + error_ratio = float(different_element_indexes.size) / golden.size + print("error ratio: %.4f, tolerance: %.4f" % (error_ratio, error_tol)) + return error_ratio <= error_tol + + +if __name__ == '__main__': + try: + res = verify_result(sys.argv[1], sys.argv[2]) + if not res: + raise ValueError("[ERROR] result error") + else: + print("test pass") + except Exception as e: + print(e) + sys.exit(1) diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_CPU.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_CPU.sh new file mode 100644 index 0000000000000000000000000000000000000000..3adbb990827b5a4d677dab659719f73a769328ce --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_CPU.sh @@ -0,0 +1,2 @@ +cd AddKernelInvocationNeo +bash run.sh -r cpu -v Ascend910B1 -o "$OEC_OUTPUT_PATH" diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_NPU.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_NPU.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfdbab7c5fc8b3ceadd6217f214b24cb85bdb0a2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_NPU.sh @@ -0,0 +1,10 @@ +set -e +cd AddKernelInvocationNeo +SOC_VERSION=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +bash run.sh -r npu -v $SOC_VERSION -o "$OEC_OUTPUT_PATH" diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_SIM.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_SIM.sh new file mode 100644 index 0000000000000000000000000000000000000000..701b59ee015835feaa47412f27e963f34d9090e4 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD/TEST_SIM.sh @@ -0,0 +1,10 @@ +set -e +cd AddKernelInvocationNeo +SOC_VERSION=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +bash run.sh -r sim -v $SOC_VERSION -o "$OEC_OUTPUT_PATH" diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..f34d50258837b770ff7dd35436bbc5f5a56ef6ba --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/TEST.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e +output_path="$OEC_OUTPUT_PATH" +special=(A300) +npu=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +target=ai_core-Ascend910B3 +for product in "${special[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + target="ai_core-$npu" + fi +done +msopgen gen -i add_custom.json -c $target -lan cpp -out "$output_path" +cd "$output_path" +bash build.sh \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/add_custom.json b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/add_custom.json new file mode 100644 index 0000000000000000000000000000000000000000..dce1ed85f7413fdeea2a974ba1f80b2158a6b123 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_ADD_MSOPGEN/add_custom.json @@ -0,0 +1,40 @@ +[ + { + "op": "AddCustom", + "language": "cpp", + "input_desc": [ + { + "name": "x", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + }, + { + "name": "y", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ], + "output_desc": [ + { + "name": "z", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "float16" + ] + } + ] + } +] \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/CMakeLists.txt b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d4c47d84bec9be9c372ac0d637c412cb045b08a7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. + +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.16.0) + +# project information +project(Ascend_C) +set(SOC_VERSION "Ascend310P3" CACHE STRING "system on chip type") +if(DEFINED ENV{USER} AND "$ENV{USER}" STREQUAL "root") + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for root user") +else() + set(DEFAULT_ASCEND_CANN_PACKAGE_PATH "$ENV{HOME}/Ascend/ascend-toolkit/latest" CACHE PATH "ASCEND CANN package default installation directory for other user") +endif() + +if(DEFINED ASCEND_CANN_PACKAGE_PATH) +elseif(DEFINED ENV{ASCEND_HOME_PATH}) + set(ASCEND_CANN_PACKAGE_PATH "$ENV{ASCEND_HOME_PATH}" CACHE PATH "ASCEND CANN package installation directory" FORCE) +else() + set(ASCEND_CANN_PACKAGE_PATH "${DEFAULT_ASCEND_CANN_PACKAGE_PATH}" CACHE PATH "ASCEND CANN package installation directory") +endif() + +set(RUN_MODE "npu" CACHE STRING "run mode: npu") +set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()") + +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/ascendc_devkit/tikcpp/samples/cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed." ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +endif() + +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +# ascendc_library use to add kernel file to generate ascendc library +ascendc_library(kernels STATIC + hello_world.cpp +) + +add_executable(main main.cpp) + +target_link_libraries(main PRIVATE + kernels +) \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/TEST.sh b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/TEST.sh new file mode 100755 index 0000000000000000000000000000000000000000..93ce88ec2b05c156a8975576e3444043e1c820d7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/TEST.sh @@ -0,0 +1,50 @@ +SOC_VERSION=$(python3 -c " +try: + import acl + print(acl.get_soc_name()) +except: + print('unknow') +") +output="$OEC_OUTPUT_PATH" +type=dev +function make_run(){ + mkdir -p "$output/HelloWorld/build" + + cmake -B "$output/HelloWorld/build" \ + -DSOC_VERSION=${SOC_VERSION} \ + -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_PATH} \ + -DCMAKE_INSTALL_PREFIX="$output/HelloWorld/out" + if [ $? -ne 0 ]; then + echo "cmake hello world failed" + return 1 + fi + cd "$output/HelloWorld" + cmake --build build -j + if [ $? -ne 0 ]; then + echo "buid hello world failed" + return 2 + fi + cmake --install build + if [ $? -ne 0 ]; then + echo "install hello world failed" + return 3 + fi + if [[ $type == "build" ]];then + return 0 # 算子编译场景下无需执行用例,算子开发需要执行用例 + fi + check_msg="Hello World" + file_path=output_msg.txt + + ./build/main | tee $file_path + count=$(grep -c "$check_msg" $file_path) + + if [ $count -ne 8 ]; then + echo "Error, Expected 8 occurrences of $check_msg, but found $count occurrences." + return 3 + fi + +} + + +make_run +exit $? \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/hello_world.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/hello_world.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d758710ddacefb231648ca13b8e568e55285ed2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/hello_world.cpp @@ -0,0 +1,10 @@ +#include "kernel_operator.h" +extern "C" __global__ __aicore__ void hello_world() +{ + AscendC::printf("Hello World!!!\n"); +} + +void hello_world_do(uint32_t blockDim, void* stream) +{ + hello_world<<>>(); +} diff --git a/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/main.cpp b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4cb6424f1994a95384fdc0cd6ad1787de8131bb9 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Kernel/Develop/KERNEL_DEV_HELLO_WORLD/main.cpp @@ -0,0 +1,24 @@ +#include "acl/acl.h" +extern void hello_world_do(uint32_t coreDim, void* stream); + +int32_t main(int argc, char const *argv[]) +{ + // AscendCL初始化 + aclInit(nullptr); + // 运行管理资源申请 + int32_t deviceId = 0; + aclrtSetDevice(deviceId); + aclrtStream stream = nullptr; + aclrtCreateStream(&stream); + + // 设置参与运算的核数为8(核数可根据实际需求设置) + constexpr uint32_t blockDim = 8; + // 用内核调用符<<<>>>调用核函数,hello_world_do中封装了<<<>>>调用 + hello_world_do(blockDim, stream); + aclrtSynchronizeStream(stream); + // 资源释放和AscendCL去初始化 + aclrtDestroyStream(stream); + aclrtResetDevice(deviceId); + aclFinalize(); + return 0; +} diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_PB_TO_JSON_DESC/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_PB_TO_JSON_DESC/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..b536c4debcea85edb8de6dcb3c763220746cef17 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_PB_TO_JSON_DESC/TEST.sh @@ -0,0 +1,3 @@ +mkdir -p "$OEC_OUTPUT_PATH" +cd "$OEC_OUTPUT_PATH" +atc --mode=1 --om="${OEC_DATA_PATH}/model/model_tf.pb" --json=model.json --framework=3 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..9ab2c427331307abd25888042cef4111190afeb1 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/TEST.sh @@ -0,0 +1,4 @@ +jsonfile=$(realpath "add.json") +mkdir -p "$OEC_OUTPUT_PATH" +cd "$OEC_OUTPUT_PATH" +atc --singleop="$jsonfile" --output=out --soc_version=Ascend910B3 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/add.json b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/add.json new file mode 100644 index 0000000000000000000000000000000000000000..7424b0db2850c86c74e160cac24be1352043f210 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_ADD/add.json @@ -0,0 +1,25 @@ +[ + { + "op": "Add", + "name": "add", + "input_desc": [ + { + "format": "ND", + "shape": [3,3], + "type": "int32" + }, + { + "format": "ND", + "shape": [3,3], + "type": "int32" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [3,3], + "type": "int32" + } + ] + } +] diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..d4d7ba62d6b3ddeefbfe997aef71a17d15ac32ee --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/TEST.sh @@ -0,0 +1,4 @@ +jsonfile=$(realpath "conv2d.json") +mkdir -p "$OEC_OUTPUT_PATH" +cd "$OEC_OUTPUT_PATH" +atc --singleop="$jsonfile" --output=out --soc_version=Ascend910B3 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/conv2d.json b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/conv2d.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a5d38546d113077ee34cd86918b3e76b546fd4 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_CONV2D/conv2d.json @@ -0,0 +1,43 @@ +[ + { + "op": "Conv2D", + "name": "conv2d", + "input_desc": [ + { + "format": "NCHW", + "shape": [1, 3, 16, 16], + "type": "float16" + }, + { + "format": "NCHW", + "shape": [3, 3, 3, 3], + "type": "float16" + } + ], + "output_desc": [ + { + "format": "NCHW", + "shape": [1, 3, 16, 16], + "type": "float16" + } + ], + "attr": [ + { + "name": "strides", + "type": "list_int", + "value": [1, 1, 1, 1] + }, + { + "name": "pads", + "type": "list_int", + "value": [1, 1, 1, 1] + }, + { + "name": "dilations", + "type": "list_int", + "value": [1, 1, 1, 1] + } + ] + } + ] + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..fe0278d4cf4f9aac5ef42c22793ee83b57d18375 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/TEST.sh @@ -0,0 +1,4 @@ +jsonfile=$(realpath "dynamic_shape.json") +mkdir -p "$OEC_OUTPUT_PATH" +cd "$OEC_OUTPUT_PATH" +atc --singleop="$jsonfile" --output=out --soc_version=Ascend910B3 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/dynamic_shape.json b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/dynamic_shape.json new file mode 100644 index 0000000000000000000000000000000000000000..f524ec91c3914b6007bad94b978161a23ec24838 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Compile/MODEL_ATC_SINGLE_DYNAMIC_SHAPE/dynamic_shape.json @@ -0,0 +1,29 @@ +[ + { + "op": "Add", + "name": "add", + "input_desc": [ + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0, 32]], + "type": "int64" + }, + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0, 32]], + "type": "int64" + } + ], + "output_desc": [ + { + "format": "ND", + "shape": [-1,16], + "shape_range": [[0,32]], + "type": "int64" + } + ] + } + ] + \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..cd57b53cbc9b60bce2a92451a7ac4db421551ddf --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/TEST.sh @@ -0,0 +1 @@ +python3 check_package_version.py \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/check_package_version.py b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/check_package_version.py new file mode 100644 index 0000000000000000000000000000000000000000..8e7af63194a6ba7c182502c6d27c4434c8a5b4a2 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_AIS_BENCH_CHECK/check_package_version.py @@ -0,0 +1,303 @@ +import sys +import subprocess +from packaging.version import parse as parse_version + +def get_python_version(): + """获取当前Python版本字符串""" + return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + +def get_installed_packages(): + """获取所有已安装的包及其版本 (使用pip list)""" + try: + result = subprocess.run( + [sys.executable, '-m', 'pip', 'list', '--format=freeze'], + capture_output=True, + text=True, + check=True + ) + packages = {} + for line in result.stdout.splitlines(): + if '==' in line: + name, version = line.split('==', 1) + packages[name.lower()] = version.strip() + return packages + except Exception as e: + print(f"错误: 无法获取已安装包列表 - {str(e)}") + print("请确保pip已安装并能正常工作") + sys.exit(1) + +def check_python_version(min_version=None, max_version=None): + """ + 检查Python版本是否在指定范围内 + + 参数: + min_version (str): 最小支持版本 (e.g., "3.8.0") + max_version (str): 最大支持版本 (e.g., "3.10.0") + + 返回: + tuple: (是否满足, 问题描述) + """ + current_ver = parse_version(get_python_version()) + problems = [] + + if min_version: + min_ver = parse_version(min_version) + if current_ver < min_ver: + problems.append(f"需要 ≥ {min_version}") + + if max_version: + max_ver = parse_version(max_version) + if current_ver > max_ver: + problems.append(f"需要 ≤ {max_version}") + + return (len(problems) == 0, problems) + +def check_package(pkg_info, installed_packages): + """ + 检查单个包是否满足要求 + + 参数: + pkg_info (dict): 包配置信息 + installed_packages (dict): 已安装包的字典 + + 返回: + tuple: (是否满足, 安装的版本, 问题描述) + """ + pypi_name = pkg_info["pypi_name"].lower() + installed_version = installed_packages.get(pypi_name) + + # 包未安装 + if not installed_version: + return (False, None, ["未安装"]) + + # 没有版本要求 + if "min_version" not in pkg_info and "max_version" not in pkg_info: + return (True, installed_version, []) + + # 检查版本要求 + problems = [] + try: + installed_ver = parse_version(installed_version) + + if "min_version" in pkg_info: + min_ver = parse_version(pkg_info["min_version"]) + if installed_ver < min_ver: + problems.append(f"需要 ≥ {pkg_info['min_version']}") + + if "max_version" in pkg_info: + max_ver = parse_version(pkg_info["max_version"]) + if installed_ver > max_ver: + problems.append(f"需要 ≤ {pkg_info['max_version']}") + except Exception as e: + problems.append(f"版本解析错误: {str(e)}") + + return (len(problems) == 0, installed_version, problems) + +def check_dependencies(requirements): + """ + 检查所有依赖项 + + 参数: + requirements (dict): 依赖配置字典 + + 返回: + tuple: (所有依赖是否满足, 包检查结果列表) + """ + # 获取已安装包列表 + installed_packages = get_installed_packages() + + print("=" * 70) + print("Python环境与包依赖检查") + print("=" * 70) + + all_ok = True + results = [] + + # 1. 检查Python版本 + py_req = requirements.get("python", {}) + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + py_ok, py_problems = check_python_version(min_py, max_py) + + version_range = [] + if min_py: version_range.append(f"≥ {min_py}") + if max_py: version_range.append(f"≤ {max_py}") + if min_py and max_py and min_py == max_py: + range_str = min_py + else: + range_str = " 且 ".join(version_range) if version_range else "任意版本" + + status = "✓" if py_ok else "✗" + problems = ", ".join(py_problems) if py_problems else "满足要求" + print(f"Python版本: {get_python_version()} | 要求: {range_str}") + print(f" {status} 状态: {problems}") + print("-" * 70) + + if not py_ok: + all_ok = False + + # 2. 检查包依赖 + packages = requirements.get("packages", []) + if not packages: + print("未配置包依赖检查") + else: + print("\n包依赖检查:") + print("-" * 70) + + for pkg in packages: + # 获取包信息 + name = pkg["name"] + pypi_name = pkg["name"] + + # 确定当前Python版本适用的规则 + current_py = f"{sys.version_info.major}.{sys.version_info.minor}" + version_rules = pkg.get("version_rules", {}) + rule = version_rules.get(current_py, pkg.get("default", {})) + + # 检查包 + satisfied, version, problems = check_package( + {"pypi_name": pypi_name, **rule}, + installed_packages + ) + + # 确定显示的要求范围 + + range_parts = [] + if "min_version" in rule: + range_parts.append(f"≥ {rule['min_version']}") + if "max_version" in rule: + range_parts.append(f"≤ {rule['max_version']}") + if len(range_parts) == 2 and rule['min_version'] == rule['max_version']: + range_str = rule['min_version'] + else: + range_str = " 且 ".join(range_parts) if range_parts else "任意版本" + + # 确定状态 + if not satisfied: + status = "✗" + all_ok = False + else: + status = "✓" + + # 收集结果 + results.append({ + "display_name": name, + "pypi_name": pypi_name, + "status": status, + "installed": version or "未安装", + "required": range_str, + "problems": problems, + "rule": rule + }) + + # 打印结果 + print(f"{status} {name}") + print(f" 已安装: {version or '未安装'}") + print(f" 要求: {range_str}") + if problems: + print(f" 问题: {', '.join(problems)}") + print("-" * 70) + + print("=" * 70) + return all_ok, results + +def generate_install_commands(results, py_req=None): + """ + 生成安装命令 + + 参数: + results (list): 包检查结果列表 + py_req (dict): Python版本要求 + + 返回: + str: 安装命令字符串 + """ + commands = [] + + # Python版本要求 + if py_req: + min_py = py_req.get("min_version") + max_py = py_req.get("max_version") + if min_py or max_py: + commands.append("# 请确保使用正确的Python版本") + if min_py and max_py: + if max_py == min_py: + commands.append(f"# 推荐使用 Python {min_py}") + else: + commands.append(f"# 推荐使用 Python {min_py} 到 {max_py} 之间的版本") + elif min_py: + commands.append(f"# 需要 Python {min_py} 或更高版本") + elif max_py: + commands.append(f"# 需要 Python {max_py} 或更低版本") + + # 包安装命令 + commands.append("\n# 包安装命令:") + + for res in results: + pkg_name = res["pypi_name"] + rule = res["rule"] + + if "min_version" in rule and "max_version" in rule: + if rule['min_version']==rule['max_version']: + commands.append(f"请先安装 '{pkg_name}=={rule['min_version']}'") + else: + commands.append(f"请先安装 '{pkg_name}>={rule['min_version']},<={rule['max_version']}'") + elif "min_version" in rule: + commands.append(f"请先安装 '{pkg_name}>={rule['min_version']}'") + elif "max_version" in rule: + commands.append(f"请先安装 '{pkg_name}<={rule['max_version']}'") + else: + commands.append(f"请先安装 {pkg_name}") + + return "\n".join(commands) + +if __name__ == "__main__": + # ====== 依赖配置 ====== + # 配置说明: + # - "python": 可选的Python版本要求 + # - "packages": 包依赖列表 + # 每个包必须包含: + # - "name": PyPI上的包名 + # - "version_rules": (可选) 针对不同Python版本的规则 + # - "default": (可选) 默认规则 + # + # 规则格式: + # { + # "min_version": "最低版本", + # "max_version": "最高版本" + # } + + DEPENDENCY_CONFIG = { + # Python版本要求 + "python": { + "min_version": "3.7" + }, + + "packages": [ + { + "name": "ais_bench_net_test" + }, + + ] + } + # ==================== + + # 检查依赖 + all_ok, results = check_dependencies(DEPENDENCY_CONFIG) + + if all_ok: + print("\n所有依赖满足! 可以运行主程序。") + # 这里可以继续执行你的主程序 + # from main import main + # main() + else: + print("\n错误: 环境不满足要求!") + print("请根据以下提示解决问题:") + + # 生成安装建议 + py_req = DEPENDENCY_CONFIG.get("python", {}) + commands = generate_install_commands(results, py_req) + print("\n" + commands) + + sys.exit(1) # 非零退出码表示错误 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_GATHER_TEST/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_GATHER_TEST/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..b54bda7c8e3b864d632458d8ab9edc5bedf16979 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_GATHER_TEST/TEST.sh @@ -0,0 +1,24 @@ +device_count=$(python3 -c " +try: + import acl + count,ret = acl.rt.get_device_count() + assert ret == 0 + print(count) +except: + print(0) +") + +# 执行命令并捕获所有输出 +output=$(timeout -k 1s 60s python3 -m ais_bench -n $device_count all_gather_test -p $device_count -b 8K -e 64M -f 2 -d fp32 2>&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "check result failed"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_REDUCE_TEST/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_REDUCE_TEST/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..8908608913ae4899d065bcb2f2f43a7daee3ccb5 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_ALL_REDUCE_TEST/TEST.sh @@ -0,0 +1,25 @@ + +device_count=$(python3 -c " +try: + import acl + count,ret = acl.rt.get_device_count() + assert ret == 0 + print(count) +except: + print(0) +") + +# 执行命令并捕获所有输出 +output=$(timeout -k 1s 60s python3 -m ais_bench -n $device_count all_reduce_test -p $device_count -b 8K -e 64M -f 2 -d fp32 -o sum 2>&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "check result failed"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_BROADCAST_TEST/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_BROADCAST_TEST/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..44e188b611cd2cb310617fee07c2a649976c6e5a --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/HCCL/MODEL_HCCL_BROADCAST_TEST/TEST.sh @@ -0,0 +1,30 @@ +unsupported=(A300) +for product in "${unsupported[@]}"; do + if [[ "$product" == "$OEC_PRODUCT" ]]; then + exit 192 + fi +done +device_count=$(python3 -c " +try: + import acl + count,ret = acl.rt.get_device_count() + assert ret == 0 + print(count) +except: + print(0) +") + +# 执行命令并捕获所有输出 +output=$(timeout -k 1s 60s python3 -m ais_bench -n $device_count broadcast_test -p $device_count -b 8K -e 64M -f 2 -d fp32 2>&1) +exit_code=$? + +# 将输出打印到终端 +echo "$output" + +# 检查输出中是否包含[ERROR] +if echo "$output" | grep -q "check result failed"; then + exit 1 +fi + +# 如果没有错误,返回原始命令的退出码 +exit $exit_code \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/Model/Tuning/MODEL_AOE_TF/TEST.sh b/oec-ascend/oec/test_cases/cann/Model/Tuning/MODEL_AOE_TF/TEST.sh new file mode 100644 index 0000000000000000000000000000000000000000..3239693e4eb65dfce245791dd66e88eb78fc4093 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/Model/Tuning/MODEL_AOE_TF/TEST.sh @@ -0,0 +1,3 @@ +mkdir -p "$OEC_OUTPUT_PATH" +cd "$OEC_OUTPUT_PATH" +timeout -k 1s 1200s aoe --framework=3 --model="$OEC_DATA_PATH/model/model_tf.pb" --job_type=2 \ No newline at end of file diff --git a/oec-ascend/oec/test_cases/cann/base_report.xlsx b/oec-ascend/oec/test_cases/cann/base_report.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e3c4ebe4468edc99a1122e980a13dad8fb630a1e Binary files /dev/null and b/oec-ascend/oec/test_cases/cann/base_report.xlsx differ diff --git a/oec-ascend/oec/test_cases/cann/map.config b/oec-ascend/oec/test_cases/cann/map.config new file mode 100644 index 0000000000000000000000000000000000000000..75ce5b53e410fb875f48e5e728e1a22a360927d7 --- /dev/null +++ b/oec-ascend/oec/test_cases/cann/map.config @@ -0,0 +1,18 @@ +Environment 运行环境 +Environment/Dependency 运行依赖 +Environment/CANNInstall CANN安装卸载 +ApplicationDev 应用开发 +ApplicationDev/Basic 基础功能 +ApplicationDev/aclnn 算子加速库 +ApplicationDev/media 媒体处理 +ApplicationDev/DevTools 开发工具 +Kernel 算子 +Kernel/Compile 算子编译 +Kernel/Develop 算子开发 +Model 模型开发 +Model/Compile 模型编译 +Model/Tuning 模型调优 +Model/HCCL 集合通信 +IntegrationTest 集成测试 +IntegrationTest/ATB ATB +IntegrationTest/OfflineInference 离线推理 \ No newline at end of file diff --git a/oec-ascend/setup.py b/oec-ascend/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1607cc575a2e8f2466f493df620a5d13d62ea971 --- /dev/null +++ b/oec-ascend/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup, find_packages + +setup( + name="oec-ascend-compatibility", + version="1.0.0", + packages=find_packages(), + include_package_data=True, + install_requires=["openpyxl", "pandas", "distro", "packaging", "psutil"], + author="spicy-bittern", + author_email=" ", + description="Ascend Operating System Compatibility Verification Tool", + license="Apache-2.0", + entry_points={ # 定义命令行指令 + "console_scripts": ["oec-ascend = oec.main:main"] # 命令名 = 模块:函数 + }, + keywords="Ascend Operating System Compatibility Verification Tool oec-ascend", + url="https://gitee.com/ascend/tools/tree/master/oec-ascend", +)