From e510d7bd4998b93433aba63a544120d8a8307599 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Thu, 29 Aug 2024 12:37:47 +0800 Subject: [PATCH 01/14] Operator generation --- .../generate_op_script/op_generator.py | 144 ++++++++++++++---- .../operator_replication.template | 113 ++++++++++---- 2 files changed, 198 insertions(+), 59 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 7d3e2b226..49a2f9606 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -26,38 +26,54 @@ except ImportError: pass from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, FileOpen +from msprobe.core.common.utils import check_file_or_directory_path +import re TENSOR_DATA_LIST = ["torch.Tensor"] TORCH_BOOL_TYPE = ["torch.bool"] TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", "torch.int64", "torch.long"] -TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -def check_json(json_path): +def check_json(json_path, propagation): + check_file_or_directory_path(json_path) json_file = os.path.realpath(json_path) - with open(json_file) as f: + with FileOpen(json_file, 'r') as f: json_content = json.load(f) if not isinstance(json_content, dict): raise ValueError("content of json file is not a dictionary!") - if len(list(json_content.items())) > 1: + if len(list(json_content.items())) > 2: raise ValueError("json file has more than one API, only one API is allowed!") - (api_full_name, api_info_dict) = list(json_content.items())[0] - (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + (api_full_name_forward, api_info_dict_forward) = list(json_content.items())[0] + if propagation == 'backward': + (api_full_name_backward, api_info_dict_backward) = list(json_content.items())[1] + + (api_type, api_name, ordinal_number, _) = api_full_name_forward.split(".", -1) if api_type not in ("Functional", "Tensor", "Torch"): raise ValueError("type {0} of API is not supported!".format(api_type)) - return (api_full_name, api_info_dict) + if propagation == 'backward': + return (api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward) + else: + return (api_full_name_forward, api_info_dict_forward) def check_user_settings(cmd_args): iter_t = cmd_args.iter_times if iter_t <= 0: raise ValueError("iter_times should be an integer bigger than zero!") - (api_full_name, api_info_dict) = check_json(cmd_args.forward_json_path) - return api_full_name, api_info_dict + # 通过 check_json 函数读取JSON文件并检查其内容是否合法。并根据传播方向返回相应的API信息 + if cmd_args.propagation == "backward": + (api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward) = check_json( + cmd_args.output_file, cmd_args.propagation) + return api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward + else: + (api_full_name_forward, api_info_dict_forward) = check_json(cmd_args.output_file, cmd_args.propagation) + return api_full_name_forward, api_info_dict_forward def get_compare_standard(api_name): @@ -71,7 +87,7 @@ def get_compare_standard(api_name): def get_settings(cmd_args): - ''' + ''' internal_settings contain all information needed for the operator program. keys: api_full_name: api_type.api_name.ordinal_number @@ -87,14 +103,28 @@ def get_settings(cmd_args): kwargs_value_assignment: code for kwargs assignment kwargs_dict_generator_device: code for generate kwargs dict on device kwargs_dict_generator_bench: code for generate kwargs dict on bench - ''' - api_full_name, api_info_dict = check_user_settings(cmd_args) - args_info = api_info_dict.get("args") - kwargs_info = api_info_dict.get("kwargs") + ''' + if cmd_args.propagation == "backward": + # 读取和检查json文件 + api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward = check_user_settings(cmd_args) + args_info_forward = api_info_dict_forward.get("input_args") + kwargs_info_forward = api_info_dict_forward.get("input_kwargs") + args_info_backward = api_info_dict_backward.get("grad_input") + else: + # 读取和检查json文件 + api_full_name_forward, api_info_dict_forward = check_user_settings(cmd_args) + args_info_forward = api_info_dict_forward.get("input_args") + kwargs_info_forward = api_info_dict_forward.get("input_kwargs") + # 根据用户设置生成内部设置字典,包含API名称、类型、比较标准、随机种子、迭代次数等信息 internal_settings = {} - internal_settings["api_full_name"] = api_full_name - (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + internal_settings["propagation"] = cmd_args.propagation + if cmd_args.propagation == "backward": + internal_settings['api_full_name'] = api_full_name_backward + (api_type, api_name, ordinal_number, _) = api_full_name_backward.split(".", -1) + else: + internal_settings["api_full_name"] = api_full_name_forward + (api_type, api_name, ordinal_number, _) = api_full_name_forward.split(".", -1) if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" elif api_type == "Tensor": @@ -104,18 +134,27 @@ def get_settings(cmd_args): internal_settings["api_name"] = api_name internal_settings["compare_standard"] = get_compare_standard(api_name) internal_settings["ordinal_number"] = ordinal_number - internal_settings["direction_status"] = "forward" + internal_settings["direction_status"] = cmd_args.propagation internal_settings["random_seed"] = cmd_args.random_seed if cmd_args.mode == "real_data": internal_settings["iter_times"] = 1 else: internal_settings["iter_times"] = cmd_args.iter_times - internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) - internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) - internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) - internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info) - internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info) - internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info) + internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info_forward) + internal_settings["args_list_generator_device"] = generate_args_list_device(args_info_forward) + internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info_forward) + internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info_forward) + internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info_forward) + internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info_forward) + if cmd_args.propagation == "backward": + internal_settings["args_element_assignment_backward"] = generate_args_element_assignment_code(args_info_backward) + internal_settings["args_list_generator_device_backward"] = generate_args_list_device(args_info_backward) + internal_settings["args_list_generator_bench_backward"] = generate_args_list_bench(args_info_backward) + else: + internal_settings["args_element_assignment_backward"] = '' + internal_settings["args_list_generator_device_backward"] = '' + internal_settings["args_list_generator_bench_backward"] = '' + return internal_settings @@ -226,10 +265,58 @@ def generate_kwargs_dict_bench(kwargs_info): kwargs_dict_generator_bench += recursive_kwargs_dict(value, flag_bench=True) + ", " return kwargs_dict_generator_bench +def update_data_name(data, dump_data_dir): + if isinstance(data, list): + for item in data: + update_data_name(item, dump_data_dir) + elif 'data_name' in data: + data['data_name'] = dump_data_dir + data['data_name'] + +def load_real_data_path(value, dump_data_dir): + if 'input_args' in value: + for v in value['input_args']: + update_data_name(v, dump_data_dir) + if 'grad_input' in value: + for v in value['grad_input']: + if v is not None: + update_data_name(v, dump_data_dir) + for v in value['output']: + update_data_name(v, dump_data_dir) + return value + +def extract_op(args): + check_file_or_directory_path(args.dump_json_path) + with FileOpen(args.dump_json_path, 'r') as file: + data = json.load(file) + extract_key_pattern = re.compile(f"^{re.escape(args.module_name)}\..+") + new_data = {} + for key, value in data['data'].items(): + if extract_key_pattern.match(key): + if len(args.real_data_path) != 0: + value = load_real_data_path(value, args.real_data_path) + new_data[key] = value + if not new_data: + print(f"Error: The module {args.module_name} does not exist in the file.") + else: + with FileOpen(args.output_file, 'w') as file: + json.dump(new_data, file, indent=4) + print(f"The module {args.module_name} has been successfully extracted and saved in: {args.output_file}") def op_generator_parser(parser): - parser.add_argument("-forward", "--forward_json_path", dest="forward_json_path", type=str, - help=" Path of forward API json file.", + parser.add_argument("-dump", "--dump_json_path", dest="dump_json_path", default='', type=str, + help=" Path of dump json file.", + required=False) + parser.add_argument("-o", "--output_file", dest="output_file", type=str, + help=" Path of API json file.", + required=True) + parser.add_argument("-n", "--module_name", dest="module_name", type=str, + help=" extract module name.", + required=True) + parser.add_argument("-r", "--real_data_path", dest="real_data_path", default='', type=str, + help=" Path of real data files.", + required=False) + parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, + help=" forward or backward.", required=True) parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), help=" Execute mode, should be random_data or real_data.", @@ -237,7 +324,7 @@ def op_generator_parser(parser): parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, help=" If mode is random_data, it is random seed.", required=False) - parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, + parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=1, help=" If mode is random_data, generate iter_times group of data.", required=False) @@ -246,13 +333,16 @@ def main(): parser = argparse.ArgumentParser() op_generator_parser(parser) cmd_args = parser.parse_args() + + if len(cmd_args.dump_json_path) != 0: + extract_op(cmd_args) internal_settings = get_settings(cmd_args) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) try: - with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: + with FileOpen(template_path, 'r') as ftemp, FileOpen(operator_script_path, 'w') as fout: code_template = ftemp.read() fout.write(code_template.format(**internal_settings)) except OSError: diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 7630839aa..625150960 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -6,14 +6,17 @@ import torch try: import torch_npu except ImportError: + from torch.cuda.amp import autocast pass +from tabulate import tabulate +from msprobe.core.common.utils import check_file_or_directory_path TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] TORCH_BOOL_TYPE = ["torch.bool"] TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", "torch.int64", "torch.long"] -TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] RAISE_PRECISION = {{ @@ -30,6 +33,7 @@ class CompareStandard(Enum): ABSOLUTE_THRESHOLD_STANDARD = auto() ULP_ERROR_STANDARD = auto() BENCHMARK_STANDARD = auto() + THOUSANDTH_STANDARD = auto() def get_device(): @@ -80,14 +84,19 @@ def generate_random_tensor(info): def generate_real_tensor(data_path): + check_file_or_directory_path(data_path) data_path = os.path.realpath(data_path) - data = torch.load(data_path) + try: + data = torch.load(data_path, map_location='cpu') + except Exception as e: + raise RuntimeError(f"An unexpected error occurred:{{e}} when loading grad_file.") from e return data def generate_data(info): data_type = info.get("type") - data_path = info.get("datapath") + data_path = info.get("data_name") + data_grad = info.get("requires_grad") if data_type in TENSOR_DATA_LIST: if data_path: data = generate_real_tensor(data_path) @@ -95,26 +104,44 @@ def generate_data(info): data = generate_random_tensor(info) else: data = info.get("value") + if data_grad == True: + data.requires_grad_(True) return data -def get_input(): +def get_input(propagation): {args_element_assignment} args_device = [{args_list_generator_device}] args_bench = [{args_list_generator_bench}] {kwargs_value_assignment} kwargs_device = {{{kwargs_dict_generator_device}}} kwargs_bench = {{{kwargs_dict_generator_bench}}} +{args_element_assignment_backward} + args_device_backward = [{args_list_generator_device_backward}] + args_bench_backward = [{args_list_generator_bench_backward}] + if propagation == 'backward': + return args_device, kwargs_device, args_bench, kwargs_bench, args_device_backward, args_bench_backward return args_device, kwargs_device, args_bench, kwargs_bench - -def exec_api_device(args, kwargs): +def exec_api_device(args, kwargs, args_grad_input, propagation): output_device = {api_type}.{api_name}(*args, **kwargs) + if propagation == 'backward': + args_input_tensor = [tensor for tensor in args if isinstance(tensor, torch.Tensor) and tensor.requires_grad] + args_input_tensor.extend( + [value for value in kwargs.values() if isinstance(value, torch.Tensor) and value.requires_grad]) + output_device_new = torch.autograd.grad(outputs=output_device, inputs=args_input_tensor, grad_outputs=args_grad_input) + return output_device_new return output_device -def exec_api_bench(args, kwargs): +def exec_api_bench(args, kwargs, args_grad_input, propagation): output_bench = {api_type}.{api_name}(*args, **kwargs) + if propagation == 'backward': + args_input_tensor = [tensor for tensor in args if isinstance(tensor, torch.Tensor) and tensor.requires_grad] + args_input_tensor.extend( + [value for value in kwargs.values() if isinstance(value, torch.Tensor) and value.requires_grad]) + output_bench_new = torch.autograd.grad(outputs=output_bench, inputs=args_input_tensor, grad_outputs=args_grad_input) + return output_bench_new return output_bench @@ -160,20 +187,23 @@ def compare_tensor(out_device, out_bench, api_name): if torch.numel(out_bench) == 0: print("Both out_device and out_bench have zero elements.") return None - print(f"shape is {{out_bench.shape}}") - print(f"dtype of out_device is {{out_device.dtype}}") - print(f"dtype of out_bench is {{out_bench.dtype}}") dtype_device = out_device.dtype dtype_bench = out_bench.dtype + headers = ["Metric", "Value"] + table = [ + ["Shape", out_bench.shape], + ["Dtype of out_device", out_device.dtype], + ["Dtype of out_bench", out_bench.dtype] + ] if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: out_device = out_device.to(torch.device("cpu")) if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: - print("compare standard: binary equality standard:") error_number = torch.sum(out_device != out_bench).item() error_rate = error_number / torch.numel(out_bench) - print(f"error rate is {{error_rate}}.") + table.append(["Compare Standard", "Binary Equality Standard"]) + table.append(["Error Rate", error_rate]) else: abs_err = torch.abs(out_device - out_bench) abs_bench = torch.abs(out_bench) @@ -210,9 +240,9 @@ def compare_tensor(out_device, out_bench, api_name): abs_err_proportion = 0 else: abs_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) - print("compare standard: absolute threshold standard") - print(f"relative error ratio is {{rel_err_proportion}}") - print(f"absolute error ratio is {{abs_err_proportion}}") + table.append(["Compare Standard", "Absolute Threshold Standard"]) + table.append(["Relative Error Ratio", rel_err_proportion]) + table.append(["Absolute Error Ratio", abs_err_proportion]) elif compare_standard == CompareStandard.ULP_ERROR_STANDARD: if dtype_device == torch.float16: min_eb, exponent_num = -14, 10 @@ -233,10 +263,20 @@ def compare_tensor(out_device, out_bench, api_name): ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) else: ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) - print("compare standard: ulp error standard") - print(f"maximum ulp error is {{max_ulp_err}}") - print(f"mean ulp error is {{mean_ulp_err}}") - print(f"ulp error proportion is {{ulp_err_proportion}}") + table.append(["Compare Standard", "ULP error Standard"]) + table.append(["Maximum ULP Error", max_ulp_err]) + table.append(["Mean ULP Error", mean_ulp_err]) + table.append(["ULP Error Proportion", ulp_err_proportion]) + elif compare_standard == CompareStandard.THOUSANDTH_STANDARD: + rel_err_origin = np.abs(abs_err / abs_bench_with_eps) + thresholding = 0.001 + if np.size(rel_err_origin) == 0: + thousand_res = 1 + else: + thousand_res = np.divide(np.sum(rel_err < thresholding), np.size(rel_err_origin)) + thousand_status = thousand_res > (1 - thresholding) + table.append(["Compare Standard", "Thousandth Standard"]) + table.append(["Thousandth ratio", thousand_res]) else: if dtype_device == torch.float16: small_value, small_value_atol = 1.0e-3, 1.0e-5 @@ -264,14 +304,16 @@ def compare_tensor(out_device, out_bench, api_name): mean_rel_err = torch.sum(torch.clamp(rel_err, min=0)) / torch.sum(normal_value_mask) rmse = compute_rmse(abs_err, normal_value_mask) error_balance = compute_error_balance(out_device, out_bench) - print("compare standard: benchmark standard") - print(f"small value error proportion is {{small_value_err_proportion}}") - print(f"maximum relative error is {{max_rel_err}}") - print(f"mean relative error is {{mean_rel_err}}") - print(f"root mean squared error is {{rmse}}") - print(f"error balance is {{error_balance}}") + table.append(["Compare Standard", "Benchmark Standard"]) + table.append(["Small Value Error Proportion", small_value_err_proportion]) + table.append(["Maximum Relative Error", max_rel_err]) + table.append(["Mean Relative Error", mean_rel_err]) + table.append(["Root Mean Squared Error", rmse]) + table.append(["Error Balance", error_balance]) else: print(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") + return None + print(tabulate(table, headers, tablefmt='grid')) return None @@ -280,10 +322,10 @@ def compare_element(out_device, out_bench, api_name): print("ERROR: out_device and out_bench is not the same type!") return None if isinstance(out_bench, torch.Tensor): - print(f"data type: {{type(out_bench)}}") + # print(f"data type: {{type(out_bench)}}") compare_tensor(out_device, out_bench, api_name) elif isinstance(out_bench, (bool, int, float, str)): - print(f"data type: {{type(out_bench)}}") + # print(f"data type: {{type(out_bench)}}") if out_device == out_bench: print("PASS: out_device and out_bench equals.") else: @@ -300,7 +342,7 @@ def compare(out_device, out_bench, api_name): print("Compare finished.") return None if isinstance(out_bench, (list, tuple)): - print(f"data type: {{type(out_bench)}}") + # print(f"data type: {{type(out_bench)}}") if len(out_device) != len(out_bench): print("ERROR: len of out_device and out_bench is different!") print("Compare finished.") @@ -315,11 +357,18 @@ def compare(out_device, out_bench, api_name): device = get_device() api_name = "{api_name}" +propagation = "{propagation}" compare_standard = {compare_standard} torch.manual_seed({random_seed}) for i in range({iter_times}): print(f"iter: {{i}}:") - args_device, kwargs_device, args_bench, kwargs_bench = get_input() - output_device = exec_api_device(args_device, kwargs_device) - output_bench = exec_api_bench(args_bench, kwargs_bench) - compare(output_device, output_bench, api_name) + if propagation == 'backward': + args_device, kwargs_device, args_bench, kwargs_bench, args_device_backward, args_bench_backward = get_input(propagation) + output_device = exec_api_device(args_device, kwargs_device, args_device_backward, propagation) + output_bench = exec_api_bench(args_bench, kwargs_bench, args_bench_backward, propagation) + compare(output_device, output_bench, api_name) + else: + args_device, kwargs_device, args_bench, kwargs_bench = get_input(propagation) + output_device = exec_api_device(args_device, kwargs_device, None, propagation) + output_bench = exec_api_bench(args_bench, kwargs_bench, None, propagation) + compare(output_device, output_bench, api_name) -- Gitee From 574ef7784a801668d45e418285dab00cbeae6d88 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 2 Sep 2024 11:09:16 +0800 Subject: [PATCH 02/14] Zero Division Verification --- .../generate_op_script/op_generator.py | 4 ++-- .../operator_replication.template | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 49a2f9606..40affad97 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -296,11 +296,11 @@ def extract_op(args): value = load_real_data_path(value, args.real_data_path) new_data[key] = value if not new_data: - print(f"Error: The module {args.module_name} does not exist in the file.") + print(f"Error: The module '{args.module_name}' does not exist in the file.") else: with FileOpen(args.output_file, 'w') as file: json.dump(new_data, file, indent=4) - print(f"The module {args.module_name} has been successfully extracted and saved in: {args.output_file}") + print(f"The module '{args.module_name}' has been successfully extracted and saved in: {args.output_file}") def op_generator_parser(parser): parser.add_argument("-dump", "--dump_json_path", dest="dump_json_path", default='', type=str, diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 625150960..a6be61a50 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -176,7 +176,10 @@ def compute_error_balance(out_device, out_bench): larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) total_count = torch.numel(out_bench) - error_balance = abs(larger_count - smaller_count) / total_count + try: + error_balance = abs(larger_count - smaller_count) / total_count + except ZeroDivisionError: + print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) return error_balance @@ -201,7 +204,10 @@ def compare_tensor(out_device, out_bench, api_name): out_device = out_device.to(torch.device("cpu")) if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: error_number = torch.sum(out_device != out_bench).item() - error_rate = error_number / torch.numel(out_bench) + try: + error_rate = error_number / torch.numel(out_bench) + except ZeroDivisionError: + print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) table.append(["Compare Standard", "Binary Equality Standard"]) table.append(["Error Rate", error_rate]) else: @@ -259,10 +265,13 @@ def compare_tensor(out_device, out_bench, api_name): ulp_err = torch.abs(ulp_err) max_ulp_err = torch.max(ulp_err) mean_ulp_err = torch.mean(ulp_err) - if dtype_device == torch.float32: - ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) - else: - ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) + try: + if dtype_device == torch.float32: + ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) + else: + ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) + except ZeroDivisionError: + print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) table.append(["Compare Standard", "ULP error Standard"]) table.append(["Maximum ULP Error", max_ulp_err]) table.append(["Mean ULP Error", mean_ulp_err]) -- Gitee From 6725f5596b9648e2baaba3dd260648962e37b715 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 2 Sep 2024 19:52:29 +0800 Subject: [PATCH 03/14] Reduce parameters --- .../generate_op_script/op_generator.py | 115 +++++++++++------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 40affad97..d35081783 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -20,6 +20,7 @@ import os import math import numpy as np import torch +import re try: import torch_npu except ImportError: @@ -28,7 +29,8 @@ except ImportError: from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, FileOpen from msprobe.core.common.utils import check_file_or_directory_path -import re +from msprobe.core.common.file_check import create_directory + TENSOR_DATA_LIST = ["torch.Tensor"] @@ -40,40 +42,53 @@ TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.floa TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -def check_json(json_path, propagation): - check_file_or_directory_path(json_path) - json_file = os.path.realpath(json_path) - with FileOpen(json_file, 'r') as f: - json_content = json.load(f) - if not isinstance(json_content, dict): - raise ValueError("content of json file is not a dictionary!") - if len(list(json_content.items())) > 2: - raise ValueError("json file has more than one API, only one API is allowed!") - (api_full_name_forward, api_info_dict_forward) = list(json_content.items())[0] - if propagation == 'backward': - (api_full_name_backward, api_info_dict_backward) = list(json_content.items())[1] - - (api_type, api_name, ordinal_number, _) = api_full_name_forward.split(".", -1) - if api_type not in ("Functional", "Tensor", "Torch"): - raise ValueError("type {0} of API is not supported!".format(api_type)) - if propagation == 'backward': - return (api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward) - else: - return (api_full_name_forward, api_info_dict_forward) +class APIResult: + def __init__(self, api_full_name, api_info_dict, backward_info=None): + self.api_full_name = api_full_name + self.api_info_dict = api_info_dict + self.backward_info = backward_info + + @property + def api_type(self): + return self.api_full_name.split(".", -1)[0] + def is_supported_type(self): + return self.api_type in ("Functional", "Tensor", "Torch") + + @classmethod + def from_json(cls, json_content, propagation): + if not isinstance(json_content, dict): + raise ValueError("content of json file is not a dictionary!") + if len(list(json_content.items())) > 2: + raise ValueError("json file has more than one API, only one API is allowed!") + + forward_name, forward_dict = list(json_content.items())[0] + forward_info = cls(api_full_name=forward_name, api_info_dict=forward_dict) + + if propagation == 'backward': + backward_name, backward_dict = list(json_content.items())[1] + backward_info = cls(api_full_name=backward_name, api_info_dict=backward_dict) + forward_info.backward_info = backward_info + + if not forward_info.is_supported_type(): + raise ValueError(f"type {forward_info.api_type} of API is not supported!") + + return forward_info def check_user_settings(cmd_args): iter_t = cmd_args.iter_times if iter_t <= 0: raise ValueError("iter_times should be an integer bigger than zero!") - # 通过 check_json 函数读取JSON文件并检查其内容是否合法。并根据传播方向返回相应的API信息 - if cmd_args.propagation == "backward": - (api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward) = check_json( - cmd_args.output_file, cmd_args.propagation) - return api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward - else: - (api_full_name_forward, api_info_dict_forward) = check_json(cmd_args.output_file, cmd_args.propagation) - return api_full_name_forward, api_info_dict_forward + + json_file = cmd_args.output_file + propagation = cmd_args.propagation + + with FileOpen(json_file, 'r') as f: + json_content = json.load(f) + + api_result = APIResult.from_json(json_content, propagation) + + return api_result def get_compare_standard(api_name): @@ -106,13 +121,17 @@ def get_settings(cmd_args): ''' if cmd_args.propagation == "backward": # 读取和检查json文件 - api_full_name_forward, api_info_dict_forward, api_full_name_backward, api_info_dict_backward = check_user_settings(cmd_args) + api_result = check_user_settings(cmd_args) + api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict + api_full_name_backward, api_info_dict_backward = (api_result.backward_info.api_full_name, + api_result.backward_info.api_info_dict) args_info_forward = api_info_dict_forward.get("input_args") kwargs_info_forward = api_info_dict_forward.get("input_kwargs") args_info_backward = api_info_dict_backward.get("grad_input") else: # 读取和检查json文件 - api_full_name_forward, api_info_dict_forward = check_user_settings(cmd_args) + api_result = check_user_settings(cmd_args) + api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict args_info_forward = api_info_dict_forward.get("input_args") kwargs_info_forward = api_info_dict_forward.get("input_kwargs") @@ -270,7 +289,7 @@ def update_data_name(data, dump_data_dir): for item in data: update_data_name(item, dump_data_dir) elif 'data_name' in data: - data['data_name'] = dump_data_dir + data['data_name'] + data['data_name'] = os.path.join(dump_data_dir, data['data_name']) def load_real_data_path(value, dump_data_dir): if 'input_args' in value: @@ -288,39 +307,41 @@ def extract_op(args): check_file_or_directory_path(args.dump_json_path) with FileOpen(args.dump_json_path, 'r') as file: data = json.load(file) - extract_key_pattern = re.compile(f"^{re.escape(args.module_name)}\..+") new_data = {} + real_data_path = '' + output_path = os.path.dirname(args.output_file) + create_directory(output_path) + module_name = os.path.splitext(os.path.basename(args.output_file))[0] + extract_key_pattern = re.compile(f"^{re.escape(module_name)}\..+") + try: + real_data_path = data['dump_data_dir'] + except: + pass for key, value in data['data'].items(): if extract_key_pattern.match(key): - if len(args.real_data_path) != 0: + if real_data_path: value = load_real_data_path(value, args.real_data_path) new_data[key] = value if not new_data: - print(f"Error: The module '{args.module_name}' does not exist in the file.") + print(f"Error: The module '{module_name}' does not exist in the file.") else: with FileOpen(args.output_file, 'w') as file: json.dump(new_data, file, indent=4) - print(f"The module '{args.module_name}' has been successfully extracted and saved in: {args.output_file}") + print(f"The module '{module_name}' has been successfully extracted and saved in: {args.output_file}") def op_generator_parser(parser): parser.add_argument("-dump", "--dump_json_path", dest="dump_json_path", default='', type=str, help=" Path of dump json file.", required=False) parser.add_argument("-o", "--output_file", dest="output_file", type=str, - help=" Path of API json file.", + help=" Path of extract api_name.json.", required=True) - parser.add_argument("-n", "--module_name", dest="module_name", type=str, - help=" extract module name.", - required=True) - parser.add_argument("-r", "--real_data_path", dest="real_data_path", default='', type=str, - help=" Path of real data files.", - required=False) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, help=" forward or backward.", - required=True) - parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), + required=False) + parser.add_argument("-m", "--mode", dest="mode", type=str, default="random_data", help=" Execute mode, should be random_data or real_data.", - required=True) + required=False) parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, help=" If mode is random_data, it is random seed.", required=False) @@ -334,7 +355,7 @@ def main(): op_generator_parser(parser) cmd_args = parser.parse_args() - if len(cmd_args.dump_json_path) != 0: + if cmd_args.dump_json_path: extract_op(cmd_args) internal_settings = get_settings(cmd_args) -- Gitee From 9a30dd7fe52e5c2159a77957bfaff26b4347bd9e Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 2 Sep 2024 19:55:06 +0800 Subject: [PATCH 04/14] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index d35081783..4f31d4026 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -320,7 +320,7 @@ def extract_op(args): for key, value in data['data'].items(): if extract_key_pattern.match(key): if real_data_path: - value = load_real_data_path(value, args.real_data_path) + value = load_real_data_path(value, real_data_path) new_data[key] = value if not new_data: print(f"Error: The module '{module_name}' does not exist in the file.") -- Gitee From 8896a30e823b7efea4eb948c72de9008ebfc6767 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 2 Sep 2024 20:07:52 +0800 Subject: [PATCH 05/14] bugfix --- .../generate_op_script/op_generator.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 4f31d4026..6c33e62ef 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -48,13 +48,13 @@ class APIResult: self.api_info_dict = api_info_dict self.backward_info = backward_info + def is_supported_type(self): + return self.api_type in ("Functional", "Tensor", "Torch") + @property def api_type(self): return self.api_full_name.split(".", -1)[0] - def is_supported_type(self): - return self.api_type in ("Functional", "Tensor", "Torch") - @classmethod def from_json(cls, json_content, propagation): if not isinstance(json_content, dict): @@ -308,7 +308,6 @@ def extract_op(args): with FileOpen(args.dump_json_path, 'r') as file: data = json.load(file) new_data = {} - real_data_path = '' output_path = os.path.dirname(args.output_file) create_directory(output_path) module_name = os.path.splitext(os.path.basename(args.output_file))[0] @@ -316,7 +315,7 @@ def extract_op(args): try: real_data_path = data['dump_data_dir'] except: - pass + real_data_path = '' for key, value in data['data'].items(): if extract_key_pattern.match(key): if real_data_path: -- Gitee From 0d0dd15d5a2765bb7e9ec533f2fe44453201297b Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 2 Sep 2024 20:21:12 +0800 Subject: [PATCH 06/14] bugfix --- .../generate_op_script/op_generator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 6c33e62ef..897d2b297 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -17,10 +17,11 @@ import argparse import json import os +import re import math import numpy as np import torch -import re + try: import torch_npu except ImportError: @@ -48,9 +49,6 @@ class APIResult: self.api_info_dict = api_info_dict self.backward_info = backward_info - def is_supported_type(self): - return self.api_type in ("Functional", "Tensor", "Torch") - @property def api_type(self): return self.api_full_name.split(".", -1)[0] @@ -75,6 +73,9 @@ class APIResult: return forward_info + def is_supported_type(self): + return self.api_type in ("Functional", "Tensor", "Torch") + def check_user_settings(cmd_args): iter_t = cmd_args.iter_times if iter_t <= 0: @@ -312,9 +313,9 @@ def extract_op(args): create_directory(output_path) module_name = os.path.splitext(os.path.basename(args.output_file))[0] extract_key_pattern = re.compile(f"^{re.escape(module_name)}\..+") - try: + if 'dump_data_dir' in data: real_data_path = data['dump_data_dir'] - except: + else: real_data_path = '' for key, value in data['data'].items(): if extract_key_pattern.match(key): -- Gitee From 2d1dce4741d1c4bcb9bab0490224b35de9903d0c Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 9 Sep 2024 10:05:24 +0800 Subject: [PATCH 07/14] Add the api name parameter --- .../generate_op_script/op_generator.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 897d2b297..7934b9f98 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -311,8 +311,7 @@ def extract_op(args): new_data = {} output_path = os.path.dirname(args.output_file) create_directory(output_path) - module_name = os.path.splitext(os.path.basename(args.output_file))[0] - extract_key_pattern = re.compile(f"^{re.escape(module_name)}\..+") + extract_key_pattern = re.compile(f"^{re.escape(args.api_name)}\..+") if 'dump_data_dir' in data: real_data_path = data['dump_data_dir'] else: @@ -323,11 +322,11 @@ def extract_op(args): value = load_real_data_path(value, real_data_path) new_data[key] = value if not new_data: - print(f"Error: The module '{module_name}' does not exist in the file.") + print(f"Error: The api '{args.api_name}' does not exist in the file.") else: with FileOpen(args.output_file, 'w') as file: json.dump(new_data, file, indent=4) - print(f"The module '{module_name}' has been successfully extracted and saved in: {args.output_file}") + print(f"The api '{args.api_name}' has been successfully extracted and saved in: {args.output_file}") def op_generator_parser(parser): parser.add_argument("-dump", "--dump_json_path", dest="dump_json_path", default='', type=str, @@ -336,6 +335,9 @@ def op_generator_parser(parser): parser.add_argument("-o", "--output_file", dest="output_file", type=str, help=" Path of extract api_name.json.", required=True) + parser.add_argument("-n", "--api_name", dest="api_name", type=str, + help=" extract api_name.", + required=True) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, help=" forward or backward.", required=False) @@ -355,8 +357,8 @@ def main(): op_generator_parser(parser) cmd_args = parser.parse_args() - if cmd_args.dump_json_path: - extract_op(cmd_args) + # if cmd_args.dump_json_path: + extract_op(cmd_args) internal_settings = get_settings(cmd_args) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") -- Gitee From bd7cde6fc0d93c49766791847f2c38c76a817e09 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 9 Sep 2024 15:31:57 +0800 Subject: [PATCH 08/14] Modified Review --- .../generate_op_script/op_generator.py | 84 ++++++++++------- .../operator_replication.template | 89 ++++++++++--------- 2 files changed, 96 insertions(+), 77 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 7934b9f98..a1912e05b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -28,10 +28,10 @@ except ImportError: pass from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi -from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker, FileOpen +from msprobe.core.common.file_check import FileOpen from msprobe.core.common.utils import check_file_or_directory_path from msprobe.core.common.file_check import create_directory - +from msprobe.core.common.const import Const TENSOR_DATA_LIST = ["torch.Tensor"] @@ -51,19 +51,19 @@ class APIResult: @property def api_type(self): - return self.api_full_name.split(".", -1)[0] + return self.api_full_name.split(Const.SEP, -1)[0] @classmethod def from_json(cls, json_content, propagation): if not isinstance(json_content, dict): raise ValueError("content of json file is not a dictionary!") - if len(list(json_content.items())) > 2: + if len(json_content) > 2: raise ValueError("json file has more than one API, only one API is allowed!") forward_name, forward_dict = list(json_content.items())[0] forward_info = cls(api_full_name=forward_name, api_info_dict=forward_dict) - if propagation == 'backward': + if propagation == Const.BACKWARD: backward_name, backward_dict = list(json_content.items())[1] backward_info = cls(api_full_name=backward_name, api_info_dict=backward_dict) forward_info.backward_info = backward_info @@ -101,6 +101,27 @@ def get_compare_standard(api_name): return "CompareStandard.ULP_ERROR_STANDARD" return "CompareStandard.BENCHMARK_STANDARD" +def extract_detailed_api_segments(full_api_name_with_direction_status): + """ + Function Description: + Extract the name of the API. + Parameter: + full_api_name_with_direction_status: Full name of the API. Example: torch.matmul.0.forward.output.0 + Return: + api_name: Name of api. Example: matmul, mul, etc. + full_api_name: Full name of api. Example: torch.matmul.0 + direction_status: Direction status of api. Example: forward, backward, etc. + """ + api_parts = full_api_name_with_direction_status.split(Const.SEP) + api_parts_length = len(api_parts) + if api_parts_length == 6: + api_type, api_name, api_order, direction_status, _, _ = api_parts + elif api_parts_length == 7: + api_type, prefix, api_name, api_order, direction_status, _, _ = api_parts + api_name = Const.SEP.join([prefix, api_name]) + else: + api_type, api_name, api_order, _= api_parts + return api_type, api_name, api_order def get_settings(cmd_args): ''' @@ -120,31 +141,34 @@ def get_settings(cmd_args): kwargs_dict_generator_device: code for generate kwargs dict on device kwargs_dict_generator_bench: code for generate kwargs dict on bench ''' - if cmd_args.propagation == "backward": + if cmd_args.propagation == Const.BACKWARD: # 读取和检查json文件 api_result = check_user_settings(cmd_args) api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict api_full_name_backward, api_info_dict_backward = (api_result.backward_info.api_full_name, api_result.backward_info.api_info_dict) - args_info_forward = api_info_dict_forward.get("input_args") - kwargs_info_forward = api_info_dict_forward.get("input_kwargs") - args_info_backward = api_info_dict_backward.get("grad_input") + args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) + kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) + if Const.GRAD_INPUT in api_info_dict_backward: + args_info_backward = api_info_dict_backward.get(Const.GRAD_INPUT) + elif Const.INPUT in api_info_dict_backward: + args_info_backward = api_info_dict_backward.get(Const.INPUT) else: # 读取和检查json文件 api_result = check_user_settings(cmd_args) api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict - args_info_forward = api_info_dict_forward.get("input_args") - kwargs_info_forward = api_info_dict_forward.get("input_kwargs") + args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) + kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) # 根据用户设置生成内部设置字典,包含API名称、类型、比较标准、随机种子、迭代次数等信息 internal_settings = {} internal_settings["propagation"] = cmd_args.propagation - if cmd_args.propagation == "backward": - internal_settings['api_full_name'] = api_full_name_backward - (api_type, api_name, ordinal_number, _) = api_full_name_backward.split(".", -1) + if cmd_args.propagation == Const.BACKWARD: + internal_settings["api_full_name"] = api_full_name_backward + (api_type, api_name, ordinal_number, _) = extract_detailed_api_segments(api_full_name_backward) else: internal_settings["api_full_name"] = api_full_name_forward - (api_type, api_name, ordinal_number, _) = api_full_name_forward.split(".", -1) + (api_type, api_name, ordinal_number, _) = extract_detailed_api_segments(api_full_name_forward) if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" elif api_type == "Tensor": @@ -166,7 +190,7 @@ def get_settings(cmd_args): internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info_forward) internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info_forward) internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info_forward) - if cmd_args.propagation == "backward": + if cmd_args.propagation == Const.BACKWARD: internal_settings["args_element_assignment_backward"] = generate_args_element_assignment_code(args_info_backward) internal_settings["args_list_generator_device_backward"] = generate_args_list_device(args_info_backward) internal_settings["args_list_generator_bench_backward"] = generate_args_list_bench(args_info_backward) @@ -293,16 +317,11 @@ def update_data_name(data, dump_data_dir): data['data_name'] = os.path.join(dump_data_dir, data['data_name']) def load_real_data_path(value, dump_data_dir): - if 'input_args' in value: - for v in value['input_args']: - update_data_name(v, dump_data_dir) - if 'grad_input' in value: - for v in value['grad_input']: + parameters = [Const.INPUT_ARGS, Const.GRAD_INPUT, Const.INPUT, Const.OUTPUT, Const.GRAD_OUTPUT] + for parameter in parameters: + for v in value.get(parameter, []): if v is not None: update_data_name(v, dump_data_dir) - for v in value['output']: - update_data_name(v, dump_data_dir) - return value def extract_op(args): check_file_or_directory_path(args.dump_json_path) @@ -312,11 +331,8 @@ def extract_op(args): output_path = os.path.dirname(args.output_file) create_directory(output_path) extract_key_pattern = re.compile(f"^{re.escape(args.api_name)}\..+") - if 'dump_data_dir' in data: - real_data_path = data['dump_data_dir'] - else: - real_data_path = '' - for key, value in data['data'].items(): + real_data_path = data.get('dump_data_dir', '') + for key, value in data.get('data', {}).items(): if extract_key_pattern.match(key): if real_data_path: value = load_real_data_path(value, real_data_path) @@ -335,13 +351,15 @@ def op_generator_parser(parser): parser.add_argument("-o", "--output_file", dest="output_file", type=str, help=" Path of extract api_name.json.", required=True) - parser.add_argument("-n", "--api_name", dest="api_name", type=str, + parser.add_argument("-a", "--api_name", dest="api_name", type=str, help=" extract api_name.", - required=True) + required=False) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, + choices=['forward', 'backward'], help=" forward or backward.", required=False) parser.add_argument("-m", "--mode", dest="mode", type=str, default="random_data", + choices=['random_data', 'real_data'], help=" Execute mode, should be random_data or real_data.", required=False) parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, @@ -357,8 +375,8 @@ def main(): op_generator_parser(parser) cmd_args = parser.parse_args() - # if cmd_args.dump_json_path: - extract_op(cmd_args) + if cmd_args.dump_json_path: + extract_op(cmd_args) internal_settings = get_settings(cmd_args) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index a6be61a50..bc3493c5a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -6,11 +6,11 @@ import torch try: import torch_npu except ImportError: - from torch.cuda.amp import autocast pass from tabulate import tabulate from msprobe.core.common.utils import check_file_or_directory_path - +from msprobe.core.common.log import logger +from msprobe.core.common.const import Const TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -35,6 +35,17 @@ class CompareStandard(Enum): BENCHMARK_STANDARD = auto() THOUSANDTH_STANDARD = auto() +def load_pt(pt_path, to_cpu=False): + pt_path = os.path.realpath(pt_path) + check_file_or_directory_path(pt_path) + try: + if to_cpu: + pt = torch.load(pt_path, map_location=torch.device("cpu")) + else: + pt = torch.load(pt_path) + except Exception as e: + raise RuntimeError(f"load pt file {{pt_path}} failed") from e + return pt def get_device(): if torch.cuda.is_available(): @@ -86,10 +97,7 @@ def generate_random_tensor(info): def generate_real_tensor(data_path): check_file_or_directory_path(data_path) data_path = os.path.realpath(data_path) - try: - data = torch.load(data_path, map_location='cpu') - except Exception as e: - raise RuntimeError(f"An unexpected error occurred:{{e}} when loading grad_file.") from e + data = load_pt(data_path, to_cpu = True) return data @@ -119,13 +127,13 @@ def get_input(propagation): {args_element_assignment_backward} args_device_backward = [{args_list_generator_device_backward}] args_bench_backward = [{args_list_generator_bench_backward}] - if propagation == 'backward': + if propagation == Const.BACKWARD: return args_device, kwargs_device, args_bench, kwargs_bench, args_device_backward, args_bench_backward return args_device, kwargs_device, args_bench, kwargs_bench def exec_api_device(args, kwargs, args_grad_input, propagation): output_device = {api_type}.{api_name}(*args, **kwargs) - if propagation == 'backward': + if propagation == Const.BACKWARD: args_input_tensor = [tensor for tensor in args if isinstance(tensor, torch.Tensor) and tensor.requires_grad] args_input_tensor.extend( [value for value in kwargs.values() if isinstance(value, torch.Tensor) and value.requires_grad]) @@ -136,7 +144,7 @@ def exec_api_device(args, kwargs, args_grad_input, propagation): def exec_api_bench(args, kwargs, args_grad_input, propagation): output_bench = {api_type}.{api_name}(*args, **kwargs) - if propagation == 'backward': + if propagation == Const.BACKWARD: args_input_tensor = [tensor for tensor in args if isinstance(tensor, torch.Tensor) and tensor.requires_grad] args_input_tensor.extend( [value for value in kwargs.values() if isinstance(value, torch.Tensor) and value.requires_grad]) @@ -175,20 +183,18 @@ def compute_rmse(abs_err, normal_value_mask): def compute_error_balance(out_device, out_bench): larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) - total_count = torch.numel(out_bench) - try: - error_balance = abs(larger_count - smaller_count) / total_count - except ZeroDivisionError: - print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) + if torch.numel(out_bench) == 0 + raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") + error_balance = abs(larger_count - smaller_count) / torch.numel(out_bench) return error_balance def compare_tensor(out_device, out_bench, api_name): if out_device.shape != out_bench.shape: - print("ERROR: shape of out_device and out_bench is not equal!") + logger.error("ERROR: shape of out_device and out_bench is not equal!") return None if torch.numel(out_bench) == 0: - print("Both out_device and out_bench have zero elements.") + logger.error("Both out_device and out_bench have zero elements.") return None dtype_device = out_device.dtype dtype_bench = out_bench.dtype @@ -204,10 +210,9 @@ def compare_tensor(out_device, out_bench, api_name): out_device = out_device.to(torch.device("cpu")) if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: error_number = torch.sum(out_device != out_bench).item() - try: - error_rate = error_number / torch.numel(out_bench) - except ZeroDivisionError: - print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) + if torch.numel(out_bench) == 0 + raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") + error_rate = error_number / torch.numel(out_bench) table.append(["Compare Standard", "Binary Equality Standard"]) table.append(["Error Rate", error_rate]) else: @@ -265,13 +270,12 @@ def compare_tensor(out_device, out_bench, api_name): ulp_err = torch.abs(ulp_err) max_ulp_err = torch.max(ulp_err) mean_ulp_err = torch.mean(ulp_err) - try: - if dtype_device == torch.float32: - ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) - else: - ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) - except ZeroDivisionError: - print(f"ERROR: please check torch.numel out_bench", torch.numel(out_bench)) + if torch.numel(out_bench) == 0 + raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") + if dtype_device == torch.float32: + ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) + else: + ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) table.append(["Compare Standard", "ULP error Standard"]) table.append(["Maximum ULP Error", max_ulp_err]) table.append(["Mean ULP Error", mean_ulp_err]) @@ -320,7 +324,7 @@ def compare_tensor(out_device, out_bench, api_name): table.append(["Root Mean Squared Error", rmse]) table.append(["Error Balance", error_balance]) else: - print(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") + logger.error(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") return None print(tabulate(table, headers, tablefmt='grid')) return None @@ -328,40 +332,37 @@ def compare_tensor(out_device, out_bench, api_name): def compare_element(out_device, out_bench, api_name): if type(out_device) != type(out_bench): - print("ERROR: out_device and out_bench is not the same type!") + logger.error("ERROR: out_device and out_bench is not the same type!") return None if isinstance(out_bench, torch.Tensor): - # print(f"data type: {{type(out_bench)}}") compare_tensor(out_device, out_bench, api_name) elif isinstance(out_bench, (bool, int, float, str)): - # print(f"data type: {{type(out_bench)}}") if out_device == out_bench: - print("PASS: out_device and out_bench equals.") + logger.info("PASS: out_device and out_bench equals.") else: - print("ERROR: out_device and out_bench is not equal!") + logger.error("ERROR: out_device and out_bench is not equal!") else: - print(f"ERROR: comparison of type {{type(out_bench)}} is not supported.") + logger.error(f"ERROR: comparison of type {{type(out_bench)}} is not supported.") return None def compare(out_device, out_bench, api_name): - print("Compare result:") + logger.info("Compare result:") if type(out_device) != type(out_bench): - print("ERROR: out_device and out_bench is not the same type!") - print("Compare finished.") + logger.error("ERROR: out_device and out_bench is not the same type!") + logger.info("Compare finished.") return None if isinstance(out_bench, (list, tuple)): - # print(f"data type: {{type(out_bench)}}") if len(out_device) != len(out_bench): - print("ERROR: len of out_device and out_bench is different!") - print("Compare finished.") + logger.error("ERROR: len of out_device and out_bench is different!") + logger.info("Compare finished.") return None for index, _ in enumerate(out_bench): - print(f"index {{index}}:") + logger.info(f"index {{index}}:") compare_element(out_device[index], out_bench[index], api_name) else: compare_element(out_device, out_bench, api_name) - print("Compare finished.") + logger.info("Compare finished.") device = get_device() @@ -370,8 +371,8 @@ propagation = "{propagation}" compare_standard = {compare_standard} torch.manual_seed({random_seed}) for i in range({iter_times}): - print(f"iter: {{i}}:") - if propagation == 'backward': + logger.info(f"iter: {{i}}:") + if propagation == Const.BACKWARD: args_device, kwargs_device, args_bench, kwargs_bench, args_device_backward, args_bench_backward = get_input(propagation) output_device = exec_api_device(args_device, kwargs_device, args_device_backward, propagation) output_bench = exec_api_bench(args_bench, kwargs_bench, args_bench_backward, propagation) -- Gitee From 8348d56bf89534d0f337158d3d92957b3e1b02b7 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 9 Sep 2024 15:45:28 +0800 Subject: [PATCH 09/14] bugfix --- .../generate_op_script/op_generator.py | 9 +++++---- .../generate_op_script/operator_replication.template | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index a1912e05b..6f2630faa 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -101,7 +101,7 @@ def get_compare_standard(api_name): return "CompareStandard.ULP_ERROR_STANDARD" return "CompareStandard.BENCHMARK_STANDARD" -def extract_detailed_api_segments(full_api_name_with_direction_status): +def extract_detailed_api_segments(full_api_name): """ Function Description: Extract the name of the API. @@ -112,7 +112,7 @@ def extract_detailed_api_segments(full_api_name_with_direction_status): full_api_name: Full name of api. Example: torch.matmul.0 direction_status: Direction status of api. Example: forward, backward, etc. """ - api_parts = full_api_name_with_direction_status.split(Const.SEP) + api_parts = full_api_name.split(Const.SEP) api_parts_length = len(api_parts) if api_parts_length == 6: api_type, api_name, api_order, direction_status, _, _ = api_parts @@ -322,6 +322,7 @@ def load_real_data_path(value, dump_data_dir): for v in value.get(parameter, []): if v is not None: update_data_name(v, dump_data_dir) + return value def extract_op(args): check_file_or_directory_path(args.dump_json_path) @@ -355,11 +356,11 @@ def op_generator_parser(parser): help=" extract api_name.", required=False) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, - choices=['forward', 'backward'], + choices=('forward', 'backward'), help=" forward or backward.", required=False) parser.add_argument("-m", "--mode", dest="mode", type=str, default="random_data", - choices=['random_data', 'real_data'], + choices=('random_data', 'real_data'), help=" Execute mode, should be random_data or real_data.", required=False) parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index bc3493c5a..ffda98a7f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -183,7 +183,7 @@ def compute_rmse(abs_err, normal_value_mask): def compute_error_balance(out_device, out_bench): larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) - if torch.numel(out_bench) == 0 + if torch.numel(out_bench) == 0: raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") error_balance = abs(larger_count - smaller_count) / torch.numel(out_bench) return error_balance @@ -210,7 +210,7 @@ def compare_tensor(out_device, out_bench, api_name): out_device = out_device.to(torch.device("cpu")) if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: error_number = torch.sum(out_device != out_bench).item() - if torch.numel(out_bench) == 0 + if torch.numel(out_bench) == 0: raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") error_rate = error_number / torch.numel(out_bench) table.append(["Compare Standard", "Binary Equality Standard"]) @@ -270,7 +270,7 @@ def compare_tensor(out_device, out_bench, api_name): ulp_err = torch.abs(ulp_err) max_ulp_err = torch.max(ulp_err) mean_ulp_err = torch.mean(ulp_err) - if torch.numel(out_bench) == 0 + if torch.numel(out_bench) == 0: raise ZeroDivisionError(f"ERROR: please check torch.numel out_bench, its value is {{torch.numel(out_bench)}}") if dtype_device == torch.float32: ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) -- Gitee From b8838aedf4f66a4e63a3977569e748fb95700146 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 9 Sep 2024 15:52:46 +0800 Subject: [PATCH 10/14] bugfix --- .../generate_op_script/op_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 6f2630faa..c5a11404c 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -165,10 +165,10 @@ def get_settings(cmd_args): internal_settings["propagation"] = cmd_args.propagation if cmd_args.propagation == Const.BACKWARD: internal_settings["api_full_name"] = api_full_name_backward - (api_type, api_name, ordinal_number, _) = extract_detailed_api_segments(api_full_name_backward) + api_type, api_name, ordinal_number = extract_detailed_api_segments(api_full_name_backward) else: internal_settings["api_full_name"] = api_full_name_forward - (api_type, api_name, ordinal_number, _) = extract_detailed_api_segments(api_full_name_forward) + api_type, api_name, ordinal_number = extract_detailed_api_segments(api_full_name_forward) if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" elif api_type == "Tensor": @@ -356,11 +356,11 @@ def op_generator_parser(parser): help=" extract api_name.", required=False) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, - choices=('forward', 'backward'), + choices=("forward", "backward"), help=" forward or backward.", required=False) parser.add_argument("-m", "--mode", dest="mode", type=str, default="random_data", - choices=('random_data', 'real_data'), + choices=("random_data", "real_data"), help=" Execute mode, should be random_data or real_data.", required=False) parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, -- Gitee From ca430011ae092e11d68761e168f802a93c8d15ab Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Sat, 21 Sep 2024 10:52:28 +0800 Subject: [PATCH 11/14] code review --- .../generate_op_script/op_generator.py | 162 ++++++++++++------ .../operator_replication.template | 7 +- 2 files changed, 111 insertions(+), 58 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index c5a11404c..5d3b843fe 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -32,7 +32,7 @@ from msprobe.core.common.file_check import FileOpen from msprobe.core.common.utils import check_file_or_directory_path from msprobe.core.common.file_check import create_directory from msprobe.core.common.const import Const - +from msprobe.core.common.log import logger TENSOR_DATA_LIST = ["torch.Tensor"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -41,8 +41,13 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] +OPERATOR_TYPE = ("Functional", "Tensor", "Torch") - +API_INFO = 2 +FOUR_SEGMENT = 4 +FIVE_SEGMENT = 5 +DATA_NAME = 'data_name' +API_MAX_LENGTH = 30 class APIResult: def __init__(self, api_full_name, api_info_dict, backward_info=None): self.api_full_name = api_full_name @@ -55,11 +60,6 @@ class APIResult: @classmethod def from_json(cls, json_content, propagation): - if not isinstance(json_content, dict): - raise ValueError("content of json file is not a dictionary!") - if len(json_content) > 2: - raise ValueError("json file has more than one API, only one API is allowed!") - forward_name, forward_dict = list(json_content.items())[0] forward_info = cls(api_full_name=forward_name, api_info_dict=forward_dict) @@ -74,7 +74,40 @@ class APIResult: return forward_info def is_supported_type(self): - return self.api_type in ("Functional", "Tensor", "Torch") + return self.api_type in OPERATOR_TYPE + +def check_path_pattern_valid(path_pattern): + pattern = re.compile(r'(\.|/|:|_|-|\s|[~0-9a-zA-Z])+') + if not pattern.fullmatch(path_pattern): + raise ValueError('Only the following characters are allowed in the path: A-Z a-z 0-9 - _ . / :') + if len(path_pattern) > API_MAX_LENGTH: + raise ValueError(f'API name {path_pattern} is too long!') + +def check_json_content(json_content, propagation): + # ensure json_content is of type dict + if not isinstance(json_content, dict): + raise ValueError(f'content of json file is not a dict!') + # ensure the dict is not empty + if not json_content: + raise ValueError(f'json file is empty!') + # ensure the length of json_content is within allowed limits + if len(json_content) > API_INFO: + raise ValueError(f'json file has more than one API, the API only contains forward and backward info') + # Retrieve the first API name and dictionary + forward_item = next(iter(json_content.items()), None) + if not forward_item or not isinstance(forward_item[1], dict): + raise ValueError(f'Invalid forward API data in json_content!') + + # if propagation is backward, ensure json file contains forward and backward info + if propagation == Const.BACKWARD and len(json_content) < API_INFO: + raise ValueError(f'Backward propagation requires contains forward and backward info!') + + # if propagation is backward, ensure it has valid data + if propagation == Const.BACKWARD: + backward_item = list(json_content.items())[1] + if not isinstance(backward_item[1], dict): + raise ValueError(f'Invalid backward API data in json_content!') + def check_user_settings(cmd_args): iter_t = cmd_args.iter_times @@ -87,6 +120,7 @@ def check_user_settings(cmd_args): with FileOpen(json_file, 'r') as f: json_content = json.load(f) + check_json_content(json_content, propagation) api_result = APIResult.from_json(json_content, propagation) return api_result @@ -114,35 +148,17 @@ def extract_detailed_api_segments(full_api_name): """ api_parts = full_api_name.split(Const.SEP) api_parts_length = len(api_parts) - if api_parts_length == 6: - api_type, api_name, api_order, direction_status, _, _ = api_parts - elif api_parts_length == 7: - api_type, prefix, api_name, api_order, direction_status, _, _ = api_parts + api_type, api_name, api_order = None, None, None + if api_parts_length == FOUR_SEGMENT: + api_type, api_name, api_order, _ = api_parts + elif api_parts_length == FIVE_SEGMENT: + api_type, prefix, api_name, api_order, _ = api_parts api_name = Const.SEP.join([prefix, api_name]) - else: - api_type, api_name, api_order, _= api_parts return api_type, api_name, api_order -def get_settings(cmd_args): - ''' - internal_settings contain all information needed for the operator program. - keys: - api_full_name: api_type.api_name.ordinal_number - api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch - api_name: name of API - ordinal_number: how many times the same api has been called - direction_status: forward - random_seed: if mode is random_data, random seed is random_seed - iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter - args_element_assignment: code for args assignment - args_list_generator_device: code for generate args list on device - args_list_generator_bench: code for generate args list on bench - kwargs_value_assignment: code for kwargs assignment - kwargs_dict_generator_device: code for generate kwargs dict on device - kwargs_dict_generator_bench: code for generate kwargs dict on bench - ''' +def check_json_legality(cmd_args): if cmd_args.propagation == Const.BACKWARD: - # 读取和检查json文件 + # read and check json api_result = check_user_settings(cmd_args) api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict api_full_name_backward, api_info_dict_backward = (api_result.backward_info.api_full_name, @@ -153,22 +169,39 @@ def get_settings(cmd_args): args_info_backward = api_info_dict_backward.get(Const.GRAD_INPUT) elif Const.INPUT in api_info_dict_backward: args_info_backward = api_info_dict_backward.get(Const.INPUT) + return api_full_name_backward, args_info_forward, kwargs_info_forward, args_info_backward else: - # 读取和检查json文件 + # read and check json api_result = check_user_settings(cmd_args) api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) + return api_full_name_forward, args_info_forward, kwargs_info_forward - # 根据用户设置生成内部设置字典,包含API名称、类型、比较标准、随机种子、迭代次数等信息 +def get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward, args_info_backward): + ''' + internal_settings contain all information needed for the operator program. + keys: + api_full_name: api_type.api_name.ordinal_number + api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch + api_name: name of API + ordinal_number: how many times the same api has been called + direction_status: forward + random_seed: if mode is random_data, random seed is random_seed + iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter + args_element_assignment: code for args assignment + args_list_generator_device: code for generate args list on device + args_list_generator_bench: code for generate args list on bench + kwargs_value_assignment: code for kwargs assignment + kwargs_dict_generator_device: code for generate kwargs dict on device + kwargs_dict_generator_bench: code for generate kwargs dict on bench + ''' + # Generate an internal setting dictionary based on user settings + # including API name, type, comparison standard, random seed, number of iterations and other information internal_settings = {} internal_settings["propagation"] = cmd_args.propagation - if cmd_args.propagation == Const.BACKWARD: - internal_settings["api_full_name"] = api_full_name_backward - api_type, api_name, ordinal_number = extract_detailed_api_segments(api_full_name_backward) - else: - internal_settings["api_full_name"] = api_full_name_forward - api_type, api_name, ordinal_number = extract_detailed_api_segments(api_full_name_forward) + internal_settings["api_full_name"] = api_full_name + api_type, api_name, ordinal_number = extract_detailed_api_segments(api_full_name) if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" elif api_type == "Tensor": @@ -313,8 +346,8 @@ def update_data_name(data, dump_data_dir): if isinstance(data, list): for item in data: update_data_name(item, dump_data_dir) - elif 'data_name' in data: - data['data_name'] = os.path.join(dump_data_dir, data['data_name']) + elif DATA_NAME in data: + data[DATA_NAME] = os.path.join(dump_data_dir, data[DATA_NAME]) def load_real_data_path(value, dump_data_dir): parameters = [Const.INPUT_ARGS, Const.GRAD_INPUT, Const.INPUT, Const.OUTPUT, Const.GRAD_OUTPUT] @@ -326,6 +359,7 @@ def load_real_data_path(value, dump_data_dir): def extract_op(args): check_file_or_directory_path(args.dump_json_path) + check_path_pattern_valid(args.api_name) with FileOpen(args.dump_json_path, 'r') as file: data = json.load(file) new_data = {} @@ -339,35 +373,40 @@ def extract_op(args): value = load_real_data_path(value, real_data_path) new_data[key] = value if not new_data: - print(f"Error: The api '{args.api_name}' does not exist in the file.") + logger.error(f"Error: The api '{args.api_name}' does not exist in the file.") else: with FileOpen(args.output_file, 'w') as file: json.dump(new_data, file, indent=4) - print(f"The api '{args.api_name}' has been successfully extracted and saved in: {args.output_file}") + logger.info(f"The api '{args.api_name}' has been successfully extracted and saved in: {args.output_file}") def op_generator_parser(parser): parser.add_argument("-dump", "--dump_json_path", dest="dump_json_path", default='', type=str, - help=" Path of dump json file.", + help=" Path of dump json file, if the API has already been extracted," + "don't need to set it up.", required=False) parser.add_argument("-o", "--output_file", dest="output_file", type=str, help=" Path of extract api_name.json.", required=True) parser.add_argument("-a", "--api_name", dest="api_name", type=str, - help=" extract api_name.", + help=" extract api_name, if the API has already been extracted," + "don't need to set it up.", required=False) parser.add_argument("-p", "--propagation", dest="propagation", default='forward', type=str, choices=("forward", "backward"), - help=" forward or backward.", + help=" forward or backward, the default value is forward.", required=False) parser.add_argument("-m", "--mode", dest="mode", type=str, default="random_data", choices=("random_data", "real_data"), - help=" Execute mode, should be random_data or real_data.", + help=" Execute mode, should be random_data or real_data," + "the default value is random_data.", required=False) parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, - help=" If mode is random_data, it is random seed.", + help=" If mode is random_data, it is random seed, " + "the default value is 1234.", required=False) parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=1, - help=" If mode is random_data, generate iter_times group of data.", + help=" If mode is random_data, generate iter_times group of data," + "the default value is 1.", required=False) @@ -376,9 +415,24 @@ def main(): op_generator_parser(parser) cmd_args = parser.parse_args() + output_path = os.path.dirname(cmd_args.output_file) + if not os.path.exists(output_path): + create_directory(output_path) + logger.info(f"Directory '{output_path}' created.") + else: + logger.info(f"Directory '{output_path}' already exists.") + check_file_or_directory_path(output_path, isdir=True) if cmd_args.dump_json_path: extract_op(cmd_args) - internal_settings = get_settings(cmd_args) + check_file_or_directory_path(cmd_args.output_file) + + if cmd_args.propagation == Const.BACKWARD: + api_full_name, args_info_forward, kwargs_info_forward, args_info_backward = check_json_legality(cmd_args) + internal_settings = get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward, + args_info_backward) + else: + api_full_name, args_info_forward, kwargs_info_forward = check_json_legality(cmd_args) + internal_settings = get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward,None) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) @@ -388,9 +442,9 @@ def main(): code_template = ftemp.read() fout.write(code_template.format(**internal_settings)) except OSError: - print(f"Failed to open file. Please check file {template_path} or {operator_script_path}.") + logger.error(f"Failed to open file. Please check file {template_path} or {operator_script_path}.") - print(f"Generate operator script successfully and the name is {operator_script_path}.") + logger.info(f"Generate operator script successfully and the name is {operator_script_path}.") if __name__ == "__main__": diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index ffda98a7f..df090c752 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -26,7 +26,7 @@ RAISE_PRECISION = {{ "torch.float32": torch.float64, "torch.float": torch.float64 }} - +THOUSANDTH_THRESHOLDING = 0.001 class CompareStandard(Enum): BINARY_EQUALITY_STANDARD = auto() @@ -282,12 +282,11 @@ def compare_tensor(out_device, out_bench, api_name): table.append(["ULP Error Proportion", ulp_err_proportion]) elif compare_standard == CompareStandard.THOUSANDTH_STANDARD: rel_err_origin = np.abs(abs_err / abs_bench_with_eps) - thresholding = 0.001 if np.size(rel_err_origin) == 0: thousand_res = 1 else: - thousand_res = np.divide(np.sum(rel_err < thresholding), np.size(rel_err_origin)) - thousand_status = thousand_res > (1 - thresholding) + thousand_res = np.divide(np.sum(rel_err < THOUSANDTH_THRESHOLDING), np.size(rel_err_origin)) + thousand_status = thousand_res > (1 - THOUSANDTH_THRESHOLDING) table.append(["Compare Standard", "Thousandth Standard"]) table.append(["Thousandth ratio", thousand_res]) else: -- Gitee From 2d989c63c37fe91128907725376abd68351a462b Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Sat, 21 Sep 2024 10:58:31 +0800 Subject: [PATCH 12/14] code review --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 5d3b843fe..04c673f40 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -363,8 +363,6 @@ def extract_op(args): with FileOpen(args.dump_json_path, 'r') as file: data = json.load(file) new_data = {} - output_path = os.path.dirname(args.output_file) - create_directory(output_path) extract_key_pattern = re.compile(f"^{re.escape(args.api_name)}\..+") real_data_path = data.get('dump_data_dir', '') for key, value in data.get('data', {}).items(): -- Gitee From 8f6a6892959f4a58e0e2f1b41ae6aca15717fbc2 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Mon, 23 Sep 2024 08:59:38 +0800 Subject: [PATCH 13/14] code review --- .../generate_op_script/op_generator.py | 44 ++++++++----------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 04c673f40..324b5e30d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -156,28 +156,6 @@ def extract_detailed_api_segments(full_api_name): api_name = Const.SEP.join([prefix, api_name]) return api_type, api_name, api_order -def check_json_legality(cmd_args): - if cmd_args.propagation == Const.BACKWARD: - # read and check json - api_result = check_user_settings(cmd_args) - api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict - api_full_name_backward, api_info_dict_backward = (api_result.backward_info.api_full_name, - api_result.backward_info.api_info_dict) - args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) - kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) - if Const.GRAD_INPUT in api_info_dict_backward: - args_info_backward = api_info_dict_backward.get(Const.GRAD_INPUT) - elif Const.INPUT in api_info_dict_backward: - args_info_backward = api_info_dict_backward.get(Const.INPUT) - return api_full_name_backward, args_info_forward, kwargs_info_forward, args_info_backward - else: - # read and check json - api_result = check_user_settings(cmd_args) - api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict - args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) - kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) - return api_full_name_forward, args_info_forward, kwargs_info_forward - def get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward, args_info_backward): ''' internal_settings contain all information needed for the operator program. @@ -425,12 +403,26 @@ def main(): check_file_or_directory_path(cmd_args.output_file) if cmd_args.propagation == Const.BACKWARD: - api_full_name, args_info_forward, kwargs_info_forward, args_info_backward = check_json_legality(cmd_args) - internal_settings = get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward, + # read and check json + api_result = check_user_settings(cmd_args) + api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict + api_full_name_backward, api_info_dict_backward = (api_result.backward_info.api_full_name, + api_result.backward_info.api_info_dict) + args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) + kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) + if Const.GRAD_INPUT in api_info_dict_backward: + args_info_backward = api_info_dict_backward.get(Const.GRAD_INPUT) + elif Const.INPUT in api_info_dict_backward: + args_info_backward = api_info_dict_backward.get(Const.INPUT) + internal_settings = get_settings(cmd_args, api_full_name_backward, args_info_forward, kwargs_info_forward, args_info_backward) else: - api_full_name, args_info_forward, kwargs_info_forward = check_json_legality(cmd_args) - internal_settings = get_settings(cmd_args, api_full_name, args_info_forward, kwargs_info_forward,None) + # read and check json + api_result = check_user_settings(cmd_args) + api_full_name_forward, api_info_dict_forward = api_result.api_full_name, api_result.api_info_dict + args_info_forward = api_info_dict_forward.get(Const.INPUT_ARGS) + kwargs_info_forward = api_info_dict_forward.get(Const.INPUT_KWARGS) + internal_settings = get_settings(cmd_args, api_full_name_forward, args_info_forward, kwargs_info_forward,None) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) -- Gitee From 43b725da4331abb9368776a1dfc1927205a7a274 Mon Sep 17 00:00:00 2001 From: xie <1272599165@qq.com> Date: Thu, 26 Sep 2024 10:57:44 +0800 Subject: [PATCH 14/14] thousandth bugfix --- .../generate_op_script/operator_replication.template | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index df090c752..2a555dc43 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -281,11 +281,11 @@ def compare_tensor(out_device, out_bench, api_name): table.append(["Mean ULP Error", mean_ulp_err]) table.append(["ULP Error Proportion", ulp_err_proportion]) elif compare_standard == CompareStandard.THOUSANDTH_STANDARD: - rel_err_origin = np.abs(abs_err / abs_bench_with_eps) - if np.size(rel_err_origin) == 0: + rel_err_origin = torch.abs(abs_err / abs_bench_with_eps) + if torch.numel(rel_err_origin) == 0: thousand_res = 1 else: - thousand_res = np.divide(np.sum(rel_err < THOUSANDTH_THRESHOLDING), np.size(rel_err_origin)) + thousand_res = torch.divide(torch.sum(rel_err < THOUSANDTH_THRESHOLDING), torch.numel(rel_err_origin)) thousand_status = thousand_res > (1 - THOUSANDTH_THRESHOLDING) table.append(["Compare Standard", "Thousandth Standard"]) table.append(["Thousandth ratio", thousand_res]) -- Gitee