From edb5c704439e672d185be6839051a41adfe5fcc7 Mon Sep 17 00:00:00 2001 From: weili10 Date: Fri, 16 Jul 2021 18:19:48 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5b98f7ec21ee0d6c14be8b0bd09301?= =?UTF-8?q?d9f6e686816?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- patch/npu.patch | 275 +++++++++--------- .../src/ATen/native/native_functions.yaml | 14 +- .../src/ATen/native/npu/ArgminKernelNpu.cpp | 0 src/aten/src/ATen/native/npu/CatKernelNpu.cpp | 14 + .../native/npu/ConstantPadNdKernelNpu.cpp | 14 +- .../native/npu/EmbeddingRenormKernelNpu.cpp | 176 +++++------ .../ATen/native/npu/GiouBackwardKernelNpu.cpp | 73 +++++ .../src/ATen/native/npu/GiouKernelNpu.cpp | 87 ++++++ .../src/ATen/native/npu/IndexPutKernelNpu.cpp | 3 + src/aten/src/ATen/native/npu/MinKernelNpu.cpp | 27 +- src/aten/src/ATen/native/npu/MmKernelNpu.cpp | 11 +- .../src/ATen/native/npu/NormKernelNpu.cpp | 31 +- .../native/npu/ReflectionPad2dKernelNpu.cpp | 127 -------- .../native/npu/ReplicationPad2dKernelNpu.cpp | 0 .../native/npu/common/FormatCastHelper.cpp | 4 +- .../npu/convolution/ConvolutionKernelNpu.cpp | 24 +- .../ATen/native/npu/frame/FormatHelper.cpp | 30 -- .../src/ATen/native/npu/frame/FormatHelper.h | 2 - .../native/npu/interface/EnvVariables.cpp | 2 - .../src/ATen/native/npu/utils/CalcuOpUtil.cpp | 13 +- .../native/npu/utils/KernelNpuOutputSize.cpp | 6 - .../native/npu/utils/KernelNpuOutputSize.h | 4 - src/aten/src/ATen/utils/DumpUtils.h | 4 + src/aten/src/ATen/utils/LoadUtils.cpp | 53 +++- src/tools/autograd/derivatives.yaml | 5 +- test/test_npu/test_constant_pad_nd.py | 70 ----- test/test_npu/test_network_ops/test_abs.py | 0 test/test_npu/test_network_ops/test_add.py | 0 test/test_npu/test_network_ops/test_addmm.py | 0 test/test_npu/test_network_ops/test_all.py | 0 test/test_npu/test_network_ops/test_any.py | 0 test/test_npu/test_network_ops/test_arange.py | 0 test/test_npu/test_network_ops/test_argmax.py | 0 .../test_avg_pool2d_backward.py | 0 .../{ => test_network_ops}/test_bilinear.py | 65 +++-- ...nary_cross_entropy_with_logits_backward.py | 0 test/test_npu/test_network_ops/test_bmm.py | 0 .../test_network_ops/test_broadcastToD.py | 0 test/test_npu/test_network_ops/test_cat.py | 0 test/test_npu/test_network_ops/test_clamp.py | 0 .../test_network_ops/test_constant_pad_nd.py | 1 - test/test_npu/test_network_ops/test_conv2d.py | 0 .../test_conv_depthwise2d_backward.py | 0 test/test_npu/test_network_ops/test_div.py | 0 .../test_npu/test_network_ops/test_dropout.py | 0 .../test_embedding_backward.py | 0 .../test_embedding_renorm.py | 6 +- test/test_npu/test_network_ops/test_exp.py | 0 test/test_npu/test_network_ops/test_fill_.py | 0 test/test_npu/test_network_ops/test_floor.py | 0 test/test_npu/test_network_ops/test_fmod.py | 0 test/test_npu/test_network_ops/test_full.py | 0 test/test_npu/test_network_ops/test_ge.py | 0 .../test_gelu_backward.py | 25 +- test/test_npu/test_network_ops/test_gt.py | 0 .../test_network_ops/test_hardtanh.py | 0 .../test_network_ops/test_index_put.py | 10 + test/test_npu/test_network_ops/test_le.py | 0 .../test_leaky_relu_backward.py | 0 test/test_npu/test_network_ops/test_log.py | 0 test/test_npu/test_network_ops/test_log2.py | 0 .../test_network_ops/test_log_softmax.py | 0 .../test_log_softmax_backward.py | 0 test/test_npu/test_network_ops/test_lt.py | 0 test/test_npu/test_network_ops/test_matmul.py | 0 test/test_npu/test_network_ops/test_max.py | 0 test/test_npu/test_network_ops/test_min.py | 0 test/test_npu/test_network_ops/test_mm.py | 0 test/test_npu/test_network_ops/test_muls.py | 0 test/test_npu/test_network_ops/test_neg.py | 0 .../test_npu/test_network_ops/test_nllloss.py | 0 .../test_network_ops/test_not_equal.py | 0 .../test_network_ops/test_npu_giou.py | 133 +++++++++ .../test_npu_giou_backward.py | 86 ++++++ test/test_npu/test_network_ops/test_pow.py | 0 test/test_npu/test_network_ops/test_prod.py | 0 .../test_network_ops/test_reciprocal.py | 0 test/test_npu/test_network_ops/test_relu.py | 0 .../test_network_ops/test_remainder.py | 0 test/test_npu/test_network_ops/test_rsqrt.py | 0 test/test_npu/test_network_ops/test_rsub.py | 0 test/test_npu/test_network_ops/test_sign.py | 0 .../test_npu/test_network_ops/test_softmax.py | 0 test/test_npu/test_network_ops/test_split.py | 0 test/test_npu/test_network_ops/test_sqrt.py | 0 test/test_npu/test_network_ops/test_stack.py | 0 test/test_npu/test_network_ops/test_sub.py | 0 test/test_npu/test_network_ops/test_sum.py | 0 .../test_upsample_bilinear_backward.py | 0 test/test_npu/test_network_ops/test_where.py | 0 test/test_npu/test_network_ops/test_zero.py | 0 test/test_npu/test_network_ops/test_zeros.py | 0 .../test_network_ops/test_zeroslike.py | 0 test/test_npu/test_network_ops/util_test.py | 0 .../torch.onnx/eval/onnx/cp_onnx_eval.py | 0 .../torch.onnx/eval/onnxrt/onnxrt_eval.py | 0 .../test_onnx/torch.onnx/export/cp_parser.py | 0 .../torch.onnx/export/export_onnx.py | 0 .../torch.onnx/export/model_export-cpu.py | 0 .../torch.onnx/export/model_export-gpu.py | 0 .../torch.onnx/export/model_export-npu.py | 0 .../torch.onnx/export/model_export.py | 0 .../torch.onnx/export/onnx_parser.py | 0 test/test_npu/test_onnx/torch.onnx/main.py | 0 test/test_npu/test_reflection_pad2d.py | 238 --------------- 105 files changed, 807 insertions(+), 826 deletions(-) mode change 100644 => 100755 src/aten/src/ATen/native/npu/ArgminKernelNpu.cpp create mode 100644 src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp create mode 100644 src/aten/src/ATen/native/npu/GiouKernelNpu.cpp delete mode 100644 src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp mode change 100644 => 100755 src/aten/src/ATen/native/npu/ReplicationPad2dKernelNpu.cpp delete mode 100644 test/test_npu/test_constant_pad_nd.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_abs.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_add.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_addmm.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_all.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_any.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_arange.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_argmax.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_avg_pool2d_backward.py rename test/test_npu/{ => test_network_ops}/test_bilinear.py (74%) mode change 100644 => 100755 test/test_npu/test_network_ops/test_binary_cross_entropy_with_logits_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_bmm.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_broadcastToD.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_cat.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_clamp.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_conv2d.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_div.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_dropout.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_embedding_backward.py rename test/test_npu/{ => test_network_ops}/test_embedding_renorm.py (97%) mode change 100644 => 100755 test/test_npu/test_network_ops/test_exp.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_fill_.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_floor.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_fmod.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_full.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_ge.py rename test/test_npu/{ => test_network_ops}/test_gelu_backward.py (77%) mode change 100644 => 100755 test/test_npu/test_network_ops/test_gt.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_hardtanh.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_index_put.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_le.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_leaky_relu_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_log.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_log2.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_log_softmax.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_log_softmax_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_lt.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_matmul.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_max.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_min.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_mm.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_muls.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_neg.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_nllloss.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_not_equal.py create mode 100644 test/test_npu/test_network_ops/test_npu_giou.py create mode 100644 test/test_npu/test_network_ops/test_npu_giou_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_pow.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_prod.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_reciprocal.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_relu.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_remainder.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_rsqrt.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_rsub.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_sign.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_softmax.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_split.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_sqrt.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_stack.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_sub.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_sum.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_upsample_bilinear_backward.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_where.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_zero.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_zeros.py mode change 100644 => 100755 test/test_npu/test_network_ops/test_zeroslike.py mode change 100644 => 100755 test/test_npu/test_network_ops/util_test.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/eval/onnx/cp_onnx_eval.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/eval/onnxrt/onnxrt_eval.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/cp_parser.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/export_onnx.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/model_export.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/export/onnx_parser.py mode change 100644 => 100755 test/test_npu/test_onnx/torch.onnx/main.py delete mode 100644 test/test_npu/test_reflection_pad2d.py diff --git a/patch/npu.patch b/patch/npu.patch index 14c9a2b015..b16fb85396 100644 --- a/patch/npu.patch +++ b/patch/npu.patch @@ -1,6 +1,6 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/CMakeLists.txt pytorch-develop/aten/CMakeLists.txt --- pytorch-v1.5.0/aten/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 ++++ pytorch-develop/aten/CMakeLists.txt 2021-07-16 18:19:46.298791052 +0800 @@ -22,8 +22,10 @@ set(ATen_CPU_INCLUDE) set(ATen_THIRD_PARTY_INCLUDE) @@ -51,7 +51,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt pytorch-develop/aten/src/ATen/CMakeLists.txt --- pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 ++++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-16 18:19:46.298791052 +0800 @@ -67,6 +67,9 @@ FILE(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h") FILE(GLOB native_cpu_h "native/cpu/*.h") @@ -129,7 +129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h --- pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-13 15:30:57.602267943 +0800 ++++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-16 18:19:46.306791339 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -170,7 +170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/function_wrapper.py pytorch-develop/aten/src/ATen/function_wrapper.py --- pytorch-v1.5.0/aten/src/ATen/function_wrapper.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-13 15:30:57.610268230 +0800 ++++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-16 18:19:46.314791625 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -354,7 +354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for option in declaration['options']: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/gen.py pytorch-develop/aten/src/ATen/gen.py --- pytorch-v1.5.0/aten/src/ATen/gen.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/gen.py 2021-07-13 15:30:57.610268230 +0800 ++++ pytorch-develop/aten/src/ATen/gen.py 2021-07-16 18:19:46.314791625 +0800 @@ -1,3 +1,18 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -512,7 +512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= generate_outputs() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp --- pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-13 15:30:57.622268661 +0800 ++++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-16 18:19:46.326792056 +0800 @@ -339,20 +339,20 @@ void hardsigmoid_backward_kernel(TensorIterator& iter) { @@ -540,7 +540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp pytorch-develop/aten/src/ATen/native/Memory.cpp --- pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-13 15:30:57.614268374 +0800 ++++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-16 18:19:46.318791769 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -595,7 +595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= detail::computeStorageSize(self.sizes(), self.strides()), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml pytorch-develop/aten/src/ATen/native/native_functions.yaml --- pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-13 15:30:57.634269091 +0800 ++++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-16 18:19:46.342792630 +0800 @@ -1,6 +1,5 @@ # See README.md in this directory for more guidance @@ -5916,24 +5916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max_unpool2d.out(Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6118,12 +7584,16 @@ - dispatch: - CPU: reflection_pad2d_out_cpu - CUDA: reflection_pad2d_out_cuda -+ npu_dispatch: -+ NPU: reflection_pad2d_out_npu - - - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor - python_module: nn - dispatch: - CPU: reflection_pad2d_cpu - CUDA: reflection_pad2d_cuda -+ npu_dispatch: -+ NPU: reflection_pad2d_npu - - - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) - python_module: nn -@@ -6166,12 +7636,16 @@ +@@ -6166,12 +7632,16 @@ dispatch: CPU: replication_pad2d_out_cpu CUDA: replication_pad2d_out_cuda @@ -5950,7 +5933,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6214,12 +7688,16 @@ +@@ -6214,12 +7684,16 @@ dispatch: CPU: upsample_linear1d_out_cpu CUDA: upsample_linear1d_out_cuda @@ -5967,7 +5950,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_linear1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6232,12 +7710,16 @@ +@@ -6232,12 +7706,16 @@ dispatch: CPU: upsample_linear1d_backward_cpu CUDA: upsample_linear1d_backward_cuda @@ -5984,7 +5967,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6245,96 +7727,128 @@ +@@ -6245,96 +7723,128 @@ CPU: upsample_bilinear2d_cpu CUDA: upsample_bilinear2d_cuda QuantizedCPU: quantized_upsample_bilinear2d_cpu @@ -6113,7 +6096,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6342,24 +7856,32 @@ +@@ -6342,24 +7852,32 @@ CPU: upsample_nearest2d_cpu CUDA: upsample_nearest2d_cuda QuantizedCPU: quantized_upsample_nearest2d_cpu @@ -6146,7 +6129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6367,38 +7889,52 @@ +@@ -6367,38 +7885,52 @@ CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: quantized_upsample_nearest3d_cpu @@ -6199,7 +6182,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # What's a thnn_conv_ versus a slow_conv_? # -@@ -6423,24 +7959,32 @@ +@@ -6423,24 +7955,32 @@ dispatch: CPU: slow_conv_transpose2d_out_cpu CUDA: slow_conv_transpose2d_out_cuda @@ -6232,7 +6215,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6468,21 +8012,29 @@ +@@ -6468,21 +8008,29 @@ - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6262,7 +6245,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn -@@ -6495,32 +8047,46 @@ +@@ -6495,32 +8043,46 @@ dispatch: CPU: slow_conv2d_backward_cpu CUDA: legacy::cuda::_thnn_conv2d_backward @@ -6309,7 +6292,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6553,12 +8119,16 @@ +@@ -6553,12 +8115,16 @@ dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda @@ -6326,7 +6309,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn -@@ -6577,57 +8147,405 @@ +@@ -6577,57 +8143,413 @@ dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -6732,10 +6715,18 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +- func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) + npu_dispatch_only: + NPU: bert_apply_adam_npu ++ ++- func: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor ++ npu_dispatch_only: ++ NPU: giou_npu ++ ++- func: npu_giou_backward(Tensor grad, Tensor bboxes, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> (Tensor, Tensor) ++ npu_dispatch_only: ++ NPU: giou_backward_npu \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S --- pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-13 15:30:57.674270525 +0800 ++++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-16 18:19:46.378793920 +0800 @@ -659,14 +659,14 @@ SUB x1, x1, 4 @@ -6761,7 +6752,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CMP x1, 2 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp pytorch-develop/aten/src/ATen/native/TensorCompare.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-16 18:19:46.322791912 +0800 @@ -64,7 +64,7 @@ Tensor isinf(const Tensor &self) { @@ -6773,7 +6764,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "isinf", [&]() { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp pytorch-develop/aten/src/ATen/native/TensorFactories.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-16 18:19:46.326792056 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6818,7 +6809,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp pytorch-develop/aten/src/ATen/native/TensorProperties.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-13 15:30:57.618268517 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-16 18:19:46.326792056 +0800 @@ -87,6 +87,7 @@ if (self.is_contiguous(memory_format)) { return self; @@ -6829,7 +6820,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= "preserve memory format is unsupported by the contiguous operator"); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp --- pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-13 15:30:57.622268661 +0800 ++++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-16 18:19:46.326792056 +0800 @@ -26,7 +26,7 @@ const scalar_t* in = &idata[output_y * input_width + output_x]; scalar_t* out = &odata[output_y * output_width + output_x]; @@ -6841,7 +6832,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= out += output_width * output_height; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native_parse.py pytorch-develop/aten/src/ATen/native_parse.py --- pytorch-v1.5.0/aten/src/ATen/native_parse.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-13 15:30:57.686270955 +0800 ++++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-16 18:19:46.394794494 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6879,7 +6870,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= msg = '''Exception raised in processing function: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py pytorch-develop/aten/src/ATen/preprocess_declarations.py --- pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-16 18:19:46.394794494 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6911,7 +6902,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h pytorch-develop/aten/src/ATen/templates/TensorBody.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-16 18:19:46.394794494 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6944,7 +6935,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h pytorch-develop/aten/src/ATen/templates/TensorMethods.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-13 15:30:57.690271099 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-16 18:19:46.394794494 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6978,7 +6969,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/CMakeLists.txt pytorch-develop/aten/src/TH/CMakeLists.txt --- pytorch-v1.5.0/aten/src/TH/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-16 18:19:46.394794494 +0800 @@ -48,6 +48,11 @@ ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) @@ -6993,7 +6984,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp pytorch-develop/aten/src/TH/generic/THStorage.cpp --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-16 18:19:46.398794637 +0800 @@ -1,9 +1,32 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7102,7 +7093,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.h pytorch-develop/aten/src/TH/generic/THStorage.h --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-13 15:30:57.694271242 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-16 18:19:46.398794637 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7141,7 +7132,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/CMakeLists.txt pytorch-develop/c10/CMakeLists.txt --- pytorch-v1.5.0/c10/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/CMakeLists.txt 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/CMakeLists.txt 2021-07-16 18:19:46.410795068 +0800 @@ -63,6 +63,14 @@ message(STATUS "don't use NUMA") endif() @@ -7170,7 +7161,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # not checked in diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Backend.h pytorch-develop/c10/core/Backend.h --- pytorch-v1.5.0/c10/core/Backend.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Backend.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Backend.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7265,7 +7256,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.cpp pytorch-develop/c10/core/Device.cpp --- pytorch-v1.5.0/c10/core/Device.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Device.cpp 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7305,7 +7296,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= types.begin(), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.h pytorch-develop/c10/core/Device.h --- pytorch-v1.5.0/c10/core/Device.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Device.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7340,7 +7331,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return type_ == DeviceType::CPU; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.cpp pytorch-develop/c10/core/DeviceType.cpp --- pytorch-v1.5.0/c10/core/DeviceType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7380,7 +7371,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return false; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.h pytorch-develop/c10/core/DeviceType.h --- pytorch-v1.5.0/c10/core/DeviceType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DeviceType.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7423,7 +7414,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= constexpr DeviceType kXLA = DeviceType::XLA; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.cpp pytorch-develop/c10/core/DispatchKey.cpp --- pytorch-v1.5.0/c10/core/DispatchKey.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7455,7 +7446,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= case DispatchKey::TESTING_ONLY_GenericModeTensorId: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.h pytorch-develop/c10/core/DispatchKey.h --- pytorch-v1.5.0/c10/core/DispatchKey.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/DispatchKey.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7487,7 +7478,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Storage.h pytorch-develop/c10/core/Storage.h --- pytorch-v1.5.0/c10/core/Storage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Storage.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/Storage.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7521,7 +7512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/StorageImpl.h pytorch-develop/c10/core/StorageImpl.h --- pytorch-v1.5.0/c10/core/StorageImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/StorageImpl.h 2021-07-13 15:30:57.706271672 +0800 ++++ pytorch-develop/c10/core/StorageImpl.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,12 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7578,7 +7569,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorImpl.h pytorch-develop/c10/core/TensorImpl.h --- pytorch-v1.5.0/c10/core/TensorImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorImpl.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/core/TensorImpl.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7648,7 +7639,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorOptions.h pytorch-develop/c10/core/TensorOptions.h --- pytorch-v1.5.0/c10/core/TensorOptions.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorOptions.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/core/TensorOptions.h 2021-07-16 18:19:46.410795068 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7689,7 +7680,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/macros/Export.h pytorch-develop/c10/macros/Export.h --- pytorch-v1.5.0/c10/macros/Export.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/macros/Export.h 2021-07-13 15:30:57.710271816 +0800 ++++ pytorch-develop/c10/macros/Export.h 2021-07-16 18:19:46.414795211 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7816,7 +7807,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/caffe2/CMakeLists.txt pytorch-develop/caffe2/CMakeLists.txt --- pytorch-v1.5.0/caffe2/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-13 15:30:57.718272102 +0800 ++++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-16 18:19:46.422795498 +0800 @@ -32,6 +32,7 @@ # Add source, includes, and libs to lists list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) @@ -7963,7 +7954,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.clang-format pytorch-develop/.clang-format --- pytorch-v1.5.0/.clang-format 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.clang-format 2021-07-13 15:30:57.586267370 +0800 ++++ pytorch-develop/.clang-format 2021-07-16 18:19:46.294790909 +0800 @@ -84,5 +84,4 @@ SpacesInSquareBrackets: false Standard: Cpp11 @@ -7974,7 +7965,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/BuildVariables.cmake pytorch-develop/cmake/BuildVariables.cmake --- pytorch-v1.5.0/cmake/BuildVariables.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-16 18:19:46.530799370 +0800 @@ -11,6 +11,7 @@ # CMakeLists.txt files under each folder respectively. set(Caffe2_CPU_SRCS) @@ -8001,7 +7992,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # symbols. However, if the lib is whole linked in caffe2 lib, we don't want diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Codegen.cmake pytorch-develop/cmake/Codegen.cmake --- pytorch-v1.5.0/cmake/Codegen.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Codegen.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Codegen.cmake 2021-07-16 18:19:46.530799370 +0800 @@ -191,13 +191,14 @@ file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) @@ -8032,7 +8023,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Dependencies.cmake pytorch-develop/cmake/Dependencies.cmake --- pytorch-v1.5.0/cmake/Dependencies.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Dependencies.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Dependencies.cmake 2021-07-16 18:19:46.534799514 +0800 @@ -1509,6 +1509,13 @@ ENDIF(NOT C_HAS_THREAD) endif() @@ -8049,7 +8040,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Summary.cmake pytorch-develop/cmake/Summary.cmake --- pytorch-v1.5.0/cmake/Summary.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Summary.cmake 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/Summary.cmake 2021-07-16 18:19:46.534799514 +0800 @@ -134,6 +134,7 @@ if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") @@ -8060,7 +8051,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endfunction() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/TorchConfig.cmake.in pytorch-develop/cmake/TorchConfig.cmake.in --- pytorch-v1.5.0/cmake/TorchConfig.cmake.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-13 15:30:57.830276118 +0800 ++++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-16 18:19:46.534799514 +0800 @@ -112,6 +112,11 @@ list(APPEND TORCH_LIBRARIES ${TORCH_CUDA_LIBRARIES}) endif() @@ -8075,7 +8066,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/CMakeLists.txt pytorch-develop/CMakeLists.txt --- pytorch-v1.5.0/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/CMakeLists.txt 2021-07-13 15:30:57.590267513 +0800 ++++ pytorch-develop/CMakeLists.txt 2021-07-16 18:19:46.294790909 +0800 @@ -205,6 +205,10 @@ option(USE_TBB "Use TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) @@ -8142,7 +8133,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.dockerignore pytorch-develop/.dockerignore --- pytorch-v1.5.0/.dockerignore 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.dockerignore 2021-07-13 15:30:57.586267370 +0800 ++++ pytorch-develop/.dockerignore 2021-07-16 18:19:46.294790909 +0800 @@ -1,257 +1 @@ -# READ THIS BEFORE YOU REFACTOR ME -# @@ -8405,7 +8396,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/docs/make.bat pytorch-develop/docs/make.bat --- pytorch-v1.5.0/docs/make.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/docs/make.bat 2021-07-13 15:30:57.834276262 +0800 ++++ pytorch-develop/docs/make.bat 2021-07-16 18:19:46.538799657 +0800 @@ -1,36 +1,36 @@ -@ECHO OFF - @@ -8494,7 +8485,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/requirements.txt pytorch-develop/requirements.txt --- pytorch-v1.5.0/requirements.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/requirements.txt 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/requirements.txt 2021-07-16 18:19:46.554800231 +0800 @@ -4,4 +4,12 @@ requests setuptools @@ -8513,7 +8504,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install.bat pytorch-develop/scripts/appveyor/install.bat --- pytorch-v1.5.0/scripts/appveyor/install.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/appveyor/install.bat 2021-07-16 18:19:46.554800231 +0800 @@ -1,10 +1,10 @@ -:: Installation scripts for appveyor. - @@ -8537,7 +8528,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +conda install -y numpy diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install_cuda.bat pytorch-develop/scripts/appveyor/install_cuda.bat --- pytorch-v1.5.0/scripts/appveyor/install_cuda.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-16 18:19:46.554800231 +0800 @@ -1,22 +1,22 @@ -@echo on - @@ -8585,7 +8576,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +nvcc -V || exit /b diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/build_windows.bat pytorch-develop/scripts/build_windows.bat --- pytorch-v1.5.0/scripts/build_windows.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/build_windows.bat 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/build_windows.bat 2021-07-16 18:19:46.554800231 +0800 @@ -1,84 +1,84 @@ -:: ############################################################################# -:: Example command to build on Windows. @@ -8757,7 +8748,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +exit /b 1 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/proto.ps1 pytorch-develop/scripts/proto.ps1 --- pytorch-v1.5.0/scripts/proto.ps1 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/proto.ps1 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/scripts/proto.ps1 2021-07-16 18:19:46.554800231 +0800 @@ -1,17 +1,17 @@ -param( - [string]$protoc, @@ -8795,7 +8786,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +Invoke-Expression $cmd diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/setup.py pytorch-develop/setup.py --- pytorch-v1.5.0/setup.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/setup.py 2021-07-13 15:30:57.850276836 +0800 ++++ pytorch-develop/setup.py 2021-07-16 18:19:46.554800231 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8894,7 +8885,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'python/serialized_test/data/operator_test/*.zip', diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/derivatives.yaml pytorch-develop/tools/autograd/derivatives.yaml --- pytorch-v1.5.0/tools/autograd/derivatives.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-16 18:19:47.710841680 +0800 @@ -107,6 +107,10 @@ # # NB: The parameter names here MUST be consistent with the parameter names @@ -8951,7 +8942,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # The above backward definitions are equivalent to the definitions below. Why do we bundle # everything up? It's because it's more convenient to define double backwards # when there is a single function that manages everything. -@@ -1630,3 +1643,52 @@ +@@ -1630,3 +1643,55 @@ - name: nonzero(Tensor self) -> Tensor output_differentiability: [False] @@ -9004,11 +8995,14 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +- name: npu_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor + input, weight: npu_linear_backward(grad, input, weight) + bias: maybe_multiply(grad, 1) ++ ++- name: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor ++ self, gtboxes: npu_giou_backward(grad, self, gtboxes, trans, is_cross, mode) \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/dump_utils.py pytorch-develop/tools/autograd/dump_utils.py --- pytorch-v1.5.0/tools/autograd/dump_utils.py 1970-01-01 08:00:00.000000000 +0800 -+++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-13 15:30:58.990317711 +0800 -@@ -0,0 +1,114 @@ ++++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-16 18:19:47.710841680 +0800 +@@ -0,0 +1,115 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# All rights reserved. +# @@ -9121,11 +9115,12 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + "pin_memory", + "to_device", + "numpy_T", -+ "slice_Tensor" ++ "slice_Tensor", ++ "select_int" +] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py pytorch-develop/tools/autograd/gen_autograd_functions.py --- pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-16 18:19:47.710841680 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9311,7 +9306,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_python_functions.py pytorch-develop/tools/autograd/gen_python_functions.py --- pytorch-v1.5.0/tools/autograd/gen_python_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-16 18:19:47.710841680 +0800 @@ -1,3 +1,20 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9353,7 +9348,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'value': argname, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_variable_type.py pytorch-develop/tools/autograd/gen_variable_type.py --- pytorch-v1.5.0/tools/autograd/gen_variable_type.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-16 18:19:47.714841823 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9526,7 +9521,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/Functions.cpp pytorch-develop/tools/autograd/templates/Functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/Functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-16 18:19:47.714841823 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9606,7 +9601,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto sparse = sparse_.coalesce(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp pytorch-develop/tools/autograd/templates/python_torch_functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-16 18:19:47.714841823 +0800 @@ -22,7 +22,7 @@ #include "torch/csrc/autograd/generated/variable_factories.h" #include "torch/csrc/utils/structseq.h" @@ -9690,7 +9685,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp pytorch-develop/tools/autograd/templates/python_variable_methods.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-16 18:19:47.714841823 +0800 @@ -15,7 +15,13 @@ #include "torch/csrc/cuda/Stream.h" #include "torch/csrc/cuda/Event.h" @@ -9777,7 +9772,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"has_names", (PyCFunction)THPVariable_has_names, METH_NOARGS, NULL}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp pytorch-develop/tools/autograd/templates/VariableType.cpp --- pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-16 18:19:47.714841823 +0800 @@ -1,7 +1,27 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9808,7 +9803,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.h pytorch-develop/tools/autograd/templates/VariableType.h --- pytorch-v1.5.0/tools/autograd/templates/VariableType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-13 15:30:58.990317711 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-16 18:19:47.714841823 +0800 @@ -1,3 +1,20 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9840,7 +9835,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= const at::Tensor & unpack(const Tensor & t, const char * name, int pos); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/build_variables.bzl pytorch-develop/tools/build_variables.bzl --- pytorch-v1.5.0/tools/build_variables.bzl 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/build_variables.bzl 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/tools/build_variables.bzl 2021-07-16 18:19:47.714841823 +0800 @@ -46,6 +46,7 @@ "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/input_buffer.cpp", @@ -9926,7 +9921,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def grad(outputs: _TensorOrTensors, inputs: _TensorOrTensors, grad_outputs: Optional[_TensorOrTensors]=..., retain_graph: Optional[bool]=..., create_graph: bool=..., only_inputs: bool=..., allow_unused: bool=...) -> Tuple[Tensor, ...]: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/autograd/profiler.py pytorch-develop/torch/autograd/profiler.py --- pytorch-v1.5.0/torch/autograd/profiler.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/autograd/profiler.py 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/autograd/profiler.py 2021-07-16 18:19:47.718841966 +0800 @@ -1,8 +1,25 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -10399,7 +10394,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return ''.join(result) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/CMakeLists.txt pytorch-develop/torch/CMakeLists.txt --- pytorch-v1.5.0/torch/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/CMakeLists.txt 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/CMakeLists.txt 2021-07-16 18:19:47.714841823 +0800 @@ -97,6 +97,7 @@ ${TORCH_SRC_DIR}/csrc/tensor/python_tensor.cpp ${TORCH_SRC_DIR}/csrc/utils.cpp @@ -10431,7 +10426,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/engine.cpp pytorch-develop/torch/csrc/autograd/engine.cpp --- pytorch-v1.5.0/torch/csrc/autograd/engine.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10554,7 +10549,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto event = c10::Event{c10::DeviceType::CUDA}; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp pytorch-develop/torch/csrc/autograd/functions/tensor.cpp --- pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10586,7 +10581,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= /*non_blocking=*/false, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/init.cpp pytorch-develop/torch/csrc/autograd/init.cpp --- pytorch-v1.5.0/torch/csrc/autograd/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10629,7 +10624,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= m.def("_enable_profiler", enableProfiler); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp pytorch-develop/torch/csrc/autograd/input_buffer.cpp --- pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10681,7 +10676,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto& old_var = buffer[pos]; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp pytorch-develop/torch/csrc/autograd/profiler.cpp --- pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10877,7 +10872,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CUDAStubs::~CUDAStubs() = default; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.h pytorch-develop/torch/csrc/autograd/profiler.h --- pytorch-v1.5.0/torch/csrc/autograd/profiler.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11002,7 +10997,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp pytorch-develop/torch/csrc/autograd/python_variable.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11056,7 +11051,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"is_complex", (getter)THPVariable_is_complex, nullptr, nullptr, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11097,7 +11092,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h --- pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-13 15:30:59.010318428 +0800 ++++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-16 18:19:47.730842396 +0800 @@ -168,6 +168,45 @@ return r.release(); } @@ -11146,7 +11141,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!r) throw python_error(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp --- pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-13 15:30:59.006318284 +0800 ++++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-16 18:19:47.730842396 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11180,7 +11175,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!t.defined()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp pytorch-develop/torch/csrc/distributed/c10d/comm.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-16 18:19:47.734842540 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11286,7 +11281,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= while (!in_flight.empty()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp pytorch-develop/torch/csrc/distributed/c10d/init.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-16 18:19:47.734842540 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11343,7 +11338,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= .def("is_success", &::c10d::ProcessGroup::Work::isSuccess) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-13 15:30:59.014318571 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-16 18:19:47.734842540 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11468,7 +11463,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp pytorch-develop/torch/csrc/DynamicTypes.cpp --- pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-16 18:19:47.718841966 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11517,7 +11512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return it->second; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Generator.cpp pytorch-develop/torch/csrc/Generator.cpp --- pytorch-v1.5.0/torch/csrc/Generator.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-16 18:19:47.722842110 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11585,7 +11580,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= #endif diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/serialization.cpp pytorch-develop/torch/csrc/generic/serialization.cpp --- pytorch-v1.5.0/torch/csrc/generic/serialization.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-16 18:19:47.738842684 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11685,7 +11680,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/Storage.cpp pytorch-develop/torch/csrc/generic/Storage.cpp --- pytorch-v1.5.0/torch/csrc/generic/Storage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-16 18:19:47.738842684 +0800 @@ -1,7 +1,25 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11764,7 +11759,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for (Py_ssize_t i = 0; i < length; i++) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp pytorch-develop/torch/csrc/generic/StorageMethods.cpp --- pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-13 15:30:59.018318714 +0800 ++++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-16 18:19:47.738842684 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11812,7 +11807,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"_write_file", (PyCFunction)THPStorage_(writeFile), METH_VARARGS, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Module.cpp pytorch-develop/torch/csrc/Module.cpp --- pytorch-v1.5.0/torch/csrc/Module.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Module.cpp 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/csrc/Module.cpp 2021-07-16 18:19:47.722842110 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11956,7 +11951,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto set_module_attr = [&](const char* name, PyObject* v, bool incref = true) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp pytorch-develop/torch/csrc/tensor/python_tensor.cpp --- pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-16 18:19:47.758843401 +0800 @@ -1,18 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12333,7 +12328,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +} // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.cpp pytorch-develop/torch/csrc/utils/init.cpp --- pytorch-v1.5.0/torch/csrc/utils/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-16 18:19:47.758843401 +0800 @@ -1,6 +1,10 @@ #include #include @@ -12421,7 +12416,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.h pytorch-develop/torch/csrc/utils/init.h --- pytorch-v1.5.0/torch/csrc/utils/init.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.h 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/init.h 2021-07-16 18:19:47.758843401 +0800 @@ -8,4 +8,7 @@ void initThroughputBenchmarkBindings(PyObject* module); @@ -12432,7 +12427,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h pytorch-develop/torch/csrc/utils/python_arg_parser.h --- pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-16 18:19:47.758843401 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12467,7 +12462,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return at::Device(device_str); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp pytorch-develop/torch/csrc/utils/tensor_layouts.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-16 18:19:47.762843544 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12498,7 +12493,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= registerLayoutObject((THPLayout*)strided_layout, at::Backend::QuantizedCPU); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp pytorch-develop/torch/csrc/utils/tensor_new.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-16 18:19:47.762843544 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12634,7 +12629,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } else if(expected_layout == c10::kSparse) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp pytorch-develop/torch/csrc/utils/tensor_types.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-13 15:30:59.038319432 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-16 18:19:47.762843544 +0800 @@ -1,58 +1,91 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12847,7 +12842,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def get_rng_state(): ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributed/distributed_c10d.py pytorch-develop/torch/distributed/distributed_c10d.py --- pytorch-v1.5.0/torch/distributed/distributed_c10d.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-13 15:30:59.042319575 +0800 ++++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-16 18:19:47.762843544 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12928,7 +12923,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributions/von_mises.py pytorch-develop/torch/distributions/von_mises.py --- pytorch-v1.5.0/torch/distributions/von_mises.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributions/von_mises.py 2021-07-13 15:30:59.042319575 +0800 ++++ pytorch-develop/torch/distributions/von_mises.py 2021-07-16 18:19:47.766843687 +0800 @@ -1,140 +1,140 @@ -from __future__ import absolute_import, division, print_function - @@ -13212,7 +13207,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + _log_modified_bessel_fn(self.concentration, order=0)).exp() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/__init__.py pytorch-develop/torch/__init__.py --- pytorch-v1.5.0/torch/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/__init__.py 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/__init__.py 2021-07-16 18:19:47.714841823 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13255,7 +13250,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt pytorch-develop/torch/lib/c10d/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 ++++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-16 18:19:47.766843687 +0800 @@ -28,6 +28,10 @@ option(USE_C10D_NCCL "USE C10D NCCL" ON) endif() @@ -13308,7 +13303,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= copy_header(ProcessGroupMPI.hpp) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt pytorch-develop/torch/lib/libshm/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 ++++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-16 18:19:47.770843831 +0800 @@ -37,8 +37,11 @@ SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" @@ -13365,7 +13360,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -_maybe_indices_t = _scalar_or_tuple_2_t[Tensor] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/functional.py pytorch-develop/torch/nn/functional.py --- pytorch-v1.5.0/torch/nn/functional.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/functional.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/functional.py 2021-07-16 18:19:47.770843831 +0800 @@ -1611,7 +1611,7 @@ else: output = input.matmul(weight.t()) @@ -13388,7 +13383,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -from . import parallel as parallel diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/batchnorm.py pytorch-develop/torch/nn/modules/batchnorm.py --- pytorch-v1.5.0/torch/nn/modules/batchnorm.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-16 18:19:47.770843831 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13420,7 +13415,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= self.register_parameter('running_var', None) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/module.py pytorch-develop/torch/nn/modules/module.py --- pytorch-v1.5.0/torch/nn/modules/module.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/module.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/module.py 2021-07-16 18:19:47.774843974 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13563,7 +13558,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return t.to(device, dtype if t.is_floating_point() else None, non_blocking, memory_format=convert_to_format) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/normalization.py pytorch-develop/torch/nn/modules/normalization.py --- pytorch-v1.5.0/torch/nn/modules/normalization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-13 15:30:59.050319862 +0800 ++++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-16 18:19:47.774843974 +0800 @@ -128,13 +128,14 @@ """ __constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] @@ -13596,7 +13591,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return '{normalized_shape}, eps={eps}, ' \ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in pytorch-develop/torch/nn/modules/transformer.pyi.in --- pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-16 18:19:47.774843974 +0800 @@ -1,60 +1,60 @@ -from ..init import xavier_uniform_ -from .activation import MultiheadAttention @@ -13756,7 +13751,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - module_kwargs: Optional[Any] = ...) -> Tensor: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/parallel/distributed.py pytorch-develop/torch/nn/parallel/distributed.py --- pytorch-v1.5.0/torch/nn/parallel/distributed.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-16 18:19:47.774843974 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14107,7 +14102,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def remove_weight_norm(module: T_module, name: str = ...) -> T_module: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/onnx/symbolic_opset9.py pytorch-develop/torch/onnx/symbolic_opset9.py --- pytorch-v1.5.0/torch/onnx/symbolic_opset9.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-13 15:30:59.054320005 +0800 ++++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-16 18:19:47.778844118 +0800 @@ -1621,14 +1621,23 @@ slices = [sym_help._slice_helper(g, w, axes=[0], starts=[x * n], ends=[y * n]) for x, y in intervals] return g.op('Concat', *slices, axis_i=0) @@ -14185,7 +14180,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/optim/adamax.py pytorch-develop/torch/optim/adamax.py --- pytorch-v1.5.0/torch/optim/adamax.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/optim/adamax.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/optim/adamax.py 2021-07-16 18:19:47.778844118 +0800 @@ -80,8 +80,8 @@ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) @@ -14362,7 +14357,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/serialization.py pytorch-develop/torch/serialization.py --- pytorch-v1.5.0/torch/serialization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/serialization.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/serialization.py 2021-07-16 18:19:47.778844118 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14446,7 +14441,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def location_tag(storage): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/storage.py pytorch-develop/torch/storage.py --- pytorch-v1.5.0/torch/storage.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/storage.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/storage.py 2021-07-16 18:19:47.778844118 +0800 @@ -7,6 +7,7 @@ class _StorageBase(object): @@ -14466,7 +14461,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/tensor.py pytorch-develop/torch/tensor.py --- pytorch-v1.5.0/torch/tensor.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/tensor.py 2021-07-13 15:30:59.058320149 +0800 ++++ pytorch-develop/torch/tensor.py 2021-07-16 18:19:47.778844118 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14528,7 +14523,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def __reversed__(self): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_tensor_str.py pytorch-develop/torch/_tensor_str.py --- pytorch-v1.5.0/torch/_tensor_str.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_tensor_str.py 2021-07-13 15:30:58.994317854 +0800 ++++ pytorch-develop/torch/_tensor_str.py 2021-07-16 18:19:47.718841966 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14582,7 +14577,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= has_default_dtype = self.dtype in (torch.get_default_dtype(), torch.int64, torch.bool) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/dataloader.py pytorch-develop/torch/utils/data/dataloader.py --- pytorch-v1.5.0/torch/utils/data/dataloader.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-16 18:19:47.782844261 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14791,7 +14786,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py pytorch-develop/torch/utils/data/_utils/pin_memory.py --- pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-16 18:19:47.782844261 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14852,7 +14847,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/__init__.py pytorch-develop/torch/utils/__init__.py --- pytorch-v1.5.0/torch/utils/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/__init__.py 2021-07-13 15:30:59.062320292 +0800 ++++ pytorch-develop/torch/utils/__init__.py 2021-07-16 18:19:47.782844261 +0800 @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals @@ -14863,7 +14858,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def set_module(obj, mod): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_utils.py pytorch-develop/torch/_utils.py --- pytorch-v1.5.0/torch/_utils.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_utils.py 2021-07-13 15:30:58.998317998 +0800 ++++ pytorch-develop/torch/_utils.py 2021-07-16 18:19:47.718841966 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. diff --git a/src/aten/src/ATen/native/native_functions.yaml b/src/aten/src/ATen/native/native_functions.yaml index 30c7a8aeb1..74c22e5b3a 100644 --- a/src/aten/src/ATen/native/native_functions.yaml +++ b/src/aten/src/ATen/native/native_functions.yaml @@ -7584,16 +7584,12 @@ dispatch: CPU: reflection_pad2d_out_cpu CUDA: reflection_pad2d_out_cuda - npu_dispatch: - NPU: reflection_pad2d_out_npu - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor python_module: nn dispatch: CPU: reflection_pad2d_cpu CUDA: reflection_pad2d_cuda - npu_dispatch: - NPU: reflection_pad2d_npu - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn @@ -8548,4 +8544,12 @@ - func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) npu_dispatch_only: - NPU: bert_apply_adam_npu \ No newline at end of file + NPU: bert_apply_adam_npu + +- func: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor + npu_dispatch_only: + NPU: giou_npu + +- func: npu_giou_backward(Tensor grad, Tensor bboxes, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> (Tensor, Tensor) + npu_dispatch_only: + NPU: giou_backward_npu \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/ArgminKernelNpu.cpp b/src/aten/src/ATen/native/npu/ArgminKernelNpu.cpp old mode 100644 new mode 100755 diff --git a/src/aten/src/ATen/native/npu/CatKernelNpu.cpp b/src/aten/src/ATen/native/npu/CatKernelNpu.cpp index 8c3ac87647..4bc949120d 100644 --- a/src/aten/src/ATen/native/npu/CatKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/CatKernelNpu.cpp @@ -154,6 +154,20 @@ Tensor& _cat_out_npu(Tensor& result, TensorList tensors, int64_t dim) { } Tensor& cat_out_npu(Tensor& result, TensorList tensors, int64_t dim) { + SmallVector inputTensors = cat_dest_tensor_list(tensors); + + int64_t dim_post_expr = 0; + if (inputTensors.size() > 0) { + dim_post_expr = inputTensors[0].dim(); + } + dim = CalcuOpUtil::make_wrap_dim(dim, dim_post_expr); + auto outputSize = cat_npu_output_size(inputTensors, dim); + OpPreparation::CheckOut( + {tensors[0]}, + result, + ACL_FORMAT_ND, + tensors[0].scalar_type(), + outputSize); return at::_cat_out(result, tensors, dim); } diff --git a/src/aten/src/ATen/native/npu/ConstantPadNdKernelNpu.cpp b/src/aten/src/ATen/native/npu/ConstantPadNdKernelNpu.cpp index 442034bec2..e29b20600b 100644 --- a/src/aten/src/ATen/native/npu/ConstantPadNdKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/ConstantPadNdKernelNpu.cpp @@ -88,13 +88,21 @@ Tensor constant_pad_nd_npu(const Tensor& self, IntArrayRef pad, Scalar value){ } if (is_backward(pad)) { - TORCH_CHECK(self.dim() == 4, "only support 4D now, but self.dim is",self.dim()); - TORCH_CHECK(pad.size() == 4, "Length of pad must is 4 now, but pad.size() is", pad.size()); + TORCH_CHECK(self.dim() == 4 || self.dim() == 5, + "Only support 4D and 5D now, but self.dim is",self.dim()); + TORCH_CHECK(pad.size() == 4 || pad.size() == 6, + "Length of pad must is 4 or 6 now, but pad.size() is", pad.size()); SmallVector begin_list = {0, 0, -pad[2], -pad[0]}; SmallVector end_list = {self.size(0), self.size(1), self.size(-2) + pad[3], self.size(-1) + pad[1]}; SmallVector strides = {1, 1, 1, 1}; + if (self.dim() == 5) { + begin_list = {0, 0, -pad[4], -pad[2], -pad[0]}; + end_list = {self.size(0), self.size(1), self.size(-3) + pad[5], self.size(-2) + pad[3], self.size(-1) + pad[1]}; + strides = {1, 1, 1, 1, 1}; + } + return at::npu_indexing(self, begin_list, end_list, strides); } @@ -109,4 +117,4 @@ Tensor constant_pad_nd_npu(const Tensor& self, IntArrayRef pad, Scalar value){ } } // namespace native -} // namespace at \ No newline at end of file +} // namespace at diff --git a/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp b/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp index 3a22c9157d..517c96b7f2 100644 --- a/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/EmbeddingRenormKernelNpu.cpp @@ -12,71 +12,39 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" +#include "ATen/native/npu/utils/OpAdapter.h" namespace at { namespace native { using namespace at::native::npu; -SmallVector embedding_renorm_npu_input( - const SmallVector& inputTensor) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputTensor); -} - -SmallVector embedding_renorm_npu_output( - const SmallVector& outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector embedding_renorm_npu_attr( - double max_norm, - double norm_type){ - int64_t dim = 0; - float max_norm_float = (float) max_norm; - float norm_type_float = (float) norm_type; - NPUAttrDesc npuAttrScalarP = NPUAttrDesc("p", norm_type_float); - NPUAttrDesc npuAttrScalarMaxnorm = NPUAttrDesc("maxnorm", max_norm_float); - NPUAttrDesc npuAttrDim = NPUAttrDesc("dim", dim); - SmallVector attrs = {npuAttrScalarP, npuAttrDim, npuAttrScalarMaxnorm}; - return attrs; -} -SmallVector embedding_gather2d_npu_attr() { - NPUAttrDesc npuAttrAxis = NPUAttrDesc("axis", (int64_t)0); - SmallVector attrs = {npuAttrAxis}; - return attrs; -} - -SmallVector embedding_renorm_scatter_update_npu_attr(){ - NPUAttrDesc npuAttrAxis = NPUAttrDesc("use_locking", false); - SmallVector attrs = {npuAttrAxis}; - return attrs; -} - Tensor& embedding_renorm_gather2d_out_npu( Tensor& result, const Tensor& self, - const Tensor& indices - ){ -// execute the NPU operate GatherV2D - auto inputs = embedding_renorm_npu_input({self, indices}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_gather2d_npu_attr(); - CalcuOpUtil::execute_npu_operate("GatherV2D", inputs, outputs, attrs); + const Tensor& indices) { + OpCommand cmd; + cmd.Name("GatherV2D") + .Input(self) + .Input(indices) + .Output(result) + .Attr("axis", (int64_t)0) + .Run(); return result; } Tensor& embedding_renorm_execute_out_npu( Tensor& result, const Tensor& self, - double max_norm, - double norm_type){ -//execute the NPU operate Renorm - auto inputs = embedding_renorm_npu_input({self}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_renorm_npu_attr(max_norm, norm_type); - CalcuOpUtil::execute_npu_operate("Renorm", inputs, outputs, attrs); + double max_norm, + double norm_type) { + OpCommand cmd; + cmd.Name("Renorm") + .Input(self) + .Output(result) + .Attr("p", (float)norm_type) + .Attr("dim", (int64_t)0) + .Attr("maxnorm", (float)max_norm) + .Run(); return result; } @@ -85,82 +53,84 @@ Tensor& embedding_renorm_scatter_update_out_npu( Tensor& result, const Tensor& self, const Tensor& indices, - const Tensor& update){ - auto inputs = embedding_renorm_npu_input({self, indices, update}); - auto outputs = embedding_renorm_npu_output({result}); - auto attrs = embedding_renorm_scatter_update_npu_attr(); - CalcuOpUtil::execute_npu_operate("ScatterUpdate", inputs, outputs, attrs); + const Tensor& update) { + OpCommand cmd; + cmd.Name("ScatterUpdate") + .Input(self) + .Input(indices) + .Input(update) + .Output(result) + .Attr("use_locking", false) + .Run(); return result; } - Tensor& embedding_renorm_out_npu( Tensor& result, const Tensor& self, const Tensor& indices, - Tensor& mid_input, - Tensor& mid_output, - double max_norm, + double max_norm, double norm_type){ -// execute the NPU operate GatherV2D,generate new tensor by indices - embedding_renorm_gather2d_out_npu( - mid_input, - self, - indices); -//execute the NPU operate Renorm - embedding_renorm_execute_out_npu( - mid_output, - mid_input, - max_norm, - norm_type); -// executing the NPU operator ScatterUpdate - embedding_renorm_scatter_update_out_npu( - result, - self, - indices, - mid_output); + + //get the outSize of GatherV2 , the middle tensor + SmallVector midSize = {indices.size(0), self.size(1)}; + Tensor mid_input = OpPreparation::ApplyTensor(self, midSize); + Tensor mid_output = OpPreparation::ApplyTensor(self, midSize); + + // execute the NPU operate GatherV2D, generate new tensor by indices + embedding_renorm_gather2d_out_npu(mid_input,self,indices); + + //execute the NPU operate Renorm + embedding_renorm_execute_out_npu(mid_output, mid_input, max_norm, norm_type); + + //execute the NPU operate ZerosLike or RangeD, generate new tensor by indices.numel() + Tensor mid_output_copy = mid_output.clone(); + auto num_indices = indices.numel(); + Tensor input_indices; + + // RangeD not support range(0,0) + if (num_indices - 1 == 0) { + input_indices = at::zeros({1}, self.options()).to(at::kLong); + } else { + input_indices = at::range(0, num_indices-1, self.options()).to(at::kLong); + } + + //execute the NPU operate MUL, generate change result + auto num_mid_output = mid_output.numel(); + resize_npu_(mid_output_copy, num_mid_output); + Tensor scalar_out = OpPreparation::ApplyTensor(self, {num_indices, 1}); + embedding_renorm_gather2d_out_npu(scalar_out, mid_output_copy, input_indices); + Tensor out_res = mid_input * scalar_out; + + // executing the NPU operator ScatterUpdate + embedding_renorm_scatter_update_out_npu(result, self, indices, out_res); + return result; } Tensor& embedding_renorm_npu_( Tensor& self, const Tensor& indices, - double max_norm, + double max_norm, double norm_type) { -//check dim and type + //check dim and type auto self_arg = TensorArg(self, "self", 1); auto indices_arg = TensorArg(indices, "indices", 2); checkDim("embedding_renorm_", self_arg, 2); checkScalarType("embedding_renorm_", indices_arg, kLong); -// indices must be int64 in pytorch, but npu can only support int32 - auto indices_int32 = indices.to("cpu"); - indices_int32 = indices_int32.to(at::kInt); - indices_int32 = indices_int32.to("npu"); - -//resize indices to 1D + //resize indices to 1D Tensor indices_copy = indices.clone(); auto num_indices = indices.numel(); resize_npu_(indices_copy, num_indices); - - SmallVector inputs = {self}; - SmallVector outputs = {self}; - CalcuOpUtil::check_memory_over_laps(inputs, outputs); - -//get the outSize of GatherV2 , the middle tensor - auto midSize = embedding_renorm_mid_npu_output_size(self, indices_copy); - Tensor mid = at::empty_with_format(midSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - Tensor mid1 = at::empty_with_format(midSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - -//inplace operate - if (!NpuUtils::check_match(&self)) { - Tensor contiguousSelf = NpuUtils::format_contiguous(self); - Tensor result = embedding_renorm_out_npu(contiguousSelf, contiguousSelf, indices_copy, mid, mid1, max_norm, norm_type); - NpuUtils::format_fresh_view(self, result); - } else { - embedding_renorm_out_npu(self, self, indices_copy, mid, mid1, max_norm, norm_type); - } + + OpPipeWithDefinedOut pipe; + pipe.CheckMemory({self, indices_copy}, {self}) + .Func([&self, &indices_copy, max_norm, norm_type](Tensor& result){ + embedding_renorm_out_npu(self, self, indices_copy, max_norm, norm_type);}) + .Call(self); + return self; } diff --git a/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp b/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp new file mode 100644 index 0000000000..5672a390df --- /dev/null +++ b/src/aten/src/ATen/native/npu/GiouBackwardKernelNpu.cpp @@ -0,0 +1,73 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +std::tuple +giou_backward_inner_out_npu( + Tensor& dbboxes, + Tensor& dgtboxes, + const Tensor& grad, + const Tensor& bboxes, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + string mode_str = mode == 1 ? "iof" : "iou"; + + OpCommand cmd; + cmd.Name("GIoUGrad") + .Input(grad) + .Input(bboxes) + .Input(gtboxes) + .Output(dbboxes) + .Output(dgtboxes) + .Attr("trans", trans) + .Attr("is_cross", is_cross) + .Attr("mode", mode_str) + .Run(); + return std::tie(dbboxes, dgtboxes); +} + +std::tuple +giou_backward_npu( + const Tensor& grad, + const Tensor& bboxes, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + TORCH_CHECK(!trans && !is_cross && mode == 0, + "giou backward only support trans==False, ", + "is_cross==False, ", + "mode==0('iou') current version ", + "if you need to back propagation, ", + "please ensure your parameter is correct!"); + // Op need form of [n] grad + Tensor gradCp = at::squeeze(grad, 0); + Tensor dbboxes = OpPreparation::ApplyTensor(bboxes); + Tensor dgtboxes = OpPreparation::ApplyTensor(gtboxes); + + giou_backward_inner_out_npu(dbboxes, dgtboxes, gradCp, bboxes, gtboxes, trans, is_cross, mode); + return std::tie(dbboxes, dgtboxes); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp b/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp new file mode 100644 index 0000000000..5360ee39c8 --- /dev/null +++ b/src/aten/src/ATen/native/npu/GiouKernelNpu.cpp @@ -0,0 +1,87 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +SmallVector giou_output_size( + const Tensor& self, + const Tensor& gtboxes, + bool is_cross){ + SmallVector output_size; + if(is_cross){ + output_size = {gtboxes.size(0), self.size(0)}; + } else { + output_size = {1, self.size(0)}; + } + return output_size; +} + +Tensor& giou_inner_out_npu( + Tensor& result, + const Tensor& self, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + auto output_size = giou_output_size(self, gtboxes, is_cross); + OpPreparation::CheckOut( + {self}, + result, + self, + output_size); + string mode_str = mode == 1 ? "iof" : "iou"; + + OpCommand cmd; + cmd.Name("GIoU") + .Input(self) + .Input(gtboxes) + .Output(result) + .Attr("trans", trans) + .Attr("is_cross", is_cross) + .Attr("mode", mode_str) + .Run(); + return result; +} + +Tensor giou_npu( + const Tensor& self, + const Tensor& gtboxes, + bool trans, + bool is_cross, + int64_t mode){ + TORCH_CHECK(!trans && !is_cross && mode == 0, + "giou backward only support trans==False, ", + "is_cross==False, ", + "mode==0('iou') current version ", + "if you need to back propagation, ", + "please ensure your parameter is correct!"); + // Op need form of [n, 4], but pass should be [4, n]; + Tensor selfCp = self.permute({1, 0}); + Tensor gtboxesCp = gtboxes.permute({1, 0}); + auto output_size = giou_output_size(selfCp, gtboxesCp, is_cross); + Tensor result = OpPreparation::ApplyTensor(selfCp, output_size); + + giou_inner_out_npu(result, selfCp, gtboxesCp, trans, is_cross, mode); + result = result.permute({1, 0}); + return result; +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp index 9cbbf8f841..6814d60261 100644 --- a/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/IndexPutKernelNpu.cpp @@ -26,6 +26,9 @@ Tensor& index_put_nocheck( const TensorList& indices, const Tensor& value, bool accumulate) { + if (value.numel() == 0) { + return result; + } // masks corresponds to indices. 0 indicates undefined tensor. SmallVector masks; std::vector allDefinedIndices; diff --git a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp index f45ae27e9b..680ec91179 100644 --- a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp @@ -67,21 +67,32 @@ tuple min_out_npu( } tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) { + Tensor selfCast = self; + if(self.dtype() == ScalarType::Bool){ + selfCast = self.to(ScalarType::Float); + } + SmallVector dims = {dim}; - auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + auto outputSize = reduce_ops_npu_output_size(selfCast, dims, keepdim); SmallVector indicesSize = outputSize; - auto func = [&self, dim, keepdim](Tensor outputs, Tensor indices) { - min_out_npu_nocheck(outputs, indices, self, dim, keepdim); + auto func = [&selfCast, dim, keepdim](Tensor outputs, Tensor indices) { + min_out_npu_nocheck(outputs, indices, selfCast, dim, keepdim); }; Tensor outputs, indices; OpPipeWithDefinedMultiOut pipe(outputs, indices); - return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), ACL_FORMAT_ND) - .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) - .Call(func) - .ReflushOutputDtype<1>(ScalarType::Long) - .Return(); + std::tie(outputs, indices) = pipe.ApplyOutputWithSpecailParams<0>(outputSize, selfCast.options(), ACL_FORMAT_ND) + .ApplyOutputWithSpecailParams<1>(indicesSize, selfCast.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) + .Call(func) + .ReflushOutputDtype<1>(ScalarType::Long) + .Return(); + + if(self.dtype() == ScalarType::Bool){ + outputs = outputs.to(ScalarType::Bool); + } + + return std::tie(outputs, indices); } tuple min_out_npu( diff --git a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp index 91af42d2af..28ab0aa981 100644 --- a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp @@ -18,8 +18,6 @@ #include "ATen/native/npu/utils/KernelNpuOutputSize.h" #include "ATen/native/npu/utils/NpuUtils.h" #include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/common/InnerNpuNativeFunction.h" -#include "ATen/native/npu/frame/StorageDescHelper.h" namespace at { namespace native { @@ -28,7 +26,7 @@ using namespace at::native::npu; // Flexible transpose judgement for view+transpose+Matmul, // i.e., tensors with dim=2 and base_size_.size=3 can also be Matmul directly! bool is_transpose_last_two_dims_flex(const Tensor& tensor) { - if (tensor.dim() != 2) { + if (tensor.dim() < 2 || tensor.dim() > 3) { return false; } int64_t numel = 1; @@ -115,17 +113,10 @@ Tensor mm_npu(const Tensor& self, const Tensor& mat2) { // Matmul cannot directly deal with view+transposed tensor with NZ format, so Transdata is necessary if (self.sizes().size() != self_desc.base_sizes_.size()) { selfFormatCast = OpPreparation::CastBackToOriFormat(self); - // refresh storage desc info [origin shape and storage shape] of reshaped Tensor - if (is_transpose_last_two_dims_flex(selfFormatCast)) { - StorageDescHelper::ReflushDescBySelf(selfFormatCast.transpose(-2, -1)); - } } if (mat2.sizes().size() != mat2_desc.base_sizes_.size()) { mat2FormatCast = OpPreparation::CastBackToOriFormat(mat2); - if (is_transpose_last_two_dims_flex(mat2FormatCast)) { - StorageDescHelper::ReflushDescBySelf(mat2FormatCast.transpose(-2, -1)); - } } // construct the output tensor of the NPU diff --git a/src/aten/src/ATen/native/npu/NormKernelNpu.cpp b/src/aten/src/ATen/native/npu/NormKernelNpu.cpp index 8308e4763a..2f25260240 100644 --- a/src/aten/src/ATen/native/npu/NormKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NormKernelNpu.cpp @@ -38,7 +38,7 @@ int64_t calculate_p(optional p) { // norm.dtype_out -Tensor& norm_out_npu( +Tensor& norm_out_npu_nocheck( Tensor& out, const Tensor& self, optional p, @@ -80,11 +80,36 @@ Tensor& norm_out_npu( optional p, IntArrayRef dim, bool keepdim) { - norm_out_npu(out, self, p, dim, keepdim, self.scalar_type()); + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + OpPreparation::CheckOut( + {self}, + out, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + norm_out_npu_nocheck(out, self, p, dim, keepdim, self.scalar_type()); return out; } +Tensor& norm_out_npu( + Tensor& out, + const Tensor& self, + optional p, + IntArrayRef dim, + bool keepdim, + ScalarType dtype) { + auto outputSize = reduce_ops_npu_output_size(self, dim, keepdim); + OpPreparation::CheckOut( + {self}, + out, + ACL_FORMAT_ND, + self.scalar_type(), + outputSize); + norm_out_npu_nocheck(out, self, p, dim, keepdim, dtype); + + return out; +} // norm.ScalarOpt_dim_dtype Tensor norm_npu( const Tensor& self, @@ -99,7 +124,7 @@ Tensor norm_npu( Tensor out = OpPreparation::ApplyTensorWithSizes(outputSize, self.options().dtype(dtype)); // calculate the output result of the NPU - norm_out_npu(out, self, p, dim, keepdim, dtype); + norm_out_npu_nocheck(out, self, p, dim, keepdim, dtype); return out; } diff --git a/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp deleted file mode 100644 index c3daebe725..0000000000 --- a/src/aten/src/ATen/native/npu/ReflectionPad2dKernelNpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2020, Huawei Technologies.All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/NpuUtils.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; -SmallVector reflection_pad2d_npu_input(SmallVector inputs) { - return CalcuOpUtil::create_npu_input_tensor_desc(inputs); -} - -SmallVector reflection_pad2d_npu_output(const SmallVector &outputTensor) { - return CalcuOpUtil::create_npu_output_tensor_desc(outputTensor); -} - -SmallVector reflection_pad2d_npu_attr(const Tensor& input, IntArrayRef paddingSize) { - int64_t pad_l = 0; - int64_t pad_r = 0; - int64_t pad_t = 0; - int64_t pad_b = 0; - int64_t pad_zeros = 0; - - TORCH_CHECK(paddingSize.size() == 4, "padding size is expected to be 4"); - - pad_l = paddingSize[0]; - pad_r = paddingSize[1]; - pad_t = paddingSize[2]; - pad_b = paddingSize[3]; - - SmallVector vectorInt = {}; - SmallVector, SIZE> vectorVectorInt = {}; - SmallVector vectorListInt = {}; - SmallVector paddingsVector = array_to_small_vector(paddingSize); - paddingsVector.resize(input.dim(), 0); - - for (int i = 0; i < paddingsVector.size(); i ++) { - if (i<2) { - vectorInt.emplace_back(pad_zeros); - vectorInt.emplace_back(pad_zeros); - } - else if (i == 2) { - vectorInt.emplace_back(pad_t); - vectorInt.emplace_back(pad_b); - } - else { - vectorInt.emplace_back(pad_l); - vectorInt.emplace_back(pad_r); - } - vectorVectorInt.emplace_back(vectorInt); - vectorInt.clear(); - vectorListInt.emplace_back(IntArrayRef(vectorVectorInt.back())); - } - int64_t constant_values = 0; - // string mode = "constant"; - string mode = "reflect"; - bool padding_contiguous = true; - NPUAttrDesc npuAttrConstantValues = NPUAttrDesc("constant_values", constant_values); - NPUAttrDesc npuAttrMode = NPUAttrDesc("mode", mode); - NPUAttrDesc npuAttrPaddingContiguous = NPUAttrDesc("padding_contiguous", padding_contiguous); - NPUAttrDesc npuAttrPadding = NPUAttrDesc("paddings", vectorListInt); - SmallVector attrs = { - npuAttrPadding, - npuAttrConstantValues, - npuAttrMode, - npuAttrPaddingContiguous - }; - return attrs; -} - -Tensor& reflection_pad2d_out_npu_nocheck(Tensor& out, const Tensor& self, IntArrayRef padding) { - //constructs the input and output NPUTensorDesc - auto inputs = reflection_pad2d_npu_input({self}); - auto outputs = reflection_pad2d_npu_output({out}); - - //constructs the attr of the NPUAttrDesc - auto attrs = reflection_pad2d_npu_attr(self, padding); - - //executing the NPU operator - CalcuOpUtil::execute_npu_operate("PadV3D", inputs, outputs, attrs); - - return out; -} - -Tensor& reflection_pad2d_out_npu(Tensor& result, const Tensor& self, IntArrayRef padding){ - //calculate the output size - auto outputSize = reflection_pad2d_npu_output_size(self, padding); - //construct the output tensor of the NPU - result = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - OpPreparation::CheckOut( - {self}, - result, - CalcuOpUtil::get_tensor_npu_format(self), - self.scalar_type(), - outputSize); - reflection_pad2d_out_npu_nocheck(result, self, padding); - - return result; -} - -Tensor reflection_pad2d_npu(const Tensor& self, IntArrayRef padding) { - //calculate the output size - auto outputSize = reflection_pad2d_npu_output_size(self, padding); - //construct the output tensor of the NPU - Tensor out = at::empty_with_format(outputSize, self.options(), CalcuOpUtil::get_tensor_npu_format(self)); - - //calculate the output result of the NPU - reflection_pad2d_out_npu_nocheck(out, self, padding); - - return out; -} -} -} // namespace at::native diff --git a/src/aten/src/ATen/native/npu/ReplicationPad2dKernelNpu.cpp b/src/aten/src/ATen/native/npu/ReplicationPad2dKernelNpu.cpp old mode 100644 new mode 100755 diff --git a/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp b/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp index 2890926250..bf72d425e5 100644 --- a/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp +++ b/src/aten/src/ATen/native/npu/common/FormatCastHelper.cpp @@ -36,7 +36,9 @@ void FormatCastHelper::format_cast_as_base_format(const Tensor& src, aclFormat f AT_ASSERT(FormatHelper::IsBaseFormatType(src), "src format must be base format"); auto& src_desc = src.storage().unsafeGetStorageImpl()->npu_desc_; - src_desc.storage_sizes_ = FormatHelper::GetSizeOfBaseFormat(src, format); + // due to CANN principle : if the ori format of a tensor is the + // same as the npu format, then its base shape must be same as storage shape + // so we should not change the storage shape when format cast between base format src_desc.origin_format_ = format; src_desc.npu_format_ = format; return; diff --git a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp index 46054203a6..f84a0656c0 100644 --- a/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/convolution/ConvolutionKernelNpu.cpp @@ -28,18 +28,6 @@ constexpr int output_channels_dim = 1; constexpr int weight_output_channels_dim = 0; constexpr int weight_input_channels_dim = 1; -bool is_depthwise( - const at::Tensor& input, - const at::Tensor& weight, - int64_t groups, - bool transposed) { - return input.is_npu() && !transposed && input.ndimension() == 4 && - input.size(1) == groups && - groups > 1 && // no point if there is only a single group - weight.size(0) % input.size(1) == - 0; // output channels must be a multiple of input channels -} - inline SmallVector expand_dim_if_needed( IntArrayRef list_param, const char* param_name, @@ -261,17 +249,7 @@ Tensor _convolution_npu( } Tensor output; - if (is_depthwise(input, weight, groups, transposed)) { - auto kernel_size = weight.sizes().slice(2); - output = at::thnn_conv_depthwise2d( - input.contiguous(), - weight, - kernel_size, - bias, - stride, - padding, - dilation); - } else if (!transposed) { + if (!transposed) { output = at::npu_convolution( input, weight, bias, stride, padding, dilation, groups); } else { diff --git a/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp b/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp index 018f6e2707..d13bccb4c0 100644 --- a/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp +++ b/src/aten/src/ATen/native/npu/frame/FormatHelper.cpp @@ -59,22 +59,6 @@ std::unordered_map FormatHelper::info = { {ACL_FRACTAL_Z_3D, (FormatInfo){ACL_FRACTAL_Z_3D, ACL_FORMAT_NCDHW, InferShapeOfFZ3D, "FRACTAL_Z_3D", true}}, }; -std::unordered_map> FormatHelper::base_format_convert_info = { - {ACL_FORMAT_ND, { - {ACL_FORMAT_NCHW, InferShapeNDToNCHW}, - {ACL_FORMAT_NCDHW, InferShapeNDToNCDHW}, - } - }, - {ACL_FORMAT_NCHW, { - {ACL_FORMAT_ND, InferShapeNCHWToND}, - } - }, - {ACL_FORMAT_NCDHW, { - {ACL_FORMAT_ND, InferShapeNCDHWToND}, - } - }, -}; - bool FormatHelper::IsPadded(const Tensor* tensor) { auto format = tensor->storage().unsafeGetStorageImpl()->npu_desc_.npu_format_; return IsPadded(format); @@ -136,20 +120,6 @@ FormatShape FormatHelper::GetStorageSizes(NPUStorageDesc desc) { return GetStorageSizes(format, ori_size); } -FormatShape FormatHelper::GetSizeOfBaseFormat(const Tensor& src, aclFormat dst_format) { - auto src_format = GetBaseFormat(src); - auto itr = base_format_convert_info.find(src_format); - if (itr != base_format_convert_info.end()) { - auto next_itr = itr->second.find(dst_format); - if (next_itr != itr->second.end()) { - auto src_desc = src.storage().unsafeGetStorageImpl()->npu_desc_; - return next_itr->second(src_desc.storage_sizes_, src_desc.base_sizes_); - } - } - AT_ERROR("unsupport InferShape from ", GetFormatName(src_format), " to ", GetFormatName(dst_format)); - return {}; -} - // namespace { FormatShape InferShapeLessTo4(IntArrayRef dims) { diff --git a/src/aten/src/ATen/native/npu/frame/FormatHelper.h b/src/aten/src/ATen/native/npu/frame/FormatHelper.h index 862ff1b7d3..9f0d1f0242 100644 --- a/src/aten/src/ATen/native/npu/frame/FormatHelper.h +++ b/src/aten/src/ATen/native/npu/frame/FormatHelper.h @@ -48,7 +48,6 @@ public: static FormatShape GetStorageSizes(aclFormat format, sizeType ori_size); // GetStorageSizes used to calculate the storage sizes of op at npu device at different format. static FormatShape GetStorageSizes(NPUStorageDesc desc); - static FormatShape GetSizeOfBaseFormat(const Tensor& src, aclFormat dst_format); private: static bool IsPadded(aclFormat format); @@ -64,7 +63,6 @@ private: bool isPadded = false; } FormatInfo; static std::unordered_map info; - static std::unordered_map> base_format_convert_info; }; // class FormatHelper // template impl diff --git a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp index 1985cbffbb..46abd15c00 100644 --- a/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp +++ b/src/aten/src/ATen/native/npu/interface/EnvVariables.cpp @@ -42,9 +42,7 @@ REGISTER_OPTION_HOOK(ACL_OP_COMPILER_CACHE_DIR, [](const std::string& val) { aclSetCompileopt(aclCompileOpt::ACL_OP_COMPILER_CACHE_DIR, val.c_str()); }) REGISTER_OPTION_HOOK(NPU_FUZZY_COMPILE_BLACKLIST, [](const std::string& val) { - if (CheckFuzzyEnable()) { FuzzyCompileBlacklist::GetInstance().RegisterBlacklist(val); - } }) REGISTER_OPTION_INIT_BY_ENV(PROFILING_MODE) diff --git a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp index a49aa9b994..412d1fc32b 100644 --- a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp +++ b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp @@ -347,7 +347,18 @@ NPUStatus CalcuOpUtil::CreateAclTensorDescInfo( input[i].tensorDescType == NPUTensorDesc::TensorDescType::TENSOR) { Tensor* aclInput = &input[i].tensor; SmallVector dims; - dims = aclInput->storage().get_npu_desc().base_sizes_; + if (opName == "MatMul") { + auto dims_pre = aclInput->sizes(); + if (attrs[i].boolAttrValue == 1) { + dims.push_back(dims_pre[1]); + dims.push_back(dims_pre[0]); + } else if (attrs[i].boolAttrValue == 0) { + dims.push_back(dims_pre[0]); + dims.push_back(dims_pre[1]); + } + } else { + dims = aclInput->storage().get_npu_desc().base_sizes_; + } auto storageDims = aclInput->storage().get_npu_desc().storage_sizes_; int64_t numel = 1; for (int j = 0; j < storageDims.size(); j++) { diff --git a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp index 773f25ab30..10672bf113 100644 --- a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp +++ b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.cpp @@ -333,12 +333,6 @@ SmallVector embedding_dense_backward_npu_output_size( return {num_weights, grad_output.size(-1)}; } -SmallVector embedding_renorm_mid_npu_output_size( - const Tensor& self, - const Tensor& indices){ - return {indices.size(0), self.size(1)}; -} - SmallVector equal_npu_output_size(void) { int64_t outputshape = 1; SmallVector outputSize = {outputshape}; diff --git a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h index 9290da7ddd..b676141652 100644 --- a/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h +++ b/src/aten/src/ATen/native/npu/utils/KernelNpuOutputSize.h @@ -182,10 +182,6 @@ SmallVector embedding_dense_backward_npu_output_size( int64_t padding_idx, bool scale_grad_by_freq); -SmallVector embedding_renorm_mid_npu_output_size( - const Tensor& self, - const Tensor& indices); - SmallVector index_npu_output_size( const Tensor& self, TensorList indices); diff --git a/src/aten/src/ATen/utils/DumpUtils.h b/src/aten/src/ATen/utils/DumpUtils.h index 630e9a94f2..f728ed3027 100644 --- a/src/aten/src/ATen/utils/DumpUtils.h +++ b/src/aten/src/ATen/utils/DumpUtils.h @@ -71,6 +71,10 @@ public: void SetValue(const T &value) { value_ = value; } + + void SetName(const string& newName) { + name_ = newName; + } private: string name_; T value_; diff --git a/src/aten/src/ATen/utils/LoadUtils.cpp b/src/aten/src/ATen/utils/LoadUtils.cpp index 10ed418f73..f526fb5a22 100644 --- a/src/aten/src/ATen/utils/LoadUtils.cpp +++ b/src/aten/src/ATen/utils/LoadUtils.cpp @@ -89,9 +89,11 @@ namespace at { using stringmap = std::unordered_map; stringmap IrNameMapper = { {"NpuConvolutionBackward", "CudnnConvolutionBackward"}, + {"NativeBatchNormBackward", "CudnnBatchNormBackward"}, }; std::unordered_map IrParamNameMapper = { {"NpuConvolutionBackward", {{"input", "self"},}}, + {"NativeBatchNormBackward", {{"eps", "epsilon"},}}, }; void MaybeMapTensorName(const string& irName, std::vector& tensorDescVec) { @@ -103,6 +105,26 @@ namespace at { } } + template + void MaybeMapValueName(const string& irName, T& value) { + for (auto it = value.begin(); it != value.end(); it++) { + auto valueName = (*it).Name(); + if (IrParamNameMapper[irName].find(valueName) != IrParamNameMapper[irName].end()) { + (*it).SetName(IrParamNameMapper[irName][valueName]); + } + } + } + + template + void MaybeMapScalarName(const string& irName, T& value) { + for (auto it = value.begin(); it != value.end(); it++) { + auto valueName = (*it)->Name(); + if (IrParamNameMapper[irName].find(valueName) != IrParamNameMapper[irName].end()) { + (*it)->SetName(IrParamNameMapper[irName][valueName]); + } + } + } + void MaybeMapName(CommDesc& commDesc, const H5File* file) { std::string h5IRPath = "/" + commDesc.nameIr; if (file->nameExists(h5IRPath)) { @@ -112,6 +134,17 @@ namespace at { auto oriNameIr = commDesc.nameIr; commDesc.nameIr = IrNameMapper[commDesc.nameIr]; MaybeMapTensorName(oriNameIr, commDesc.tensorDescVec); + MaybeMapValueName(oriNameIr, commDesc.int64VecDescVec); + MaybeMapValueName(oriNameIr, commDesc.int64DescVec); + MaybeMapValueName(oriNameIr, commDesc.boolDescVec); + MaybeMapValueName(oriNameIr, commDesc.doubleDescVec); + MaybeMapValueName(oriNameIr, commDesc.optionalDoubleDescVec); + MaybeMapScalarName(oriNameIr, commDesc.scalarDescVec); + MaybeMapValueName(oriNameIr, commDesc.optionalInt64DescVec); + MaybeMapScalarName(oriNameIr, commDesc.optionalScalarDescVec); + MaybeMapValueName(oriNameIr, commDesc.scalarTypeDescVec); + MaybeMapValueName(oriNameIr, commDesc.sizePairDescVec); + MaybeMapValueName(oriNameIr, commDesc.longIntArrayDescVec); } } @@ -689,17 +722,23 @@ namespace at { } + void ZeroStrideClear(Tensor& dst, Tensor& src) { + auto strides = dst.strides().vec(); + auto position = std::find(strides.begin(), strides.end(), 0); + if (position != strides.end()) { + dst = dst.select(position - strides.begin(), 0); + src = src.select(position - strides.begin(), 0); + } else { + return; + } + ZeroStrideClear(dst, src); + } + // when the stride of some dim is zero, the tensor may has been "expand", copy should only // process on any axis of that dim // To do: is this kind of copy matches other zero stride cases? void CopyMaybeWithZeroStride(Tensor dst, Tensor src) { - auto strides = dst.strides().vec(); - for (int i = 0; i < strides.size(); i++) { - if (strides[i] == 0) { - dst = dst.select(i, 0); - src = src.select(i, 0); - } - } + ZeroStrideClear(dst, src); dst.copy_(src); } diff --git a/src/tools/autograd/derivatives.yaml b/src/tools/autograd/derivatives.yaml index 1db83b1c5a..ee68e09e8d 100644 --- a/src/tools/autograd/derivatives.yaml +++ b/src/tools/autograd/derivatives.yaml @@ -1691,4 +1691,7 @@ - name: npu_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor input, weight: npu_linear_backward(grad, input, weight) - bias: maybe_multiply(grad, 1) \ No newline at end of file + bias: maybe_multiply(grad, 1) + +- name: npu_giou(Tensor self, Tensor gtboxes, bool trans=False, bool is_cross=False, int mode=0) -> Tensor + self, gtboxes: npu_giou_backward(grad, self, gtboxes, trans, is_cross, mode) \ No newline at end of file diff --git a/test/test_npu/test_constant_pad_nd.py b/test/test_npu/test_constant_pad_nd.py deleted file mode 100644 index 59d0bbae99..0000000000 --- a/test/test_npu/test_constant_pad_nd.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestConstantPadNd(TestCase): - - def op_exec_cpu(self, input1, pad_shape): - output = torch.constant_pad_nd(input1, pad_shape) - output = output.numpy() - - return output - - def op_exec_npu(self, input1, pad_shape): - input1 = input1.to("npu") - output = torch.constant_pad_nd(input1, pad_shape) - output = output.to("cpu") - output = output.numpy() - return output - - def test_constant_pad_nd_shape_format(self, device): - shape_format = [ - [[np.float32, 3, (25, 32, 1, 1)], (1,1)], - [[np.float32, 0, [25, 32, 11, 11]], (2,2,2,2)], - [[np.float32, 0, [25, 3, 22, 22]],(2,2,2,2,20,20)], - [[np.float16, 3, [25, 12, 7, 7]], (20,20,20,20)], - [[np.float16, 0, [25, 3, 22, 22]], (20,20,20,20,5,5,5,5)], - [[np.float16, 4, (2, 3, 3, 3)], (1,1,1,20,5,5,5,5)], - [[np.float16, 4, [100, 20, 7, 7]], (0,0,0,0,0,0,0,0)], - [[np.float16, 0, [2,3,4,5]], (1,0,1,0,1,0,1,0)], - [[np.float16, 4, [2]],(0,1)], - [[np.float16, 0, [20,20]],(0,1,0,2)], - [[np.float16, 0, [20,20,20]],(1,1,1,1) ], - [[np.float16, 3, [1,1,1,1]], (1,1)], - [[np.float16, 3, [1]], (1,1)], - [[np.float16, 0, [50, 24, 56, 56]], (100, 100, 100, 100, 100, 100, 100, 100)], - ] - - for item in shape_format: - input_cpu, input_npu = create_common_tensor(item[0], 1, 1) - pad_shape = item[1] - cpu_output = self.op_exec_cpu(input_cpu, pad_shape) - npu_output = self.op_exec_npu(input_npu, pad_shape) - - - self.assertRtolEqual(cpu_output, npu_output) - - - -instantiate_device_type_tests(TestConstantPadNd, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() diff --git a/test/test_npu/test_network_ops/test_abs.py b/test/test_npu/test_network_ops/test_abs.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_add.py b/test/test_npu/test_network_ops/test_add.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_addmm.py b/test/test_npu/test_network_ops/test_addmm.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_all.py b/test/test_npu/test_network_ops/test_all.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_any.py b/test/test_npu/test_network_ops/test_any.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_arange.py b/test/test_npu/test_network_ops/test_arange.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_argmax.py b/test/test_npu/test_network_ops/test_argmax.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_avg_pool2d_backward.py b/test/test_npu/test_network_ops/test_avg_pool2d_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_bilinear.py b/test/test_npu/test_network_ops/test_bilinear.py similarity index 74% rename from test/test_npu/test_bilinear.py rename to test/test_npu/test_network_ops/test_bilinear.py index dbb919e5a7..4bfdb837f9 100644 --- a/test/test_npu/test_bilinear.py +++ b/test/test_npu/test_network_ops/test_bilinear.py @@ -33,7 +33,7 @@ class test_bilinear(TestCase): outputs = outputs.cpu().detach().numpy() return outputs - def test_add_common_shape_format1(self, device): + def test_bilinear_common_shape_format1(self, device): shape_format = [ [[np.float32, -1, (10,30)], [np.float32, -1, (10, 40)], [np.float32, -1, (5, 30, 40)], [np.float32, -1, (5,)]], @@ -43,12 +43,12 @@ class test_bilinear(TestCase): [[np.float32, -1, (10, 30, 40, 30)], [np.float32, -1, (10, 30, 40, 30)], [np.float32, -1, (30, 30, 30)], [np.float32, -1, (30,)]], - [[np.float32, -1, (100,3)], [np.float32, -1, (1000, 4)], [np.float32, -1, (5, 3, 4)], + [[np.float32, -1, (100,3)], [np.float32, -1, (100, 4)], [np.float32, -1, (5, 3, 4)], [np.float32, -1, (5,)]], [[np.float16, -1, (2, 1, 1, 1)], [np.float16, -1, (2, 1, 1, 1)], [np.float16, -1, (5, 1, 1)], [np.float16, -1, (5,)]], [[np.float16, -1, (2, 50)], [np.float16, -1, (2, 50)], [np.float16, -1, (5, 50, 50)], - [np.float16, -1, (2, 4)]], + [np.float16, -1, (5)]], [[np.float16, -1, (2, 3)], [np.float16, -1, (2, 4)], [np.float16, -1, (2, 3, 4)],], [[np.float16, -1, (2, 3)], [np.float16, -1, (2, 4)], [np.float16, -1, (4, 3, 4)], [np.float16, -1, (4,)]], @@ -61,11 +61,19 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format2(self, device): + def test_bilinear_common_shape_format2(self, device): shape_format = [ [[np.int32, -1, (10,30)], [np.int32, -1, (10, 40)], [np.int32, -1, (5, 30, 40)], [np.int32, -1, (5,)]], @@ -87,7 +95,7 @@ class test_bilinear(TestCase): npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format3(self, device): + def test_bilinear_common_shape_format3(self, device): shape_format = [ [[np.float32, 0, (10,30)], [np.float32, 0, (10, 40)], [np.float32, 0, (5, 30, 40)], [np.float32, 0, (5,)]], @@ -97,12 +105,12 @@ class test_bilinear(TestCase): [[np.float32, 0, (10, 30, 40, 30)], [np.float32, 0, (10, 30, 40, 30)], [np.float32, 0, (30, 30, 30)], [np.float32, 0, (30,)]], - [[np.float32, 0, (100,3)], [np.float32, 0, (1000, 4)], [np.float32, 0, (5, 3, 4)], + [[np.float32, 0, (100,3)], [np.float32, 0, (100, 4)], [np.float32, 0, (5, 3, 4)], [np.float32, 0, (5,)]], [[np.float16, 0, (2, 1, 1, 1)], [np.float16, 0, (2, 1, 1, 1)], [np.float16, 0, (5, 1, 1)], [np.float16, 0, (5,)]], [[np.float16, 0, (2, 50)], [np.float16, 0, (2, 50)], [np.float16, 0, (5, 50, 50)], - [np.float16, 0, (2, 4)]], + [np.float16, 0, (5)]], [[np.float16, 0, (2, 3)], [np.float16, 0, (2, 4)], [np.float16, 0, (2, 3, 4)],], [[np.float16, 0, (2, 3)], [np.float16, 0, (2, 4)], [np.float16, 0, (4, 3, 4)], [np.float16, 0, (4,)]], @@ -115,11 +123,19 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) - def test_add_common_shape_format4(self, device): + def test_bilinear_common_shape_format4(self, device): shape_format = [ [[np.float32, 3, (10,30)], [np.float32, 3, (10, 40)], [np.float32, 3, (5, 30, 40)], [np.float32, 3, (5,)]], @@ -129,15 +145,15 @@ class test_bilinear(TestCase): [[np.float32, 3, (10, 30, 40, 30)], [np.float32, 3, (10, 30, 40, 30)], [np.float32, 3, (30, 30, 30)], [np.float32, 3, (30,)]], - [[np.float32, 29, (100,3)], [np.float32, 29, (1000, 4)], [np.float32, 29, (5, 3, 4)], - [np.float32, 29, (5,)]], - [[np.float16, 29, (2, 1, 1, 1)], [np.float16, 29, (2, 1, 1, 1)], [np.float16, 29, (5, 1, 1)], - [np.float16, 29, (5,)]], - [[np.float16, 29, (2, 50)], [np.float16, 29, (2, 50)], [np.float16, 29, (5, 50, 50)], - [np.float16, 29, (2, 4)]], - [[np.float16, 29, (2, 3)], [np.float16, 29, (2, 4)], [np.float16, 29, (2, 3, 4)],], - [[np.float16, 29, (2, 3)], [np.float16, 29, (2, 4)], [np.float16, 29, (4, 3, 4)], - [np.float16, 29, (4,)]], + [[np.float32, 2, (100,3)], [np.float32, 2, (100, 4)], [np.float32, 2, (5, 3, 4)], + [np.float32, 2, (5,)]], + [[np.float16, 2, (2, 1, 1, 1)], [np.float16, 2, (2, 1, 1, 1)], [np.float16, 2, (5, 1, 1)], + [np.float16, 2, (5,)]], + [[np.float16, 2, (2, 50)], [np.float16, 2, (2, 50)], [np.float16, 2, (5, 50, 50)], + [np.float16, 2, (5)]], + [[np.float16, 2, (2, 3)], [np.float16, 2, (2, 4)], [np.float16, 2, (2, 3, 4)],], + [[np.float16, 2, (2, 3)], [np.float16, 2, (2, 4)], [np.float16, 2, (4, 3, 4)], + [np.float16, 2, (4,)]], ] for item in shape_format: bias = [None, None] @@ -147,11 +163,18 @@ class test_bilinear(TestCase): if len(item)>3: cpu_input4, npu_input4 = create_common_tensor(item[3], 0, 1) bias = [cpu_input4, npu_input4] - cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) + if cpu_input1.dtype == torch.float16: + if bias[0] != None: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0].float()).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec( + cpu_input1.float(), cpu_input2.float(), cpu_input3.float(), bias[0]).astype(np.float16) + else: + cpu_outputs = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, bias[0]) npu_outputs = self.npu_op_exec(npu_input1, npu_input2, npu_input3, bias[1]) self.assertRtolEqual(cpu_outputs, npu_outputs) instantiate_device_type_tests(test_bilinear, globals(), except_for='cpu') if __name__ == "__main__": - torch.npu.set_device("npu:5") run_tests() diff --git a/test/test_npu/test_network_ops/test_binary_cross_entropy_with_logits_backward.py b/test/test_npu/test_network_ops/test_binary_cross_entropy_with_logits_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_bmm.py b/test/test_npu/test_network_ops/test_bmm.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_broadcastToD.py b/test/test_npu/test_network_ops/test_broadcastToD.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_cat.py b/test/test_npu/test_network_ops/test_cat.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_clamp.py b/test/test_npu/test_network_ops/test_clamp.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_constant_pad_nd.py b/test/test_npu/test_network_ops/test_constant_pad_nd.py index 5572e8af6e..06efe9dcf6 100644 --- a/test/test_npu/test_network_ops/test_constant_pad_nd.py +++ b/test/test_npu/test_network_ops/test_constant_pad_nd.py @@ -35,7 +35,6 @@ class TestConstantPadNd(TestCase): def constant_pad_nd_shape_format(self, shape_format): for item in shape_format: - print(item) input_cpu, input_npu = create_common_tensor(item[0], 1, 1) pad_shape = item[1] if input_cpu.dtype == torch.float16: diff --git a/test/test_npu/test_network_ops/test_conv2d.py b/test/test_npu/test_network_ops/test_conv2d.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py b/test/test_npu/test_network_ops/test_conv_depthwise2d_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_div.py b/test/test_npu/test_network_ops/test_div.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_dropout.py b/test/test_npu/test_network_ops/test_dropout.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_embedding_backward.py b/test/test_npu/test_network_ops/test_embedding_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_embedding_renorm.py b/test/test_npu/test_network_ops/test_embedding_renorm.py similarity index 97% rename from test/test_npu/test_embedding_renorm.py rename to test/test_npu/test_network_ops/test_embedding_renorm.py index 51f06efe73..2da53426a0 100644 --- a/test/test_npu/test_embedding_renorm.py +++ b/test/test_npu/test_network_ops/test_embedding_renorm.py @@ -26,7 +26,7 @@ class TestEmbeddingRenorm(TestCase): input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) npu_input1 = torch.from_numpy(input1) npu_input2 = torch.LongTensor(np.random.uniform(0,shape[0], int(shape[0]/2,)).astype(np.int32)) - #npu_input2=torch.LongTensor([[0,1,1,0,1],[0,1,1,0,1],[1,0,1,1,2]]) + return npu_input1, npu_input2 def cpu_op_exec(self, input1, input2, max_norm, norm_type): @@ -36,7 +36,6 @@ class TestEmbeddingRenorm(TestCase): output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) if stype == torch.float16: output = output.half() - output = output.numpy() return output def npu_op_exec(self, input1, input2, max_norm,norm_type): @@ -44,7 +43,6 @@ class TestEmbeddingRenorm(TestCase): input2 = input2.to("npu") output = torch.embedding_renorm_(input1, input2, max_norm=max_norm, norm_type=norm_type) output = output.to("cpu") - output = output.numpy() return output def test_embedding_renorm_float16_2(self, device): @@ -60,7 +58,7 @@ class TestEmbeddingRenorm(TestCase): cpu_input1 = copy.deepcopy(npu_input1) cpu_input2 = copy.deepcopy(npu_input2) npu_output = self.npu_op_exec(npu_input1, npu_input2, 0.2, 0) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, 0.2, 0) self.assertRtolEqual(cpu_output, npu_output) def test_embedding_renorm_float16_1(self, device): diff --git a/test/test_npu/test_network_ops/test_exp.py b/test/test_npu/test_network_ops/test_exp.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_fill_.py b/test/test_npu/test_network_ops/test_fill_.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_floor.py b/test/test_npu/test_network_ops/test_floor.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_fmod.py b/test/test_npu/test_network_ops/test_fmod.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_full.py b/test/test_npu/test_network_ops/test_full.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_ge.py b/test/test_npu/test_network_ops/test_ge.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_gelu_backward.py b/test/test_npu/test_network_ops/test_gelu_backward.py similarity index 77% rename from test/test_npu/test_gelu_backward.py rename to test/test_npu/test_network_ops/test_gelu_backward.py index a21092c621..439e57e28c 100644 --- a/test/test_npu/test_gelu_backward.py +++ b/test/test_npu/test_network_ops/test_gelu_backward.py @@ -33,7 +33,7 @@ class TestGeluBackward(TestCase): z = output.sum() z.backward() res = input1.grad - return res.detach() + return res.detach().numpy() def npu_op_exec(self, input1): input1 = input1.to("npu") @@ -42,44 +42,37 @@ class TestGeluBackward(TestCase): z = output.sum() z.backward() res = input1.grad.to("cpu") - return res.detach() + return res.detach().numpy() def test_gelu_backward_float32_1(self, device): - input1= self.generate_single_data(0, 100, (4,3,1,1), np.float32) + input1= self.generate_single_data(0, 100, (4, 3, 1, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_2(self, device): - input1= self.generate_single_data(0, 100, (4,3,10), np.float32) + input1= self.generate_single_data(0, 100, (15, 3, 1), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float32_3(self, device): - input1= self.generate_single_data(0, 100, (400,30,10), np.float32) - cpu_input1 = copy.deepcopy(input1) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_gelu_backward_float32_4(self, device): - input1= self.generate_single_data(-30, 0, (4,4), np.float32) + input1= self.generate_single_data(0, 100, (4, 4), np.float32) cpu_input1 = copy.deepcopy(input1) cpu_output = self.cpu_op_exec(cpu_input1) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) def test_gelu_backward_float16(self, device): - input1 = self.generate_single_data(0, 100, (5, 10, 100) , np.float16) - input1 = input1.to(torch.float32) - cpu_input1 = copy.deepcopy(input1) + input1 = self.generate_single_data(0, 100, (5, 10, 100), np.float16) + cpu_input1 = input1.to(torch.float32) cpu_output = self.cpu_op_exec(cpu_input1) + cpu_output = cpu_output.astype(np.float16) npu_output = self.npu_op_exec(input1) self.assertRtolEqual(cpu_output, npu_output) instantiate_device_type_tests(TestGeluBackward, globals(), except_for="cpu") if __name__ == "__main__": - run_tests() \ No newline at end of file + run_tests() diff --git a/test/test_npu/test_network_ops/test_gt.py b/test/test_npu/test_network_ops/test_gt.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_hardtanh.py b/test/test_npu/test_network_ops/test_hardtanh.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_index_put.py b/test/test_npu/test_network_ops/test_index_put.py old mode 100644 new mode 100755 index 2ab3b47b94..3a01077180 --- a/test/test_npu/test_network_ops/test_index_put.py +++ b/test/test_npu/test_network_ops/test_index_put.py @@ -127,6 +127,16 @@ class TestIndexPut(TestCase): self.case_exec_fp16(shape_format) self.case_inp_exec_fp16(shape_format) + def test_index_put_null(self, device): + cpu_input1 = torch.rand(2, 2) + cpu_input2 = torch.rand(2, 2) + cpu_mask_index = torch.tensor([[False, False], [False, False]]) + npu_mask_index = cpu_mask_index.to("npu") + npu_input1 = cpu_input1.to("npu") + npu_input2 = cpu_input2.to("npu") + cpu_input1[cpu_mask_index] = cpu_input2.detach()[cpu_mask_index] + npu_input1[npu_mask_index] = npu_input2.detach()[npu_mask_index] + self.assertEqual(cpu_input1, npu_input1.to("cpu")) instantiate_device_type_tests(TestIndexPut, globals(), except_for="cpu") diff --git a/test/test_npu/test_network_ops/test_le.py b/test/test_npu/test_network_ops/test_le.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_leaky_relu_backward.py b/test/test_npu/test_network_ops/test_leaky_relu_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_log.py b/test/test_npu/test_network_ops/test_log.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_log2.py b/test/test_npu/test_network_ops/test_log2.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_log_softmax.py b/test/test_npu/test_network_ops/test_log_softmax.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_log_softmax_backward.py b/test/test_npu/test_network_ops/test_log_softmax_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_lt.py b/test/test_npu/test_network_ops/test_lt.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_matmul.py b/test/test_npu/test_network_ops/test_matmul.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_max.py b/test/test_npu/test_network_ops/test_max.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_min.py b/test/test_npu/test_network_ops/test_min.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_mm.py b/test/test_npu/test_network_ops/test_mm.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_muls.py b/test/test_npu/test_network_ops/test_muls.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_neg.py b/test/test_npu/test_network_ops/test_neg.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_nllloss.py b/test/test_npu/test_network_ops/test_nllloss.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_not_equal.py b/test/test_npu/test_network_ops/test_not_equal.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_npu_giou.py b/test/test_npu/test_network_ops/test_npu_giou.py new file mode 100644 index 0000000000..c6f55768d0 --- /dev/null +++ b/test/test_npu/test_network_ops/test_npu_giou.py @@ -0,0 +1,133 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import math +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuGiou(TestCase): + def generate_giou_data(self, n, m, dtype): + data_bboxes = np.array([]).astype(dtype) + for i in range(4): + data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype) + data_bboxes = np.append(data_bboxes, data_bboxes_array) + data_bboxes = data_bboxes.reshape([4, n]) + data_gtboxes = np.array([]).astype(dtype) + for i in range(4): + data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype) + data_gtboxes = np.append(data_gtboxes, data_gtboxes_array) + data_gtboxes = data_gtboxes.reshape([4, m]) + cpu_input1 = torch.from_numpy(data_bboxes) + cpu_input2 = torch.from_numpy(data_gtboxes) + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + return cpu_input1, cpu_input2, npu_input1, npu_input2 + + def cpu_op_exec(self, box1, box2, trans=False, is_cross=False, mode="iou"): + box1 = box1.numpy() + box2 = box2.numpy() + dtype = box1.dtype + _, n = box1.shape + _, m = box2.shape + if trans: + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + else: + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + area1 = w1 * h1 + area2 = w2 * h2 + giou_res =np.array([], dtype=dtype) + + for i in range(n): + for j in range(m): + inter_x1 = max(b1_x1[i], b2_x1[j]) + inter_x2 = min(b1_x2[i], b2_x2[j]) + inter_y1 = max(b1_y1[i], b2_y1[j]) + inter_y2 = min(b1_y2[i], b2_y2[j]) + outer_x1 = min(b1_x1[i], b2_x1[j]) + outer_x2 = max(b1_x2[i], b2_x2[j]) + outer_y1 = min(b1_y1[i], b2_y1[j]) + outer_y2 = max(b1_y2[i], b2_y2[j]) + inter_area = max(0, (inter_x2 - inter_x1)) * max(0, (inter_y2 - inter_y1)) + outer_area = abs(outer_x2 - outer_x1) * abs(outer_y2 - outer_y1) + union_area = area1[i] + area2[j] - inter_area + 1e-16 + other_area = outer_area - union_area + giou_ij = inter_area / union_area - other_area / outer_area + if not is_cross: + if i == j: + giou_res = np.append(giou_res, giou_ij) + else: + giou_res = np.append(giou_res, giou_ij) + + if not is_cross: + res = giou_res.reshape(1, n) + else: + res = giou_res.reshape(n, m) + res = np.transpose(res) + res = np.transpose(res) + return res + + def npu_op_exec(self, box1, box2, trans=False, is_cross=False, mode=0): + output = torch.npu_giou(box1, box2, trans, is_cross, mode) + output = output.detach().cpu().numpy() + return output + + def test_npu_giou_shape_format_fp32(self, device): + self._test_npu_giou_shape_format(np.float32) + + def test_npu_giou_shape_format_fp16(self, device): + self._test_npu_giou_shape_format(np.float16) + + def _test_npu_giou_shape_format(self, dtype): + shape_list = [ + [10, 10], + [12, 10], + [100, 100] + ] + is_trans_list = [False] + mode_list = ["iou"] + # TODO(Ascend): 反向只支持 mode=="iof", is_cross==False, + # is_trans==Fasle场景,这里同步验证相同场景 + shape_format = [[j, k, m] + for j in shape_list + for k in is_trans_list + for m in mode_list] + + for item in shape_format: + mode_digit = 0 if item[-1] == "iou" else 1 + is_cross = False if item[0][0] == item[0][1] else True + cpu_input1, cpu_input2, npu_input1, npu_input2 = self.generate_giou_data(*item[0], dtype) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, item[1], is_cross, item[-1]) + npu_output = self.npu_op_exec(npu_input1, npu_input2, item[1], is_cross, mode_digit) + cpu_output = cpu_output.astype(npu_output.dtype) + if dtype == np.float16: + # TODO(Ascend): fp16 insufficient precision + self.assertRtolEqual(cpu_output, npu_output, prec16=1e-2) + else: + self.assertRtolEqual(cpu_output, npu_output) + + +instantiate_device_type_tests(TestNpuGiou, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_npu_giou_backward.py b/test/test_npu/test_network_ops/test_npu_giou_backward.py new file mode 100644 index 0000000000..1cf564d74b --- /dev/null +++ b/test/test_npu/test_network_ops/test_npu_giou_backward.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np +import math +from common_utils import TestCase, run_tests +from common_device_type import dtypes, instantiate_device_type_tests +from util_test import create_common_tensor + +class TestNpuGiouBackward(TestCase): + def generate_giou_data(self, n, m, dtype): + data_bboxes = np.array([]).astype(dtype) + for i in range(4): + data_bboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, n).astype(dtype) + data_bboxes = np.append(data_bboxes, data_bboxes_array) + data_bboxes = data_bboxes.reshape([4, n]) + data_gtboxes = np.array([]).astype(dtype) + for i in range(4): + data_gtboxes_array = i // 2 + math.pow(-1, i // 2) * 0.5 * np.random.rand(1, m).astype(dtype) + data_gtboxes = np.append(data_gtboxes, data_gtboxes_array) + data_gtboxes = data_gtboxes.reshape([4, m]) + cpu_input1 = torch.from_numpy(data_bboxes) + cpu_input2 = torch.from_numpy(data_gtboxes) + npu_input1 = cpu_input1.npu() + npu_input2 = cpu_input2.npu() + return cpu_input1, cpu_input2, npu_input1, npu_input2 + + def npu_op_exec(self, box1, box2, trans=False, is_cross=False, mode=0): + box1.requires_grad = True + box2.requires_grad = True + output = torch.npu_giou(box1, box2, trans, is_cross, mode) + output.backward(torch.ones_like(output)) + box1_grad = box1.grad + box2_grad = box2.grad + box1_grad = box1_grad.detach().cpu().numpy() + box2_grad = box2_grad.detach().cpu().numpy() + output = output.detach().cpu().numpy() + return output, box1_grad, box2_grad + + def test_npu_giou_backward_shape_format(self, dtype): + shape_list = [ + [1, 1] + ] + is_trans_list = [False] + mode_list = ["iou"] + # TODO(Ascend): only support mode=="iof", is_cross==False, + # is_trans==Fasle currently + shape_format = [[j, k, m] + for j in shape_list + for k in is_trans_list + for m in mode_list] + + for item in shape_format: + mode_digit = 0 if item[-1] == "iou" else 1 + is_cross = False if item[0][0] == item[0][1] else True + expected_cpu_grad1 = np.array([[0.51091206], + [-0.70909655], + [0.3726323], + [0.349545]], dtype=np.float32) + expected_cpu_grad2 = np.array([[-0.51091206], + [0.70909655], + [0.3599837], + [0.47306436]], dtype=np.float32) + _, _, npu_input1, npu_input2 = self.generate_giou_data(*item[0], np.float32) + _, npu_grad1, npu_grad2 = self.npu_op_exec(npu_input1, npu_input2, item[1], is_cross, mode_digit) + self.assertRtolEqual(expected_cpu_grad1, npu_grad1) + self.assertRtolEqual(expected_cpu_grad2, npu_grad2) + + +instantiate_device_type_tests(TestNpuGiouBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_npu/test_network_ops/test_pow.py b/test/test_npu/test_network_ops/test_pow.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_prod.py b/test/test_npu/test_network_ops/test_prod.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_reciprocal.py b/test/test_npu/test_network_ops/test_reciprocal.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_relu.py b/test/test_npu/test_network_ops/test_relu.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_remainder.py b/test/test_npu/test_network_ops/test_remainder.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_rsqrt.py b/test/test_npu/test_network_ops/test_rsqrt.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_rsub.py b/test/test_npu/test_network_ops/test_rsub.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_sign.py b/test/test_npu/test_network_ops/test_sign.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_softmax.py b/test/test_npu/test_network_ops/test_softmax.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_split.py b/test/test_npu/test_network_ops/test_split.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_sqrt.py b/test/test_npu/test_network_ops/test_sqrt.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_stack.py b/test/test_npu/test_network_ops/test_stack.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_sub.py b/test/test_npu/test_network_ops/test_sub.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_sum.py b/test/test_npu/test_network_ops/test_sum.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py b/test/test_npu/test_network_ops/test_upsample_bilinear_backward.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_where.py b/test/test_npu/test_network_ops/test_where.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_zero.py b/test/test_npu/test_network_ops/test_zero.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_zeros.py b/test/test_npu/test_network_ops/test_zeros.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/test_zeroslike.py b/test/test_npu/test_network_ops/test_zeroslike.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_network_ops/util_test.py b/test/test_npu/test_network_ops/util_test.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/eval/onnx/cp_onnx_eval.py b/test/test_npu/test_onnx/torch.onnx/eval/onnx/cp_onnx_eval.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/eval/onnxrt/onnxrt_eval.py b/test/test_npu/test_onnx/torch.onnx/eval/onnxrt/onnxrt_eval.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/cp_parser.py b/test/test_npu/test_onnx/torch.onnx/export/cp_parser.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py b/test/test_npu/test_onnx/torch.onnx/export/export_onnx.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-cpu.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-gpu.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py b/test/test_npu/test_onnx/torch.onnx/export/model_export-npu.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/model_export.py b/test/test_npu/test_onnx/torch.onnx/export/model_export.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/export/onnx_parser.py b/test/test_npu/test_onnx/torch.onnx/export/onnx_parser.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_onnx/torch.onnx/main.py b/test/test_npu/test_onnx/torch.onnx/main.py old mode 100644 new mode 100755 diff --git a/test/test_npu/test_reflection_pad2d.py b/test/test_npu/test_reflection_pad2d.py deleted file mode 100644 index d150c4c955..0000000000 --- a/test/test_npu/test_reflection_pad2d.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestReflectionPad2d(TestCase): - def cpu_op_out_exec(self, input1, pad, output): - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - return m - - def npu_op_out_exec(self, input1, pad, output): - m_n = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m_n = m_n.to("cpu") - m_n = m_n.numpy() - return m_n - - def cpu_op_exec(self, input1, pad): - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - return output - - def npu_op_exec(self, input1, pad): - m = torch.nn.ReflectionPad2d(pad).to("npu") - output = m(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_reflectionPad2d_out_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1, 1, 3, 3)], [2, 2, 2, 2]], - [[np.float32, 3, (1, 1, 4, 3)], 2] - ] - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_out_exec(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.float16, 3, (1, 1, 4, 3)], 2] - ] - - def cpu_op_out_exec_fp16(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.float16) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_fp16(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_int8(self, device): - shape_format = [ - [[np.int8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int8, 0, (1, 1, 5, 3)], 2] - ] - - def cpu_op_out_exec_int8(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.int8) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_int8(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_uint8(self, device): - shape_format = [ - [[np.uint8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.uint8, 0, (1, 1, 4, 9)], 3] - ] - - def cpu_op_out_exec_uint8(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.uint8) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_uint8(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_out_shape_format_int32(self, device): - shape_format = [ - [[np.int32, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int32, 0, (1, 1, 4, 9)], 2] - ] - - def cpu_op_out_exec_int32(input1, pad, output): - input1 = input1.to(torch.float32) - m = torch._C._nn.reflection_pad2d(input1, pad, out=output) - m = m.numpy() - m = m.astype(np.int32) - return m - - for item in shape_format: - cpuout = torch.randn(1, 1, 3, 3) - npuout = cpuout.npu() - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_out_exec_int32(cpu_input1, item[1], cpuout) - npu_output = self.npu_op_out_exec(npu_input1, item[1], npuout) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1, 1, 3, 3)], [2, 2, 2, 2]], - [[np.float32, 3, (1, 1, 4, 3)], 2] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = self.cpu_op_exec(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.float16, 3, (1, 1, 4, 3)], 2] - ] - - def cpu_op_exec_fp16(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_fp16(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_int8(self, device): - shape_format = [ - [[np.int8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int8, 0, (1, 1, 5, 3)], 2] - ] - - def cpu_op_exec_int8(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.int8) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_int8(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_uint8(self, device): - shape_format = [ - [[np.uint8, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.uint8, 0, (1, 1, 4, 9)], 3] - ] - - def cpu_op_exec_uint8(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.uint8) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_uint8(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_reflectionPad2d_shape_format_int32(self, device): - shape_format = [ - [[np.int32, 0, (1, 1, 4, 3)], [2, 2, 2, 2]], - [[np.int32, 0, (1, 1, 4, 9)], 2] - ] - - def cpu_op_exec_int32(input1, pad): - input1 = input1.to(torch.float32) - m = torch.nn.ReflectionPad2d(pad) - output = m(input1) - output = output.numpy() - output = output.astype(np.int32) - return output - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_output = cpu_op_exec_int32(cpu_input1, item[1]) - npu_output = self.npu_op_exec(npu_input1, item[1]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestReflectionPad2d, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:2") - run_tests() -- Gitee