diff --git a/test/test_network_ops/test_abs.py b/test/test_network_ops/test_abs.py new file mode 100644 index 0000000000000000000000000000000000000000..cf44ee4bc0cda0ca2958634cd735651558f60b5d --- /dev/null +++ b/test/test_network_ops/test_abs.py @@ -0,0 +1,62 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import Dtypes, instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE + +class TestAbs(TestCase): + def cpu_op_exec(self, input1): + output = torch.abs(input1) + output = output.numpy() + return output + + def npu_op_exec(self, input1): + output = torch.abs(input1) + output = output.to("cpu") + output = output.numpy() + return output + + def test_abs_shape_format_fp16(self, device): + format_list = [0, 3] + shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] + shape_format = [ + [np.float16, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_input = cpu_input.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + cpu_output = cpu_output.astype(np.float16) + self.assertRtolEqual(cpu_output, npu_output) + + def test_abs_shape_format_fp32(self, device): + format_list = [0, 3] + shape_list = [[5], [5, 10], [1, 3, 2], [52, 15, 15, 20]] + shape_format = [ + [np.float32, i, j] for i in format_list for j in shape_list + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, -10, 10) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAbs, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_network_ops/test_acos.py b/test/test_network_ops/test_acos.py new file mode 100644 index 0000000000000000000000000000000000000000..36e9ffa8931ca0e8487451c78b1014ad0b3b7748 --- /dev/null +++ b/test/test_network_ops/test_acos.py @@ -0,0 +1,68 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import Dtypes, instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE + +class TestAcos(TestCase): + def cpu_op_exec(self, input_para): + output = torch.acos(input_para) + output = output.numpy() + return output + + def npu_op_exec(self, input_para): + output = torch.acos(input_para) + output = output.to("cpu") + output = output.numpy() + return output + + def test_acos_common_shape_format(self, device): + shape_format = [ + [[np.float32, -1, 1]], + [[np.float32, -1, (64, 10)]], + [[np.float32, -1, (32, 1, 3)]] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -1, 1) + cpu_output = self.cpu_op_exec(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + + def test_acos_float16_shape_format(self, device): + def cpu_op_exec_fp16(input_para): + input_para = input_para.to(torch.float32) + output = torch.acos(input_para) + output = output.numpy() + output = output.astype(np.float16) + return output + + shape_format = [ + [[np.float16, -1, 1]], + [[np.float16, -1, (64, 10)]], + [[np.float16, -1, (31, 1, 3)]] + ] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item[0], -1, 1) + cpu_output = cpu_op_exec_fp16(cpu_input) + npu_output = self.npu_op_exec(npu_input) + self.assertRtolEqual(cpu_output, npu_output) + +instantiate_device_type_tests(TestAcos, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() + diff --git a/test/test_network_ops/test_adaptive_avg_pool1d.py b/test/test_network_ops/test_adaptive_avg_pool1d.py new file mode 100644 index 0000000000000000000000000000000000000000..d39d76da23a99f0208628e924d1e630eb62beb21 --- /dev/null +++ b/test/test_network_ops/test_adaptive_avg_pool1d.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +import torch.nn as nn +import numpy as np +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import Dtypes, instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE + +class TestAdaptiveAvgPool1d(TestCase): + def cpu_op_exec(self, input1, output_size): + m = nn.AdaptiveAvgPool1d(output_size) + output = m(input1) + return output + + def npu_op_exec(self, input1, output_size): + m = nn.AdaptiveAvgPool1d(output_size).npu() + output = m(input1) + return output.cpu() + + def test_AdaptiveAvgPool1d_shape_format_fp16(self, device): + shape_format = [ + [np.float16, 0, (64, 10, 16)], + [np.float16, -1, (256, 2048, 8)], + [np.float16, 3, (32, 16, 16)] + ] + output_list = [(4), (3)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input.float(), output_size).half() + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output, prec16=0.002) + + def test_AdaptiveAvgPool1d_shape_format_fp32(self, device): + shape_format = [ + [np.float32, 0, (64, 10, 16)], + [np.float32, -1, (256, 2048, 8)], + [np.float32, 3, (32, 16, 16)] + ] + output_list = [(4), (3), (1)] + for item in shape_format: + cpu_input, npu_input = create_common_tensor(item, 1, 10) + for output_size in output_list: + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output, npu_output, 0.001) + +instantiate_device_type_tests(TestAdaptiveAvgPool1d, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_network_ops/test_adaptive_avg_pool2d_backward.py b/test/test_network_ops/test_adaptive_avg_pool2d_backward.py new file mode 100644 index 0000000000000000000000000000000000000000..5f60db63eaa31789211d6214053428f6566ba607 --- /dev/null +++ b/test/test_network_ops/test_adaptive_avg_pool2d_backward.py @@ -0,0 +1,74 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch_npu +import numpy as np +from torch.nn import functional as F +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import Dtypes, instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor, test_2args_broadcast, create_dtype_tensor, UT_FAST_MODE + +class TestAdaptiveAvgPool2dBackward(TestCase): + + def cpu_op_exec(self, input_x, input_grad): + input_x.requires_grad_(True) + m = torch.nn.AdaptiveAvgPool2d(input_grad) + if input_x.dtype == torch.half: + output = m(input_x.float()).half() + else: + output = m(input_x) + output.backward(output) + out = output.detach(), input_x.grad + return out + + def npu_op_exec(self, input_x, input_grad): + input_x.requires_grad_(True) + m = torch.nn.AdaptiveAvgPool2d(input_grad) + output = m(input_x) + output.backward(output) + out = output.detach().cpu(), input_x.grad.cpu() + return out + + def test_adaptiveAvgPool2d_backward_1(self, device): + cpu_input = torch.randn((1, 8, 9), dtype=torch.float32) + npu_input = cpu_input.npu() + output_size = np.array((2, 3)) + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output[0], npu_output[0]) + self.assertRtolEqual(cpu_output[1], npu_output[1]) + + def test_adaptiveAvgPool2d_backward_2(self, device): + cpu_input = torch.randn((1, 3, 3, 3), dtype=torch.float32) + npu_input = cpu_input.npu() + output_size = np.array((2, 2)) + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output[0], npu_output[0]) + self.assertRtolEqual(cpu_output[1], npu_output[1]) + + def test_adaptiveAvgPool2d_backward_fp16(self, device): + input_x = np.random.uniform(0, 1, (1, 3, 6, 6)).astype(np.float16) + cpu_input = torch.from_numpy(input_x) + npu_input = cpu_input.npu() + output_size = np.array((5, 5)) + cpu_output = self.cpu_op_exec(cpu_input, output_size) + npu_output = self.npu_op_exec(npu_input, output_size) + self.assertRtolEqual(cpu_output[0], npu_output[0]) + self.assertRtolEqual(cpu_output[1], npu_output[1]) + +instantiate_device_type_tests(TestAdaptiveAvgPool2dBackward, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/test/test_network_ops/test_addbmm.py b/test/test_network_ops/test_addbmm.py new file mode 100644 index 0000000000000000000000000000000000000000..c9fe853f909d2eb1c19035d9519d408012a49eeb --- /dev/null +++ b/test/test_network_ops/test_addbmm.py @@ -0,0 +1,128 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestAddbmm(TestCase): + def generate_scalar(self, dtype, min_d, max_d): + if dtype == "float32": + scalar = np.random.uniform(min_d, max_d) + if dtype == "int32": + scalar = np.random.randint(min_d, max_d) + return scalar + + def cpu_op_exec(self, input1, input2, input3, scalar1, scalar2): + output = torch.addbmm(input1, input2, input3, beta=scalar1, alpha=scalar2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2, input3, scalar1, scalar2): + input1 = input1.to("npu") + input2 = input2.to("npu") + input3 = input3.to("npu") + output = torch.addbmm(input1, input2, input3, beta=scalar1, alpha=scalar2) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_out(self, input1, input2, input3, scalar1, scalar2, input4): + input1 = input1.to("npu") + input2 = input2.to("npu") + input3 = input3.to("npu") + output = input4.to("npu") + torch.addbmm(input1, input2, input3, beta=scalar1, alpha=scalar2, out=output) + output = output.to("cpu") + output = output.numpy() + return output + + def npu_op_exec_inplace(self, input1, input2, input3, scalar1, scalar2): + input1 = input1.to("npu") + input2 = input2.to("npu") + input3 = input3.to("npu") + input1.addbmm_(input2, input3, beta=scalar1, alpha=scalar2) + output = input1.to("cpu") + output = output.numpy() + return output + + + def cpu_op_transpose_exec(self, input1, input2, input3, scalar1, scalar2): + input3_t = np.transpose(input3,(0,2,1)) + output = torch.addbmm(input1, input2, input3_t, beta=scalar1, alpha=scalar2) + output = output.numpy() + return output + + def npu_op_transpose_exec(self, input1, input2, input3, scalar1, scalar2): + input1 = input1.to("npu") + input2 = input2.to("npu") + input3 = input3.to("npu") + input3_t = np.transpose(input3,(0,2,1)) + output = torch.addbmm(input1, input2, input3_t, beta=scalar1, alpha=scalar2) + output = output.to("cpu") + output = output.numpy() + return output + + def test_addbmm(self, device): + shape_format = [ + [[np.float32, 0, [3, 5]], [np.float32, 0, [10, 3, 4]], [np.float32, 0, [10, 4, 5]], "float32"], + [[np.int32, 0, [3, 5]], [np.int32, 0, [10, 3, 4]], [np.int32, 0, [10, 4, 5]], "int32"] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input3, npu_input3 = create_common_tensor(item[2], 0, 100) + cpu_input4, npu_input4 = create_common_tensor(item[0], 0, 100) + + scalar1 = self.generate_scalar(item[3], 0, 10) + scalar2 = self.generate_scalar(item[3], 0, 10) + + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, scalar1, scalar2) + npu_output = self.npu_op_exec(npu_input1, npu_input2, npu_input3, scalar1, scalar2) + + npu_output1 = self.npu_op_exec_out(npu_input1, npu_input2, npu_input3, scalar1, scalar2, npu_input4) + npu_output2 = self.npu_op_exec_inplace(npu_input1, npu_input2, npu_input3, scalar1, scalar2) + + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output1) + self.assertRtolEqual(cpu_output, npu_output2) + + def test_addbmm_transpose(self, device): + shape_format = [ + [[np.float32, 0, [4, 5]], [np.float32, 0, [10, 4, 7]], [np.float32, 0, [10, 5, 7]], "float32"], + [[np.int32, 0, [4, 5]], [np.int32, 0, [10, 4, 7]], [np.int32, 0, [10, 5, 7]], "int32"] + ] + + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 100) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 100) + cpu_input3, npu_input3 = create_common_tensor(item[2], 0, 100) + + scalar1 = self.generate_scalar(item[3], 0, 10) + scalar2 = self.generate_scalar(item[3], 0, 10) + + cpu_transpose_output = self.cpu_op_transpose_exec(cpu_input1, cpu_input2, cpu_input3, scalar1, scalar2) + npu_transpose_output = self.npu_op_transpose_exec(npu_input1, npu_input2, npu_input3, scalar1, scalar2) + + self.assertRtolEqual(cpu_transpose_output, npu_transpose_output) + + +instantiate_device_type_tests(TestAddbmm, globals(), except_for='cpu') +if __name__ == "__main__": + run_tests() \ No newline at end of file diff --git a/test/test_network_ops/test_bmm.py b/test/test_network_ops/test_bmm.py new file mode 100644 index 0000000000000000000000000000000000000000..de204834bb3e7644060ebc02e4125fdd1713ebba --- /dev/null +++ b/test/test_network_ops/test_bmm.py @@ -0,0 +1,73 @@ +# Copyright (c) 2020, Huawei Technologies.All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch +import torch_npu +import numpy as np + +from torch_npu.testing.common_utils import TestCase, run_tests +from torch_npu.testing.common_device_type import instantiate_device_type_tests +from torch_npu.testing.util_test import create_common_tensor + +class TestBatchMatMul(TestCase): + def cpu_op_exec(self, input1, input2): + output = torch.bmm(input1, input2) + output = output.numpy() + return output + + def npu_op_exec(self, input1, input2): + output = torch.bmm(input1, input2) + output = output.to("cpu") + output = output.numpy() + return output + + def bmm_auto_list_exec(self, shape): + for item in shape: + cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 10) + if cpu_input1.dtype == torch.float16: + cpu_input1 = cpu_input1.to(torch.float32) + if cpu_input2.dtype == torch.float16: + cpu_input2 = cpu_input2.to(torch.float32) + cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2) + npu_output = self.npu_op_exec(npu_input1, npu_input2) + cpu_output = cpu_output.astype(npu_output.dtype) + self.assertRtolEqual(cpu_output, npu_output) + + def test_batchmatmul_shape_format_fp16_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(1, 3, 2)] + shape_format1 = [[np.float16, i, j] + for i in format_list for j in shape_list] + format_list = [0, 3, 29] + shape_list = [(1, 2, 3)] + shape_format2 = [[np.float16, i, j] + for i in format_list for j in shape_list] + shape_format = [[i, j] for i in shape_format1 for j in shape_format2] + self.bmm_auto_list_exec(shape_format) + + def test_batchmatmul_shape_format_fp32_3d(self, device): + format_list = [0, 3, 29] + shape_list = [(1, 3, 2)] + shape_format1 = [[np.float32, i, j] + for i in format_list for j in shape_list] + format_list = [0, 3, 29] + shape_list = [(1, 2, 3)] + shape_format2 = [[np.float32, i, j] + for i in format_list for j in shape_list] + shape_format = [[i, j] for i in shape_format1 for j in shape_format2] + self.bmm_auto_list_exec(shape_format) + +instantiate_device_type_tests(TestBatchMatMul, globals(), except_for="cpu") +if __name__ == "__main__": + run_tests() diff --git a/torch_npu/csrc/aten/ops/AbsKernelNpu.cpp b/torch_npu/csrc/aten/ops/AbsKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65bf701255f5037a0cea14fa559918e697c1a23b --- /dev/null +++ b/torch_npu/csrc/aten/ops/AbsKernelNpu.cpp @@ -0,0 +1,55 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& abs_out_npu_nocheck(at::Tensor& result, const at::Tensor& self) { + OpCommand cmd; + cmd.Name("Abs") + .Input(self) + .Output(result) + .Run(); + return result; +} + +at::Tensor& NPUNativeFunctions::abs_out(const at::Tensor& self, at::Tensor& result) { + OpPreparation::CheckOut( + {self}, + result, + self); + OpPipeWithDefinedOut pipe; + return pipe.CheckMemory({self}, {result}) + .Func([&self](at::Tensor& result){abs_out_npu_nocheck(result, self);}) + .Call(result); +} + +at::Tensor NPUNativeFunctions::abs(const at::Tensor& self) { + OpPipeWithApplyOut pipe; + return pipe.ApplyOutputSameAs(self) + .Func([&self](at::Tensor& result) {abs_out_npu_nocheck(result, self);}) + .Call(); +} + +at::Tensor& NPUNativeFunctions::abs_(at::Tensor& self) { + abs_out(self, self); + return self; +} + +} // namespace native +} // namespace at_npu \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/AcosKernelNpu.cpp b/torch_npu/csrc/aten/ops/AcosKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..731ac93999f1e00034c98866c3fc7d8457491b6f --- /dev/null +++ b/torch_npu/csrc/aten/ops/AcosKernelNpu.cpp @@ -0,0 +1,56 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& acos_out_npu_nocheck(const at::Tensor& self, at::Tensor& result) { + OpCommand cmd; + cmd.Name("Acos") + .Input(self) + .Output(result) + .Run(); + + return result; +} + +at::Tensor& NPUNativeFunctions::acos_out(const at::Tensor& self, at::Tensor& result) { + OpPipeWithDefinedOut pipe; + OpPreparation::CheckOut( + {self}, + result, + self); + return pipe.CheckMemory({self}, {result}) + .Func([&self](at::Tensor& result){acos_out_npu_nocheck(self, result);}) + .Call(result); +} + +at::Tensor NPUNativeFunctions::acos(const at::Tensor& self) { + OpPipeWithApplyOut pipe; + return pipe.ApplyOutputSameAs(self) + .Func([&self](at::Tensor& result) {acos_out_npu_nocheck(self, result);}) + .Call(); +} + +at::Tensor& NPUNativeFunctions::acos_(at::Tensor& self) { + acos_out(self, self); + return self; +} + +} // namespace native +} // namespace at_npu \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/AddbmmKernelNpu.cpp b/torch_npu/csrc/aten/ops/AddbmmKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dff97430b145785997c9b7187b90b2f6f534445a --- /dev/null +++ b/torch_npu/csrc/aten/ops/AddbmmKernelNpu.cpp @@ -0,0 +1,70 @@ +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& NPUNativeFunctions::addbmm_out( + const at::Tensor& self, + const at::Tensor& batch1, + const at::Tensor& batch2, + at::Scalar beta, + at::Scalar alpha, + at::Tensor& result) { + at::Tensor MulResult = at::mul(batch1, alpha); + at::Tensor bmmResult = at::bmm(MulResult,batch2); + int64_t dim[2] = {batch1.size(1), batch2.size(2)}; + at::Tensor sumResult = at::sum_to(bmmResult, dim); + // sumResult + self*beta + at::add_out(result, sumResult, self, beta); + return result; +} + +at::Tensor NPUNativeFunctions::addbmm( + const at::Tensor& self, + const at::Tensor& batch1, + const at::Tensor& batch2, + at::Scalar beta, + at::Scalar alpha) { + // calculate the output size + auto outputSize = addbmm_npu_output_size(self, batch1, batch2, beta, alpha); + // construct the output tensor of the NPU + at::Tensor result = OpPreparation::ApplyTensor(self, outputSize); + // calculate the output result of the NPU + addbmm_out(self, batch1, batch2, beta, alpha, result); + return result; +} + +at::Tensor& NPUNativeFunctions::addbmm_( + at::Tensor& self, + const at::Tensor& batch1, + const at::Tensor& batch2, + at::Scalar beta, + at::Scalar alpha) { + OpPreparation::CheckMemory({self, batch1, batch2}, {self}); + if (!NpuUtils::check_match(&self)) { + at::Tensor contiguousSelf = NpuUtils::format_contiguous(self); + at::Tensor result = addbmm_out(contiguousSelf, batch1, batch2, beta, alpha, contiguousSelf); + NpuUtils::format_fresh_view(self, result); + } else { + addbmm_out(self, batch1, batch2, beta, alpha, self); + } + return self; +} + +} // namespace native +} // namespace at \ No newline at end of file diff --git a/torch_npu/csrc/aten/ops/BmmKernelNpu.cpp b/torch_npu/csrc/aten/ops/BmmKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..38aceb87fd3faf9805639c1659673666e757ab15 --- /dev/null +++ b/torch_npu/csrc/aten/ops/BmmKernelNpu.cpp @@ -0,0 +1,88 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "c10/npu/OptionsManager.h" + +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +at::Tensor& NPUNativeFunctions::bmm_out(const at::Tensor& self, const at::Tensor& mat2, at::Tensor& result) { + at::Tensor contiguousResult = result.is_contiguous() ? result : result.contiguous(); + + at::Tensor contiguousSelf = self; + at::Tensor contiguousMat2 = mat2; + bool isSelfT = CalcuOpUtil::is_transpose_last_two_dims(self); + bool isMat2T = CalcuOpUtil::is_transpose_last_two_dims(mat2); + + if(!isSelfT){ + contiguousSelf = NpuUtils::format_contiguous_add_copy_optimize(self); + } + if(!isMat2T){ + contiguousMat2 = NpuUtils::format_contiguous_add_copy_optimize(mat2); + } + + auto func1 = [&contiguousSelf]() { + bool pass = false; + return std::tie(pass, contiguousSelf); + }; + auto func2 = [&contiguousMat2]() { + bool pass = false; + return std::tie(pass, contiguousMat2); + }; + + // executing the NPU operator + OpCommand cmd; + cmd.Name("BatchMatMul") + .InputWithFunc(func1) + .InputWithFunc(func2) + .Output(contiguousResult) + .Attr("adj_x1", isSelfT) + .Attr("adj_x2", isMat2T) + .Run(); + + if (!result.is_contiguous()) { + result.copy_(contiguousResult); + } + return result; +} + +at::Tensor NPUNativeFunctions::bmm(const at::Tensor& self, const at::Tensor& mat2) { + // calculate the output size + auto outputSize = {self.size(0), self.size(1), mat2.size(2)}; + + // construct the output tensor of the NPU + at::Tensor result; + + // 检查是否指定mm输出为NCHW。待NLP模型总体策略制定后删去 + if ((self.scalar_type() == at::ScalarType::Float || self.scalar_type() == at::ScalarType::Half) && + !c10::npu::OptionsManager::CheckSwitchMMOutputEnable()) { + result = at::empty_with_format(outputSize, self.options(), ACL_FORMAT_FRACTAL_NZ); + } else { + result = at::empty_with_format(outputSize, self.options(), ACL_FORMAT_ND); + } + + // calculate the output result of the NPU + NPUNativeFunctions::bmm_out(self, mat2, result); + + return result; +} + +} // namespace native +} // namespace at diff --git a/torch_npu/csrc/aten/ops/pooling/AdaptiveAvgPool1dKernelNpu.cpp b/torch_npu/csrc/aten/ops/pooling/AdaptiveAvgPool1dKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ae50870d0f727c27fd9ee0009c638350eac84796 --- /dev/null +++ b/torch_npu/csrc/aten/ops/pooling/AdaptiveAvgPool1dKernelNpu.cpp @@ -0,0 +1,42 @@ +// Copyright (c) 2020, Huawei Technologies.All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "torch_npu/csrc/framework/utils/OpAdapter.h" +#include "torch_npu/csrc/aten/NPUNativeFunctions.h" + +namespace at_npu { +namespace native { + +static void check1d( + const char* function_name, + const char* argument_name, + at::IntArrayRef x) { + TORCH_CHECK( + x.size() == 1, + function_name, "() argument '", argument_name, + "' should contain one int (got ", x.size(), ")"); +} + +at::Tensor NPUNativeFunctions::adaptive_avg_pool1d(const at::Tensor& self, at::IntArrayRef output_size) { + at::checkDim("adaptive_avg_pool1d", at::TensorArg(self, "self", 1), 3); + check1d("adaptive_avg_pool1d", "output_size", output_size); +// construct the output tensor of the NPU + auto output = NPUNativeFunctions::adaptive_avg_pool2d( + self.unsqueeze(2), + {1, output_size[0]}); + + return output.squeeze(2); +} + +} // namespace native +} // namespace at_npu \ No newline at end of file