diff --git a/patch/npu.patch b/patch/npu.patch index 4e01faf27cccb57db1074605f20a5c9883360123..14c9a2b015f9a880197f7275d29f92be65b92dc7 100644 --- a/patch/npu.patch +++ b/patch/npu.patch @@ -1,6 +1,6 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/CMakeLists.txt pytorch-develop/aten/CMakeLists.txt --- pytorch-v1.5.0/aten/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/CMakeLists.txt 2021-07-09 17:16:47.786789915 +0800 ++++ pytorch-develop/aten/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 @@ -22,8 +22,10 @@ set(ATen_CPU_INCLUDE) set(ATen_THIRD_PARTY_INCLUDE) @@ -51,7 +51,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt pytorch-develop/aten/src/ATen/CMakeLists.txt --- pytorch-v1.5.0/aten/src/ATen/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-09 17:16:47.786789915 +0800 ++++ pytorch-develop/aten/src/ATen/CMakeLists.txt 2021-07-13 15:30:57.594267657 +0800 @@ -67,6 +67,9 @@ FILE(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h") FILE(GLOB native_cpu_h "native/cpu/*.h") @@ -129,7 +129,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h --- pytorch-v1.5.0/aten/src/ATen/core/dispatch/DispatchTable.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-09 17:16:47.794790202 +0800 ++++ pytorch-develop/aten/src/ATen/core/dispatch/DispatchTable.h 2021-07-13 15:30:57.602267943 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -170,7 +170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/function_wrapper.py pytorch-develop/aten/src/ATen/function_wrapper.py --- pytorch-v1.5.0/aten/src/ATen/function_wrapper.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-09 17:16:47.802790488 +0800 ++++ pytorch-develop/aten/src/ATen/function_wrapper.py 2021-07-13 15:30:57.610268230 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -248,7 +248,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # options should be List[FunctionOption] 'options': Any, 'schema_string': str, -@@ -1037,12 +1081,33 @@ +@@ -1037,12 +1081,32 @@ return_types.append(rtype) return return_types @@ -267,9 +267,8 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + elif 'TensorOptions' in argu_types: + check.append(argu_names[argu_types.index('TensorOptions')] + ".device()") + else: -+ print("Can not find right dispatch key of argument Type of Tensor, TensorList, TensorOptions") + print("argument:", option['schema_string']) -+ raise ++ raise ValueError("Can not find right dispatch key of argument Type of Tensor, TensorList, TensorOptions.") + return check def process_native(option): @@ -282,7 +281,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= formals = native_get_formals(option) option['formals_list'] = formals option['formals'] = [format_formal(f) for f in formals] -@@ -1203,17 +1268,22 @@ +@@ -1203,17 +1267,22 @@ # we just implement it in the base Type. This is exposed # in Declarations.yaml via a field named 'abstract'. abstract = False @@ -307,7 +306,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if option['use_c10_dispatcher'] == 'full': op_registrations.append(OpRegistration( operator_name=OPERATOR_NAME.substitute(option), -@@ -1236,6 +1306,17 @@ +@@ -1236,6 +1305,17 @@ option['native_type_method_dispatch'] = value top_env['native_function_declarations'].append(NATIVE_DECLARATION.substitute(option)) generated_native_functions.append(value) @@ -325,7 +324,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: top_env['native_function_declarations'].append(NATIVE_DECLARATION.substitute(option)) -@@ -1552,7 +1633,7 @@ +@@ -1552,7 +1632,7 @@ # type: (FunctionOption) -> None dispatch = option['type_method_definition_dispatch'] env = nested_dict(option, backend_type_env) @@ -334,7 +333,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if isinstance(dispatch, dict): # If we're here, then our native_functions.yaml entry has dispatch configuration. # Having manual kernel registration doesn't make sense. -@@ -1576,6 +1657,18 @@ +@@ -1576,6 +1656,18 @@ op_registrations.append(OpRegistration( operator_name=OPERATOR_NAME.substitute(option), registration_code=BACKEND_UNBOXEDONLY_FUNCTION_REGISTRATION.substitute(env))) @@ -355,7 +354,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for option in declaration['options']: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/gen.py pytorch-develop/aten/src/ATen/gen.py --- pytorch-v1.5.0/aten/src/ATen/gen.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/gen.py 2021-07-09 17:16:47.802790488 +0800 ++++ pytorch-develop/aten/src/ATen/gen.py 2021-07-13 15:30:57.610268230 +0800 @@ -1,3 +1,18 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -513,7 +512,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= generate_outputs() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp --- pytorch-v1.5.0/aten/src/ATen/native/cpu/Activation.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-09 17:16:47.814790918 +0800 ++++ pytorch-develop/aten/src/ATen/native/cpu/Activation.cpp 2021-07-13 15:30:57.622268661 +0800 @@ -339,20 +339,20 @@ void hardsigmoid_backward_kernel(TensorIterator& iter) { @@ -541,7 +540,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp pytorch-develop/aten/src/ATen/native/Memory.cpp --- pytorch-v1.5.0/aten/src/ATen/native/Memory.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-09 17:16:47.806790632 +0800 ++++ pytorch-develop/aten/src/ATen/native/Memory.cpp 2021-07-13 15:30:57.614268374 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -596,7 +595,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= detail::computeStorageSize(self.sizes(), self.strides()), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml pytorch-develop/aten/src/ATen/native/native_functions.yaml --- pytorch-v1.5.0/aten/src/ATen/native/native_functions.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-09 17:16:47.830791493 +0800 ++++ pytorch-develop/aten/src/ATen/native/native_functions.yaml 2021-07-13 15:30:57.634269091 +0800 @@ -1,6 +1,5 @@ # See README.md in this directory for more guidance @@ -2274,13 +2273,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: matrix_rank.tol(Tensor self, float tol, bool symmetric=False) -> Tensor use_c10_dispatcher: full -@@ -1761,26 +2246,40 @@ - - func: matrix_power(Tensor self, int n) -> Tensor - use_c10_dispatcher: full - variants: function, method -+ npu_dispatch: -+ NPU: matrix_power_npu - +@@ -1765,22 +2250,34 @@ - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) variants: function, method supports_named_tensor: True @@ -2315,7 +2308,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -1791,6 +2290,8 @@ +@@ -1791,6 +2288,8 @@ - func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor supports_named_tensor: True @@ -2324,7 +2317,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor requires_tensor: True -@@ -1814,6 +2315,8 @@ +@@ -1814,6 +2313,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2333,7 +2326,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.dim(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor variants: function, method -@@ -1822,6 +2325,8 @@ +@@ -1822,6 +2323,8 @@ CPU: mean_cpu_gpu CUDA: mean_cpu_gpu QuantizedCPU: quantized_mean_cpu @@ -2342,7 +2335,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mean.out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -1829,47 +2334,73 @@ +@@ -1829,47 +2332,73 @@ CPU: mean_out_cpu_gpu CUDA: mean_out_cpu_gpu QuantizedCPU: quantized_mean_out_cpu @@ -2416,7 +2409,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor -@@ -1958,6 +2489,8 @@ +@@ -1958,6 +2487,8 @@ CUDA: legacy::cuda::_th_mm SparseCPU: _sparse_mm SparseCUDA: _sparse_mm @@ -2425,7 +2418,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) -@@ -1966,6 +2499,8 @@ +@@ -1966,6 +2497,8 @@ CUDA: legacy::cuda::_th_mm_out SparseCPU: _sparse_mm_out SparseCUDA: _sparse_mm_out @@ -2434,7 +2427,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor -@@ -1994,6 +2529,8 @@ +@@ -1994,6 +2527,8 @@ SparseCPU: mul_sparse SparseCUDA: mul_sparse MkldnnCPU: mkldnn_mul @@ -2443,7 +2436,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2004,6 +2541,8 @@ +@@ -2004,6 +2539,8 @@ SparseCPU: mul_sparse_ SparseCUDA: mul_sparse_ MkldnnCPU: mkldnn_mul_ @@ -2452,7 +2445,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2013,15 +2552,21 @@ +@@ -2013,15 +2550,21 @@ SparseCPU: mul_out_sparse_cpu SparseCUDA: mul_out_sparse_cuda MkldnnCPU: mkldnn_mul_out @@ -2474,7 +2467,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mv(Tensor self, Tensor vec) -> Tensor use_c10_dispatcher: full -@@ -2030,12 +2575,16 @@ +@@ -2030,12 +2573,16 @@ CPU: mv_cpu CUDA: legacy::cuda::_th_mv supports_named_tensor: True @@ -2491,7 +2484,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mvlgamma(Tensor self, int p) -> Tensor use_c10_dispatcher: full -@@ -2052,6 +2601,8 @@ +@@ -2052,6 +2599,8 @@ CUDA: narrow_copy_dense SparseCPU: narrow_copy_sparse SparseCUDA: narrow_copy_sparse @@ -2500,7 +2493,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a) variants: function, method -@@ -2068,6 +2619,8 @@ +@@ -2068,6 +2617,8 @@ CPU: batch_norm_cpu CUDA: batch_norm_cuda MkldnnCPU: mkldnn_batch_norm @@ -2509,7 +2502,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!)) dispatch: -@@ -2098,6 +2651,8 @@ +@@ -2098,6 +2649,8 @@ dispatch: CPU: batch_norm_backward_cpu CUDA: batch_norm_backward_cuda @@ -2518,7 +2511,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor) dispatch: -@@ -2117,6 +2672,8 @@ +@@ -2117,6 +2670,8 @@ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor variants: function @@ -2527,7 +2520,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor) variants: function -@@ -2129,42 +2686,60 @@ +@@ -2129,42 +2684,60 @@ - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -2590,7 +2583,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Only exposed from C++ -- in Python, # we expose it as an attribute `T`, not a function. -@@ -2253,54 +2828,82 @@ +@@ -2253,54 +2826,82 @@ supports_named_tensor: True - func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -2674,7 +2667,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: repeat_interleave.Tensor(Tensor repeats) -> Tensor use_c10_dispatcher: full -@@ -2316,6 +2919,8 @@ +@@ -2316,6 +2917,8 @@ - func: repeat_interleave.self_int(Tensor self, int repeats, int? dim=None) -> Tensor use_c10_dispatcher: full variants: function, method @@ -2683,7 +2676,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: reshape(Tensor self, int[] shape) -> Tensor variants: function, method -@@ -2337,16 +2942,22 @@ +@@ -2337,16 +2940,22 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -2706,7 +2699,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor -@@ -2360,6 +2971,8 @@ +@@ -2360,6 +2969,8 @@ CUDA: relu MkldnnCPU: mkldnn_relu QuantizedCPU: quantized_relu @@ -2715,7 +2708,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: relu_(Tensor(a!) self) -> Tensor(a!) -@@ -2370,6 +2983,8 @@ +@@ -2370,6 +2981,8 @@ CUDA: relu_ MkldnnCPU: mkldnn_relu_ QuantizedCPU: quantized_relu_ @@ -2724,7 +2717,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: prelu(Tensor self, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -2377,12 +2992,16 @@ +@@ -2377,12 +2990,16 @@ dispatch: CPU: prelu_cpu CUDA: prelu_cuda @@ -2741,7 +2734,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2390,6 +3009,8 @@ +@@ -2390,6 +3007,8 @@ dispatch: CPU: gelu_cpu CUDA: gelu_cuda @@ -2750,7 +2743,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gelu_backward(Tensor grad, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2397,29 +3018,41 @@ +@@ -2397,29 +3016,41 @@ dispatch: CPU: gelu_backward_cpu CUDA: gelu_backward_cuda @@ -2792,7 +2785,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) variants: function, method -@@ -2433,14 +3066,21 @@ +@@ -2433,14 +3064,21 @@ - func: selu(Tensor self) -> Tensor use_c10_dispatcher: full @@ -2815,7 +2808,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2451,6 +3091,8 @@ +@@ -2451,6 +3089,8 @@ CUDA: sigmoid QuantizedCPU: quantized_sigmoid MkldnnCPU: mkldnn_sigmoid @@ -2824,7 +2817,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sigmoid_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2459,36 +3101,52 @@ +@@ -2459,36 +3099,52 @@ CPU: sigmoid_ CUDA: sigmoid_ MkldnnCPU: mkldnn_sigmoid_ @@ -2877,7 +2870,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Returns a copy of this `Variable` that is detached from its autograd graph. # This method is OK to call if the `Variable` is a view. -@@ -2533,6 +3191,8 @@ +@@ -2533,6 +3189,8 @@ - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) variants: function, method @@ -2886,7 +2879,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: smm(Tensor self, Tensor mat2) -> Tensor use_c10_dispatcher: full -@@ -2542,10 +3202,14 @@ +@@ -2542,10 +3200,14 @@ - func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2901,7 +2894,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor use_c10_dispatcher: full -@@ -2553,12 +3217,16 @@ +@@ -2553,12 +3215,16 @@ CPU: softmax_cpu CUDA: softmax_cuda MkldnnCPU: mkldnn_softmax @@ -2918,7 +2911,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[] variants: function, method -@@ -2609,8 +3277,12 @@ +@@ -2609,8 +3275,12 @@ SparseCUDA: _sspaddmm_out_cuda - func: stack(Tensor[] tensors, int dim=0) -> Tensor @@ -2931,7 +2924,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # The signature is designed to be consistent with librosa except that it is # missing the `pad_mode` and `center` arguments, which are taken care of at -@@ -2633,20 +3305,30 @@ +@@ -2633,20 +3303,30 @@ - func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor variants: function, method supports_named_tensor: True @@ -2962,7 +2955,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sum_to_size(Tensor self, int[] size) -> Tensor variants: method -@@ -2656,13 +3338,19 @@ +@@ -2656,13 +3336,19 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -2982,7 +2975,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: square(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2677,51 +3365,81 @@ +@@ -2677,51 +3363,81 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3065,7 +3058,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: t(Tensor(a) self) -> Tensor(a) device_guard: False -@@ -2736,6 +3454,8 @@ +@@ -2736,6 +3452,8 @@ use_c10_dispatcher: full supports_named_tensor: True variants: function, method @@ -3074,7 +3067,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tan_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2743,12 +3463,16 @@ +@@ -2743,12 +3461,16 @@ dispatch: CPU: _tan__cpu CUDA: _tan__cuda @@ -3091,7 +3084,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -2758,6 +3482,8 @@ +@@ -2758,6 +3480,8 @@ CPU: tanh CUDA: tanh QuantizedCPU: quantized_tanh @@ -3100,7 +3093,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tanh_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -2765,12 +3491,16 @@ +@@ -2765,12 +3489,16 @@ dispatch: CPU: _tanh__cpu CUDA: _tanh__cuda @@ -3117,7 +3110,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor variants: function -@@ -2783,6 +3513,8 @@ +@@ -2783,6 +3511,8 @@ dispatch: CPU: threshold CUDA: threshold_cuda @@ -3126,7 +3119,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!) variants: function -@@ -2790,12 +3522,16 @@ +@@ -2790,12 +3520,16 @@ dispatch: CPU: threshold_ CUDA: threshold__cuda @@ -3143,7 +3136,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor use_c10_dispatcher: full -@@ -2803,6 +3539,8 @@ +@@ -2803,6 +3537,8 @@ dispatch: CPU: threshold_backward CUDA: threshold_backward_cuda @@ -3152,7 +3145,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a) variants: function, method -@@ -2835,18 +3573,24 @@ +@@ -2835,18 +3571,24 @@ use_c10_dispatcher: full python_module: nn variants: function @@ -3177,7 +3170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # default int[] value [0,1] should not add space after comma, since native_parse.py uses ', ' to split args -@@ -2872,6 +3616,8 @@ +@@ -2872,6 +3614,8 @@ CUDA: true_divide SparseCPU: true_divide_sparse SparseCUDA: true_divide_sparse @@ -3186,7 +3179,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) -@@ -2881,6 +3627,8 @@ +@@ -2881,6 +3625,8 @@ CUDA: true_divide_ SparseCPU: true_divide_sparse_ SparseCUDA: true_divide_sparse_ @@ -3195,7 +3188,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -@@ -2889,31 +3637,43 @@ +@@ -2889,31 +3635,43 @@ CUDA: true_divide_out SparseCPU: true_divide_out_sparse_zerodim SparseCUDA: true_divide_out_sparse_zerodim @@ -3239,7 +3232,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: type_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -2956,6 +3716,8 @@ +@@ -2956,6 +3714,8 @@ dispatch: CPU: _unique2_cpu CUDA: _unique2_cuda @@ -3248,7 +3241,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _unsafe_view(Tensor self, int[] size) -> Tensor -@@ -2971,32 +3733,48 @@ +@@ -2971,32 +3731,48 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3297,7 +3290,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: view_as(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -3009,13 +3787,19 @@ +@@ -3009,13 +3785,19 @@ - func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full variants: function, method @@ -3317,7 +3310,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor variants: function -@@ -3041,13 +3825,21 @@ +@@ -3041,13 +3823,21 @@ - func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_guard: False @@ -3339,7 +3332,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor use_c10_dispatcher: full -@@ -3100,25 +3892,37 @@ +@@ -3100,25 +3890,37 @@ - func: _sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor dispatch: @@ -3379,7 +3372,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor variants: function, method -@@ -3162,12 +3966,16 @@ +@@ -3162,12 +3964,16 @@ SparseCUDA: clone_sparse MkldnnCPU: mkldnn_clone QuantizedCPU: quantized_clone @@ -3396,7 +3389,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -3176,6 +3984,8 @@ +@@ -3176,6 +3982,8 @@ CUDA: pow_out SparseCPU: pow_out_sparse_scalar SparseCUDA: pow_out_sparse_scalar @@ -3405,7 +3398,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor use_c10_dispatcher: full -@@ -3186,6 +3996,8 @@ +@@ -3186,6 +3994,8 @@ CUDA: pow SparseCPU: pow_sparse_scalar SparseCUDA: pow_sparse_scalar @@ -3414,7 +3407,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: zero_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -3196,6 +4008,14 @@ +@@ -3196,6 +4006,14 @@ SparseCPU: zero_sparse_ SparseCUDA: zero_sparse_ MkldnnCPU: mkldnn_zero_ @@ -3429,7 +3422,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -3204,6 +4024,8 @@ +@@ -3204,6 +4022,8 @@ SparseCPU: sub_out_sparse SparseCUDA: sub_out_sparse supports_named_tensor: True @@ -3438,7 +3431,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -3213,6 +4035,8 @@ +@@ -3213,6 +4033,8 @@ CUDA: sub SparseCPU: sub_sparse SparseCUDA: sub_sparse @@ -3447,7 +3440,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) -@@ -3222,6 +4046,8 @@ +@@ -3222,6 +4044,8 @@ CUDA: sub_ SparseCPU: sub_sparse_ SparseCUDA: sub_sparse_ @@ -3456,7 +3449,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True # For C++ only, until we have conversion from C++ numbers to Tensor -@@ -3229,21 +4055,29 @@ +@@ -3229,21 +4053,29 @@ use_c10_dispatcher: full variants: function, method supports_named_tensor: True @@ -3486,7 +3479,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Functionally the same as addmm, but we give it a different derivative formula # that doesn't propagate gradients to non-present entries on sparse. -@@ -3257,6 +4091,8 @@ +@@ -3257,6 +4089,8 @@ CUDA: legacy::cuda::_th_addmm_out SparseCPU: addmm_out_sparse_dense_cpu SparseCUDA: addmm_out_sparse_dense_cuda @@ -3495,7 +3488,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor -@@ -3267,6 +4103,8 @@ +@@ -3267,6 +4101,8 @@ CUDA: legacy::cuda::_th_addmm SparseCPU: addmm_sparse_dense_cpu SparseCUDA: addmm_sparse_dense_cuda @@ -3504,7 +3497,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) -@@ -3278,9 +4116,10 @@ +@@ -3278,9 +4114,10 @@ # broadcasting SparseCPU: s_addmm_sparse_dense_cpu_ SparseCUDA: s_addmm_sparse_dense_cuda_ @@ -3516,7 +3509,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # NOTE [ Sparse: autograd and API ] # # -@@ -3396,7 +4235,6 @@ +@@ -3396,7 +4233,6 @@ # shared. In other words, their outputs are non-differentiable views of the # sparse tensor. @@ -3524,7 +3517,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given # the default would never make sense. - func: sparse_coo_tensor.size(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor -@@ -3433,7 +4271,6 @@ +@@ -3433,7 +4269,6 @@ SparseCUDA: sparse_resize_and_clear_ requires_tensor: True @@ -3532,7 +3525,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sparse_mask(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full variants: method -@@ -3442,7 +4279,6 @@ +@@ -3442,7 +4277,6 @@ SparseCUDA: sparse_mask_cuda requires_tensor: True @@ -3540,7 +3533,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: to_dense(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3474,7 +4310,6 @@ +@@ -3474,7 +4308,6 @@ requires_tensor: True device_guard: False @@ -3548,7 +3541,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dense_dim(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3494,7 +4329,6 @@ +@@ -3494,7 +4327,6 @@ requires_tensor: True device_guard: False @@ -3556,7 +3549,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _nnz(Tensor self) -> int use_c10_dispatcher: full variants: method -@@ -3504,7 +4338,6 @@ +@@ -3504,7 +4336,6 @@ requires_tensor: True device_guard: False @@ -3564,7 +3557,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: coalesce(Tensor self) -> Tensor use_c10_dispatcher: full variants: method -@@ -3513,7 +4346,6 @@ +@@ -3513,7 +4344,6 @@ SparseCUDA: coalesce_sparse_cuda requires_tensor: True @@ -3572,7 +3565,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: is_coalesced(Tensor self) -> bool use_c10_dispatcher: full variants: method -@@ -3524,7 +4356,6 @@ +@@ -3524,7 +4354,6 @@ device_guard: False supports_named_tensor: True @@ -3580,7 +3573,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _indices(Tensor(a) self) -> Tensor(a) variants: method dispatch: -@@ -3568,7 +4399,6 @@ +@@ -3568,7 +4397,6 @@ requires_tensor: True device_guard: False @@ -3588,7 +3581,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) dispatch: SparseCPU: hspmm_out_sparse_cpu -@@ -3630,11 +4460,15 @@ +@@ -3630,11 +4458,15 @@ variants: function dispatch: CPU: quantize_per_tensor_cpu @@ -3604,7 +3597,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dequantize(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -3713,20 +4547,28 @@ +@@ -3713,20 +4545,28 @@ variants: method device_guard: False supports_named_tensor: True @@ -3633,7 +3626,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: meshgrid(Tensor[] tensors) -> Tensor[] -@@ -3765,6 +4607,8 @@ +@@ -3765,6 +4605,8 @@ dispatch: CPU: _local_scalar_dense_cpu CUDA: _local_scalar_dense_cuda @@ -3642,7 +3635,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= variants: function supports_named_tensor: True -@@ -3791,10 +4635,16 @@ +@@ -3791,10 +4633,16 @@ # RNN cells and layers - func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor) @@ -3659,7 +3652,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor) -@@ -3839,10 +4689,14 @@ +@@ -3839,10 +4687,14 @@ # PackedSequence utilities - func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor) @@ -3674,7 +3667,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # wrappers for legacy TH methods -@@ -3852,6 +4706,8 @@ +@@ -3852,6 +4704,8 @@ dispatch: CPU: set_ CUDA: set_ @@ -3683,7 +3676,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!) variants: method -@@ -3860,6 +4716,8 @@ +@@ -3860,6 +4714,8 @@ CPU: legacy::cpu::_th_set_ CUDA: legacy::cuda::_th_set_ QuantizedCPU: set_storage @@ -3692,7 +3685,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!) variants: method -@@ -3867,12 +4725,16 @@ +@@ -3867,12 +4723,16 @@ dispatch: CPU: set_tensor_ CUDA: set_tensor_ @@ -3709,7 +3702,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: set_quantizer_(Tensor(a!) self, ConstQuantizerPtr quantizer) -> Tensor(a!) variants: method -@@ -3892,6 +4754,8 @@ +@@ -3892,6 +4752,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3718,7 +3711,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor -@@ -3904,6 +4768,8 @@ +@@ -3904,6 +4766,8 @@ dispatch: CPU: masked_fill__cpu CUDA: masked_fill__cuda @@ -3727,7 +3720,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor -@@ -3916,6 +4782,8 @@ +@@ -3916,6 +4780,8 @@ dispatch: CPU: masked_scatter__cpu CUDA: masked_scatter__cuda @@ -3736,7 +3729,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor use_c10_dispatcher: full -@@ -3929,25 +4797,35 @@ +@@ -3929,25 +4795,35 @@ CUDA: view MkldnnCPU: mkldnn_view QuantizedCPU: view @@ -3772,7 +3765,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3955,11 +4833,15 @@ +@@ -3955,11 +4831,15 @@ dispatch: CPU: legacy::cpu::_th_index_fill_ CUDA: legacy::cuda::_th_index_fill_ @@ -3788,7 +3781,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) variants: method -@@ -3967,11 +4849,15 @@ +@@ -3967,11 +4847,15 @@ CPU: index_fill_ CUDA: index_fill_ supports_named_tensor: True @@ -3804,7 +3797,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) variants: method -@@ -3994,6 +4880,8 @@ +@@ -3994,6 +4878,8 @@ dispatch: CPU: scatter_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3813,7 +3806,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor use_c10_dispatcher: full -@@ -4004,6 +4892,8 @@ +@@ -4004,6 +4890,8 @@ dispatch: CPU: scatter_fill_cpu_ CUDA: legacy::cuda::_th_scatter_ @@ -3822,7 +3815,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor use_c10_dispatcher: full -@@ -4020,81 +4910,127 @@ +@@ -4020,81 +4908,127 @@ dispatch: CPU: scatter_add_cpu_ CUDA: legacy::cuda::_th_scatter_add_ @@ -3950,7 +3943,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4107,70 +5043,106 @@ +@@ -4107,70 +5041,106 @@ dispatch: CPU: bitwise_or_out CUDA: bitwise_or_out @@ -4057,7 +4050,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: __ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) variants: method -@@ -4240,18 +5212,24 @@ +@@ -4240,18 +5210,24 @@ - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) supports_named_tensor: True variants: method @@ -4082,7 +4075,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: digamma_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4266,6 +5244,8 @@ +@@ -4266,6 +5242,8 @@ dispatch: CPU: legacy::cpu::_th_renorm_ CUDA: legacy::cuda::_th_renorm_ @@ -4091,7 +4084,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4273,6 +5253,8 @@ +@@ -4273,6 +5251,8 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4100,7 +4093,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) supports_named_tensor: True -@@ -4280,53 +5262,71 @@ +@@ -4280,53 +5260,71 @@ dispatch: CPU: pow_ CUDA: pow_ @@ -4172,7 +4165,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor use_c10_dispatcher: full -@@ -4334,28 +5334,40 @@ +@@ -4334,28 +5332,40 @@ dispatch: CPU: legacy::cpu::_th_addbmm CUDA: legacy::cuda::_th_addbmm @@ -4213,7 +4206,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!) -@@ -4380,6 +5392,8 @@ +@@ -4380,6 +5390,8 @@ dispatch: CPU: legacy::cpu::_th_diag_out CUDA: legacy::cuda::_th_diag_out @@ -4222,7 +4215,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: diag(Tensor self, int diagonal=0) -> Tensor use_c10_dispatcher: full -@@ -4387,30 +5401,44 @@ +@@ -4387,30 +5399,44 @@ dispatch: CPU: legacy::cpu::_th_diag CUDA: legacy::cuda::_th_diag @@ -4267,7 +4260,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor dispatch: -@@ -4435,6 +5463,8 @@ +@@ -4435,6 +5461,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4276,7 +4269,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4444,6 +5474,8 @@ +@@ -4444,6 +5472,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4285,7 +4278,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4451,6 +5483,8 @@ +@@ -4451,6 +5481,8 @@ CPU: ne_out CUDA: ne_out QuantizedCPU: ne_out_quantized_cpu @@ -4294,7 +4287,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ne.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4460,6 +5494,8 @@ +@@ -4460,6 +5492,8 @@ CPU: ne CUDA: ne QuantizedCPU: ne_quantized_cpu @@ -4303,7 +4296,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4467,6 +5503,8 @@ +@@ -4467,6 +5501,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4312,7 +4305,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4476,6 +5514,8 @@ +@@ -4476,6 +5512,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4321,7 +4314,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4483,6 +5523,8 @@ +@@ -4483,6 +5521,8 @@ CPU: eq_out CUDA: eq_out QuantizedCPU: eq_out_quantized_cpu @@ -4330,7 +4323,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: eq.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4492,6 +5534,8 @@ +@@ -4492,6 +5532,8 @@ CPU: eq CUDA: eq QuantizedCPU: eq_quantized_cpu @@ -4339,7 +4332,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4499,6 +5543,8 @@ +@@ -4499,6 +5541,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4348,7 +4341,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4508,6 +5554,8 @@ +@@ -4508,6 +5552,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4357,7 +4350,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4515,6 +5563,8 @@ +@@ -4515,6 +5561,8 @@ CPU: ge_out CUDA: ge_out QuantizedCPU: ge_out_quantized_cpu @@ -4366,7 +4359,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: ge.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4524,6 +5574,8 @@ +@@ -4524,6 +5572,8 @@ CPU: ge CUDA: ge QuantizedCPU: ge_quantized_cpu @@ -4375,7 +4368,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4531,6 +5583,8 @@ +@@ -4531,6 +5581,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4384,7 +4377,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4540,6 +5594,8 @@ +@@ -4540,6 +5592,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4393,7 +4386,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4547,6 +5603,8 @@ +@@ -4547,6 +5601,8 @@ CPU: le_out CUDA: le_out QuantizedCPU: le_out_quantized_cpu @@ -4402,7 +4395,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: le.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4556,6 +5614,8 @@ +@@ -4556,6 +5612,8 @@ CPU: le CUDA: le QuantizedCPU: le_quantized_cpu @@ -4411,7 +4404,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4563,6 +5623,8 @@ +@@ -4563,6 +5621,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4420,7 +4413,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4572,6 +5634,8 @@ +@@ -4572,6 +5632,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4429,7 +4422,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4579,6 +5643,8 @@ +@@ -4579,6 +5641,8 @@ CPU: gt_out CUDA: gt_out QuantizedCPU: gt_out_quantized_cpu @@ -4438,7 +4431,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4588,6 +5654,8 @@ +@@ -4588,6 +5652,8 @@ CPU: gt CUDA: gt QuantizedCPU: gt_quantized_cpu @@ -4447,7 +4440,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4595,6 +5663,8 @@ +@@ -4595,6 +5661,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4456,7 +4449,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Scalar(Tensor self, Scalar other) -> Tensor supports_named_tensor: True -@@ -4604,6 +5674,8 @@ +@@ -4604,6 +5672,8 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4465,7 +4458,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True -@@ -4611,6 +5683,8 @@ +@@ -4611,6 +5681,8 @@ CPU: lt_out CUDA: lt_out QuantizedCPU: lt_out_quantized_cpu @@ -4474,7 +4467,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lt.Tensor(Tensor self, Tensor other) -> Tensor supports_named_tensor: True -@@ -4620,11 +5694,16 @@ +@@ -4620,11 +5692,16 @@ CPU: lt CUDA: lt QuantizedCPU: lt_quantized_cpu @@ -4491,7 +4484,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: take(Tensor self, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4632,11 +5711,16 @@ +@@ -4632,11 +5709,16 @@ dispatch: CPU: legacy::cpu::_th_take CUDA: legacy::cuda::_th_take @@ -4508,7 +4501,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: index_select(Tensor self, int dim, Tensor index) -> Tensor use_c10_dispatcher: full -@@ -4646,17 +5730,25 @@ +@@ -4646,17 +5728,25 @@ CUDA: legacy::cuda::_th_index_select SparseCPU: index_select_sparse SparseCUDA: index_select_sparse @@ -4534,7 +4527,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: masked_select(Tensor self, Tensor mask) -> Tensor use_c10_dispatcher: full -@@ -4665,11 +5757,15 @@ +@@ -4665,11 +5755,15 @@ CPU: masked_select_cpu CUDA: masked_select_cuda supports_named_tensor: True @@ -4550,7 +4543,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -4677,6 +5773,8 @@ +@@ -4677,6 +5771,8 @@ dispatch: CPU: legacy::cpu::_th_nonzero CUDA: legacy::cuda::_th_nonzero @@ -4559,7 +4552,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: nonzero_numpy(Tensor self) -> Tensor[] variants: method, function -@@ -4685,6 +5783,8 @@ +@@ -4685,6 +5781,8 @@ dispatch: CPU: gather_out_cpu CUDA: gather_out_cuda @@ -4568,7 +4561,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor use_c10_dispatcher: full -@@ -4692,34 +5792,50 @@ +@@ -4692,34 +5790,50 @@ dispatch: CPU: gather_cpu CUDA: gather_cuda @@ -4619,7 +4612,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lstsq.X(Tensor self, Tensor A, *, Tensor(a!) X, Tensor(b!) qr) -> (Tensor(a!) solution, Tensor(b!) QR) dispatch: -@@ -4826,9 +5942,13 @@ +@@ -4826,9 +5940,13 @@ CUDA: legacy::cuda::_th_potri - func: qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R) @@ -4633,7 +4626,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _qr_helper(Tensor self, bool some) -> (Tensor, Tensor) variants: function -@@ -4891,12 +6011,16 @@ +@@ -4891,12 +6009,16 @@ dispatch: CPU: multinomial_out CUDA: multinomial_out @@ -4650,7 +4643,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _multinomial_alias_setup(Tensor probs) -> (Tensor, Tensor) variants: function -@@ -4947,6 +6071,8 @@ +@@ -4947,6 +6069,8 @@ dispatch: CPU: erfinv CUDA: erfinv @@ -4659,7 +4652,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: erfinv_(Tensor(a!) self) -> Tensor(a!) supports_named_tensor: True -@@ -4954,26 +6080,36 @@ +@@ -4954,26 +6078,36 @@ dispatch: CPU: _erfinv__cpu CUDA: _erfinv__cuda @@ -4696,7 +4689,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor use_c10_dispatcher: full -@@ -4981,21 +6117,29 @@ +@@ -4981,21 +6115,29 @@ - func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) supports_named_tensor: True @@ -4726,7 +4719,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor use_c10_dispatcher: full -@@ -5003,6 +6147,8 @@ +@@ -5003,6 +6145,8 @@ dispatch: CPU: lerp_cpu_scalar CUDA: lerp_cuda_scalar @@ -4735,7 +4728,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor use_c10_dispatcher: full -@@ -5010,6 +6156,8 @@ +@@ -5010,6 +6154,8 @@ dispatch: CPU: lerp_cpu_tensor CUDA: lerp_cuda_tensor @@ -4744,7 +4737,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) dispatch: -@@ -5027,6 +6175,8 @@ +@@ -5027,6 +6173,8 @@ dispatch: CPU: fmod_out CUDA: legacy::cuda::_th_fmod_out @@ -4753,7 +4746,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5034,11 +6184,15 @@ +@@ -5034,11 +6182,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4769,7 +4762,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5046,11 +6200,15 @@ +@@ -5046,11 +6198,15 @@ dispatch: CPU: fmod CUDA: legacy::cuda::_th_fmod @@ -4785,7 +4778,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Scalar(Tensor self, Scalar other) -> Tensor use_c10_dispatcher: full -@@ -5058,11 +6216,15 @@ +@@ -5058,11 +6214,15 @@ dispatch: CPU: remainder CUDA: remainder @@ -4801,7 +4794,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: remainder.Tensor(Tensor self, Tensor other) -> Tensor use_c10_dispatcher: full -@@ -5070,12 +6232,18 @@ +@@ -5070,12 +6230,18 @@ dispatch: CPU: remainder CUDA: remainder @@ -4820,7 +4813,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: min(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5084,13 +6252,19 @@ +@@ -5084,13 +6250,19 @@ CPU: min CUDA: legacy::cuda::_th_min QuantizedCPU: min_quant @@ -4840,7 +4833,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5099,6 +6273,8 @@ +@@ -5099,6 +6271,8 @@ CPU: max CUDA: legacy::cuda::_th_max QuantizedCPU: max_quant @@ -4849,7 +4842,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: median(Tensor self) -> Tensor -@@ -5107,12 +6283,16 @@ +@@ -5107,12 +6281,16 @@ dispatch: CPU: median_cpu CUDA: median_cuda @@ -4866,7 +4859,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) variants: method, function -@@ -5120,23 +6300,45 @@ +@@ -5120,23 +6298,45 @@ CPU: legacy::cpu::_th_sort CUDA: legacy::cuda::_th_sort QuantizedCPU: sort_quant @@ -4912,7 +4905,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) variants: method, function -@@ -5144,11 +6346,15 @@ +@@ -5144,11 +6344,15 @@ CPU: topk CUDA: topk QuantizedCPU: quantized_topk_cpu @@ -4928,7 +4921,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: any(Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5159,11 +6365,15 @@ +@@ -5159,11 +6363,15 @@ CUDA: any SparseCPU: any_sparse SparseCUDA: any_sparse @@ -4944,7 +4937,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor use_c10_dispatcher: full -@@ -5171,6 +6381,8 @@ +@@ -5171,6 +6379,8 @@ dispatch: CPU: legacy::cpu::_th_renorm CUDA: legacy::cuda::_th_renorm @@ -4953,7 +4946,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a) variants: method -@@ -5178,6 +6390,8 @@ +@@ -5178,6 +6388,8 @@ dispatch: CPU: unfold CUDA: unfold @@ -4962,7 +4955,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: equal(Tensor self, Tensor other) -> bool use_c10_dispatcher: full -@@ -5186,6 +6400,8 @@ +@@ -5186,6 +6398,8 @@ CPU: legacy::cpu::_th_equal CUDA: legacy::cuda::_th_equal QuantizedCPU: quantized_equal @@ -4971,7 +4964,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!) -@@ -5193,6 +6409,8 @@ +@@ -5193,6 +6407,8 @@ dispatch: CPU: pow_out CUDA: pow_out @@ -4980,7 +4973,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5201,12 +6419,16 @@ +@@ -5201,12 +6417,16 @@ dispatch: CPU: pow CUDA: pow @@ -4997,7 +4990,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor use_c10_dispatcher: full -@@ -5214,6 +6436,8 @@ +@@ -5214,6 +6434,8 @@ dispatch: CPU: pow CUDA: pow @@ -5006,7 +4999,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!) variants: method -@@ -5221,40 +6445,58 @@ +@@ -5221,40 +6443,58 @@ CPU: normal_cpu_ CUDA: normal_cuda_ supports_named_tensor: True @@ -5065,7 +5058,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: alias(Tensor(a) self) -> Tensor(a) variants: method, function -@@ -5265,43 +6507,59 @@ +@@ -5265,43 +6505,59 @@ dispatch: CPU: legacy::cpu::_th_addr CUDA: legacy::cuda::_th_addr @@ -5126,7 +5119,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _var(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5309,6 +6567,8 @@ +@@ -5309,6 +6565,8 @@ CPU: legacy::cpu::_th_var CUDA: legacy::cuda::_th_var supports_named_tensor: True @@ -5135,7 +5128,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _std(Tensor self, bool unbiased=True) -> Tensor use_c10_dispatcher: full -@@ -5321,6 +6581,8 @@ +@@ -5321,6 +6579,8 @@ variants: function dispatch: CUDA: _amp_non_finite_check_and_unscale_cuda_ @@ -5144,7 +5137,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _amp_update_scale(Tensor(a!) growth_tracker, Tensor current_scale, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor variants: function -@@ -5332,12 +6594,16 @@ +@@ -5332,12 +6592,16 @@ CPU: _cat_cpu CUDA: cat_cuda QuantizedCPU: quantized_cat @@ -5161,7 +5154,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor, Tensor) dispatch: -@@ -5353,36 +6619,50 @@ +@@ -5353,36 +6617,50 @@ dispatch: CPU: legacy::cpu::_th_max CUDA: legacy::cuda::_th_max @@ -5212,7 +5205,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor use_c10_dispatcher: full -@@ -5390,23 +6670,33 @@ +@@ -5390,23 +6668,33 @@ dispatch: CPU: mse_loss_backward CUDA: mse_loss_backward @@ -5246,7 +5239,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5434,22 +6724,30 @@ +@@ -5434,22 +6722,30 @@ - func: multilabel_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5277,7 +5270,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: multilabel_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, Tensor is_target, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -5466,97 +6764,137 @@ +@@ -5466,97 +6762,137 @@ - func: nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5415,7 +5408,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5564,6 +6902,8 @@ +@@ -5564,6 +6900,8 @@ CPU: elu_out CUDA: elu_out QuantizedCPU: quantized_elu_out @@ -5424,7 +5417,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor use_c10_dispatcher: full -@@ -5572,16 +6912,22 @@ +@@ -5572,16 +6910,22 @@ CPU: elu CUDA: elu QuantizedCPU: quantized_elu @@ -5447,7 +5440,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!) python_module: nn -@@ -5589,12 +6935,16 @@ +@@ -5589,12 +6933,16 @@ CPU: elu_ CUDA: elu_ QuantizedCPU: quantized_elu_ @@ -5464,7 +5457,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu(Tensor self, int dim=-1) -> Tensor use_c10_dispatcher: full -@@ -5602,12 +6952,16 @@ +@@ -5602,12 +6950,16 @@ dispatch: CPU: glu CUDA: legacy::cuda::_thnn_glu_forward @@ -5481,7 +5474,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor use_c10_dispatcher: full -@@ -5615,20 +6969,30 @@ +@@ -5615,20 +6967,30 @@ dispatch: CPU: glu_backward CUDA: legacy::cuda::_thnn_glu_backward @@ -5512,7 +5505,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5636,6 +7000,8 @@ +@@ -5636,6 +6998,8 @@ CPU: hardtanh_out CUDA: hardtanh_out QuantizedCPU: quantized_hardtanh_out @@ -5521,7 +5514,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor use_c10_dispatcher: full -@@ -5644,16 +7010,22 @@ +@@ -5644,16 +7008,22 @@ CPU: hardtanh CUDA: hardtanh QuantizedCPU: quantized_hardtanh @@ -5544,7 +5537,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!) python_module: nn -@@ -5661,6 +7033,8 @@ +@@ -5661,6 +7031,8 @@ CPU: hardtanh_ CUDA: hardtanh_ QuantizedCPU: quantized_hardtanh_ @@ -5553,7 +5546,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5668,6 +7042,8 @@ +@@ -5668,6 +7040,8 @@ CPU: leaky_relu_out CUDA: leaky_relu_out QuantizedCPU: quantized_leaky_relu_out @@ -5562,7 +5555,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor use_c10_dispatcher: full -@@ -5676,10 +7052,14 @@ +@@ -5676,10 +7050,14 @@ CPU: leaky_relu CUDA: leaky_relu QuantizedCPU: quantized_leaky_relu @@ -5577,7 +5570,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!) python_module: nn -@@ -5687,31 +7067,44 @@ +@@ -5687,31 +7065,44 @@ CPU: leaky_relu_ CUDA: leaky_relu_ QuantizedCPU: quantized_leaky_relu_ @@ -5622,7 +5615,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor use_c10_dispatcher: full -@@ -5719,6 +7112,8 @@ +@@ -5719,6 +7110,8 @@ dispatch: CPU: log_sigmoid_backward_cpu CUDA: legacy::cuda::_thnn_log_sigmoid_backward @@ -5631,7 +5624,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5744,37 +7139,53 @@ +@@ -5744,37 +7137,53 @@ - func: softplus.out(Tensor self, Scalar beta=1, Scalar threshold=20, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -5685,7 +5678,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -5782,9 +7193,13 @@ +@@ -5782,9 +7191,13 @@ CPU: adaptive_avg_pool2d_out_cpu CUDA: adaptive_avg_pool2d_out_cuda MkldnnCPU: mkldnn_adaptive_avg_pool2d_out @@ -5699,7 +5692,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor dispatch: -@@ -5796,6 +7211,8 @@ +@@ -5796,6 +7209,8 @@ CPU: adaptive_avg_pool2d_cpu CUDA: adaptive_avg_pool2d_cuda QuantizedCPU: quantized_adaptive_avg_pool2d @@ -5708,7 +5701,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5803,24 +7220,32 @@ +@@ -5803,24 +7218,32 @@ dispatch: CPU: adaptive_avg_pool2d_backward_cpu CUDA: adaptive_avg_pool2d_backward_cuda @@ -5741,7 +5734,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor use_c10_dispatcher: full -@@ -5828,6 +7253,8 @@ +@@ -5828,6 +7251,8 @@ dispatch: CPU: adaptive_avg_pool3d_backward_cpu CUDA: adaptive_avg_pool3d_backward_cuda @@ -5750,7 +5743,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5835,6 +7262,8 @@ +@@ -5835,6 +7260,8 @@ dispatch: CPU: adaptive_max_pool2d_out_cpu CUDA: adaptive_max_pool2d_out_cuda @@ -5759,7 +5752,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor) -@@ -5842,12 +7271,16 @@ +@@ -5842,12 +7269,16 @@ dispatch: CPU: adaptive_max_pool2d_cpu CUDA: adaptive_max_pool2d_cuda @@ -5776,7 +5769,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor use_c10_dispatcher: full -@@ -5855,6 +7288,8 @@ +@@ -5855,6 +7286,8 @@ dispatch: CPU: adaptive_max_pool2d_backward_cpu CUDA: adaptive_max_pool2d_backward_cuda @@ -5785,7 +5778,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: adaptive_max_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5889,6 +7324,8 @@ +@@ -5889,6 +7322,8 @@ CPU: avg_pool2d_out_cpu CUDA: avg_pool2d_out_cuda MkldnnCPU: mkldnn_avg_pool2d_out @@ -5794,7 +5787,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5897,24 +7334,32 @@ +@@ -5897,24 +7332,32 @@ CUDA: avg_pool2d_cuda MkldnnCPU: mkldnn_avg_pool2d QuantizedCPU: quantized_avg_pool2d @@ -5827,7 +5820,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor python_module: nn -@@ -5922,18 +7367,24 @@ +@@ -5922,18 +7365,24 @@ CPU: avg_pool3d_cpu CUDA: avg_pool3d_cuda QuantizedCPU: quantized_avg_pool3d @@ -5852,7 +5845,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: fractional_max_pool2d.output(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -5993,6 +7444,8 @@ +@@ -5993,6 +7442,8 @@ dispatch: CPU: max_pool2d_with_indices_out_cpu CUDA: max_pool2d_with_indices_out_cuda @@ -5861,7 +5854,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6000,6 +7453,8 @@ +@@ -6000,6 +7451,8 @@ dispatch: CPU: max_pool2d_with_indices_cpu CUDA: max_pool2d_with_indices_cuda @@ -5870,7 +5863,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6007,12 +7462,16 @@ +@@ -6007,12 +7460,16 @@ dispatch: CPU: max_pool2d_with_indices_backward_out_cpu CUDA: max_pool2d_with_indices_backward_out_cuda @@ -5887,7 +5880,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) -@@ -6020,6 +7479,8 @@ +@@ -6020,6 +7477,8 @@ dispatch: CPU: max_pool3d_with_indices_out_cpu CUDA: max_pool3d_with_indices_out_cuda @@ -5896,7 +5889,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Return: (Tensor output, Tensor indices) - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) -@@ -6027,6 +7488,8 @@ +@@ -6027,6 +7486,8 @@ dispatch: CPU: max_pool3d_with_indices_cpu CUDA: max_pool3d_with_indices_cuda @@ -5905,7 +5898,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= supports_named_tensor: True - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) -@@ -6034,12 +7497,17 @@ +@@ -6034,12 +7495,17 @@ dispatch: CPU: max_pool3d_with_indices_backward_out_cpu CUDA: max_pool3d_with_indices_backward_out_cuda @@ -5923,7 +5916,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: max_unpool2d.out(Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6118,12 +7586,16 @@ +@@ -6118,12 +7584,16 @@ dispatch: CPU: reflection_pad2d_out_cpu CUDA: reflection_pad2d_out_cuda @@ -5940,7 +5933,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6166,12 +7638,16 @@ +@@ -6166,12 +7636,16 @@ dispatch: CPU: replication_pad2d_out_cpu CUDA: replication_pad2d_out_cuda @@ -5957,7 +5950,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: replication_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6214,12 +7690,16 @@ +@@ -6214,12 +7688,16 @@ dispatch: CPU: upsample_linear1d_out_cpu CUDA: upsample_linear1d_out_cuda @@ -5974,7 +5967,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_linear1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn -@@ -6232,12 +7712,16 @@ +@@ -6232,12 +7710,16 @@ dispatch: CPU: upsample_linear1d_backward_cpu CUDA: upsample_linear1d_backward_cuda @@ -5991,7 +5984,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6245,96 +7729,128 @@ +@@ -6245,96 +7727,128 @@ CPU: upsample_bilinear2d_cpu CUDA: upsample_bilinear2d_cuda QuantizedCPU: quantized_upsample_bilinear2d_cpu @@ -6120,7 +6113,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6342,24 +7858,32 @@ +@@ -6342,24 +7856,32 @@ CPU: upsample_nearest2d_cpu CUDA: upsample_nearest2d_cuda QuantizedCPU: quantized_upsample_nearest2d_cpu @@ -6153,7 +6146,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn -@@ -6367,38 +7891,52 @@ +@@ -6367,38 +7889,52 @@ CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: quantized_upsample_nearest3d_cpu @@ -6206,7 +6199,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # What's a thnn_conv_ versus a slow_conv_? # -@@ -6423,24 +7961,32 @@ +@@ -6423,24 +7959,32 @@ dispatch: CPU: slow_conv_transpose2d_out_cpu CUDA: slow_conv_transpose2d_out_cuda @@ -6239,7 +6232,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6468,21 +8014,29 @@ +@@ -6468,21 +8012,29 @@ - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6269,7 +6262,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn -@@ -6495,32 +8049,46 @@ +@@ -6495,32 +8047,46 @@ dispatch: CPU: slow_conv2d_backward_cpu CUDA: legacy::cuda::_thnn_conv2d_backward @@ -6316,7 +6309,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn -@@ -6553,12 +8121,16 @@ +@@ -6553,12 +8119,16 @@ dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda @@ -6333,7 +6326,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn -@@ -6577,57 +8149,401 @@ +@@ -6577,57 +8147,405 @@ dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -6354,7 +6347,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CPU: col2im_backward_out_cpu CUDA: col2im_backward_out_cuda + npu_dispatch: -+ NPU: col2im_backward_out_npu ++ NPU: im2col_out_npu - func: col2im_backward(Tensor grad_output, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor python_module: nn @@ -6362,7 +6355,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CPU: col2im_backward_cpu CUDA: col2im_backward_cuda + npu_dispatch: -+ NPU: col2im_backward_npu ++ NPU: im2col_npu - func: im2col.out(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -6735,10 +6728,14 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +- func: npu_linear_backward(Tensor grad, Tensor input, Tensor weight) -> (Tensor, Tensor) + npu_dispatch_only: + NPU: linear_backward_npu ++ ++- func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) ++ npu_dispatch_only: ++ NPU: bert_apply_adam_npu \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S --- pytorch-v1.5.0/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-09 17:16:47.866792783 +0800 ++++ pytorch-develop/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S 2021-07-13 15:30:57.674270525 +0800 @@ -659,14 +659,14 @@ SUB x1, x1, 4 @@ -6764,7 +6761,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CMP x1, 2 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp pytorch-develop/aten/src/ATen/native/TensorCompare.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorCompare.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-09 17:16:47.810790775 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorCompare.cpp 2021-07-13 15:30:57.618268517 +0800 @@ -64,7 +64,7 @@ Tensor isinf(const Tensor &self) { @@ -6776,7 +6773,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return AT_DISPATCH_FLOATING_TYPES_AND_HALF(self.scalar_type(), "isinf", [&]() { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp pytorch-develop/aten/src/ATen/native/TensorFactories.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorFactories.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-09 17:16:47.810790775 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorFactories.cpp 2021-07-13 15:30:57.618268517 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6821,7 +6818,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp pytorch-develop/aten/src/ATen/native/TensorProperties.cpp --- pytorch-v1.5.0/aten/src/ATen/native/TensorProperties.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-09 17:16:47.810790775 +0800 ++++ pytorch-develop/aten/src/ATen/native/TensorProperties.cpp 2021-07-13 15:30:57.618268517 +0800 @@ -87,6 +87,7 @@ if (self.is_contiguous(memory_format)) { return self; @@ -6832,7 +6829,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= "preserve memory format is unsupported by the contiguous operator"); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp --- pytorch-v1.5.0/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-09 17:16:47.814790918 +0800 ++++ pytorch-develop/aten/src/ATen/native/UpSampleBicubic2d.cpp 2021-07-13 15:30:57.622268661 +0800 @@ -26,7 +26,7 @@ const scalar_t* in = &idata[output_y * input_width + output_x]; scalar_t* out = &odata[output_y * output_width + output_x]; @@ -6844,7 +6841,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= out += output_width * output_height; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/native_parse.py pytorch-develop/aten/src/ATen/native_parse.py --- pytorch-v1.5.0/aten/src/ATen/native_parse.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-09 17:16:47.878793213 +0800 ++++ pytorch-develop/aten/src/ATen/native_parse.py 2021-07-13 15:30:57.686270955 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6882,7 +6879,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= msg = '''Exception raised in processing function: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py pytorch-develop/aten/src/ATen/preprocess_declarations.py --- pytorch-v1.5.0/aten/src/ATen/preprocess_declarations.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-09 17:16:47.882793357 +0800 ++++ pytorch-develop/aten/src/ATen/preprocess_declarations.py 2021-07-13 15:30:57.690271099 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -6914,7 +6911,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h pytorch-develop/aten/src/ATen/templates/TensorBody.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorBody.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-09 17:16:47.882793357 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorBody.h 2021-07-13 15:30:57.690271099 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6947,7 +6944,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h pytorch-develop/aten/src/ATen/templates/TensorMethods.h --- pytorch-v1.5.0/aten/src/ATen/templates/TensorMethods.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-09 17:16:47.882793357 +0800 ++++ pytorch-develop/aten/src/ATen/templates/TensorMethods.h 2021-07-13 15:30:57.690271099 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -6981,7 +6978,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/CMakeLists.txt pytorch-develop/aten/src/TH/CMakeLists.txt --- pytorch-v1.5.0/aten/src/TH/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-09 17:16:47.886793500 +0800 ++++ pytorch-develop/aten/src/TH/CMakeLists.txt 2021-07-13 15:30:57.694271242 +0800 @@ -48,6 +48,11 @@ ${CMAKE_CURRENT_SOURCE_DIR} PARENT_SCOPE) @@ -6996,7 +6993,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp pytorch-develop/aten/src/TH/generic/THStorage.cpp --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-09 17:16:47.886793500 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.cpp 2021-07-13 15:30:57.694271242 +0800 @@ -1,9 +1,32 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7105,7 +7102,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/aten/src/TH/generic/THStorage.h pytorch-develop/aten/src/TH/generic/THStorage.h --- pytorch-v1.5.0/aten/src/TH/generic/THStorage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-09 17:16:47.886793500 +0800 ++++ pytorch-develop/aten/src/TH/generic/THStorage.h 2021-07-13 15:30:57.694271242 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7144,7 +7141,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/CMakeLists.txt pytorch-develop/c10/CMakeLists.txt --- pytorch-v1.5.0/c10/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/CMakeLists.txt 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/CMakeLists.txt 2021-07-13 15:30:57.706271672 +0800 @@ -63,6 +63,14 @@ message(STATUS "don't use NUMA") endif() @@ -7173,7 +7170,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # not checked in diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Backend.h pytorch-develop/c10/core/Backend.h --- pytorch-v1.5.0/c10/core/Backend.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Backend.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/Backend.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7268,7 +7265,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.cpp pytorch-develop/c10/core/Device.cpp --- pytorch-v1.5.0/c10/core/Device.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.cpp 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/Device.cpp 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7308,7 +7305,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= types.begin(), diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Device.h pytorch-develop/c10/core/Device.h --- pytorch-v1.5.0/c10/core/Device.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Device.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/Device.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7343,7 +7340,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return type_ == DeviceType::CPU; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.cpp pytorch-develop/c10/core/DeviceType.cpp --- pytorch-v1.5.0/c10/core/DeviceType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/DeviceType.cpp 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7383,7 +7380,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return false; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DeviceType.h pytorch-develop/c10/core/DeviceType.h --- pytorch-v1.5.0/c10/core/DeviceType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DeviceType.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/DeviceType.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7426,7 +7423,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= constexpr DeviceType kXLA = DeviceType::XLA; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.cpp pytorch-develop/c10/core/DispatchKey.cpp --- pytorch-v1.5.0/c10/core/DispatchKey.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/DispatchKey.cpp 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7458,7 +7455,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= case DispatchKey::TESTING_ONLY_GenericModeTensorId: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/DispatchKey.h pytorch-develop/c10/core/DispatchKey.h --- pytorch-v1.5.0/c10/core/DispatchKey.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/DispatchKey.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/DispatchKey.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7490,7 +7487,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/Storage.h pytorch-develop/c10/core/Storage.h --- pytorch-v1.5.0/c10/core/Storage.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/Storage.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/Storage.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7524,7 +7521,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= }; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/StorageImpl.h pytorch-develop/c10/core/StorageImpl.h --- pytorch-v1.5.0/c10/core/StorageImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/StorageImpl.h 2021-07-09 17:16:47.902794074 +0800 ++++ pytorch-develop/c10/core/StorageImpl.h 2021-07-13 15:30:57.706271672 +0800 @@ -1,12 +1,39 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7581,7 +7578,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorImpl.h pytorch-develop/c10/core/TensorImpl.h --- pytorch-v1.5.0/c10/core/TensorImpl.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorImpl.h 2021-07-09 17:16:47.906794218 +0800 ++++ pytorch-develop/c10/core/TensorImpl.h 2021-07-13 15:30:57.710271816 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7651,7 +7648,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/core/TensorOptions.h pytorch-develop/c10/core/TensorOptions.h --- pytorch-v1.5.0/c10/core/TensorOptions.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/core/TensorOptions.h 2021-07-09 17:16:47.906794218 +0800 ++++ pytorch-develop/c10/core/TensorOptions.h 2021-07-13 15:30:57.710271816 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7692,7 +7689,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/c10/macros/Export.h pytorch-develop/c10/macros/Export.h --- pytorch-v1.5.0/c10/macros/Export.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/c10/macros/Export.h 2021-07-09 17:16:47.906794218 +0800 ++++ pytorch-develop/c10/macros/Export.h 2021-07-13 15:30:57.710271816 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -7819,7 +7816,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/caffe2/CMakeLists.txt pytorch-develop/caffe2/CMakeLists.txt --- pytorch-v1.5.0/caffe2/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-09 17:16:47.918794647 +0800 ++++ pytorch-develop/caffe2/CMakeLists.txt 2021-07-13 15:30:57.718272102 +0800 @@ -32,6 +32,7 @@ # Add source, includes, and libs to lists list(APPEND Caffe2_CPU_SRCS ${ATen_CPU_SRCS}) @@ -7966,7 +7963,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # Call again since Caffe2_HIP_INCLUDE is extended with ATen include dirs. diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.clang-format pytorch-develop/.clang-format --- pytorch-v1.5.0/.clang-format 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.clang-format 2021-07-09 17:16:47.778789628 +0800 ++++ pytorch-develop/.clang-format 2021-07-13 15:30:57.586267370 +0800 @@ -84,5 +84,4 @@ SpacesInSquareBrackets: false Standard: Cpp11 @@ -7977,7 +7974,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/BuildVariables.cmake pytorch-develop/cmake/BuildVariables.cmake --- pytorch-v1.5.0/cmake/BuildVariables.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-09 17:16:48.030798663 +0800 ++++ pytorch-develop/cmake/BuildVariables.cmake 2021-07-13 15:30:57.830276118 +0800 @@ -11,6 +11,7 @@ # CMakeLists.txt files under each folder respectively. set(Caffe2_CPU_SRCS) @@ -8004,7 +8001,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # symbols. However, if the lib is whole linked in caffe2 lib, we don't want diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Codegen.cmake pytorch-develop/cmake/Codegen.cmake --- pytorch-v1.5.0/cmake/Codegen.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Codegen.cmake 2021-07-09 17:16:48.030798663 +0800 ++++ pytorch-develop/cmake/Codegen.cmake 2021-07-13 15:30:57.830276118 +0800 @@ -191,13 +191,14 @@ file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt generated_cpp) file(READ ${CMAKE_BINARY_DIR}/aten/src/ATen/generated_cpp.txt-cuda cuda_generated_cpp) @@ -8035,7 +8032,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Dependencies.cmake pytorch-develop/cmake/Dependencies.cmake --- pytorch-v1.5.0/cmake/Dependencies.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Dependencies.cmake 2021-07-09 17:16:48.030798663 +0800 ++++ pytorch-develop/cmake/Dependencies.cmake 2021-07-13 15:30:57.830276118 +0800 @@ -1509,6 +1509,13 @@ ENDIF(NOT C_HAS_THREAD) endif() @@ -8052,7 +8049,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= # diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/Summary.cmake pytorch-develop/cmake/Summary.cmake --- pytorch-v1.5.0/cmake/Summary.cmake 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/Summary.cmake 2021-07-09 17:16:48.034798807 +0800 ++++ pytorch-develop/cmake/Summary.cmake 2021-07-13 15:30:57.830276118 +0800 @@ -134,6 +134,7 @@ if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") @@ -8063,7 +8060,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endfunction() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/cmake/TorchConfig.cmake.in pytorch-develop/cmake/TorchConfig.cmake.in --- pytorch-v1.5.0/cmake/TorchConfig.cmake.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-09 17:16:48.034798807 +0800 ++++ pytorch-develop/cmake/TorchConfig.cmake.in 2021-07-13 15:30:57.830276118 +0800 @@ -112,6 +112,11 @@ list(APPEND TORCH_LIBRARIES ${TORCH_CUDA_LIBRARIES}) endif() @@ -8078,7 +8075,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/CMakeLists.txt pytorch-develop/CMakeLists.txt --- pytorch-v1.5.0/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/CMakeLists.txt 2021-07-09 17:16:47.782789771 +0800 ++++ pytorch-develop/CMakeLists.txt 2021-07-13 15:30:57.590267513 +0800 @@ -205,6 +205,10 @@ option(USE_TBB "Use TBB" OFF) option(ONNX_ML "Enable traditional ONNX ML API." ON) @@ -8145,7 +8142,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/.dockerignore pytorch-develop/.dockerignore --- pytorch-v1.5.0/.dockerignore 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/.dockerignore 2021-07-09 17:16:47.778789628 +0800 ++++ pytorch-develop/.dockerignore 2021-07-13 15:30:57.586267370 +0800 @@ -1,257 +1 @@ -# READ THIS BEFORE YOU REFACTOR ME -# @@ -8408,7 +8405,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/docs/make.bat pytorch-develop/docs/make.bat --- pytorch-v1.5.0/docs/make.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/docs/make.bat 2021-07-09 17:16:48.038798950 +0800 ++++ pytorch-develop/docs/make.bat 2021-07-13 15:30:57.834276262 +0800 @@ -1,36 +1,36 @@ -@ECHO OFF - @@ -8497,7 +8494,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/requirements.txt pytorch-develop/requirements.txt --- pytorch-v1.5.0/requirements.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/requirements.txt 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/requirements.txt 2021-07-13 15:30:57.850276836 +0800 @@ -4,4 +4,12 @@ requests setuptools @@ -8516,7 +8513,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install.bat pytorch-develop/scripts/appveyor/install.bat --- pytorch-v1.5.0/scripts/appveyor/install.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install.bat 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/scripts/appveyor/install.bat 2021-07-13 15:30:57.850276836 +0800 @@ -1,10 +1,10 @@ -:: Installation scripts for appveyor. - @@ -8540,7 +8537,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +conda install -y numpy diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/appveyor/install_cuda.bat pytorch-develop/scripts/appveyor/install_cuda.bat --- pytorch-v1.5.0/scripts/appveyor/install_cuda.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/scripts/appveyor/install_cuda.bat 2021-07-13 15:30:57.850276836 +0800 @@ -1,22 +1,22 @@ -@echo on - @@ -8588,7 +8585,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +nvcc -V || exit /b diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/build_windows.bat pytorch-develop/scripts/build_windows.bat --- pytorch-v1.5.0/scripts/build_windows.bat 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/build_windows.bat 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/scripts/build_windows.bat 2021-07-13 15:30:57.850276836 +0800 @@ -1,84 +1,84 @@ -:: ############################################################################# -:: Example command to build on Windows. @@ -8760,7 +8757,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +exit /b 1 diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/scripts/proto.ps1 pytorch-develop/scripts/proto.ps1 --- pytorch-v1.5.0/scripts/proto.ps1 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/scripts/proto.ps1 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/scripts/proto.ps1 2021-07-13 15:30:57.850276836 +0800 @@ -1,17 +1,17 @@ -param( - [string]$protoc, @@ -8798,7 +8795,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +Invoke-Expression $cmd diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/setup.py pytorch-develop/setup.py --- pytorch-v1.5.0/setup.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/setup.py 2021-07-09 17:16:48.054799524 +0800 ++++ pytorch-develop/setup.py 2021-07-13 15:30:57.850276836 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -8897,7 +8894,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'python/serialized_test/data/operator_test/*.zip', diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/derivatives.yaml pytorch-develop/tools/autograd/derivatives.yaml --- pytorch-v1.5.0/tools/autograd/derivatives.yaml 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/derivatives.yaml 2021-07-13 15:30:58.990317711 +0800 @@ -107,6 +107,10 @@ # # NB: The parameter names here MUST be consistent with the parameter names @@ -9010,8 +9007,8 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/dump_utils.py pytorch-develop/tools/autograd/dump_utils.py --- pytorch-v1.5.0/tools/autograd/dump_utils.py 1970-01-01 08:00:00.000000000 +0800 -+++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-09 17:16:49.194840399 +0800 -@@ -0,0 +1,112 @@ ++++ pytorch-develop/tools/autograd/dump_utils.py 2021-07-13 15:30:58.990317711 +0800 +@@ -0,0 +1,114 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# All rights reserved. +# @@ -9122,11 +9119,13 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + "item", + "set__source_Storage_storage_offset", + "pin_memory", -+ "to_device" ++ "to_device", ++ "numpy_T", ++ "slice_Tensor" +] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py pytorch-develop/tools/autograd/gen_autograd_functions.py --- pytorch-v1.5.0/tools/autograd/gen_autograd_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/gen_autograd_functions.py 2021-07-13 15:30:58.990317711 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9312,7 +9311,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_python_functions.py pytorch-develop/tools/autograd/gen_python_functions.py --- pytorch-v1.5.0/tools/autograd/gen_python_functions.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/gen_python_functions.py 2021-07-13 15:30:58.990317711 +0800 @@ -1,3 +1,20 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9354,7 +9353,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= 'value': argname, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/gen_variable_type.py pytorch-develop/tools/autograd/gen_variable_type.py --- pytorch-v1.5.0/tools/autograd/gen_variable_type.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/gen_variable_type.py 2021-07-13 15:30:58.990317711 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2021 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -9527,7 +9526,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/Functions.cpp pytorch-develop/tools/autograd/templates/Functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/Functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/templates/Functions.cpp 2021-07-13 15:30:58.990317711 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9607,7 +9606,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto sparse = sparse_.coalesce(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp pytorch-develop/tools/autograd/templates/python_torch_functions.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_torch_functions.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/templates/python_torch_functions.cpp 2021-07-13 15:30:58.990317711 +0800 @@ -22,7 +22,7 @@ #include "torch/csrc/autograd/generated/variable_factories.h" #include "torch/csrc/utils/structseq.h" @@ -9691,7 +9690,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp pytorch-develop/tools/autograd/templates/python_variable_methods.cpp --- pytorch-v1.5.0/tools/autograd/templates/python_variable_methods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/templates/python_variable_methods.cpp 2021-07-13 15:30:58.990317711 +0800 @@ -15,7 +15,13 @@ #include "torch/csrc/cuda/Stream.h" #include "torch/csrc/cuda/Event.h" @@ -9778,7 +9777,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"has_names", (PyCFunction)THPVariable_has_names, METH_NOARGS, NULL}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp pytorch-develop/tools/autograd/templates/VariableType.cpp --- pytorch-v1.5.0/tools/autograd/templates/VariableType.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.cpp 2021-07-13 15:30:58.990317711 +0800 @@ -1,7 +1,27 @@ +// Copyright (c) 2021 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9809,7 +9808,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/autograd/templates/VariableType.h pytorch-develop/tools/autograd/templates/VariableType.h --- pytorch-v1.5.0/tools/autograd/templates/VariableType.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-09 17:16:49.194840399 +0800 ++++ pytorch-develop/tools/autograd/templates/VariableType.h 2021-07-13 15:30:58.990317711 +0800 @@ -1,3 +1,20 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -9841,7 +9840,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= const at::Tensor & unpack(const Tensor & t, const char * name, int pos); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/tools/build_variables.bzl pytorch-develop/tools/build_variables.bzl --- pytorch-v1.5.0/tools/build_variables.bzl 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/tools/build_variables.bzl 2021-07-09 17:16:49.198840543 +0800 ++++ pytorch-develop/tools/build_variables.bzl 2021-07-13 15:30:58.994317854 +0800 @@ -46,6 +46,7 @@ "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/input_buffer.cpp", @@ -9927,7 +9926,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def grad(outputs: _TensorOrTensors, inputs: _TensorOrTensors, grad_outputs: Optional[_TensorOrTensors]=..., retain_graph: Optional[bool]=..., create_graph: bool=..., only_inputs: bool=..., allow_unused: bool=...) -> Tuple[Tensor, ...]: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/autograd/profiler.py pytorch-develop/torch/autograd/profiler.py --- pytorch-v1.5.0/torch/autograd/profiler.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/autograd/profiler.py 2021-07-09 17:16:49.202840686 +0800 ++++ pytorch-develop/torch/autograd/profiler.py 2021-07-13 15:30:58.998317998 +0800 @@ -1,8 +1,25 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -10400,7 +10399,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return ''.join(result) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/CMakeLists.txt pytorch-develop/torch/CMakeLists.txt --- pytorch-v1.5.0/torch/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/CMakeLists.txt 2021-07-09 17:16:49.198840543 +0800 ++++ pytorch-develop/torch/CMakeLists.txt 2021-07-13 15:30:58.994317854 +0800 @@ -97,6 +97,7 @@ ${TORCH_SRC_DIR}/csrc/tensor/python_tensor.cpp ${TORCH_SRC_DIR}/csrc/utils.cpp @@ -10432,7 +10431,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= endif() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/engine.cpp pytorch-develop/torch/csrc/autograd/engine.cpp --- pytorch-v1.5.0/torch/csrc/autograd/engine.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/engine.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10555,7 +10554,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto event = c10::Event{c10::DeviceType::CUDA}; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp pytorch-develop/torch/csrc/autograd/functions/tensor.cpp --- pytorch-v1.5.0/torch/csrc/autograd/functions/tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/functions/tensor.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10587,7 +10586,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= /*non_blocking=*/false, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/init.cpp pytorch-develop/torch/csrc/autograd/init.cpp --- pytorch-v1.5.0/torch/csrc/autograd/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/init.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10630,7 +10629,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= m.def("_enable_profiler", enableProfiler); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp pytorch-develop/torch/csrc/autograd/input_buffer.cpp --- pytorch-v1.5.0/torch/csrc/autograd/input_buffer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/input_buffer.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10682,7 +10681,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto& old_var = buffer[pos]; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp pytorch-develop/torch/csrc/autograd/profiler.cpp --- pytorch-v1.5.0/torch/csrc/autograd/profiler.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -10878,7 +10877,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= CUDAStubs::~CUDAStubs() = default; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/profiler.h pytorch-develop/torch/csrc/autograd/profiler.h --- pytorch-v1.5.0/torch/csrc/autograd/profiler.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/profiler.h 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11003,7 +11002,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp pytorch-develop/torch/csrc/autograd/python_variable.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11057,7 +11056,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"is_complex", (getter)THPVariable_is_complex, nullptr, nullptr, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp --- pytorch-v1.5.0/torch/csrc/autograd/python_variable_indexing.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/python_variable_indexing.cpp 2021-07-13 15:30:59.010318428 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11098,7 +11097,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h --- pytorch-v1.5.0/torch/csrc/autograd/utils/wrap_outputs.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-09 17:16:49.214841116 +0800 ++++ pytorch-develop/torch/csrc/autograd/utils/wrap_outputs.h 2021-07-13 15:30:59.010318428 +0800 @@ -168,6 +168,45 @@ return r.release(); } @@ -11147,7 +11146,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!r) throw python_error(); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp --- pytorch-v1.5.0/torch/csrc/autograd/VariableTypeManual.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-09 17:16:49.210840973 +0800 ++++ pytorch-develop/torch/csrc/autograd/VariableTypeManual.cpp 2021-07-13 15:30:59.006318284 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11181,7 +11180,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= if (!t.defined()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp pytorch-develop/torch/csrc/distributed/c10d/comm.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/comm.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-09 17:16:49.218841259 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/comm.cpp 2021-07-13 15:30:59.014318571 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11287,7 +11286,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= while (!in_flight.empty()) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp pytorch-develop/torch/csrc/distributed/c10d/init.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-09 17:16:49.218841259 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/init.cpp 2021-07-13 15:30:59.014318571 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11344,7 +11343,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= .def("is_success", &::c10d::ProcessGroup::Work::isSuccess) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp --- pytorch-v1.5.0/torch/csrc/distributed/c10d/reducer.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-09 17:16:49.218841259 +0800 ++++ pytorch-develop/torch/csrc/distributed/c10d/reducer.cpp 2021-07-13 15:30:59.014318571 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11469,7 +11468,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp pytorch-develop/torch/csrc/DynamicTypes.cpp --- pytorch-v1.5.0/torch/csrc/DynamicTypes.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-09 17:16:49.202840686 +0800 ++++ pytorch-develop/torch/csrc/DynamicTypes.cpp 2021-07-13 15:30:58.998317998 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11518,7 +11517,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return it->second; diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Generator.cpp pytorch-develop/torch/csrc/Generator.cpp --- pytorch-v1.5.0/torch/csrc/Generator.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-09 17:16:49.202840686 +0800 ++++ pytorch-develop/torch/csrc/Generator.cpp 2021-07-13 15:30:58.998317998 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11586,7 +11585,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= #endif diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/serialization.cpp pytorch-develop/torch/csrc/generic/serialization.cpp --- pytorch-v1.5.0/torch/csrc/generic/serialization.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-09 17:16:49.222841403 +0800 ++++ pytorch-develop/torch/csrc/generic/serialization.cpp 2021-07-13 15:30:59.018318714 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11686,7 +11685,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/Storage.cpp pytorch-develop/torch/csrc/generic/Storage.cpp --- pytorch-v1.5.0/torch/csrc/generic/Storage.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-09 17:16:49.218841259 +0800 ++++ pytorch-develop/torch/csrc/generic/Storage.cpp 2021-07-13 15:30:59.018318714 +0800 @@ -1,7 +1,25 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11765,7 +11764,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= for (Py_ssize_t i = 0; i < length; i++) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp pytorch-develop/torch/csrc/generic/StorageMethods.cpp --- pytorch-v1.5.0/torch/csrc/generic/StorageMethods.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-09 17:16:49.222841403 +0800 ++++ pytorch-develop/torch/csrc/generic/StorageMethods.cpp 2021-07-13 15:30:59.018318714 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11813,7 +11812,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= {"_write_file", (PyCFunction)THPStorage_(writeFile), METH_VARARGS, nullptr}, diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/Module.cpp pytorch-develop/torch/csrc/Module.cpp --- pytorch-v1.5.0/torch/csrc/Module.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/Module.cpp 2021-07-09 17:16:49.202840686 +0800 ++++ pytorch-develop/torch/csrc/Module.cpp 2021-07-13 15:30:58.998317998 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -11957,7 +11956,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= auto set_module_attr = [&](const char* name, PyObject* v, bool incref = true) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp pytorch-develop/torch/csrc/tensor/python_tensor.cpp --- pytorch-v1.5.0/torch/csrc/tensor/python_tensor.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/tensor/python_tensor.cpp 2021-07-13 15:30:59.038319432 +0800 @@ -1,18 +1,35 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12334,7 +12333,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= +} // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.cpp pytorch-develop/torch/csrc/utils/init.cpp --- pytorch-v1.5.0/torch/csrc/utils/init.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/init.cpp 2021-07-13 15:30:59.038319432 +0800 @@ -1,6 +1,10 @@ #include #include @@ -12422,7 +12421,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/init.h pytorch-develop/torch/csrc/utils/init.h --- pytorch-v1.5.0/torch/csrc/utils/init.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/init.h 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/init.h 2021-07-13 15:30:59.038319432 +0800 @@ -8,4 +8,7 @@ void initThroughputBenchmarkBindings(PyObject* module); @@ -12433,7 +12432,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } // namespace torch diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h pytorch-develop/torch/csrc/utils/python_arg_parser.h --- pytorch-v1.5.0/torch/csrc/utils/python_arg_parser.h 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/python_arg_parser.h 2021-07-13 15:30:59.038319432 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12468,7 +12467,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return at::Device(device_str); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp pytorch-develop/torch/csrc/utils/tensor_layouts.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_layouts.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_layouts.cpp 2021-07-13 15:30:59.038319432 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12499,7 +12498,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= registerLayoutObject((THPLayout*)strided_layout, at::Backend::QuantizedCPU); diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp pytorch-develop/torch/csrc/utils/tensor_new.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_new.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_new.cpp 2021-07-13 15:30:59.038319432 +0800 @@ -1,3 +1,19 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12635,7 +12634,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= } else if(expected_layout == c10::kSparse) { diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp pytorch-develop/torch/csrc/utils/tensor_types.cpp --- pytorch-v1.5.0/torch/csrc/utils/tensor_types.cpp 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-09 17:16:49.242842120 +0800 ++++ pytorch-develop/torch/csrc/utils/tensor_types.cpp 2021-07-13 15:30:59.038319432 +0800 @@ -1,58 +1,91 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. @@ -12848,7 +12847,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def get_rng_state(): ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributed/distributed_c10d.py pytorch-develop/torch/distributed/distributed_c10d.py --- pytorch-v1.5.0/torch/distributed/distributed_c10d.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-09 17:16:49.246842264 +0800 ++++ pytorch-develop/torch/distributed/distributed_c10d.py 2021-07-13 15:30:59.042319575 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -12929,7 +12928,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/distributions/von_mises.py pytorch-develop/torch/distributions/von_mises.py --- pytorch-v1.5.0/torch/distributions/von_mises.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/distributions/von_mises.py 2021-07-09 17:16:49.246842264 +0800 ++++ pytorch-develop/torch/distributions/von_mises.py 2021-07-13 15:30:59.042319575 +0800 @@ -1,140 +1,140 @@ -from __future__ import absolute_import, division, print_function - @@ -13213,7 +13212,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= + _log_modified_bessel_fn(self.concentration, order=0)).exp() diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/__init__.py pytorch-develop/torch/__init__.py --- pytorch-v1.5.0/torch/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/__init__.py 2021-07-09 17:16:49.198840543 +0800 ++++ pytorch-develop/torch/__init__.py 2021-07-13 15:30:58.994317854 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13256,7 +13255,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= \ No newline at end of file diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt pytorch-develop/torch/lib/c10d/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/c10d/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-09 17:16:49.250842407 +0800 ++++ pytorch-develop/torch/lib/c10d/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 @@ -28,6 +28,10 @@ option(USE_C10D_NCCL "USE C10D NCCL" ON) endif() @@ -13309,7 +13308,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= copy_header(ProcessGroupMPI.hpp) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt pytorch-develop/torch/lib/libshm/CMakeLists.txt --- pytorch-v1.5.0/torch/lib/libshm/CMakeLists.txt 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-09 17:16:49.250842407 +0800 ++++ pytorch-develop/torch/lib/libshm/CMakeLists.txt 2021-07-13 15:30:59.046319718 +0800 @@ -37,8 +37,11 @@ SET_TARGET_PROPERTIES(shm PROPERTIES PREFIX "lib" @@ -13366,7 +13365,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -_maybe_indices_t = _scalar_or_tuple_2_t[Tensor] diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/functional.py pytorch-develop/torch/nn/functional.py --- pytorch-v1.5.0/torch/nn/functional.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/functional.py 2021-07-09 17:16:49.254842550 +0800 ++++ pytorch-develop/torch/nn/functional.py 2021-07-13 15:30:59.050319862 +0800 @@ -1611,7 +1611,7 @@ else: output = input.matmul(weight.t()) @@ -13389,7 +13388,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -from . import parallel as parallel diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/batchnorm.py pytorch-develop/torch/nn/modules/batchnorm.py --- pytorch-v1.5.0/torch/nn/modules/batchnorm.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-09 17:16:49.254842550 +0800 ++++ pytorch-develop/torch/nn/modules/batchnorm.py 2021-07-13 15:30:59.050319862 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13421,7 +13420,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= self.register_parameter('running_var', None) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/module.py pytorch-develop/torch/nn/modules/module.py --- pytorch-v1.5.0/torch/nn/modules/module.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/module.py 2021-07-09 17:16:49.254842550 +0800 ++++ pytorch-develop/torch/nn/modules/module.py 2021-07-13 15:30:59.050319862 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -13564,7 +13563,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return t.to(device, dtype if t.is_floating_point() else None, non_blocking, memory_format=convert_to_format) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/normalization.py pytorch-develop/torch/nn/modules/normalization.py --- pytorch-v1.5.0/torch/nn/modules/normalization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-09 17:16:49.254842550 +0800 ++++ pytorch-develop/torch/nn/modules/normalization.py 2021-07-13 15:30:59.050319862 +0800 @@ -128,13 +128,14 @@ """ __constants__ = ['normalized_shape', 'eps', 'elementwise_affine'] @@ -13597,7 +13596,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= return '{normalized_shape}, eps={eps}, ' \ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in pytorch-develop/torch/nn/modules/transformer.pyi.in --- pytorch-v1.5.0/torch/nn/modules/transformer.pyi.in 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-09 17:16:49.254842550 +0800 ++++ pytorch-develop/torch/nn/modules/transformer.pyi.in 2021-07-13 15:30:59.054320005 +0800 @@ -1,60 +1,60 @@ -from ..init import xavier_uniform_ -from .activation import MultiheadAttention @@ -13757,7 +13756,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - module_kwargs: Optional[Any] = ...) -> Tensor: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/nn/parallel/distributed.py pytorch-develop/torch/nn/parallel/distributed.py --- pytorch-v1.5.0/torch/nn/parallel/distributed.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-09 17:16:49.258842694 +0800 ++++ pytorch-develop/torch/nn/parallel/distributed.py 2021-07-13 15:30:59.054320005 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14108,7 +14107,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= -def remove_weight_norm(module: T_module, name: str = ...) -> T_module: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/onnx/symbolic_opset9.py pytorch-develop/torch/onnx/symbolic_opset9.py --- pytorch-v1.5.0/torch/onnx/symbolic_opset9.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-09 17:16:49.258842694 +0800 ++++ pytorch-develop/torch/onnx/symbolic_opset9.py 2021-07-13 15:30:59.054320005 +0800 @@ -1621,14 +1621,23 @@ slices = [sym_help._slice_helper(g, w, axes=[0], starts=[x * n], ends=[y * n]) for x, y in intervals] return g.op('Concat', *slices, axis_i=0) @@ -14186,7 +14185,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/optim/adamax.py pytorch-develop/torch/optim/adamax.py --- pytorch-v1.5.0/torch/optim/adamax.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/optim/adamax.py 2021-07-09 17:16:49.262842837 +0800 ++++ pytorch-develop/torch/optim/adamax.py 2021-07-13 15:30:59.058320149 +0800 @@ -80,8 +80,8 @@ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) @@ -14363,7 +14362,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/serialization.py pytorch-develop/torch/serialization.py --- pytorch-v1.5.0/torch/serialization.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/serialization.py 2021-07-09 17:16:49.262842837 +0800 ++++ pytorch-develop/torch/serialization.py 2021-07-13 15:30:59.058320149 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14447,7 +14446,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def location_tag(storage): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/storage.py pytorch-develop/torch/storage.py --- pytorch-v1.5.0/torch/storage.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/storage.py 2021-07-09 17:16:49.262842837 +0800 ++++ pytorch-develop/torch/storage.py 2021-07-13 15:30:59.058320149 +0800 @@ -7,6 +7,7 @@ class _StorageBase(object): @@ -14467,7 +14466,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= else: diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/tensor.py pytorch-develop/torch/tensor.py --- pytorch-v1.5.0/torch/tensor.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/tensor.py 2021-07-09 17:16:49.262842837 +0800 ++++ pytorch-develop/torch/tensor.py 2021-07-13 15:30:59.058320149 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14529,7 +14528,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def __reversed__(self): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_tensor_str.py pytorch-develop/torch/_tensor_str.py --- pytorch-v1.5.0/torch/_tensor_str.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_tensor_str.py 2021-07-09 17:16:49.198840543 +0800 ++++ pytorch-develop/torch/_tensor_str.py 2021-07-13 15:30:58.994317854 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14583,7 +14582,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= has_default_dtype = self.dtype in (torch.get_default_dtype(), torch.int64, torch.bool) diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/dataloader.py pytorch-develop/torch/utils/data/dataloader.py --- pytorch-v1.5.0/torch/utils/data/dataloader.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-09 17:16:49.266842980 +0800 ++++ pytorch-develop/torch/utils/data/dataloader.py 2021-07-13 15:30:59.062320292 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14792,7 +14791,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool) -> None: ... diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py pytorch-develop/torch/utils/data/_utils/pin_memory.py --- pytorch-v1.5.0/torch/utils/data/_utils/pin_memory.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-09 17:16:49.266842980 +0800 ++++ pytorch-develop/torch/utils/data/_utils/pin_memory.py 2021-07-13 15:30:59.062320292 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. @@ -14853,7 +14852,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= - diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/utils/__init__.py pytorch-develop/torch/utils/__init__.py --- pytorch-v1.5.0/torch/utils/__init__.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/utils/__init__.py 2021-07-09 17:16:49.266842980 +0800 ++++ pytorch-develop/torch/utils/__init__.py 2021-07-13 15:30:59.062320292 +0800 @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals @@ -14864,7 +14863,7 @@ diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude= def set_module(obj, mod): diff -Nur '--exclude=.git*' '--exclude=.jenkins' '--exclude=android' '--exclude=OWNERS' '--exclude=third_party' '--exclude=README*' -Nur pytorch-v1.5.0/torch/_utils.py pytorch-develop/torch/_utils.py --- pytorch-v1.5.0/torch/_utils.py 2021-04-10 18:39:32.000000000 +0800 -+++ pytorch-develop/torch/_utils.py 2021-07-09 17:16:49.202840686 +0800 ++++ pytorch-develop/torch/_utils.py 2021-07-13 15:30:58.998317998 +0800 @@ -1,3 +1,19 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# Copyright (c) 2019, Facebook CORPORATION. diff --git a/src/aten/src/ATen/native/native_functions.yaml b/src/aten/src/ATen/native/native_functions.yaml index 4b3a1b7ded4f60281cf4d8dffa66d024ed3f5ef8..30c7a8aeb19a82f8bffa37cd4947172f082db5be 100644 --- a/src/aten/src/ATen/native/native_functions.yaml +++ b/src/aten/src/ATen/native/native_functions.yaml @@ -2246,8 +2246,6 @@ - func: matrix_power(Tensor self, int n) -> Tensor use_c10_dispatcher: full variants: function, method - npu_dispatch: - NPU: matrix_power_npu - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) variants: function, method @@ -8166,7 +8164,7 @@ CPU: col2im_backward_out_cpu CUDA: col2im_backward_out_cuda npu_dispatch: - NPU: col2im_backward_out_npu + NPU: im2col_out_npu - func: col2im_backward(Tensor grad_output, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor python_module: nn @@ -8174,7 +8172,7 @@ CPU: col2im_backward_cpu CUDA: col2im_backward_cuda npu_dispatch: - NPU: col2im_backward_npu + NPU: im2col_npu - func: im2col.out(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!) python_module: nn @@ -8546,4 +8544,8 @@ - func: npu_linear_backward(Tensor grad, Tensor input, Tensor weight) -> (Tensor, Tensor) npu_dispatch_only: - NPU: linear_backward_npu \ No newline at end of file + NPU: linear_backward_npu + +- func: npu_bert_apply_adam(Tensor(a!) var, Tensor(b!) m, Tensor(c!) v, Scalar lr, Scalar beta1, Scalar beta2, Scalar epsilon, Tensor grad, Scalar max_grad_norm, Scalar global_grad_norm, Scalar weight_decay) -> (Tensor(a!), Tensor(b!), Tensor(c!)) + npu_dispatch_only: + NPU: bert_apply_adam_npu \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/BaddbmmKernelNpu.cpp b/src/aten/src/ATen/native/npu/BaddbmmKernelNpu.cpp index 4e68df64788ca9d606cd23324424823e13ac4933..600c7e09a8db8a42e48b83ff27b097e4bac6fb62 100644 --- a/src/aten/src/ATen/native/npu/BaddbmmKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BaddbmmKernelNpu.cpp @@ -11,9 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "ATen/native/npu/utils/CalcuOpUtil.h" -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" +#include "ATen/native/npu/utils/OpAdapter.h" + namespace at { namespace native { using namespace at::native::npu; @@ -47,7 +48,8 @@ Tensor& baddbmm_out_npu( const Tensor& tensor2, Scalar beta, Scalar alpha) { - Tensor BatchMatMulTensor = result; + auto outputSize = baddbmm_npu_output_size(tensor1, tensor2); + Tensor BatchMatMulTensor = OpPreparation::ApplyTensor(self, outputSize); auto inputs = baddbmm_npu_input(tensor1, tensor2); auto outputs = baddbmm_npu_output({BatchMatMulTensor}); @@ -100,4 +102,4 @@ Tensor& baddbmm_npu_( return self; } } -} \ No newline at end of file +} diff --git a/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp b/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..380f65f1aac08321d6a9b72e091454e8be042f0d --- /dev/null +++ b/src/aten/src/ATen/native/npu/BertApplyAdamKernelNpu.cpp @@ -0,0 +1,108 @@ +// Copyright (c) 2020 Huawei Technologies Co., Ltd +// Copyright (c) 2019, Facebook CORPORATION. +// All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ATen/native/npu/utils/OpAdapter.h" + +namespace at { +namespace native { +using namespace at::native::npu; + +tuple bert_apply_adam_out_npu_nocheck( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out, + const Tensor& var, + const Tensor& m, + const Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + OpCommand cmd; + cmd.Name("ApplyAdamV2") + .Input(var) + .Input(m) + .Input(v) + .Input(lr, var.scalar_type()) + .Input(beta1, var.scalar_type()) + .Input(beta2, var.scalar_type()) + .Input(epsilon, var.scalar_type()) + .Input(grad) + .Input(max_grad_norm, var.scalar_type()) + .Input(global_grad_norm, var.scalar_type()) + .Input(weight_decay, var.scalar_type()) + .Output(var_out) + .Output(m_out) + .Output(v_out) + .Run(); + return std::tie(var_out, m_out, v_out); +} + +tuple bert_apply_adam_out_npu( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out, + const Tensor& var, + const Tensor& m, + const Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + OpPipeWithDefinedOut check; + check.CheckMemory({var, m, v, grad}, {var_out, m_out, v_out}); + + auto func = [&var, &m, &v, &lr, &beta1, &beta2, &epsilon, &grad, &max_grad_norm, &global_grad_norm, &weight_decay] ( + Tensor& var_out, + Tensor& m_out, + Tensor& v_out) { + bert_apply_adam_out_npu_nocheck(var_out, m_out, v_out, var, m, v, + lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); + }; + + OpPipeWithMultiOut pipe(var_out, m_out, v_out); + return pipe.Call(func) + .ReturnRef(); +} + +tuple bert_apply_adam_npu( + Tensor& var, + Tensor& m, + Tensor& v, + Scalar lr, + Scalar beta1, + Scalar beta2, + Scalar epsilon, + const Tensor& grad, + Scalar max_grad_norm, + Scalar global_grad_norm, + Scalar weight_decay) { + bert_apply_adam_out_npu( + var, m, v, var, m, v, + lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay); + return std::tie(var, m, v); +} + +} // namespace native +} // namespace at diff --git a/src/aten/src/ATen/native/npu/BinaryCrossEntropyKernelNpu.cpp b/src/aten/src/ATen/native/npu/BinaryCrossEntropyKernelNpu.cpp index 722dc7e2e8c874ec311083ad2b945909c3787f37..f7b608dcaef0e37afa371e7738fa458dd8fbaec5 100644 --- a/src/aten/src/ATen/native/npu/BinaryCrossEntropyKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/BinaryCrossEntropyKernelNpu.cpp @@ -68,6 +68,12 @@ Tensor binary_cross_entropy_npu( // construct the output tensor of the NPU Tensor result = OpPreparation::ApplyTensor(self, outputSize); + if (self.numel() == 0) { + // In this scenario, needs to return nan. And the nan of the NPU can only be fp32. + result = result.to(at::kFloat).fill_(0); + result = result / 0; + return result; + } // calculate the output result of the NPU binary_cross_entropy_out_npu(result, self, target, weight, reduction); diff --git a/src/aten/src/ATen/native/npu/CholeskyKernelNpu.cpp b/src/aten/src/ATen/native/npu/CholeskyKernelNpu.cpp deleted file mode 100644 index 4db76efd8096ea033de19d68b123604ff85a83d1..0000000000000000000000000000000000000000 --- a/src/aten/src/ATen/native/npu/CholeskyKernelNpu.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2020 Huawei Technologies Co., Ltd -// Copyright (c) 2019, Facebook CORPORATION. -// All rights reserved. -// -// Licensed under the BSD 3-Clause License (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://opensource.org/licenses/BSD-3-Clause -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ATen/native/npu/utils/KernelNpuOutputSize.h" -#include "ATen/native/npu/utils/NpuUtils.h" -#include "ATen/native/npu/utils/OpTemplate.h" - -namespace at { -namespace native { -using namespace at::native::npu; - -Tensor& cholesky_out_npu( - Tensor & y, - const Tensor & x, - bool upper) { - TORCH_CHECK( - upper == false, - "cholesky: The upper parameter currently only supports upper == false"); - - OpCommand cmd; - cmd.Name("Cholesky") - .Input(x) - .Output(y) - .Run(); - return y; -} - -Tensor cholesky_npu(const Tensor& x, bool upper) { - Tensor formatCastOfX = x.npu_format_cast(ACL_FORMAT_NCHW); - // calculate the output size - auto outputSize = input_same_output_size(formatCastOfX); - - // construct the output tensor of the NPU - Tensor y = at::empty_with_format( - outputSize, formatCastOfX.options(), ACL_FORMAT_NCHW); - - // calculate the output result of the NPU - cholesky_out_npu(y, formatCastOfX, upper); - - return y; -} - -} // namespace native -} // namespace at \ No newline at end of file diff --git a/src/aten/src/ATen/native/npu/EqKernelNpu.cpp b/src/aten/src/ATen/native/npu/EqKernelNpu.cpp index becfb6f46da8c924b65a3c8c334ec950d1b37c58..c4374fadf75bf0ce213c85783c68d9d996c09db1 100644 --- a/src/aten/src/ATen/native/npu/EqKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/EqKernelNpu.cpp @@ -60,8 +60,8 @@ Tensor& eq_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self, other}, result, - CalcuOpUtil::get_tensor_npu_format(self), - ScalarType::Bool, + ACL_FORMAT_ND, + result.scalar_type(), IntArrayRef(outputSize)); eq_out_npu_nocheck(result, self, other); return result; @@ -71,8 +71,8 @@ Tensor& eq_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(self), - ScalarType::Bool, + ACL_FORMAT_ND, + result.scalar_type(), self.sizes()); eq_out_npu_nocheck(result, self, other); return result; diff --git a/src/aten/src/ATen/native/npu/GeKernelNpu.cpp b/src/aten/src/ATen/native/npu/GeKernelNpu.cpp index 6169b9de059eb470ed71172330301d88ea77d319..64c08e5617fa8213ccc96571934b3f0ba3e25064 100644 --- a/src/aten/src/ATen/native/npu/GeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GeKernelNpu.cpp @@ -49,7 +49,7 @@ Tensor& ge_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); @@ -78,7 +78,7 @@ Tensor& ge_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); diff --git a/src/aten/src/ATen/native/npu/GtKernelNpu.cpp b/src/aten/src/ATen/native/npu/GtKernelNpu.cpp index 63970a9d50ce6ce8a50b284dca5d047d11f910a0..48dc7f3235f037215c06a65a44f00931acfb2ad9 100644 --- a/src/aten/src/ATen/native/npu/GtKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/GtKernelNpu.cpp @@ -50,7 +50,7 @@ Tensor& gt_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); @@ -80,7 +80,7 @@ Tensor& gt_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); diff --git a/src/aten/src/ATen/native/npu/LeKernelNpu.cpp b/src/aten/src/ATen/native/npu/LeKernelNpu.cpp index 88ad478bd8b658c5f503e481e51b6c5823a430d3..af19185518fcd91208573d6200118ca5f3c134f4 100644 --- a/src/aten/src/ATen/native/npu/LeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LeKernelNpu.cpp @@ -38,7 +38,7 @@ Tensor& le_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); @@ -67,7 +67,7 @@ Tensor& le_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); diff --git a/src/aten/src/ATen/native/npu/LogKernelNpu.cpp b/src/aten/src/ATen/native/npu/LogKernelNpu.cpp index 89409d80f542257f85c859cf35e24fea1806a7e9..d536021b4e6885402c430df4d1b11542c1213628 100644 --- a/src/aten/src/ATen/native/npu/LogKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LogKernelNpu.cpp @@ -34,10 +34,14 @@ Tensor& log_out_npu_nocheck(Tensor& result, const Tensor& self) { } Tensor& log_out_npu(Tensor& result, const Tensor& self) { - OpPreparation::CheckOut( - {self}, - result, - self); + if (!result.is_same(self)) { + OpPreparation::CheckOut( + {self}, + result, + ACL_FORMAT_ND, + self.scalar_type(), + self.sizes()); + } OpPipeWithDefinedOut pipe; return pipe.CheckMemory({self}, {result}) diff --git a/src/aten/src/ATen/native/npu/LtKernelNpu.cpp b/src/aten/src/ATen/native/npu/LtKernelNpu.cpp index 8e2ab7095eea9d592b3982d2c34c2d9f945af094..59c5ba684053863585846acf4dd3c6f62ec48308 100644 --- a/src/aten/src/ATen/native/npu/LtKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/LtKernelNpu.cpp @@ -48,7 +48,7 @@ Tensor& lt_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); @@ -77,7 +77,7 @@ Tensor& lt_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), + ACL_FORMAT_ND, result.scalar_type(), outputSize); diff --git a/src/aten/src/ATen/native/npu/MaxKernelNpu.cpp b/src/aten/src/ATen/native/npu/MaxKernelNpu.cpp index 1f3c8cd185110e7a3dce8ce8bb9f9c33315ef584..ddd370a06e977b02c52e84b2cc8467d5014d8106 100644 --- a/src/aten/src/ATen/native/npu/MaxKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MaxKernelNpu.cpp @@ -21,21 +21,6 @@ namespace at { namespace native { using namespace at::native::npu; -static inline tuple, int64_t> max_output_calc( - const Tensor& self, - IntArrayRef dims, - bool keepdim) { - SmallVector outputSize = - reduce_ops_npu_output_size(self, dims, keepdim); - - int64_t npu_format = CalcuOpUtil::get_tensor_npu_format(self); - if (outputSize.empty()) { - npu_format = ACL_FORMAT_ND; // use default format - } - - return std::tie(outputSize, npu_format); -} - tuple max_out_npu_nocheck( Tensor& output, Tensor& indices, @@ -45,7 +30,7 @@ tuple max_out_npu_nocheck( OpCommand cmd; cmd.Name("ArgMaxWithValue") .Input(self) - .Output(indices) + .Output(indices) .Output(output) .Attr("dimension", dim) .Attr("keep_dims", keepdim) @@ -59,10 +44,9 @@ tuple max_out_npu( const Tensor& self, int64_t dim, bool keepdim) { - auto params = max_output_calc(self, {dim}, keepdim); - auto outputSize = std::get<0>(params); - auto indicesSize = std::get<0>(params); - auto npu_format = std::get<1>(params); + SmallVector dims = {dim}; + auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + SmallVector indicesSize = outputSize; auto func = [&self, dim, keepdim](Tensor& output, Tensor& indices) { max_out_npu_nocheck(output, indices, self, dim, keepdim); @@ -73,19 +57,19 @@ tuple max_out_npu( Tensor indices_tmp; OpPipeWithMultiOut pipe(output, indices_tmp); - return pipe.FixOutputSizeAndFormat<0>({self}, self, npu_format, outputSize) + return pipe.FixOutputSizeAndFormat<0>({self}, self, ACL_FORMAT_ND, outputSize) .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_ND) // use default format .Call(func) .ReflushOutputDtype<1>(ScalarType::Long) + .FixOutputExceptDtype<1>({self}, ACL_FORMAT_ND, ScalarType::Long, indicesSize) .FixOutputWithReplace<1>(indices) .ReturnRef(); } tuple max_npu(const Tensor& self, int64_t dim, bool keepdim) { - auto params = max_output_calc(self, {dim}, keepdim); - auto outputSize = std::get<0>(params); - auto indicesSize = std::get<0>(params); - auto npu_format = std::get<1>(params); + SmallVector dims = {dim}; + auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + SmallVector indicesSize = outputSize; auto func = [&self, dim, keepdim](Tensor outputs, Tensor indices) { max_out_npu_nocheck(outputs, indices, self, dim, keepdim); @@ -93,7 +77,7 @@ tuple max_npu(const Tensor& self, int64_t dim, bool keepdim) { Tensor outputs, indices; OpPipeWithDefinedMultiOut pipe(outputs, indices); - return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), npu_format) + return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), ACL_FORMAT_ND) .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_ND) // use default format .Call(func) .ReflushOutputDtype<1>(ScalarType::Long) @@ -144,7 +128,12 @@ Tensor& max_out_npu( Tensor& result, const Tensor& self, const Tensor& other) { - OpPreparation::CheckOut({self}, result, self); + OpPreparation::CheckOut( + {self}, + result, + ACL_FORMAT_ND, + self.scalar_type(), + self.sizes()); max_out_npu_nocheck(result, self, other); return result; diff --git a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp index 821424393afeddddd346d448c3656a6d6b1c671c..f45ae27e9b5f9eedb40a54b483f692d9cd6f6129 100644 --- a/src/aten/src/ATen/native/npu/MinKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MinKernelNpu.cpp @@ -22,21 +22,6 @@ namespace at { namespace native { using namespace at::native::npu; -static inline tuple, int64_t> min_output_calc( - const Tensor& self, - IntArrayRef dims, - bool keepdim) { - SmallVector outputSize = - reduce_ops_npu_output_size(self, dims, keepdim); - - int64_t npu_format = CalcuOpUtil::get_tensor_npu_format(self); - if (outputSize.empty()) { - npu_format = ACL_FORMAT_ND; // scalar tensor use default format - } - - return std::tie(outputSize, npu_format); -} - tuple min_out_npu_nocheck( Tensor& output, Tensor& indices, @@ -62,10 +47,9 @@ tuple min_out_npu( const Tensor& self, int64_t dim, bool keepdim) { - auto params = min_output_calc(self, {dim}, keepdim); - auto outputSize = std::get<0>(params); - auto indicesSize = std::get<0>(params); - auto npu_format = std::get<1>(params); + SmallVector dims = {dim}; + auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + SmallVector indicesSize = outputSize; auto func = [&self, dim, keepdim](Tensor& output, Tensor& indices) { min_out_npu_nocheck(output, indices, self, dim, keepdim); @@ -73,19 +57,19 @@ tuple min_out_npu( Tensor indices_tmp; OpPipeWithMultiOut pipe(output, indices_tmp); - return pipe.FixOutputSizeAndFormat<0>({self}, self, npu_format, outputSize) - .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) // indices must be nchw format + return pipe.FixOutputSizeAndFormat<0>({self}, self, ACL_FORMAT_ND, outputSize) + .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_ND) .Call(func) .ReflushOutputDtype<1>(ScalarType::Long) + .FixOutputExceptDtype<1>({self}, ACL_FORMAT_ND, ScalarType::Long, indicesSize) .FixOutputWithReplace<1>(indices) .ReturnRef(); } tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) { - auto params = min_output_calc(self, {dim}, keepdim); - auto outputSize = std::get<0>(params); - auto indicesSize = std::get<0>(params); - auto npu_format = std::get<1>(params); + SmallVector dims = {dim}; + auto outputSize = reduce_ops_npu_output_size(self, dims, keepdim); + SmallVector indicesSize = outputSize; auto func = [&self, dim, keepdim](Tensor outputs, Tensor indices) { min_out_npu_nocheck(outputs, indices, self, dim, keepdim); @@ -93,7 +77,7 @@ tuple min_npu(const Tensor& self, int64_t dim, bool keepdim) { Tensor outputs, indices; OpPipeWithDefinedMultiOut pipe(outputs, indices); - return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), npu_format) + return pipe.ApplyOutputWithSpecailParams<0>(outputSize, self.options(), ACL_FORMAT_ND) .ApplyOutputWithSpecailParams<1>(indicesSize, self.options().dtype(ScalarType::Int), ACL_FORMAT_NCHW) .Call(func) .ReflushOutputDtype<1>(ScalarType::Long) @@ -144,7 +128,12 @@ Tensor& min_out_npu( Tensor& result, const Tensor& self, const Tensor& other) { - OpPreparation::CheckOut({self}, result, self); + OpPreparation::CheckOut( + {self}, + result, + ACL_FORMAT_ND, + self.scalar_type(), + self.sizes()); min_out_npu_nocheck(result, self, other); return result; diff --git a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp index 28ab0aa98118511eec34cdd63b981685a87124e2..91af42d2af1248cc22cdd88ce4f58dc578fe4e6a 100644 --- a/src/aten/src/ATen/native/npu/MmKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/MmKernelNpu.cpp @@ -18,6 +18,8 @@ #include "ATen/native/npu/utils/KernelNpuOutputSize.h" #include "ATen/native/npu/utils/NpuUtils.h" #include "ATen/native/npu/utils/OpAdapter.h" +#include "ATen/native/npu/common/InnerNpuNativeFunction.h" +#include "ATen/native/npu/frame/StorageDescHelper.h" namespace at { namespace native { @@ -26,7 +28,7 @@ using namespace at::native::npu; // Flexible transpose judgement for view+transpose+Matmul, // i.e., tensors with dim=2 and base_size_.size=3 can also be Matmul directly! bool is_transpose_last_two_dims_flex(const Tensor& tensor) { - if (tensor.dim() < 2 || tensor.dim() > 3) { + if (tensor.dim() != 2) { return false; } int64_t numel = 1; @@ -113,10 +115,17 @@ Tensor mm_npu(const Tensor& self, const Tensor& mat2) { // Matmul cannot directly deal with view+transposed tensor with NZ format, so Transdata is necessary if (self.sizes().size() != self_desc.base_sizes_.size()) { selfFormatCast = OpPreparation::CastBackToOriFormat(self); + // refresh storage desc info [origin shape and storage shape] of reshaped Tensor + if (is_transpose_last_two_dims_flex(selfFormatCast)) { + StorageDescHelper::ReflushDescBySelf(selfFormatCast.transpose(-2, -1)); + } } if (mat2.sizes().size() != mat2_desc.base_sizes_.size()) { mat2FormatCast = OpPreparation::CastBackToOriFormat(mat2); + if (is_transpose_last_two_dims_flex(mat2FormatCast)) { + StorageDescHelper::ReflushDescBySelf(mat2FormatCast.transpose(-2, -1)); + } } // construct the output tensor of the NPU diff --git a/src/aten/src/ATen/native/npu/NeKernelNpu.cpp b/src/aten/src/ATen/native/npu/NeKernelNpu.cpp index 4e4c377b7a1ca69228f7e8fa50b401477e7d87a2..a1de7e8793b6b39ccc29841bffcaab1e4b8e3694 100644 --- a/src/aten/src/ATen/native/npu/NeKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NeKernelNpu.cpp @@ -15,7 +15,6 @@ // limitations under the License. #include "ATen/native/npu/utils/OpAdapter.h" -#include "ATen/native/npu/utils/CalcuOpUtil.h" namespace at { namespace native { @@ -70,8 +69,8 @@ Tensor& ne_out_npu(Tensor& result, const Tensor& self, const Tensor& other) { OpPreparation::CheckOut( {self, other}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), - ScalarType::Bool, + ACL_FORMAT_ND, + result.scalar_type(), IntArrayRef(outputSize)); ne_out_npu_nocheck(result, formatCastOfSelf, formatCastOfOther); return result; @@ -83,8 +82,8 @@ Tensor& ne_out_npu(Tensor& result, const Tensor& self, Scalar other) { OpPreparation::CheckOut( {self}, result, - CalcuOpUtil::get_tensor_npu_format(formatCastOfSelf), - ScalarType::Bool, + ACL_FORMAT_ND, + result.scalar_type(), outputSize); ne_out_npu_nocheck(result, formatCastOfSelf, other); return result; diff --git a/src/aten/src/ATen/native/npu/NegKernelNpu.cpp b/src/aten/src/ATen/native/npu/NegKernelNpu.cpp index aceb969bebf5a3d8f96e0ab6fcca47f0ce578d8b..463845886638c384f6072c3b5c7d5e1724308394 100644 --- a/src/aten/src/ATen/native/npu/NegKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/NegKernelNpu.cpp @@ -33,7 +33,12 @@ Tensor& neg_out_npu_nocheck(Tensor& result, const Tensor& self) { } Tensor& neg_out_npu(Tensor& result, const Tensor& self) { - OpPreparation::CheckOut({self}, result, self); + OpPreparation::CheckOut( + {self}, + result, + ACL_FORMAT_ND, + self.scalar_type(), + self.sizes()); neg_out_npu_nocheck(result, self); return result; diff --git a/src/aten/src/ATen/native/npu/StdKernelNpu.cpp b/src/aten/src/ATen/native/npu/StdKernelNpu.cpp index d299fde881a3b0778ed46ca6777ea039a7de41fd..91b36d5d73f1b92e9d497f0ae1731b2426d3bfe1 100644 --- a/src/aten/src/ATen/native/npu/StdKernelNpu.cpp +++ b/src/aten/src/ATen/native/npu/StdKernelNpu.cpp @@ -79,7 +79,8 @@ Tensor& std_out_npu( OpPreparation::CheckOut( {self}, result, - self, + ACL_FORMAT_ND, + self.scalar_type(), outputSize); // executing the NPU operator @@ -100,12 +101,14 @@ tuple std_mean_out_npu( OpPreparation::CheckOut( {self}, result1, - self, + ACL_FORMAT_ND, + self.scalar_type(), outputSize); OpPreparation::CheckOut( {self}, result2, - self, + ACL_FORMAT_ND, + self.scalar_type(), outputSize); // executing the NPU operator diff --git a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp index 412d1fc32b7bca4bb8f5d7bcac31eee8458a5bc8..a49aa9b9945afa46ac43295733adce86ded1ece0 100644 --- a/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp +++ b/src/aten/src/ATen/native/npu/utils/CalcuOpUtil.cpp @@ -347,18 +347,7 @@ NPUStatus CalcuOpUtil::CreateAclTensorDescInfo( input[i].tensorDescType == NPUTensorDesc::TensorDescType::TENSOR) { Tensor* aclInput = &input[i].tensor; SmallVector dims; - if (opName == "MatMul") { - auto dims_pre = aclInput->sizes(); - if (attrs[i].boolAttrValue == 1) { - dims.push_back(dims_pre[1]); - dims.push_back(dims_pre[0]); - } else if (attrs[i].boolAttrValue == 0) { - dims.push_back(dims_pre[0]); - dims.push_back(dims_pre[1]); - } - } else { - dims = aclInput->storage().get_npu_desc().base_sizes_; - } + dims = aclInput->storage().get_npu_desc().base_sizes_; auto storageDims = aclInput->storage().get_npu_desc().storage_sizes_; int64_t numel = 1; for (int j = 0; j < storageDims.size(); j++) { diff --git a/test/test_npu/test_alpha_dropout.py b/test/test_npu/test_alpha_dropout.py deleted file mode 100644 index 9bd3266f91bb06a8e06cc2ad3369781ee38ec758..0000000000000000000000000000000000000000 --- a/test/test_npu/test_alpha_dropout.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import random -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestAlphaDropout(TestCase): - def cpu_op_exec(self,input, p): - m = torch.nn.AlphaDropout(p) - output = m(input) - output = output.numpy() - return output - - def npu_op_exec(self,input, p): - m = torch.nn.AlphaDropout(p) - output = m(input) - output = output.to("cpu") - output = output.numpy() - return output - - def test_alpha_dropout_common_shape_format(self, device): - shape_format = [ - [np.float32, -1, (14, 3, 2)], - [np.float32, -1, (4, 13, 1)], - [np.float32, -1, (3, 1)], - [np.float32, -1, (4, 1, 5)], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - p = random.uniform(0,1) - cpu_output = self.cpu_op_exec(cpu_input1, p) - npu_output = self.npu_op_exec(npu_input1, p) - self.assertRtolEqual(cpu_output, npu_output) - - def test_alpha_dropout_float16_shape_format(self, device): - def cpu_op_exec_fp16(input, p): - m = torch.nn.AlphaDropout(p) - input = input.to(torch.float32) - output = m(input) - output = output.numpy() - return output - shape_format = [ - [np.float16, -1, (4, 3)], - [np.float16, -1, (4, 3)], - [np.float16, -1, (4, 3, 1)], - [np.float16, -1, (4, 1, 5)], - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item, 1, 100) - p = random.uniform(0,1) - cpu_output = cpu_op_exec_fp16(cpu_input1, p) - npu_output = self.npu_op_exec(npu_input1, p) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestAlphaDropout, globals(), except_for="cpu") - -if __name__ == "__main__": - run_tests() diff --git a/test/test_npu/test_cholesky.py b/test/test_npu/test_cholesky.py deleted file mode 100644 index 052ccc04f209afd7a725088cc4d0eba1d43fc235..0000000000000000000000000000000000000000 --- a/test/test_npu/test_cholesky.py +++ /dev/null @@ -1,101 +0,0 @@ -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor -import random -import math - -class TestCholesky(TestCase): -# pylint: disable=unused-variable,unused-argument -# pylint: disable=W,C - def create_2d_tensor(self, item, minValue, maxValue): - dtype = item[0] - format = item[1] - shape = item[2] - input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) - a = torch.from_numpy(input1) - cpu_input = torch.matmul(a, a.t()) - npu_input = torch.matmul(a, a.t()).to("npu") - if format != -1: - npu_input = npu_input.npu_format_cast(format) - return cpu_input, npu_input - - def create_nd_tensor(self, item, minValue, maxValue): - dtype = item[0] - format = item[1] - shape = item[2] - input1 = np.random.uniform(minValue, maxValue, shape).astype(dtype) - a = torch.from_numpy(input1) - a = a.to(torch.float32) - cpu_input = torch.matmul(a, a.transpose(-1, -2)) + 1e-05 # make symmetric positive-definite - npu_input = torch.matmul(a, a.transpose(-1, -2)) + 1e-05 - npu_input = npu_input.to("npu") - if format != -1: - npu_input = npu_input.npu_format_cast(format) - return cpu_input, npu_input - - def cpu_op_exec(self, input1): - output = torch.cholesky(input1) - output = output.numpy() - return output - - def cpu_op_exec_fp16(self, input1): - output = torch.cholesky(input1) - output = output.numpy() - output = output.astype(np.float16) - return output - - def npu_op_exec(self, input1): - output = torch.cholesky(input1) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_fp16(self, input1): - output = torch.cholesky(input1) - output = output.to("cpu") - output = output.numpy() - output = output.astype(np.float16) - return output - - def test_cholesky_common_shape_format(self, device): - shape_format = [ - [[np.float32, -1, (1, 1)]], - [[np.float32, -1, (2, 2)]], - [[np.float32, -1, (4, 4)]], - [[np.float32, -1, (8, 8)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = self.create_2d_tensor(item[0], 1, 10) - cpu_output = self.cpu_op_exec(cpu_input1) - npu_output = self.npu_op_exec(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cholesky_float16_shape_format(self, device): - shape_format = [ - [[np.float16, -1, (4, 2, 4, 4)]], - [[np.float16, -1, (2, 3, 4, 4)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = self.create_nd_tensor(item[0], 1, 2) - cpu_output = self.cpu_op_exec_fp16(cpu_input1) - npu_output = self.npu_op_exec_fp16(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_cholesky_float16_2_shape_format(self, device): - shape_format = [ - [[np.float16, -1, (2, 4, 4)]], - [[np.float16, -1, (3, 8, 8)]] - ] - for item in shape_format: - cpu_input1, npu_input1 = self.create_nd_tensor(item[0], 1, 2) - cpu_output = self.cpu_op_exec_fp16(cpu_input1) - npu_output = self.npu_op_exec_fp16(npu_input1) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestCholesky, globals(), except_for='cpu') -if __name__ == '__main__': - run_tests() \ No newline at end of file diff --git a/test/test_npu/test_index_put.py b/test/test_npu/test_index_put.py deleted file mode 100644 index ea036e503c848505510ebc546bb6f0602a5ef0a3..0000000000000000000000000000000000000000 --- a/test/test_npu/test_index_put.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestIndexPut(TestCase): - - def generate_data(self, min_d, max_d, shape, dtype): - input1 = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input1 = torch.from_numpy(input1) - return npu_input1 - - # cpu - def cpu_op_exec(self, input_x, index, value, accumulate): - output = torch.index_put(input_x, index, value, accumulate) - output = output.numpy() - return output - - def cpu_op_exec_fp16(self, input_x, index, value, accumulate): - input_x = input_x.to(torch.float32) - output = torch.index_put(input_x, index, value, accumulate) - output = output.numpy().astype(np.float16) - return output - - def cpu_op_exec_interface(self, input_x, index, value): - input_x[index] = value - output = input_x - output = output.numpy() - return output - - # npu - def npu_op_exec_interface1(self, input_x, index, value, accumulate): - input_x = input_x.to("npu") - index_npu1 = index[0].to("npu") - index_npu2 = index[1].to("npu") - index_npu = (index_npu1, index_npu2) - if type(value) == torch.Tensor: - value = value.to("npu") - output = input_x.index_put(index_npu, value, accumulate) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_interface2(self, input_x, index, value, accumulate): - input_x = input_x.to("npu") - index_npu1 = index[0].to("npu") - index_npu2 = index[1].to("npu") - index_npu = (index_npu1, index_npu2) - if type(value) == torch.Tensor: - value = value.to("npu") - output = input_x.index_put_(index_npu, value, accumulate) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_interface3(self, input_x, index, value): - input_x = input_x.to("npu") - index_npu1 = index[0].to("npu") - index_npu2 = index[1].to("npu") - index_npu = (index_npu1, index_npu2) - if type(value) == torch.Tensor: - value = value.to("npu") - input_x[index_npu] = value - output = input_x - output = output.to("cpu") - output = output.numpy() - return output - - # assertRtolEqual - def index_put(self, testcases, value, dtype = "fp32"): - for i, item in enumerate(testcases): - index = (torch.LongTensor(item[4][0]), torch.LongTensor(item[4][1])) - #test for index_put - npuinput_x1 = self.generate_data(item[0], item[1], item[2], item[5]) - if dtype == "fp16": - cpu_output1 = self.cpu_op_exec_fp16(npuinput_x1, index, value, item[3]) - npu_output1 = self.npu_op_exec_interface1(npuinput_x1, index, value, item[3]) - self.assertRtolEqual(cpu_output1, npu_output1) - else: - cpu_output1 = self.cpu_op_exec(npuinput_x1, index, value, item[3]) - npu_output1 = self.npu_op_exec_interface1(npuinput_x1, index, value, item[3]) - self.assertRtolEqual(cpu_output1, npu_output1) - - #test for index_put_ - npuinput_x2 = self.generate_data(item[0], item[1], item[2], item[5]) - if dtype == "fp16": - cpu_output2 = self.cpu_op_exec_fp16(npuinput_x2, index, value, item[3]) - npu_output2 = self.npu_op_exec_interface2(npuinput_x2, index, value, item[3]) - self.assertRtolEqual(cpu_output2, npu_output2) - else: - cpu_output2 = self.cpu_op_exec(npuinput_x2, index, value, item[3]) - npu_output2 = self.npu_op_exec_interface2(npuinput_x2, index, value, item[3]) - self.assertRtolEqual(cpu_output2, npu_output2) - - #test for input[index] = value - npuinput_x3 = self.generate_data(item[0], item[1], item[2], item[5]) - if dtype == "fp16": - cpu_output3 = self.cpu_op_exec_interface(npuinput_x3.to(torch.float32), index, value) - npu_output3 = self.npu_op_exec_interface3(npuinput_x3, index, value) - self.assertRtolEqual(cpu_output3.astype(npu_output3.dtype), npu_output3) - else: - cpu_output3 = self.cpu_op_exec_interface(npuinput_x3, index, value) - npu_output3 = self.npu_op_exec_interface3(npuinput_x3, index, value) - self.assertRtolEqual(cpu_output3, npu_output3) - - def test_index_put_d(self, device): - testcases_fp32 = [ - #minV, maxV, shape, accumulate, index, dtype - # fp32 - #IndexPut_fp32_accumulate1_001 - [-10, 10, (2, 2, 3, 3), True, [[1,1],[0,1]], np.float32], - - # IndexPut_fp32_accumulate0_002 - [-10, 10, (2, 2, 3, 3), False, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate1_003 - [-100, 100, (2, 4, 6, 8, 10, 12), True, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate0_004 - [-100, 100, (2, 4, 6, 8, 10, 12), False, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate1_R0.5e16_005 - [-0.000030517578125, 0.000030517578125, (2,32,149,31), True, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate0_R0.5e16_006 - [-0.000030517578125, 0.000030517578125, (2,32,149,31), False,[[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate1_R2e32_007 - [-3402823500.0, 3402823500.0, (2,32,149,31), True, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate0_R2e32_008 - [-3402823500.0, 3402823500.0, (2,32,149,31), False, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate1_S2e16_009 - [-100, 100, (65535, 2, 2, 2, 2, 2), True, [[1,1],[0,1]], np.float32], - - #IndexPut_fp32_accumulate0_S2e16_010 - [-100, 100, (65535, 2, 2, 2, 2, 2), False, [[1,1],[0,1]], np.float32], - - ] - testcases_fp16 = [ - #IndexPut_fp16_accumulate1_011 - [-10, 10, (2, 2, 3, 3), True, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate0_012 - [-10, 10, (2, 2, 3, 3), False, [[1, 1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate1_013 - [-100, 100, (2, 4, 6, 8, 10, 12), True, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate0_014 - [-100, 100, (2, 4, 6, 8, 10, 12), False, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate1_R2e16_015 - [-60000,60000, (2,32,149,31), True, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate0_R2e16_016 - [-60000,60000, (2,32,149,31), True, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate1_S2e16_017 - [-100, 100, (65535, 2, 2, 2, 2, 2), True, [[1,1],[0,1]], np.float16], - - #IndexPut_fp16_accumulate0_S2e16_018 - [-100, 100, (65535, 2, 2, 2, 2, 2), False, [[1,1],[0,1]], np.float16], - ] - value = np.random.uniform(-10000, 10000) - value_tensor = torch.tensor(value) - self.index_put(testcases=testcases_fp32, value=value_tensor) - self.index_put(testcases=testcases_fp16, value=value_tensor, dtype="fp16") - -instantiate_device_type_tests(TestIndexPut, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:1") - run_tests() - \ No newline at end of file diff --git a/test/test_npu/test_kl_div.py b/test/test_npu/test_kl_div.py deleted file mode 100644 index 67b6c87304a24d48448b8872c1ebfaccd4e925cd..0000000000000000000000000000000000000000 --- a/test/test_npu/test_kl_div.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - - -class TestKlDiv(TestCase): - - def cpu_op_exec(self, input1, input2, reduction): - output = torch.kl_div(input1, input2, reduction=reduction) - output = output.numpy() - return output - - def npu_op_exec(self, input1, input2, reduction): - input1 = input1.to("npu") - input2 = input2.to("npu") - output = torch.kl_div(input1, input2, reduction=reduction) - output = output.to("cpu") - output = output.numpy() - return output - - def test_kl_div_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (4, 1, 2, 3)], [np.float32, 0, (4, 1, 2, 3)], 0], - [[np.float32, 0, (4, 1, 5)], [np.float32, 0, (4, 1, 5)], 1], - [[np.float32, 0, (14, 21, 52, 10, 22)], [ - np.float32, 0, (14, 21, 52, 10, 22)], 2], - # 130device unsupports float64 - # [[np.float64, 0, (24, 9, 15)], [np.float64, 0, (24, 9, 15)], 2], - # [[np.float64, -1, (24, 11)], [np.float64, -1, (24, 11)], 1], - # [[np.float64, 0, (14, 21, 52, 10, 22)], [np.float64, 0, (14, 21, 52, 10, 22)], 0] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) - reduction = item[2] - cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, reduction) - npu_output = self.npu_op_exec(npu_input1, npu_input2, reduction) - self.assertRtolEqual(cpu_output, npu_output) - - def test_kl_div_float16_shape_format(self, device): - def cpu_op_exec_fp16(input1, input2, reduction): - input1 = input1.to(torch.float32) - input2 = input2.to(torch.float32) - output = torch.kl_div(input1, input2, reduction=reduction) - output = output.numpy() - output = output.astype(np.float16) - return output - - shape_format = [ - [[np.float16, 0, (14, 21, 22, 33)], [ - np.float16, 0, (14, 21, 22, 33)], 0], - [[np.float16, 0, (4, 10, 5)], [np.float16, 0, (4, 10, 5)], 1], - [[np.float16, 0, (4, 1, 50)], [np.float16, 0, (4, 1, 50)], 2], - ] - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 0, 1) - cpu_input2, npu_input2 = create_common_tensor(item[1], 0, 1) - reduction = item[2] - cpu_output = cpu_op_exec_fp16(cpu_input1, cpu_input2, reduction) - npu_output = self.npu_op_exec(npu_input1, npu_input2, reduction) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(TestKlDiv, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() diff --git a/test/test_npu/test_leaky_relu_backward.py b/test/test_npu/test_leaky_relu_backward.py deleted file mode 100644 index 8c69a368f7673f7785fe68688ec395525ba5ba32..0000000000000000000000000000000000000000 --- a/test/test_npu/test_leaky_relu_backward.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -def input_grad_hook(grad): - global input_grad - input_grad = grad - - -def npu_input_grad_hook(grad): - global npu_input_grad - npu_input_grad = grad.to("cpu") - - -class TestLeakyReluBackward(TestCase): - - def cpu_op_exec(self, input, negative_slope=0): - input.requires_grad = True - input.register_hook(input_grad_hook) - - output = torch.nn.functional.leaky_relu(input, negative_slope=negative_slope) - z = output.sum() - z.backward() - - def npu_op_exec(self, input, negative_slope=0): - input.requires_grad = True - input.register_hook(npu_input_grad_hook) - - output = torch.nn.functional.leaky_relu(input, negative_slope=negative_slope) - z = output.sum() - z.backward() - input = input.cpu() - - def test_leaky_relu_backward_shape_format_fp32(self, device): - shape_format = [ - [[np.float32, 0, (3, 3)], 2], - [[np.float32, 0, (64, 64)], 5], - [[np.float32, 0, (4, 5, 6)], -3], - [[np.float32, 0, (3, 3, 3, 4)], 0.8], - [[np.float32, 0, (1, 2, 3, 4, 5)], -0.9] - ] - for item in shape_format: - input, npu_input = create_common_tensor(item[0], 1, 100) - - self.cpu_op_exec(input, item[1]) - self.npu_op_exec(npu_input, item[1]) - self.assertRtolEqual(input_grad.numpy(), npu_input_grad.numpy()) - - def test_leaky_relu_backward_shape_format_fp16(self, device): - shape_format = [ - [[np.float16, 0, (3, 3)], 2], - [[np.float16, 0, (64, 64)], 5], - [[np.float16, 0, (4, 5, 6)], -3], - [[np.float16, 0, (3, 3, 3, 4)], 0.8], - [[np.float16, 0, (1, 2, 3, 4, 5)], -0.9] - ] - for item in shape_format: - input, npu_input = create_common_tensor(item[0], 1, 100) - - input = input.to(torch.float32) - self.cpu_op_exec(input, item[1]) - self.npu_op_exec(npu_input, item[1]) - self.assertRtolEqual(input_grad.numpy().astype(np.float16), npu_input_grad.numpy().astype(np.float16)) - - -instantiate_device_type_tests(TestLeakyReluBackward, globals(), except_for="cpu") -if __name__ == "__main__": - run_tests() diff --git a/test/test_npu/test_maxUnpool2d.py b/test/test_npu/test_maxUnpool2d.py deleted file mode 100644 index c387f3a1527fea705a852711d85cf5efd6806f91..0000000000000000000000000000000000000000 --- a/test/test_npu/test_maxUnpool2d.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class testMaxUnpool2d(TestCase): - - def cpu_op_exec(self, input1, indices, output_size): - m = torch.nn.MaxUnpool2d(output_size) - output = m(input1, indices) - output = output.numpy() - return output - - def npu_op_exec(self, input1, indices, output_size): - m = torch.nn.MaxUnpool2d(output_size) - output = m(input1, indices) - output = output.to("cpu") - output = output.numpy() - return output - - - def test_max_unpool_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0, (1, 4, 2, 3)], [np.int64, 0, (1, 4, 2, 3)], (2,2)], - [[np.float32, 0, (1, 3, 8, 4)], [np.int64, 0, (1, 3, 8, 4)], (1,2)], - [[np.float64, 0, (1, 4, 2, 3)], [np.int64, 0, (1, 4, 2, 3)], (2,2)], - [[np.float64, 0, (1, 3, 8, 4)], [np.int64, 0, (1, 3, 8, 4)], (1,2)] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_indices, npu_indices = create_common_tensor(item[1], 1, 10) - cpu_output = self.cpu_op_exec(cpu_input1, cpu_indices, item[2]) - npu_output = self.npu_op_exec(npu_input1, npu_indices, item[2]) - self.assertRtolEqual(cpu_output, npu_output) - - def test_max_unpool_fp16_common_shape_format(self, device): - shape_format = [ - [[np.float16, 0, (1, 4, 2, 3)], [np.int32, 0, (1, 4, 2, 3)], (2,2)], - [[np.float16, 0, (1, 3, 8, 4)], [np.int64, 0, (1, 3, 8, 4)], (1,2)], - [[np.float16, 0, (1, 4, 2, 3)], [np.int64, 0, (1, 4, 2, 3)], (2,2)] - ] - def cpu_op_fp16_exec(input1, indices, output_size): - input1 = input1.to(torch.float32) - indices = indices.to(torch.int64) - m = torch.nn.MaxUnpool2d(output_size) - output = m(input1, indices) - output = output.numpy() - return output.astype(np.float16) - - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_indices, npu_indices = create_common_tensor(item[1], 1, 10) - cpu_output = cpu_op_fp16_exec(cpu_input1, cpu_indices, item[2]) - npu_output = self.npu_op_exec(npu_input1, npu_indices, item[2]) - self.assertRtolEqual(cpu_output, npu_output) - - -instantiate_device_type_tests(testMaxUnpool2d, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:6") - run_tests() diff --git a/test/test_npu/test_adaptive_max_pool1d.py b/test/test_npu/test_network_ops/test_adaptive_max_pool1d.py similarity index 86% rename from test/test_npu/test_adaptive_max_pool1d.py rename to test/test_npu/test_network_ops/test_adaptive_max_pool1d.py index a8b1ea91cc8dc366b9539076cc06098413d57d84..85bab10619f00a282f8245a5454c65015d073e32 100644 --- a/test/test_npu/test_adaptive_max_pool1d.py +++ b/test/test_npu/test_network_ops/test_adaptive_max_pool1d.py @@ -34,14 +34,14 @@ class TestAdaptiveMaxPool1d(TestCase): def test_adaptiveMaxPool1d_shape_format_fp16(self, device): format_list = [0, 3] shape_list = [(32, 16, 16), - (16, 1024, 256), - (1024, 464, 11), - (1, 2048, 15)] + (16, 1024, 248), + (1024, 464, 24), + (1, 2048, 24)] shape_format = [ [np.float16, i, j] for i in format_list for j in shape_list ] - output_list = [4, 3, 1, 2] + output_list = [4, 8] for item in shape_format: cpu_input, npu_input = create_common_tensor(item, 0, 100) cpu_input = cpu_input.to(torch.float32) @@ -54,22 +54,21 @@ class TestAdaptiveMaxPool1d(TestCase): def test_adaptiveMaxPool1d_shape_format_fp32(self, device): format_list = [0, 3] shape_list = [(32, 16, 16), - (16, 1024, 256), - (1024, 464, 11), - (1, 2048, 15)] + (16, 1024, 248), + (1024, 464, 24), + (1, 2048, 24)] shape_format = [ [np.float32, i, j] for i in format_list for j in shape_list ] - output_list = [4, 3, 1, 2] + output_list = [4, 8] for item in shape_format: cpu_input, npu_input = create_common_tensor(item, 0, 100) for output_size in output_list: cpu_output = self.cpu_op_exec(cpu_input, output_size) npu_output = self.npu_op_exec(npu_input, output_size) - self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output, prec=1e-2) instantiate_device_type_tests(TestAdaptiveMaxPool1d, globals(), except_for="cpu") if __name__ == "__main__": - torch.npu.set_device("npu:1") run_tests() diff --git a/test/test_npu/test_baddbmm.py b/test/test_npu/test_network_ops/test_baddbmm.py similarity index 98% rename from test/test_npu/test_baddbmm.py rename to test/test_npu/test_network_ops/test_baddbmm.py index 2502c4c36eefaa01f23f6ab3d452621fc645278c..a73c9bfb28b052864dbeef5d667fb9cc860e0651 100644 --- a/test/test_npu/test_baddbmm.py +++ b/test/test_npu/test_network_ops/test_baddbmm.py @@ -60,9 +60,9 @@ class TestBaddBmm(TestCase): ] for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - cpu_input3, npu_input3 = create_common_tensor(item[2], 1, 100) + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) + cpu_input3, npu_input3 = create_common_tensor(item[2], 1, 10) scalar1 = self.generate_scalar(item[3], 0, 10) scalar2 = self.generate_scalar(item[3], 0, 10) cpu_output = self.cpu_op_exec(cpu_input1, cpu_input2, cpu_input3, scalar1, scalar2) @@ -90,9 +90,9 @@ class TestBaddBmm(TestCase): ] for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 100) - cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 100) - cpu_input3, npu_input3 = create_common_tensor(item[2], 1, 100) + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_input2, npu_input2 = create_common_tensor(item[1], 1, 10) + cpu_input3, npu_input3 = create_common_tensor(item[2], 1, 10) scalar1 = self.generate_scalar(item[3], 0, 10) scalar2 = self.generate_scalar(item[3], 0, 10) cpu_output = cpu_op_exec_fp16(cpu_input1, cpu_input2, cpu_input3, scalar1, scalar2) diff --git a/test/test_npu/test_col2im_backward.py b/test/test_npu/test_network_ops/test_col2im_backward.py similarity index 58% rename from test/test_npu/test_col2im_backward.py rename to test/test_npu/test_network_ops/test_col2im_backward.py index 93060541e67c67391422f9eafc80bee326d74c69..8690fea148248fab303db042f97482178d8fcdb8 100644 --- a/test/test_npu/test_col2im_backward.py +++ b/test/test_npu/test_network_ops/test_col2im_backward.py @@ -14,7 +14,7 @@ import torch import numpy as np -from torch.testing._internal.common_utils import TestCase, run_tests +from common_utils import TestCase, run_tests from common_device_type import dtypes, instantiate_device_type_tests from util_test import create_common_tensor @@ -23,37 +23,32 @@ class TestCol2ImBackward(TestCase): def cpu_op_exec(self,input1, output_size, ksizes, strides, dilates, padding): input1.requires_grad = True output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) - d = output.sum() - d.backward(retain_graph=True) - #output.backward() - output1 = d.detach().numpy() - return output1 - + output.backward(torch.ones_like(output)) + output1 = output.detach().numpy() + cpu_grad = input1.grad + return output1, cpu_grad.detach().numpy() def npu_op_exec(self, input1,output_size, ksizes, strides, dilates,padding): - input1 = input1.to("npu") input1.requires_grad = True output = torch._C._nn.col2im(input1, output_size, ksizes, dilates, padding, strides) - d = output.sum() - d.backward(retain_graph=True) - output1 = d.detach().numpy() - output1 = output1.to("cpu") - return output1 + output.backward(torch.ones_like(output)) + output1 = output.detach().cpu().numpy() + npu_grad = input1.grad + return output1, npu_grad.detach().cpu().numpy() - def test_sigmoid_shape_format(self, device): + def test_col2imbackward_shape_format(self, device): shape_format = [ - [ [np.float32, 0, (4, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], - [ [np.float32, 3, (2, 8,30 )], (4,5), (2,2), (1,1), (1,1), (1,1)], - [ [np.float32, 4, ( 12, 5)], (6,3), (2,3), (1,1), (1,1), (0,0)], - [ [np.float32, 29, ( 1,12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)] + [ [np.float16, 0, (4, 12, 12)], (4,5), (2,2), (1,1), (1,1), (0,0)], + [ [np.float16, 0, ( 12, 18, 9)], (4, 5), (2,3), (1,1), (1,1), (0,0)], + [ [np.float16, 0, ( 1, 24, 42)], (7, 8), (2,2), (1,1), (1,1), (0,0)] ] for item in shape_format: cpu_input, npu_input = create_common_tensor(item[0], 1, 20) - cpu_output = self.cpu_op_exec(cpu_input, item[1], item[2], item[3], item[4], item[5]) - npu_output = self.npu_op_exec(npu_input, item[1], item[2], item[3], item[4], item[5]) - self.assertEqual(cpu_output, npu_output) - + cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, item[1], item[2], item[3], item[4], item[5]) + npu_output, npu_grad = self.npu_op_exec(npu_input, item[1], item[2], item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_grad, npu_grad) instantiate_device_type_tests(TestCol2ImBackward, globals(), except_for="cpu") diff --git a/test/test_npu/test_conv_tbc_backward.py b/test/test_npu/test_network_ops/test_conv_tbc_backward.py similarity index 89% rename from test/test_npu/test_conv_tbc_backward.py rename to test/test_npu/test_network_ops/test_conv_tbc_backward.py index 3032ad485915862fb74cbb6f136456e562c67bee..db4c4a90bce966e45197657bbd81f47ba0d0e2b5 100644 --- a/test/test_npu/test_conv_tbc_backward.py +++ b/test/test_npu/test_network_ops/test_conv_tbc_backward.py @@ -64,10 +64,6 @@ class TestConvTbcBackward(TestCase): [[np.float16, -1, (256, 8, 1)], [np.float16, -1, (10, 1, 1)], [np.float16, -1, (1)], 0], [[np.float16, -1, [232, 23, 7]], [np.float16, -1, [23, 7, 8]], [np.float16, -1, [8]], 1], [[np.float32, -1, [10, 2, 4]], [np.float32, -1, [2, 4, 2]], [np.float32, -1, [2]], 1], - [[np.float32, -1, [167, 243, 219]], [np.float32, -1, [37, 219, 216]], [np.float32, -1, [216]], 1], - [[np.float16, -1, [155, 96, 16]], [np.float16, -1, [88, 16, 67]], [np.float16, -1, [67]], 1], - [[np.float32, -1, [220, 269, 55]], [np.float32, -1, [33, 55, 292]], [np.float32, -1, [292]], 1], - [[np.float32, -1, [250, 278, 38]], [np.float32, -1, [80, 38, 81]], [np.float32, -1, [81]], 0], [[np.float16, -1, [150, 1, 20]], [np.float16, -1, [35, 20, 4]], [np.float16, -1, [4]], 1], [[np.float16, -1, [10, 2, 2]], [np.float16, -1, [3, 2, 3]], [np.float16, -1, [3]], 0], ] @@ -90,10 +86,10 @@ class TestConvTbcBackward(TestCase): self.input_grad[0] = self.input_grad[0].to(self.input_grad[1].dtype) self.weight_grad[0] = self.weight_grad[0].to(self.weight_grad[1].dtype) cpu_dBias = cpu_dBias.to(npu_dBias.dtype) - self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output, 1e-2) self.assertRtolEqual(cpu_dBias, npu_dBias) - self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy()) - self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy()) + self.assertRtolEqual(self.input_grad[0].numpy(), self.input_grad[1].numpy(), 1e-1) + self.assertRtolEqual(self.weight_grad[0].numpy(), self.weight_grad[1].numpy(), 1e-1) instantiate_device_type_tests(TestConvTbcBackward, globals(), except_for='cpu') if __name__ == "__main__": diff --git a/test/test_npu/test_convolution_backward_input.py b/test/test_npu/test_network_ops/test_convolution_backward_input.py similarity index 98% rename from test/test_npu/test_convolution_backward_input.py rename to test/test_npu/test_network_ops/test_convolution_backward_input.py index 233a18a4694f6ca0df462c91e9e25d7d45086348..7ced8fd872397bc2732728aab0e74f0d1acae388 100644 --- a/test/test_npu/test_convolution_backward_input.py +++ b/test/test_npu/test_network_ops/test_convolution_backward_input.py @@ -94,7 +94,7 @@ class TestCudnnConvolutionBackwardInput(TestCase): item[4], item[5]) cpu_output = cpu_output.astype(npu_output.dtype) cpu_dinput = npu_dinput.to(npu_dinput.dtype) - self.assertRtolEqual(cpu_output, npu_output) + self.assertRtolEqual(cpu_output, npu_output, 1e-2) self.assertRtolEqual(cpu_dinput, npu_dinput) @@ -102,5 +102,4 @@ instantiate_device_type_tests(TestCudnnConvolutionBackwardInput, globals(), except_for='cpu') if __name__ == "__main__": - torch.npu.set_device("npu:1") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_matrix_power.py b/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py similarity index 37% rename from test/test_npu/test_matrix_power.py rename to test/test_npu/test_network_ops/test_npu_bert_apply_adam.py index b87d0b5acb03054e394c8cc366ae21378668c3ed..a3b02059ab466903385045fc88ea75eba6ba09fb 100644 --- a/test/test_npu/test_matrix_power.py +++ b/test/test_npu/test_network_ops/test_npu_bert_apply_adam.py @@ -1,6 +1,4 @@ -# Copyright (c) 2020 Huawei Technologies Co., Ltd -# Copyright (c) 2019, Facebook CORPORATION. -# All rights reserved. +# Copyright (c) 2020, Huawei Technologies.All rights reserved. # # Licensed under the BSD 3-Clause License (the "License"); # you may not use this file except in compliance with the License. @@ -15,40 +13,43 @@ # limitations under the License. import torch +import torch.nn as nn import numpy as np from common_utils import TestCase, run_tests from common_device_type import dtypes, instantiate_device_type_tests from util_test import create_common_tensor -class TestMatrixPower(TestCase): - def cpu_op_exec(self, input, n): - input = input.float() - output = torch.matrix_power(input, n) - output = output.half() - output = output.numpy() - return output - - def npu_op_exec(self, input, n): - output = torch.matrix_power(input, n) - output = output.to("cpu") - output = output.numpy() - return output - - def test_matrix_power_shape_format(self, device): - shape_format = [ - [[np.float16, -1, (3, 3)], 0], - [[np.float16, -1, (3, 3)], 1], - [[np.float16, -1, (3, 3)], 5], - [[np.float16, -1, (7, 3, 3)], 1], - [[np.float16, -1, (2, 5, 5)], 2], - ] - - for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], -2, 2) - cpu_output = self.cpu_op_exec(cpu_input, item[1]) - npu_output = self.npu_op_exec(npu_input, item[1]) - self.assertRtolEqual(cpu_output, npu_output, prec16=0.05) - -instantiate_device_type_tests(TestMatrixPower, globals(), except_for="cpu") +class TestNpuBertApplyAdam(TestCase): + def test_npu_bert_apply_adam(self, device): + seed = 3 + torch.manual_seed(seed) + torch.npu.manual_seed(seed) + torch.npu.manual_seed_all(seed) + + var_in = torch.rand(321538).uniform_(-32., 21.).npu() + m_in = torch.zeros(321538).npu() + v_in = torch.zeros(321538).npu() + grad = torch.rand(321538).uniform_(-0.05, 0.03).npu() + + var_ans = torch.tensor([13.1862, -30.1250, -20.4954]) + m_ans = torch.tensor([0.0014, 0.0018, -0.0021]) + v_ans = torch.tensor([1.8999e-06, 3.2629e-06, 4.4347e-06]) + + max_grad_norm = -1. + beta1 = 0.9 + beta2 = 0.99 + weight_decay = 0. + lr = 0. + epsilon = 1e-06 + global_grad_norm = 0. + + var_out, m_out, v_out = torch.npu_bert_apply_adam( + var_in, m_in, v_in, lr, beta1, beta2, epsilon, grad, max_grad_norm, global_grad_norm, weight_decay) + + self.assertRtolEqual(var_out[:3].cpu(), var_ans) + self.assertRtolEqual(m_out[:3].cpu(), m_ans) + self.assertRtolEqual(v_out[:3].cpu(), v_ans) + +instantiate_device_type_tests(TestNpuBertApplyAdam, globals(), except_for="cpu") if __name__ == "__main__": run_tests() diff --git a/test/test_npu/test_smoothl1loss.py b/test/test_npu/test_network_ops/test_smoothl1loss.py similarity index 98% rename from test/test_npu/test_smoothl1loss.py rename to test/test_npu/test_network_ops/test_smoothl1loss.py index c3f3c785305451fb507e8f76043fd477812d5059..977192f46cac46c78bd2009f2f0837fb57743a5f 100644 --- a/test/test_npu/test_smoothl1loss.py +++ b/test/test_npu/test_network_ops/test_smoothl1loss.py @@ -53,7 +53,7 @@ class TestSmoothL1loss(TestCase): shape_list = [[256, 10], [256, 1000], [256, 10000], [64, 10, 10], [64, 100, 100], [64, 200, 200], [32, 3, 10, 10], [32, 3, 100, 100], [32, 3, 200, 200]] - reduction_list = ['none', 'mean', 'sum'] + reduction_list = ['none', 'mean'] shape_format = [ [[np.float16, i, j], [np.float16, 0, j], k] for i in format_list for j in shape_list for k in reduction_list diff --git a/test/test_npu/test_soft_margin_loss_backward.py b/test/test_npu/test_network_ops/test_soft_margin_loss_backward.py similarity index 99% rename from test/test_npu/test_soft_margin_loss_backward.py rename to test/test_npu/test_network_ops/test_soft_margin_loss_backward.py index 7c7783508d0c8be818b2af73aed7e1f68b979567..67616708c76df83c83120f65ac8293af5306c492 100644 --- a/test/test_npu/test_soft_margin_loss_backward.py +++ b/test/test_npu/test_network_ops/test_soft_margin_loss_backward.py @@ -374,5 +374,4 @@ class Testcdist(TestCase): instantiate_device_type_tests(Testcdist, globals(), except_for="cpu") if __name__ == "__main__": - torch.npu.set_device("npu:3") run_tests() \ No newline at end of file diff --git a/test/test_npu/test_network_ops/test_split.py b/test/test_npu/test_network_ops/test_split.py index 08edcf9a51570c1b3d838f628e27290f641343cc..7def0d59dd77d811c5555134d44b24ebed5802a2 100644 --- a/test/test_npu/test_network_ops/test_split.py +++ b/test/test_npu/test_network_ops/test_split.py @@ -87,6 +87,24 @@ class TestSplit(TestCase): shape_format = [[np.float32, i, [64, 112, 7, 7]] for i in format_list] self.split_result(shape_format) + def test_split_common_shape_format(self, device): + shape_format = [ + [[np.float32, 0 , (1, 4, 2, 3)], 3, 1], + [[np.float32, 0, (8,4)], [1,2,1,2,2],0], + [[np.float16, 0 , (1, 4, 2, 3)], 3, 1], + [[np.float16, 0, (8,4)], [1,2,1,2,2],0], + [[np.int32, 0 , (1, 4, 2, 3)], 3, 1], + [[np.int32, 0, (8,4)], [1,2,1,2,2],0], + [[np.int64, 0 , (1, 4, 2, 3)], 3, 1], + [[np.int64, 0, (8,4)], [1,2,1,2,2],0], + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) + cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) + npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) + for i in range(len(cpu_output)): + self.assertRtolEqual(cpu_output[i], npu_output[i]) + instantiate_device_type_tests(TestSplit, globals(), except_for="cpu") if __name__ == "__main__": diff --git a/test/test_npu/test_network_ops/test_std.py b/test/test_npu/test_network_ops/test_std.py index dc04ae778a9185bad8d796f9ba01a3cf49767af7..179532e2b55e726ab3faca09404862d8603551a2 100644 --- a/test/test_npu/test_network_ops/test_std.py +++ b/test/test_npu/test_network_ops/test_std.py @@ -147,8 +147,6 @@ class TestStd(TestCase): for item in shape_format: cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) outputshape = self.output_shape(item[2],item[3],item[4],item[5]) - print(outputshape) - print(item[2]) cpu_output,npu_output = self.create_output_tensor(0,1,outputshape,item[1],item[0]) if item[0] == np.float16: cpu_input1 = cpu_input1.to(torch.float32) @@ -280,6 +278,24 @@ class TestStd(TestCase): npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) self.assertRtolEqual(cpu_output1, npu_output1) + def test_std_dim_shape_format_5d_fp16(self, device): + format_list = [-1] + shape_list = [[2, 94, 4, 52, 192]] + dim_list = [0] + unbiased_list = [True, False] + keepdim_list = [True, False] + shape_format = [ + [np.float16, i, j, k, l, m] for i in format_list for j in shape_list + for k in dim_list for l in unbiased_list for m in keepdim_list + ] + for item in shape_format: + cpu_input1, npu_input1 = create_common_tensor(item, 0, 100) + cpu_input1 = cpu_input1.to(torch.float32) + cpu_output1 = self.cpu_op_dim_exec(cpu_input1, item[3], item[4], item[5]) + cpu_output1 = cpu_output1.astype(np.float16) + npu_output1 = self.npu_op_dim_exec(npu_input1, item[3], item[4], item[5]) + self.assertRtolEqual(cpu_output1, npu_output1, prec16=0.006) + instantiate_device_type_tests(TestStd, globals(), except_for="cpu") if __name__ == "__main__": run_tests() diff --git a/test/test_npu/test_upsample_bilinear2d_backward.py b/test/test_npu/test_network_ops/test_upsample_bilinear2d_backward.py similarity index 52% rename from test/test_npu/test_upsample_bilinear2d_backward.py rename to test/test_npu/test_network_ops/test_upsample_bilinear2d_backward.py index 83e3428f54321832f7dc8734c3b46cff168c7364..ead0f9e2696aedb579d82957e988cd3058d71f74 100644 --- a/test/test_npu/test_upsample_bilinear2d_backward.py +++ b/test/test_npu/test_network_ops/test_upsample_bilinear2d_backward.py @@ -14,56 +14,54 @@ import torch import numpy as np -import torch.nn.functional as F +import sys +import copy from common_utils import TestCase, run_tests from common_device_type import dtypes, instantiate_device_type_tests from util_test import create_common_tensor -import time -class TestUpsamleBilinear2DBackward(TestCase): - def cpu_op_exec(self, input, size): - input.requires_grad_(True) - output = F.interpolate(input, size, mode = "bilinear") +class TestUpsampleBilinear2dBackward(TestCase): + + def cpu_op_exec(self, inputs, shapes): + inputs.requires_grad_(True) + output = torch._C._nn.upsample_bilinear2d(inputs, shapes, True, 0, 0) output.backward(torch.ones_like(output)) - gradcpu = input.grad + gradcpu = inputs.grad return output.detach().numpy(), gradcpu.detach().numpy() - - def npu_op_exec(self, input, size): - input.requires_grad_(True) - output = F.interpolate(input, size, mode = "bilinear") - output = output.to("cpu") + + def npu_op_exec(self, inputs, shapes): + inputs.requires_grad_(True) + output = torch._C._nn.upsample_bilinear2d(inputs, shapes, True, 0, 0) inputback = torch.ones_like(output) - inputback = inputback.to("npu") - output = output.to("npu") output.backward(inputback) out = output.to("cpu") - grad = input.grad + grad = inputs.grad grad = grad.to("cpu") return out.detach().numpy(), grad.detach().numpy() - def test_upsample_bilinear2d_shape_format(self, device): + def test_UpsampleBilinear2d_common_shape_format(self, device): shape_format = [ - [[np.float32, 0, (2, 3, 4, 4)], [2, 2]], - [[np.float16, 0, (2, 3, 4, 4)], [2, 2]], - [[np.float32, 0, (5, 3, 6, 4)], [10, 10]], - [[np.float16, 0, (5, 3, 6, 4)], [10, 10]], - ] - + [[np.float32, -1, (4, 3, 1, 5)], (2, 2)], + [[np.float32, -1, (2, 3, 2, 1)], (3, 3)], + [[np.float32, -1, (1, 4, 2, 2)], (4, 4)], + [[np.float16, -1, (4, 10, 16, 14)], (5, 5)], + [[np.float16, -1, (8, 8, 8, 8)], (1, 2)], + [[np.float16, -1, (10, 4, 3, 2)], (2, 4)] + ] for item in shape_format: - cpu_input, npu_input = create_common_tensor(item[0], 0, 100) - if cpu_input == torch.float16: - cpu_input = cpu_input.to(torch.float32) - - size = item[1] - - cpu_output, cpu_grad = self.cpu_op_exec(cpu_input, size) - npu_output, npu_grad = self.npu_op_exec(npu_input, size) - - cpu_grad = cpu_grad.astype(npu_grad.dtype) + cpu_inputs, npu_inputs = create_common_tensor(item[0], 1, 100) + if cpu_inputs.dtype == torch.float16: + cpu_inputs = cpu_inputs.to(torch.float32) + cpu_output, cpu_grad = self.cpu_op_exec(cpu_inputs, item[1]) + npu_output, npu_grad = self.npu_op_exec(npu_inputs, item[1]) cpu_output = cpu_output.astype(npu_output.dtype) + cpu_grad = cpu_grad.astype(npu_grad.dtype) + self.assertRtolEqual(cpu_output, npu_output) self.assertRtolEqual(cpu_grad, npu_grad) -instantiate_device_type_tests(TestUpsamleBilinear2DBackward, globals(), except_for="cpu") + + +instantiate_device_type_tests(TestUpsampleBilinear2dBackward, globals(), except_for='cpu') if __name__ == "__main__": run_tests() diff --git a/test/test_npu/test_remainder.py b/test/test_npu/test_remainder.py deleted file mode 100644 index 16563848279c7de111dae0b56db146da5ef06ffd..0000000000000000000000000000000000000000 --- a/test/test_npu/test_remainder.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import torch -import numpy as np -import sys -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestRemainder(TestCase): - - def generate_two_tensor(self, min_d, max_d, shape, dtype): - dividend = np.random.uniform(min_d, max_d, shape).astype(dtype) - divisor = np.random.uniform(min_d, max_d, shape).astype(dtype) - - npu_dividend = torch.from_numpy(dividend) - npu_divisor = torch.from_numpy(divisor) - - return npu_dividend, npu_divisor - - def generate_single_tensor(self, min_d, max_d, shape, dtype): - dividend = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_dividend = torch.from_numpy(dividend) - return npu_dividend - - def generate_fp_scalar(self, min_d, max_d): - scalar = random.uniform(min_d, max_d) - return scalar - - # While operatoring on AICPU, it seems that we do not have to care whether the divisor is scalar or not. - def cpu_op_exec(self, dividend, divisor): - output = torch.remainder(dividend, divisor) - output = output.numpy() - return output - - def npu_op_exec_both_tensor(self, dividend, divisor): - output = torch.remainder(dividend, divisor) - output = output.to("cpu") - output = output.numpy() - return output - - def npu_op_exec_only_dividend_tensor(self, dividend, divisor): - dividend = dividend.to("npu") - output = torch.remainder(dividend, divisor) - output = output.to("cpu") - output = output.numpy() - return output - - def test_remainder_float32_both_tensor(self, device): - npu_dividend, npu_divisor = self.generate_two_tensor(-100, 100, (5), np.float32) - cpu_output = self.cpu_op_exec(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_both_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_float32_only_dividend_tensor(self, device): - npu_dividend = self.generate_single_tensor(-100, 100, (5), np.float32) - npu_divisor = self.generate_fp_scalar(-10, 10) - cpu_output = self.cpu_op_exec(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_only_dividend_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_int32_both_tensor(self, device): - npu_dividend, npu_divisor = self.generate_two_tensor(-100, 100, (5), np.int32) - cpu_output = self.cpu_op_exec(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_both_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_int32_only_dividend_tensor(self, device): - npu_dividend = self.generate_single_tensor(-100, 100, (5), np.int32) - npu_divisor = self.generate_fp_scalar(-10, 10) - cpu_output = self.cpu_op_exec(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_only_dividend_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - - # Because of the limitation of accracy, testcases using fp16 may not pass at the moment. - def test_remainder_float16_both_tensor(self, device): - def cpu_op_exec_fp16(dividend, divisor): - dividend = dividend.to(torch.float32) - divisor = divisor.to(torch.float32) - output = torch.remainder(dividend, divisor) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_dividend, npu_divisor = self.generate_two_tensor(-100, 100, (5), np.float16) - cpu_output = cpu_op_exec_fp16(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_both_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - - def test_remainder_float16_only_dividend_tensor(self, device): - def cpu_op_exec_fp16(dividend, divisor): - dividend = dividend.to(torch.float32) - output = torch.remainder(dividend, divisor) - output = output.numpy() - output = output.astype(np.float16) - return output - - npu_dividend = self.generate_single_tensor(-100, 100, (5), np.float16) - npu_divisor = self.generate_fp_scalar(-10, 10) - cpu_output = cpu_op_exec_fp16(npu_dividend, npu_divisor) - npu_output = self.npu_op_exec_only_dividend_tensor(npu_dividend, npu_divisor) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(TestRemainder, globals(), except_for='cpu') -if __name__ == "__main__": - torch.npu.set_device("npu:5") - run_tests() \ No newline at end of file diff --git a/test/test_npu/test_split.py b/test/test_npu/test_split.py deleted file mode 100644 index 0437927d5affeaa71c113a54d00b138720e253e6..0000000000000000000000000000000000000000 --- a/test/test_npu/test_split.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding: utf-8 - -import torch -import numpy as np -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class testSplit(TestCase): - - def cpu_op_exec(self, input1, split_size, dim): - output_tuple = torch.split(input1,split_size, dim=dim) - listtuple1 = [] - for i in range(len(output_tuple)): - listtuple1 += list(output_tuple[i].contiguous().view(-1)) - output = torch.tensor(listtuple1) - output = output.numpy() - return output - - def npu_op_exec(self, input1, split_size, dim): - output_tuple = torch.split(input1, split_size, dim=dim) - listtuple1 = [] - for i in range(len(output_tuple)): - listtuple1 += list(output_tuple[i].contiguous().view(-1)) - output = torch.tensor(listtuple1) - output = output.to("cpu") - output = output.numpy() - return output - - def test_split_common_shape_format(self, device): - shape_format = [ - [[np.float32, 0 , (1, 4, 2, 3)], 3, 1], - [[np.float32, 0, (8,4)], [1,2,1,2,2],0], - [[np.float16, 0 , (1, 4, 2, 3)], 3, 1], - [[np.float16, 0, (8,4)], [1,2,1,2,2],0], - [[np.int32, 0 , (1, 4, 2, 3)], 3, 1], - [[np.int32, 0, (8,4)], [1,2,1,2,2],0], - [[np.int64, 0 , (1, 4, 2, 3)], 3, 1], - [[np.int64, 0, (8,4)], [1,2,1,2,2],0], - [[np.double, 0 , (1, 4, 2, 3)], 3, 1], - [[np.double, 0, (8,4)], [1,2,1,2,2],0] - ] - for item in shape_format: - cpu_input1, npu_input1 = create_common_tensor(item[0], 1, 10) - cpu_output = self.cpu_op_exec(cpu_input1, item[1], item[2]) - npu_output = self.npu_op_exec(npu_input1, item[1], item[2]) - self.assertRtolEqual(cpu_output, npu_output) - -instantiate_device_type_tests(testSplit, globals(), except_for="cpu") -if __name__ == "__main__": - torch.npu.set_device("npu:6") - run_tests() diff --git a/test/test_npu/test_var.py b/test/test_npu/test_var.py deleted file mode 100644 index 9b1d22b0cb1e66481be7bec392284be0b6a5f05a..0000000000000000000000000000000000000000 --- a/test/test_npu/test_var.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2020, Huawei Technologies.All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -import sys -import copy -from common_utils import TestCase, run_tests -from common_device_type import dtypes, instantiate_device_type_tests -from util_test import create_common_tensor - -class TestVar(TestCase): - def generate_data(self, min_d, max_d, shape, dtype): - input_x = np.random.uniform(min_d, max_d, shape).astype(dtype) - npu_input = torch.from_numpy(input_x) - return npu_input - - def cpu_var_exec(self, input, dim, unbiased=True, keepdim=False): - cpu_variance = torch.var(input, dim, unbiased, keepdim) - return cpu_variance.numpy() - - def cpu_var_mean_exec(self, input, dim, unbiased=True, keepdim=False): - cpu_variance, cpu_mean = torch.var_mean(input, dim, unbiased, keepdim) - return cpu_variance.numpy(), cpu_mean.numpy() - - def cpu__var_exec(self, input, unbiased=True): - cpu_variance = torch._var(input, unbiased) - return cpu_variance.numpy() - - def npu_var_exec(self, input, dim, unbiased=True, keepdim=False): - input = input.to("npu") - npu_variance = torch.var(input, dim, unbiased, keepdim) - return npu_variance.cpu().numpy() - - def npu_var_exec_out(self, input, output_y, dim, unbiased=True, keepdim=False): - input = input.to("npu") - output_y = output_y.to("npu") - torch.var(input, dim, unbiased, keepdim, out=output_y) - return output_y.cpu().numpy() - - def npu_var_mean_exec(self, input, dim, unbiased=True, keepdim=False): - input = input.to("npu") - npu_variance, npu_mean = torch.var_mean(input, dim, unbiased, keepdim) - return npu_variance.cpu().numpy(), npu_mean.cpu().numpy() - - def npu__var_exec(self, input, unbiased=True): - input = input.to("npu") - npu_variance = torch._var(input, unbiased) - return npu_variance.cpu().numpy() - - def test_var_fp16(self, device): - input_x1 = self.generate_data(-1, 1, (30, 40, 50), np.float16) - cpu_output = self.cpu_var_exec(input_x1, [1], True, False) - npu_output = self.npu_var_exec(input_x1, [1], True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp16_out(self, device): - input_x1 = self.generate_data(-1, 1, (30, 40, 50), np.float16) - output_y = self.generate_data(-1, 1, (30, 50), np.float16) - cpu_output = self.cpu_var_exec(input_x1, [1], True, False) - npu_output = self.npu_var_exec_out(input_x1, output_y, [1], True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp16_names_out(self, device): - input_x1 = self.generate_data(-1, 1, (30, 40, 50), np.float16).rename('a', 'b', 'c') - output_y = self.generate_data(-1, 1, (30, 50), np.float16) - cpu_output = self.cpu_var_exec(input_x1, ['b'], True, False) - npu_output = self.npu_var_exec_out(input_x1, output_y, ['b'], True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp32_1(self, device): - input_x1 = self.generate_data(-1, 1, (3, 4, 5, 6), np.float32) - cpu_output = self.cpu_var_exec(input_x1, [0, 1, 2], True, False) - npu_output = self.npu_var_exec(input_x1, [0, 1, 2], True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp16_2(self, device): - input_x1 = self.generate_data(-1, 1, (30, 40, 13), np.float16) - input_x1.names = ['A', 'B', 'C'] - cpu_output = self.cpu_var_exec(input_x1, 'B', True, False) - npu_output = self.npu_var_exec(input_x1, 'B', True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp32_2(self, device): - input_x1 = self.generate_data(-1, 1, (30, 40, 13), np.float32) - input_x1.names = ['A', 'B', 'C'] - cpu_output = self.cpu_var_exec(input_x1, 'B', True, False) - npu_output = self.npu_var_exec(input_x1, 'B', True, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_fp32(self, device): - input_x1 = self.generate_data(-1, 1, (3, 4, 5, 6, 7, 8, 9), np.float32) - cpu_output = self.cpu_var_exec(input_x1, [0, 3, 5], False, False) - npu_output = self.npu_var_exec(input_x1, [0, 3, 5], False, False) - self.assertRtolEqual(cpu_output, npu_output) - - def test__var_fp32(self, device): - input_x1 = self.generate_data(-1, 1, (3, 4, 5, 6, 7, 8, 9), np.float32) - cpu_output = self.cpu__var_exec(input_x1) - npu_output = self.npu__var_exec(input_x1) - self.assertRtolEqual(cpu_output, npu_output) - - def test_var_mean_fp32_1(self, device): - input_x1 = self.generate_data(-1, 1, (3, 4, 3, 5, 7, 9), np.float32) - cpu_output1, cpu_output2 = self.cpu_var_mean_exec(input_x1, [0, 1, 2, 3], False, False) - npu_output1, npu_output2 = self.npu_var_mean_exec(input_x1, [0, 1, 2, 3], False, False) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - def test_var_mean_fp32_2(self, device): - input_x1 = self.generate_data(-1, 1, (10, 20, 30, 40), np.float32) - cpu_output1, cpu_output2 = self.cpu_var_mean_exec(input_x1, [0, 1, 2, 3], False, False) - npu_output1, npu_output2 = self.npu_var_mean_exec(input_x1, [0, 1, 2, 3], False, False) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - def test_var_mean_fp16_1(self, device): - input_x1 = self.generate_data(-1, 1, (3, 4, 3, 5, 7, 9), np.float16) - input_x1.names = ['A', 'B', 'C', 'D', 'E', 'F'] - cpu_output1, cpu_output2 = self.cpu_var_mean_exec(input_x1, ['A', 'B', 'D'], False, False) - npu_output1, npu_output2 = self.npu_var_mean_exec(input_x1, ['A', 'B', 'D'], False, False) - self.assertRtolEqual(cpu_output1, npu_output1) - self.assertRtolEqual(cpu_output2, npu_output2) - - -instantiate_device_type_tests(TestVar, globals(), except_for='cpu') -if __name__ == "__main__": - run_tests() \ No newline at end of file