diff --git a/tf_adapter/kernels/aicore/fft1_d_ops.cc b/tf_adapter/kernels/aicore/fft1_d_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..0375b6f3988c7cc225a7c194918cfa6b2b9e4475 --- /dev/null +++ b/tf_adapter/kernels/aicore/fft1_d_ops.cc @@ -0,0 +1,22 @@ +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" + +namespace tensorflow { +class Fft1DOp : public OpKernel { + public: + explicit Fft1DOp(OpKernelConstruction *context) : OpKernel(context) { + LOG(INFO) << "new Fft1DOp"; + } + ~Fft1DOp() = default; + void Compute(OpKernelContext *context) override { + (void) context; + LOG(INFO) << "Fft1D Compute"; + } + bool IsExpensive() override { + LOG(INFO) << "in Fft1D IsExpensive"; + return false; + } +}; + +REGISTER_KERNEL_BUILDER(Name("FFT1D").Device(DEVICE_CPU), Fft1DOp); +} // namespace tensorflow diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc index fbe6035bdb24c97067cf9517a9dd736a31070c3d..b53ccb20c2fabe41afa49b5b582193dbcde1a1db 100644 --- a/tf_adapter/ops/aicore/npu_aicore_ops.cc +++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc @@ -1,782 +1,856 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -using shape_inference::DimensionHandle; -using shape_inference::InferenceContext; -using shape_inference::ShapeHandle; -using shape_inference::UnchangedShape; - -namespace { -REGISTER_OP("FastGelu") - .Input("features: T") - .Output("activations: T") - .Attr("T: realnumbertype") - .SetShapeFn(tensorflow::shape_inference::UnchangedShape); - -REGISTER_OP("FastGeluV2") - .Input("features: T") - .Output("activations: T") - .Attr("T: realnumbertype") - .SetShapeFn(tensorflow::shape_inference::UnchangedShape); - -REGISTER_OP("FastGeluGrad") - .Input("gradients: T") - .Input("features: T") - .Output("backprops: T") - .Attr("T: realnumbertype") - .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn); - -REGISTER_OP("DynamicGruV2") - .Input("x: T") - .Input("weight_input: T") - .Input("weight_hidden: T") - .Input("bias_input: T") - .Input("bias_hidden: T") - .Input("seq_length: int32") - .Input("init_h: T") - .Output("y: T") - .Output("output_h: T") - .Output("update: T") - .Output("reset: T") - .Output("new: T") - .Output("hidden_new: T") - .Attr("T: {float16, float32}") - .Attr("direction: string") - .Attr("cell_depth: int = 1") - .Attr("keep_prob: float = 1.0") - .Attr("cell_clip: float = -1.0") - .Attr("num_proj: int = 0") - .Attr("time_major: bool = true") - .Attr("activation: string") - .Attr("gate_order: string") - .Attr("reset_after: bool = true") - .Attr("is_training: bool = true") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto weight_hidden_shape = c->input(2); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto hidden_size = c->Dim(weight_hidden_shape, 0); - int32_t num_proj = 0; - TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); - ShapeHandle output_y_shape; - if (num_proj == 0) { - output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); - } else { - auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); - DimensionHandle output_hidden_size; - TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); - output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); - } - auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); - c->set_output(0, output_y_shape); - c->set_output(1, output_h_shape); - c->set_output(2, c->UnknownShape()); - c->set_output(3, c->UnknownShape()); - c->set_output(4, c->UnknownShape()); - c->set_output(5, c->UnknownShape()); - return Status::OK(); - }); - -REGISTER_OP("DynamicGruV2Grad") - .Input("x: T") - .Input("weight_input: T") - .Input("weight_hidden: T") - .Input("y: T") - .Input("init_h: T") - .Input("h: T") - .Input("dy: T") - .Input("dh: T") - .Input("update: T") - .Input("reset: T") - .Input("new: T") - .Input("hidden_new: T") - .Input("seq_length: int32") - .Output("dw_input: T") - .Output("dw_hidden: T") - .Output("db_input: T") - .Output("db_hidden: T") - .Output("dx: T") - .Output("dh_prev: T") - .Attr("T: {float16, float32}") - .Attr("direction: string") - .Attr("cell_depth: int = 1") - .Attr("keep_prob: float = 1.0") - .Attr("cell_clip: float = -1.0") - .Attr("num_proj: int = 0") - .Attr("time_major: bool = true") - .Attr("gate_order: string") - .Attr("reset_after: bool = true") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto weight_hidden_shape = c->input(2); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto input_size = c->Dim(input_shape, 2); - auto hidden_size = c->Dim(weight_hidden_shape, 0); - auto hidden_size_1 = c->Dim(weight_hidden_shape, 1); - auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1}); - auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1}); - auto output_db_input_shape = c->MakeShape({hidden_size_1}); - auto output_db_hidden_shape = c->MakeShape({hidden_size_1}); - auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); - auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size}); - c->set_output(0, output_dw_input_shape); - c->set_output(1, output_dw_hidden_shape); - c->set_output(2, output_db_input_shape); - c->set_output(3, output_db_hidden_shape); - c->set_output(4, output_dx_shape); - c->set_output(5, output_dh_prev_shape); - return Status::OK(); - }); - -REGISTER_OP("DynamicAUGRU") -.Input("x: T") -.Input("weight_input: T") -.Input("weight_hidden: T") -.Input("weight_att: T") -.Input("bias_input: T") -.Input("bias_hidden: T") -.Input("seq_length: int32") -.Input("init_h: T") -.Output("y: T") -.Output("output_h: T") -.Output("update: T") -.Output("update_att: T") -.Output("reset: T") -.Output("new: T") -.Output("hidden_new: T") -.Attr("T: {float16, float32}") -.Attr("direction: string") -.Attr("cell_depth: int = 1") -.Attr("keep_prob: float = 1.0") -.Attr("cell_clip: float = -1.0") -.Attr("num_proj: int = 0") -.Attr("time_major: bool = true") -.Attr("activation: string") -.Attr("gate_order: string") -.Attr("reset_after: bool = true") -.Attr("is_training: bool = true") -.SetIsStateful() -.SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto weight_hidden_shape = c->input(2); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto hidden_size = c->Dim(weight_hidden_shape, 0); - int32_t num_proj = 0; - TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); - - ShapeHandle output_y_shape; - if (num_proj == 0) { - output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); - } else { - auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); - DimensionHandle output_hidden_size; - TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); - output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); - } - auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); - c->set_output(0, output_y_shape); - c->set_output(1, output_h_shape); - c->set_output(2, c->UnknownShape()); - c->set_output(3, c->UnknownShape()); - c->set_output(4, c->UnknownShape()); - c->set_output(5, c->UnknownShape()); - c->set_output(6, c->UnknownShape()); - return Status::OK(); -}); - -REGISTER_OP("DynamicAUGRUGrad") -.Input("x: T") -.Input("weight_input: T") -.Input("weight_hidden: T") -.Input("weight_att: T") -.Input("y: T") -.Input("init_h: T") -.Input("h: T") -.Input("dy: T") -.Input("dh: T") -.Input("update: T") -.Input("update_att: T") -.Input("reset: T") -.Input("new: T") -.Input("hidden_new: T") -.Input("seq_length: int32") -.Output("dw_input: T") -.Output("dw_hidden: T") -.Output("db_input: T") -.Output("db_hidden: T") -.Output("dx: T") -.Output("dh_prev: T") -.Output("dw_att: T") -.Attr("T: {float16, float32}") -.Attr("direction: string") -.Attr("cell_depth: int = 1") -.Attr("keep_prob: float = 1.0") -.Attr("cell_clip: float = -1.0") -.Attr("num_proj: int = 0") -.Attr("time_major: bool = true") -.Attr("gate_order: string") -.Attr("reset_after: bool = true") -.SetIsStateful() -.SetShapeFn([](InferenceContext *c) { -auto input_shape = c->input(0); -auto weight_hidden_shape = c->input(2); -auto num_step = c->Dim(input_shape, 0); -auto batch_size = c->Dim(input_shape, 1); -auto input_size = c->Dim(input_shape, 2); -auto hidden_size = c->Dim(weight_hidden_shape, 0); -auto hidden_size_1 = c->Dim(weight_hidden_shape, 1); -auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1}); -auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1}); -auto output_db_input_shape = c->MakeShape({hidden_size_1}); -auto output_db_hidden_shape = c->MakeShape({hidden_size_1}); -auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); -auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size}); -auto output_dw_att_shape = c->MakeShape({num_step, batch_size}); -c->set_output(0, output_dw_input_shape); -c->set_output(1, output_dw_hidden_shape); -c->set_output(2, output_db_input_shape); -c->set_output(3, output_db_hidden_shape); -c->set_output(4, output_dx_shape); -c->set_output(5, output_dh_prev_shape); -c->set_output(6, output_dw_att_shape); -return Status::OK(); -}); - -REGISTER_OP("DynamicRnn") - .Input("x: T") - .Input("w: T") - .Input("b: T") - .Input("seq_length: int32") - .Input("init_h: T") - .Input("init_c: T") - .Output("y: T") - .Output("output_h: T") - .Output("output_c: T") - .Output("i: T") - .Output("j: T") - .Output("f: T") - .Output("o: T") - .Output("tanhc: T") - .Attr("T: {float16, float32}") - .Attr("cell_type: string") - .Attr("direction: string") - .Attr("cell_depth: int = 1") - .Attr("use_peephole: bool = false") - .Attr("keep_prob: float = 1.0") - .Attr("cell_clip: float = -1.0") - .Attr("num_proj: int = 0") - .Attr("time_major: bool = true") - .Attr("activation: string") - .Attr("forget_bias: float = 0.0") - .Attr("is_training: bool = true") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto input_size = c->Dim(input_shape, 2); - auto w = c->input(1); - auto hidden_size_total = c->Dim(w, 0); - DimensionHandle hidden_size; - TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); - int32_t num_proj = 0; - TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); - ShapeHandle output_y_shape; - if (num_proj == 0) { - output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); - } else { - auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); - DimensionHandle output_hidden_size; - TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); - output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); - } - auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); - auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size}); - - c->set_output(0, output_y_shape); - c->set_output(1, output_h_shape); - c->set_output(2, output_c_shape); - c->set_output(3, c->UnknownShape()); - c->set_output(4, c->UnknownShape()); - c->set_output(5, c->UnknownShape()); - c->set_output(6, c->UnknownShape()); - c->set_output(7, c->UnknownShape()); - return Status::OK(); - }); - -REGISTER_OP("DynamicRnnV2") - .Input("x: T") - .Input("w: T") - .Input("b: T") - .Input("init_h: T") - .Input("init_c: T") - .Output("y: T") - .Output("output_h: T") - .Output("output_c: T") - .Output("i: T") - .Output("j: T") - .Output("f: T") - .Output("o: T") - .Output("tanhc: T") - .Attr("T: {float16, float32}") - .Attr("cell_type: string") - .Attr("direction: string") - .Attr("cell_depth: int = 1") - .Attr("use_peephole: bool = false") - .Attr("keep_prob: float = 1.0") - .Attr("cell_clip: float = -1.0") - .Attr("num_proj: int = 0") - .Attr("time_major: bool = true") - .Attr("activation: string") - .Attr("forget_bias: float = 0.0") - .Attr("is_training: bool = true") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto input_size = c->Dim(input_shape, 2); - auto w = c->input(1); - auto hidden_size_total = c->Dim(w, 0); - DimensionHandle hidden_size; - TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); - int32_t num_proj = 0; - TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); - ShapeHandle output_y_shape; - if (num_proj == 0) { - output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); - } else { - auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); - DimensionHandle output_hidden_size; - TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); - output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); - } - auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); - auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size}); - - c->set_output(0, output_y_shape); - c->set_output(1, output_h_shape); - c->set_output(2, output_c_shape); - c->set_output(3, c->UnknownShape()); - c->set_output(4, c->UnknownShape()); - c->set_output(5, c->UnknownShape()); - c->set_output(6, c->UnknownShape()); - c->set_output(7, c->UnknownShape()); - return Status::OK(); - }); - -REGISTER_OP("DynamicRnnGrad") - .Input("x: T") - .Input("w: T") - .Input("b: T") - .Input("y: T") - .Input("init_h: T") - .Input("init_c: T") - .Input("h: T") - .Input("c: T") - .Input("dy: T") - .Input("dh: T") - .Input("dc: T") - .Input("i: T") - .Input("j: T") - .Input("f: T") - .Input("o: T") - .Input("tanhc: T") - .Output("dw: T") - .Output("db: T") - .Output("dx: T") - .Output("dh_prev: T") - .Output("dc_prev: T") - .Attr("T: {float16, float32}") - .Attr("cell_type: string") - .Attr("direction: string") - .Attr("cell_depth: int = 1") - .Attr("use_peephole: bool = false") - .Attr("keep_prob: float = 1.0") - .Attr("cell_clip: float = -1.0") - .Attr("num_proj: int = 0") - .Attr("time_major: bool = true") - .Attr("forget_bias: float = 0.0") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - auto num_step = c->Dim(input_shape, 0); - auto batch_size = c->Dim(input_shape, 1); - auto input_size = c->Dim(input_shape, 2); - auto w = c->input(1); - auto hidden_size_total = c->Dim(w, 0); - auto hidden_size_4 = c->Dim(w, 1); - DimensionHandle hidden_size; - TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); - - auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); - auto output_dw_shape = c->MakeShape({hidden_size_total, hidden_size_4}); - auto output_db_shape = c->MakeShape({hidden_size_4}); - auto output_dh_prev_shape = c->MakeShape({1, batch_size, hidden_size}); - auto output_dc_prev_shape = c->MakeShape({1, batch_size, hidden_size}); - c->set_output(0, output_dw_shape); - c->set_output(1, output_db_shape); - c->set_output(2, output_dx_shape); - c->set_output(3, output_dh_prev_shape); - c->set_output(4, output_dc_prev_shape); - return Status::OK(); - }); - -REGISTER_OP("LRUCacheV2") - .Input("index_list: T") - .Input("data: Ref(dtype)") - .Input("cache: Ref(dtype)") - .Input("tag: Ref(T)") - .Input("is_last_call: T") - .Output("data1:Ref(dtype)") - .Output("cache1: Ref(dtype)") - .Output("tag1: Ref(dtype)") - .Output("index_offset_list: T") - .Output("not_in_cache_index_list: T") - .Output("not_in_cache_number: T") - .Attr("T: {int32, int64}") - .Attr("dtype: {float32}") - .Attr("pre_route_count: int") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(1)); - c->set_output(1, c->input(2)); - c->set_output(2, c->input(3)); - c->set_output(3, c->input(0)); - c->set_output(4, c->input(0)); - c->set_output(5, c->MakeShape({1})); - return Status::OK(); - }); - -REGISTER_OP("Centralization") - .Input("x: T") - .Output("y: T") - .Attr("T: {float16, float32}") - .Attr("axes: list(int)") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("PRelu") - .Input("x: T") - .Input("weight: T") - .Output("y: T") - .Attr("T: {float16, float32}") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("DropOutDoMaskV3") - .Input("x: T") - .Input("mask: uint8") - .Input("keep_prob: T") - .Output("y: T") - .Attr("T: {float16, float32}") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - return Status::OK(); - }); - -REGISTER_OP("PReluGrad") - .Input("grads: T") - .Input("features: T") - .Input("weights: T") - .Output("dx: T") - .Output("da: T") - .Attr("T: {float16, float32}") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - c->set_output(1, c->input(2)); - return Status::OK(); - }); - -REGISTER_OP("NonZero") - .Input("x:T") - .Output("y:output_type") - .Attr("transpose:bool = false") - .Attr("T:numbertype") - .Attr("output_type:{int32, int64} = DT_INT64") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto rank = InferenceContext::Rank(c->input(0)); - c->set_output(0, c->MakeShape({rank, -1})); - return Status::OK(); - }); - -REGISTER_OP("NonZeroWithValue") - .Input("x:T") - .Output("value:T") - .Output("index:output_type") - .Output("count:output_type") - .Attr("transpose:bool = false") - .Attr("T:numbertype") - .Attr("output_type:{int32, int64} = DT_INT32") - .SetIsStateful() - .SetShapeFn([](InferenceContext *c) { - auto input_shape = c->input(0); - int64_t dim1 = InferenceContext::Value(c->Dim(input_shape, 0)); - int64_t dim2 = InferenceContext::Value(c->Dim(input_shape, 1)); - int64_t value_num = dim1 * dim2; - int64_t index_dim = 2 * dim1 * dim2; - int64_t count_dim = 1; - - c->set_output(0, c->MakeShape({c->MakeDim(value_num)})); - c->set_output(1, c->MakeShape({c->MakeDim(index_dim)})); - c->set_output(2, c->MakeShape({c->MakeDim(count_dim)})); - return Status::OK(); - }); - -REGISTER_OP("FusedLayerNorm") - .Input("x: T") - .Input("gamma: T") - .Input("beta: T") - .Output("y: T") - .Output("mean: T") - .Output("variance: T") - .Attr("T: {float16, float32}") - .Attr("begin_norm_axis: int = 0") - .Attr("begin_params_axis: int = 0") - .Attr("epsilon: float = 0.0000001") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - int32_t real_dim_num = InferenceContext::Rank(c->input(0)); - int32_t begin_norm_axis = 0; - TF_RETURN_IF_ERROR(c->GetAttr("begin_norm_axis", &begin_norm_axis)); - if (begin_norm_axis < 0) { - begin_norm_axis += real_dim_num; - } - if (begin_norm_axis < 0 || begin_norm_axis >= real_dim_num) { - return errors::InvalidArgument("begin_norm_axis is invalid"); - } - ShapeHandle input_shape_handle; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), real_dim_num, &input_shape_handle)); - ShapeHandle out_shape_handle; - for (int32_t i = 0; i < real_dim_num; ++i) { - DimensionHandle tmp_dim_handle = c->Dim(input_shape_handle, i); - if (i >= begin_norm_axis) { - tmp_dim_handle = c->MakeDim(1); - TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_handle, i, tmp_dim_handle, &out_shape_handle)); - } - } - c->set_output(0, c->input(0)); - c->set_output(1, out_shape_handle); - c->set_output(2, out_shape_handle); - return Status::OK(); - }); - -REGISTER_OP("FusedLayerNormGrad") - .Input("dy: T") - .Input("x: T") - .Input("variance: T") - .Input("mean: T") - .Input("gamma: T") - .Output("pd_x: T") - .Output("pd_gamma: T") - .Output("pd_beta: T") - .Attr("T: {float16, float32}") - .SetIsStateful() - .SetShapeFn([](shape_inference::InferenceContext *c) { - c->set_output(0, c->input(0)); - c->set_output(1, c->input(4)); - c->set_output(2, c->input(4)); - return Status::OK(); - }); - -REGISTER_OP("GetShape") - .Input("x: T") - .Output("y: int32") - .Attr("N: int = 1") - .Attr("T: {float16, float32, uint8}") - .SetShapeFn([](InferenceContext* c) { - int64_t sumSize = 0; - for (int32_t i = 0; i < c->num_inputs(); i++) { - sumSize += InferenceContext::Rank(c->input(i)); - } - c->set_output(0, c->MakeShape({c->MakeDim(sumSize)})); - return Status::OK(); - }); - -REGISTER_OP("ProdEnvMatA") - .Input("coord: T") - .Input("type:int32") - .Input("natoms:int32") - .Input("box: T") - .Input("mesh:int32") - .Input("davg: T") - .Input("dstd: T") - .Output("descrpt: T") - .Output("descrpt_deriv: T") - .Output("rij: T") - .Output("nlist:int32") - .Attr("T: {float16, float32}") - .Attr("rcut_a: float = 0.0") - .Attr("rcut_r: float = 0.0") - .Attr("rcut_r_smth: float = 0.0") - .Attr("sel_a: list(int)") - .Attr("sel_r: list(int)") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - auto coord_shape = c->input(0); - int64_t nsample = InferenceContext::Value(c->Dim(coord_shape, 0)); - int64_t nloc = 12288; - int64_t nnei = 0; - std::vector sel_a; - TF_RETURN_IF_ERROR(c->GetAttr("sel_a", &sel_a)); - for (size_t i = 0; i < sel_a.size(); ++i) { - nnei = nnei + sel_a[i]; - } - int64_t des = nloc * nnei * 4; - int64_t des_a = des * 3; - int64_t rij = nloc * nnei * 3; - int64_t nlist = nloc * nnei; - c->set_output(0, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des)})); - c->set_output(1, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des_a)})); - c->set_output(2, c->MakeShape({c->MakeDim(nsample), c->MakeDim(rij)})); - c->set_output(3, c->MakeShape({c->MakeDim(nsample), c->MakeDim(nlist)})); - return Status::OK(); - }); - -REGISTER_OP("ProdVirialSeA") - .Input("net_deriv:T") - .Input("in_deriv:T") - .Input("rij:T") - .Input("nlist:int32") - .Input("natoms:int32") - .Output("virial:T") - .Output("atom_virial:T") - .Attr("n_a_sel:int = 0") - .Attr("n_r_sel:int = 0") - .Attr("T: {float32, float64}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - auto input_shape = c->input(0); - auto nframes = c->Dim(input_shape, 0); - ShapeHandle virial_shape = c->MakeShape({nframes, 9}); - c->set_output(0, virial_shape); - ShapeHandle atom_virial_shape = c->MakeShape({nframes, 254952}); - c->set_output(1, atom_virial_shape); - return Status::OK(); - }); - -REGISTER_OP("ProdForceSeA") - .Input("net_deriv:T") - .Input("in_deriv:T") - .Input("nlist:int32") - .Input("natoms:int32") - .Output("force:T") - .Attr("n_a_sel:int = 0") - .Attr("n_r_sel:int = 0") - .Attr("T: {float32}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - auto input_shape = c->input(0); - auto nframes = c->Dim(input_shape, 0); - ShapeHandle force_shape = c->MakeShape({nframes, 84984}); - c->set_output(0, force_shape); - return Status::OK(); - }); - -REGISTER_OP("TabulateFusionSeA") - .Input("table:T") - .Input("table_info:T") - .Input("em_x:T") - .Input("em:T") - .Output("descriptor:T") - .Attr("last_layer_size:int") - .Attr("T: {float16, float32, float64}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - auto input_shape = c->input(3); - auto nloc = c->Dim(input_shape, 0); - - int32_t last_layer_size; - TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size)); - ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size}); - c->set_output(0, out_shape); - return Status::OK(); - }); - -REGISTER_OP("TabulateFusionSeAGrad") - .Input("table:T") - .Input("table_info:T") - .Input("em_x:T") - .Input("em:T") - .Input("dy:T") - .Input("descriptor:T") - .Output("dy_dem_x:T") - .Output("dy_dem:T") - .Attr("T: {float16, float32, float64}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - c->set_output(0, c->input(2)); - c->set_output(1, c->input(3)); - return Status::OK(); - }); - -REGISTER_OP("TabulateFusion") - .Input("table:T") - .Input("table_info:T") - .Input("em_x:T") - .Input("em:T") - .Output("descriptor:T") - .Attr("last_layer_size:int") - .Attr("T: {float16, float32, float64}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - auto input_shape = c->input(3); - auto nloc = c->Dim(input_shape, 0); - - int32_t last_layer_size; - TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size)); - ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size}); - c->set_output(0, out_shape); - return Status::OK(); - }); - -REGISTER_OP("TabulateFusionGrad") - .Input("table:T") - .Input("table_info:T") - .Input("em_x:T") - .Input("em:T") - .Input("dy:T") - .Input("descriptor:T") - .Output("dy_dem_x:T") - .Output("dy_dem:T") - .Attr("T: {float16, float32, float64}") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - c->set_output(0, c->input(2)); - c->set_output(1, c->input(3)); - return Status::OK(); - }); -} // namespace -} // namespace tensorflow +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; +using shape_inference::UnchangedShape; + +namespace { + +REGISTER_OP("FFT1D") + .Input("input: float32") + .Output("output: float32") + .Attr("n: int") + .Attr("norm: string") + .Attr("mode: string") + .Attr("forward: bool") + .SetShapeFn([](InferenceContext *c) { + string mode; + bool forward; + TF_RETURN_IF_ERROR(c->GetAttr("mode", &mode)); + TF_RETURN_IF_ERROR(c->GetAttr("forward", &forward)); + if (mode == "r2c" && forward == true) + { + auto input_shape = c->input(0); + auto batch = c->Dim(input_shape, 0); + auto inner_batch = c->Dim(input_shape, 1); + int32_t n; + TF_RETURN_IF_ERROR(c->GetAttr("n", &n)); + ShapeHandle output_dim; + if (n == 0) + { + int64_t N = InferenceContext::Value(c->Dim(input_shape, 2)); + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N / 2 + 1), 2}); + } + else + { + auto N = n / 2 + 1; + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N), 2}); + } + c->set_output(0, output_dim); + } + if (mode == "c2r" && forward == false) + { + auto input_shape = c->input(0); + auto batch = c->Dim(input_shape, 0); + auto inner_batch = c->Dim(input_shape, 1); + int32_t n; + TF_RETURN_IF_ERROR(c->GetAttr("n", &n)); + ShapeHandle output_dim; + if (n == 0) + { + int64_t N = InferenceContext::Value(c->Dim(input_shape, 2)); + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(2 * (N - 1))}); + } + else + { + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(n)}); + } + c->set_output(0, output_dim); + } + if (mode == "c2c") + { + auto input_shape = c->input(0); + auto batch = c->Dim(input_shape, 0); + auto inner_batch = c->Dim(input_shape, 1); + int32_t n; + TF_RETURN_IF_ERROR(c->GetAttr("n", &n)); + ShapeHandle output_dim; + if (n == 0) + { + int64_t N = InferenceContext::Value(c->Dim(input_shape, 2)); + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N), 2}); + } + else + { + output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(n), 2}); + } + c->set_output(0, output_dim); + } + return Status::OK(); + }); + +REGISTER_OP("FastGelu") + .Input("features: T") + .Output("activations: T") + .Attr("T: realnumbertype") + .SetShapeFn(tensorflow::shape_inference::UnchangedShape); + +REGISTER_OP("FastGeluV2") + .Input("features: T") + .Output("activations: T") + .Attr("T: realnumbertype") + .SetShapeFn(tensorflow::shape_inference::UnchangedShape); + +REGISTER_OP("FastGeluGrad") + .Input("gradients: T") + .Input("features: T") + .Output("backprops: T") + .Attr("T: realnumbertype") + .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn); + +REGISTER_OP("DynamicGruV2") + .Input("x: T") + .Input("weight_input: T") + .Input("weight_hidden: T") + .Input("bias_input: T") + .Input("bias_hidden: T") + .Input("seq_length: int32") + .Input("init_h: T") + .Output("y: T") + .Output("output_h: T") + .Output("update: T") + .Output("reset: T") + .Output("new: T") + .Output("hidden_new: T") + .Attr("T: {float16, float32}") + .Attr("direction: string") + .Attr("cell_depth: int = 1") + .Attr("keep_prob: float = 1.0") + .Attr("cell_clip: float = -1.0") + .Attr("num_proj: int = 0") + .Attr("time_major: bool = true") + .Attr("activation: string") + .Attr("gate_order: string") + .Attr("reset_after: bool = true") + .Attr("is_training: bool = true") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto weight_hidden_shape = c->input(2); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto hidden_size = c->Dim(weight_hidden_shape, 0); + int32_t num_proj = 0; + TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); + ShapeHandle output_y_shape; + if (num_proj == 0) { + output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); + } else { + auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); + DimensionHandle output_hidden_size; + TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); + output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); + } + auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); + c->set_output(0, output_y_shape); + c->set_output(1, output_h_shape); + c->set_output(2, c->UnknownShape()); + c->set_output(3, c->UnknownShape()); + c->set_output(4, c->UnknownShape()); + c->set_output(5, c->UnknownShape()); + return Status::OK(); + }); + +REGISTER_OP("DynamicGruV2Grad") + .Input("x: T") + .Input("weight_input: T") + .Input("weight_hidden: T") + .Input("y: T") + .Input("init_h: T") + .Input("h: T") + .Input("dy: T") + .Input("dh: T") + .Input("update: T") + .Input("reset: T") + .Input("new: T") + .Input("hidden_new: T") + .Input("seq_length: int32") + .Output("dw_input: T") + .Output("dw_hidden: T") + .Output("db_input: T") + .Output("db_hidden: T") + .Output("dx: T") + .Output("dh_prev: T") + .Attr("T: {float16, float32}") + .Attr("direction: string") + .Attr("cell_depth: int = 1") + .Attr("keep_prob: float = 1.0") + .Attr("cell_clip: float = -1.0") + .Attr("num_proj: int = 0") + .Attr("time_major: bool = true") + .Attr("gate_order: string") + .Attr("reset_after: bool = true") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto weight_hidden_shape = c->input(2); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto input_size = c->Dim(input_shape, 2); + auto hidden_size = c->Dim(weight_hidden_shape, 0); + auto hidden_size_1 = c->Dim(weight_hidden_shape, 1); + auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1}); + auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1}); + auto output_db_input_shape = c->MakeShape({hidden_size_1}); + auto output_db_hidden_shape = c->MakeShape({hidden_size_1}); + auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); + auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size}); + c->set_output(0, output_dw_input_shape); + c->set_output(1, output_dw_hidden_shape); + c->set_output(2, output_db_input_shape); + c->set_output(3, output_db_hidden_shape); + c->set_output(4, output_dx_shape); + c->set_output(5, output_dh_prev_shape); + return Status::OK(); + }); + +REGISTER_OP("DynamicAUGRU") +.Input("x: T") +.Input("weight_input: T") +.Input("weight_hidden: T") +.Input("weight_att: T") +.Input("bias_input: T") +.Input("bias_hidden: T") +.Input("seq_length: int32") +.Input("init_h: T") +.Output("y: T") +.Output("output_h: T") +.Output("update: T") +.Output("update_att: T") +.Output("reset: T") +.Output("new: T") +.Output("hidden_new: T") +.Attr("T: {float16, float32}") +.Attr("direction: string") +.Attr("cell_depth: int = 1") +.Attr("keep_prob: float = 1.0") +.Attr("cell_clip: float = -1.0") +.Attr("num_proj: int = 0") +.Attr("time_major: bool = true") +.Attr("activation: string") +.Attr("gate_order: string") +.Attr("reset_after: bool = true") +.Attr("is_training: bool = true") +.SetIsStateful() +.SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto weight_hidden_shape = c->input(2); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto hidden_size = c->Dim(weight_hidden_shape, 0); + int32_t num_proj = 0; + TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); + + ShapeHandle output_y_shape; + if (num_proj == 0) { + output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); + } else { + auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); + DimensionHandle output_hidden_size; + TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); + output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); + } + auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); + c->set_output(0, output_y_shape); + c->set_output(1, output_h_shape); + c->set_output(2, c->UnknownShape()); + c->set_output(3, c->UnknownShape()); + c->set_output(4, c->UnknownShape()); + c->set_output(5, c->UnknownShape()); + c->set_output(6, c->UnknownShape()); + return Status::OK(); +}); + +REGISTER_OP("DynamicAUGRUGrad") +.Input("x: T") +.Input("weight_input: T") +.Input("weight_hidden: T") +.Input("weight_att: T") +.Input("y: T") +.Input("init_h: T") +.Input("h: T") +.Input("dy: T") +.Input("dh: T") +.Input("update: T") +.Input("update_att: T") +.Input("reset: T") +.Input("new: T") +.Input("hidden_new: T") +.Input("seq_length: int32") +.Output("dw_input: T") +.Output("dw_hidden: T") +.Output("db_input: T") +.Output("db_hidden: T") +.Output("dx: T") +.Output("dh_prev: T") +.Output("dw_att: T") +.Attr("T: {float16, float32}") +.Attr("direction: string") +.Attr("cell_depth: int = 1") +.Attr("keep_prob: float = 1.0") +.Attr("cell_clip: float = -1.0") +.Attr("num_proj: int = 0") +.Attr("time_major: bool = true") +.Attr("gate_order: string") +.Attr("reset_after: bool = true") +.SetIsStateful() +.SetShapeFn([](InferenceContext *c) { +auto input_shape = c->input(0); +auto weight_hidden_shape = c->input(2); +auto num_step = c->Dim(input_shape, 0); +auto batch_size = c->Dim(input_shape, 1); +auto input_size = c->Dim(input_shape, 2); +auto hidden_size = c->Dim(weight_hidden_shape, 0); +auto hidden_size_1 = c->Dim(weight_hidden_shape, 1); +auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1}); +auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1}); +auto output_db_input_shape = c->MakeShape({hidden_size_1}); +auto output_db_hidden_shape = c->MakeShape({hidden_size_1}); +auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); +auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size}); +auto output_dw_att_shape = c->MakeShape({num_step, batch_size}); +c->set_output(0, output_dw_input_shape); +c->set_output(1, output_dw_hidden_shape); +c->set_output(2, output_db_input_shape); +c->set_output(3, output_db_hidden_shape); +c->set_output(4, output_dx_shape); +c->set_output(5, output_dh_prev_shape); +c->set_output(6, output_dw_att_shape); +return Status::OK(); +}); + +REGISTER_OP("DynamicRnn") + .Input("x: T") + .Input("w: T") + .Input("b: T") + .Input("seq_length: int32") + .Input("init_h: T") + .Input("init_c: T") + .Output("y: T") + .Output("output_h: T") + .Output("output_c: T") + .Output("i: T") + .Output("j: T") + .Output("f: T") + .Output("o: T") + .Output("tanhc: T") + .Attr("T: {float16, float32}") + .Attr("cell_type: string") + .Attr("direction: string") + .Attr("cell_depth: int = 1") + .Attr("use_peephole: bool = false") + .Attr("keep_prob: float = 1.0") + .Attr("cell_clip: float = -1.0") + .Attr("num_proj: int = 0") + .Attr("time_major: bool = true") + .Attr("activation: string") + .Attr("forget_bias: float = 0.0") + .Attr("is_training: bool = true") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto input_size = c->Dim(input_shape, 2); + auto w = c->input(1); + auto hidden_size_total = c->Dim(w, 0); + DimensionHandle hidden_size; + TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); + int32_t num_proj = 0; + TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); + ShapeHandle output_y_shape; + if (num_proj == 0) { + output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); + } else { + auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); + DimensionHandle output_hidden_size; + TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); + output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); + } + auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); + auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size}); + + c->set_output(0, output_y_shape); + c->set_output(1, output_h_shape); + c->set_output(2, output_c_shape); + c->set_output(3, c->UnknownShape()); + c->set_output(4, c->UnknownShape()); + c->set_output(5, c->UnknownShape()); + c->set_output(6, c->UnknownShape()); + c->set_output(7, c->UnknownShape()); + return Status::OK(); + }); + +REGISTER_OP("DynamicRnnV2") + .Input("x: T") + .Input("w: T") + .Input("b: T") + .Input("init_h: T") + .Input("init_c: T") + .Output("y: T") + .Output("output_h: T") + .Output("output_c: T") + .Output("i: T") + .Output("j: T") + .Output("f: T") + .Output("o: T") + .Output("tanhc: T") + .Attr("T: {float16, float32}") + .Attr("cell_type: string") + .Attr("direction: string") + .Attr("cell_depth: int = 1") + .Attr("use_peephole: bool = false") + .Attr("keep_prob: float = 1.0") + .Attr("cell_clip: float = -1.0") + .Attr("num_proj: int = 0") + .Attr("time_major: bool = true") + .Attr("activation: string") + .Attr("forget_bias: float = 0.0") + .Attr("is_training: bool = true") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto input_size = c->Dim(input_shape, 2); + auto w = c->input(1); + auto hidden_size_total = c->Dim(w, 0); + DimensionHandle hidden_size; + TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); + int32_t num_proj = 0; + TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj)); + ShapeHandle output_y_shape; + if (num_proj == 0) { + output_y_shape = c->MakeShape({num_step, batch_size, hidden_size}); + } else { + auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj)); + DimensionHandle output_hidden_size; + TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size)); + output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size}); + } + auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size}); + auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size}); + + c->set_output(0, output_y_shape); + c->set_output(1, output_h_shape); + c->set_output(2, output_c_shape); + c->set_output(3, c->UnknownShape()); + c->set_output(4, c->UnknownShape()); + c->set_output(5, c->UnknownShape()); + c->set_output(6, c->UnknownShape()); + c->set_output(7, c->UnknownShape()); + return Status::OK(); + }); + +REGISTER_OP("DynamicRnnGrad") + .Input("x: T") + .Input("w: T") + .Input("b: T") + .Input("y: T") + .Input("init_h: T") + .Input("init_c: T") + .Input("h: T") + .Input("c: T") + .Input("dy: T") + .Input("dh: T") + .Input("dc: T") + .Input("i: T") + .Input("j: T") + .Input("f: T") + .Input("o: T") + .Input("tanhc: T") + .Output("dw: T") + .Output("db: T") + .Output("dx: T") + .Output("dh_prev: T") + .Output("dc_prev: T") + .Attr("T: {float16, float32}") + .Attr("cell_type: string") + .Attr("direction: string") + .Attr("cell_depth: int = 1") + .Attr("use_peephole: bool = false") + .Attr("keep_prob: float = 1.0") + .Attr("cell_clip: float = -1.0") + .Attr("num_proj: int = 0") + .Attr("time_major: bool = true") + .Attr("forget_bias: float = 0.0") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + auto num_step = c->Dim(input_shape, 0); + auto batch_size = c->Dim(input_shape, 1); + auto input_size = c->Dim(input_shape, 2); + auto w = c->input(1); + auto hidden_size_total = c->Dim(w, 0); + auto hidden_size_4 = c->Dim(w, 1); + DimensionHandle hidden_size; + TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size)); + + auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size}); + auto output_dw_shape = c->MakeShape({hidden_size_total, hidden_size_4}); + auto output_db_shape = c->MakeShape({hidden_size_4}); + auto output_dh_prev_shape = c->MakeShape({1, batch_size, hidden_size}); + auto output_dc_prev_shape = c->MakeShape({1, batch_size, hidden_size}); + c->set_output(0, output_dw_shape); + c->set_output(1, output_db_shape); + c->set_output(2, output_dx_shape); + c->set_output(3, output_dh_prev_shape); + c->set_output(4, output_dc_prev_shape); + return Status::OK(); + }); + +REGISTER_OP("LRUCacheV2") + .Input("index_list: T") + .Input("data: Ref(dtype)") + .Input("cache: Ref(dtype)") + .Input("tag: Ref(T)") + .Input("is_last_call: T") + .Output("data1:Ref(dtype)") + .Output("cache1: Ref(dtype)") + .Output("tag1: Ref(dtype)") + .Output("index_offset_list: T") + .Output("not_in_cache_index_list: T") + .Output("not_in_cache_number: T") + .Attr("T: {int32, int64}") + .Attr("dtype: {float32}") + .Attr("pre_route_count: int") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(1)); + c->set_output(1, c->input(2)); + c->set_output(2, c->input(3)); + c->set_output(3, c->input(0)); + c->set_output(4, c->input(0)); + c->set_output(5, c->MakeShape({1})); + return Status::OK(); + }); + +REGISTER_OP("Centralization") + .Input("x: T") + .Output("y: T") + .Attr("T: {float16, float32}") + .Attr("axes: list(int)") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +REGISTER_OP("PRelu") + .Input("x: T") + .Input("weight: T") + .Output("y: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +REGISTER_OP("DropOutDoMaskV3") + .Input("x: T") + .Input("mask: uint8") + .Input("keep_prob: T") + .Output("y: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +REGISTER_OP("PReluGrad") + .Input("grads: T") + .Input("features: T") + .Input("weights: T") + .Output("dx: T") + .Output("da: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + c->set_output(1, c->input(2)); + return Status::OK(); + }); + +REGISTER_OP("NonZero") + .Input("x:T") + .Output("y:output_type") + .Attr("transpose:bool = false") + .Attr("T:numbertype") + .Attr("output_type:{int32, int64} = DT_INT64") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto rank = InferenceContext::Rank(c->input(0)); + c->set_output(0, c->MakeShape({rank, -1})); + return Status::OK(); + }); + +REGISTER_OP("NonZeroWithValue") + .Input("x:T") + .Output("value:T") + .Output("index:output_type") + .Output("count:output_type") + .Attr("transpose:bool = false") + .Attr("T:numbertype") + .Attr("output_type:{int32, int64} = DT_INT32") + .SetIsStateful() + .SetShapeFn([](InferenceContext *c) { + auto input_shape = c->input(0); + int64_t dim1 = InferenceContext::Value(c->Dim(input_shape, 0)); + int64_t dim2 = InferenceContext::Value(c->Dim(input_shape, 1)); + int64_t value_num = dim1 * dim2; + int64_t index_dim = 2 * dim1 * dim2; + int64_t count_dim = 1; + + c->set_output(0, c->MakeShape({c->MakeDim(value_num)})); + c->set_output(1, c->MakeShape({c->MakeDim(index_dim)})); + c->set_output(2, c->MakeShape({c->MakeDim(count_dim)})); + return Status::OK(); + }); + +REGISTER_OP("FusedLayerNorm") + .Input("x: T") + .Input("gamma: T") + .Input("beta: T") + .Output("y: T") + .Output("mean: T") + .Output("variance: T") + .Attr("T: {float16, float32}") + .Attr("begin_norm_axis: int = 0") + .Attr("begin_params_axis: int = 0") + .Attr("epsilon: float = 0.0000001") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + int32_t real_dim_num = InferenceContext::Rank(c->input(0)); + int32_t begin_norm_axis = 0; + TF_RETURN_IF_ERROR(c->GetAttr("begin_norm_axis", &begin_norm_axis)); + if (begin_norm_axis < 0) { + begin_norm_axis += real_dim_num; + } + if (begin_norm_axis < 0 || begin_norm_axis >= real_dim_num) { + return errors::InvalidArgument("begin_norm_axis is invalid"); + } + ShapeHandle input_shape_handle; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), real_dim_num, &input_shape_handle)); + ShapeHandle out_shape_handle; + for (int32_t i = 0; i < real_dim_num; ++i) { + DimensionHandle tmp_dim_handle = c->Dim(input_shape_handle, i); + if (i >= begin_norm_axis) { + tmp_dim_handle = c->MakeDim(1); + TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_handle, i, tmp_dim_handle, &out_shape_handle)); + } + } + c->set_output(0, c->input(0)); + c->set_output(1, out_shape_handle); + c->set_output(2, out_shape_handle); + return Status::OK(); + }); + +REGISTER_OP("FusedLayerNormGrad") + .Input("dy: T") + .Input("x: T") + .Input("variance: T") + .Input("mean: T") + .Input("gamma: T") + .Output("pd_x: T") + .Output("pd_gamma: T") + .Output("pd_beta: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + c->set_output(1, c->input(4)); + c->set_output(2, c->input(4)); + return Status::OK(); + }); + +REGISTER_OP("GetShape") + .Input("x: T") + .Output("y: int32") + .Attr("N: int = 1") + .Attr("T: {float16, float32, uint8}") + .SetShapeFn([](InferenceContext* c) { + int64_t sumSize = 0; + for (int32_t i = 0; i < c->num_inputs(); i++) { + sumSize += InferenceContext::Rank(c->input(i)); + } + c->set_output(0, c->MakeShape({c->MakeDim(sumSize)})); + return Status::OK(); + }); + +REGISTER_OP("ProdEnvMatA") + .Input("coord: T") + .Input("type:int32") + .Input("natoms:int32") + .Input("box: T") + .Input("mesh:int32") + .Input("davg: T") + .Input("dstd: T") + .Output("descrpt: T") + .Output("descrpt_deriv: T") + .Output("rij: T") + .Output("nlist:int32") + .Attr("T: {float16, float32}") + .Attr("rcut_a: float = 0.0") + .Attr("rcut_r: float = 0.0") + .Attr("rcut_r_smth: float = 0.0") + .Attr("sel_a: list(int)") + .Attr("sel_r: list(int)") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + auto coord_shape = c->input(0); + int64_t nsample = InferenceContext::Value(c->Dim(coord_shape, 0)); + int64_t nloc = 12288; + int64_t nnei = 0; + std::vector sel_a; + TF_RETURN_IF_ERROR(c->GetAttr("sel_a", &sel_a)); + for (size_t i = 0; i < sel_a.size(); ++i) { + nnei = nnei + sel_a[i]; + } + int64_t des = nloc * nnei * 4; + int64_t des_a = des * 3; + int64_t rij = nloc * nnei * 3; + int64_t nlist = nloc * nnei; + c->set_output(0, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des)})); + c->set_output(1, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des_a)})); + c->set_output(2, c->MakeShape({c->MakeDim(nsample), c->MakeDim(rij)})); + c->set_output(3, c->MakeShape({c->MakeDim(nsample), c->MakeDim(nlist)})); + return Status::OK(); + }); + +REGISTER_OP("ProdVirialSeA") + .Input("net_deriv:T") + .Input("in_deriv:T") + .Input("rij:T") + .Input("nlist:int32") + .Input("natoms:int32") + .Output("virial:T") + .Output("atom_virial:T") + .Attr("n_a_sel:int = 0") + .Attr("n_r_sel:int = 0") + .Attr("T: {float32, float64}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + auto input_shape = c->input(0); + auto nframes = c->Dim(input_shape, 0); + ShapeHandle virial_shape = c->MakeShape({nframes, 9}); + c->set_output(0, virial_shape); + ShapeHandle atom_virial_shape = c->MakeShape({nframes, 254952}); + c->set_output(1, atom_virial_shape); + return Status::OK(); + }); + +REGISTER_OP("ProdForceSeA") + .Input("net_deriv:T") + .Input("in_deriv:T") + .Input("nlist:int32") + .Input("natoms:int32") + .Output("force:T") + .Attr("n_a_sel:int = 0") + .Attr("n_r_sel:int = 0") + .Attr("T: {float32}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + auto input_shape = c->input(0); + auto nframes = c->Dim(input_shape, 0); + ShapeHandle force_shape = c->MakeShape({nframes, 84984}); + c->set_output(0, force_shape); + return Status::OK(); + }); + +REGISTER_OP("TabulateFusionSeA") + .Input("table:T") + .Input("table_info:T") + .Input("em_x:T") + .Input("em:T") + .Output("descriptor:T") + .Attr("last_layer_size:int") + .Attr("T: {float16, float32, float64}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + auto input_shape = c->input(3); + auto nloc = c->Dim(input_shape, 0); + + int32_t last_layer_size; + TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size)); + ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size}); + c->set_output(0, out_shape); + return Status::OK(); + }); + +REGISTER_OP("TabulateFusionSeAGrad") + .Input("table:T") + .Input("table_info:T") + .Input("em_x:T") + .Input("em:T") + .Input("dy:T") + .Input("descriptor:T") + .Output("dy_dem_x:T") + .Output("dy_dem:T") + .Attr("T: {float16, float32, float64}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->input(2)); + c->set_output(1, c->input(3)); + return Status::OK(); + }); + +REGISTER_OP("TabulateFusion") + .Input("table:T") + .Input("table_info:T") + .Input("em_x:T") + .Input("em:T") + .Output("descriptor:T") + .Attr("last_layer_size:int") + .Attr("T: {float16, float32, float64}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + auto input_shape = c->input(3); + auto nloc = c->Dim(input_shape, 0); + + int32_t last_layer_size; + TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size)); + ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size}); + c->set_output(0, out_shape); + return Status::OK(); + }); + +REGISTER_OP("TabulateFusionGrad") + .Input("table:T") + .Input("table_info:T") + .Input("em_x:T") + .Input("em:T") + .Input("dy:T") + .Input("descriptor:T") + .Output("dy_dem_x:T") + .Output("dy_dem:T") + .Attr("T: {float16, float32, float64}") + .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->input(2)); + c->set_output(1, c->input(3)); + return Status::OK(); + }); +} // namespace +} // namespace tensorflow diff --git a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py index 9968ec052e2832b6d7d06a6203b8576b81c6ebea..06ada22d6248b7cabc6d75ffc8beb7354f0c0470 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py +++ b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py @@ -1,328 +1,358 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""All bert ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numbers -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops.nn_ops import _get_noise_shape -from tensorflow.python.framework import dtypes - -from npu_bridge.helper import helper -from npu_bridge.estimator.npu.npu_common import NPUBasics - -npu_aicore_ops = helper.get_gen_ops() - -DEFAULT_GRAPH_SEED = 87654321 -_MAXINT32 = 2 ** 31 - 1 - - -@ops.RegisterGradient("FastGelu") -def _fast_gelu_grad(op, grad): - """The gradient for `fast_gelu`. - - Args: - op: The `fast_gelu` `Operation` that we are differentiating, which we can use - to find the inputs and outputs of the original op. - grad: Gradient with respect to the output of the `fast_gelu` op. - - Returns: - Gradients with respect to the input of `fast_gelu`. - """ - return [npu_aicore_ops.fast_gelu_grad(grad, op.inputs[0])] # List of one Tensor, since we have one input - - -def fast_gelu_v2(x, name=None): - """ fast_gelu_v2 operator interface implementation - - Args: - x: A input tensor with type is float16 or float32. - - Returns: - A tensor. - """ - return npu_aicore_ops.fast_gelu_v2(x, name) - - -def centralization(x, axes, name=None): - """ - centralization op - return x - reduce_mean(x, axes) - """ - x = ops.convert_to_tensor(x, name="x") - result = npu_aicore_ops.centralization(x, axes, name=name) - return result - - -@ops.RegisterGradient("PRelu") -def prelu_grad(op, grad): - """Gradient for prelu""" - dx, da = npu_aicore_ops.p_relu_grad(grad, op.inputs[0], op.inputs[1]) - return [dx, da] - - -def prelu(x, weight): - """prelu op""" - return npu_aicore_ops.p_relu(x, weight) - - -def _truncate_seed(seed): - return seed % _MAXINT32 # Truncate to fit into 32-bit integer - - -def dropout_v3(x, keep_prob, noise_shape=None, seed=None, name=None): - """The gradient for `gelu`. - - Args: - x: A tensor with type is float. - keep_prob: A tensor, float, rate of every element reserved. - noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random - generated. - seed: Random seed. - name: Layer name. - - Returns: - A tensor. - """ - x = ops.convert_to_tensor(x, name="x") - if not x.dtype.is_floating: - raise ValueError("x must be a floating point tensor." - " Got a %s tensor instead." % x.dtype) - if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0: - raise ValueError("keep_prob must be a float value or a scalar tensor in the " - "range (0, 1], got %g" % keep_prob) - if isinstance(keep_prob, float) and keep_prob == 1.0: - return x - seed, seed2 = random_seed.get_seed(seed) - noise_shape = _get_noise_shape(x, noise_shape) - gen_out = npu_aicore_ops.drop_out_gen_mask_v3(noise_shape, keep_prob, seed, seed2, name) - result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name) - return result - - -@ops.RegisterGradient("DropOutDoMaskV3") -def _DropOutDoMaskV3Grad(op, grad): - result = npu_aicore_ops.drop_out_do_mask_v3(grad, op.inputs[1], op.inputs[2]) - return [result, None, None] - - -def dropout_v4(x, keep_prob, noise_shape=None, seed=None, output_dtype=dtypes.bool, name=None): - """The gradient for `gelu`. - - Args: - x: A tensor with type is float. - keep_prob: A tensor, float, rate of every element reserved. - noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random - generated. - seed: Random seed. - output_dtype: dtype of output tensor, default is bool. - name: Layer name. - - Returns: - A tensor. - """ - x = ops.convert_to_tensor(x, name="x") - if not x.dtype.is_floating: - raise ValueError("x must be a floating point tensor." - " Got a %s tensor instead." % x.dtype) - if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0: - raise ValueError("keep_prob must be a float value or a scalar tensor in the " - "range (0, 1], got %g" % keep_prob) - if isinstance(keep_prob, float) and keep_prob == 1.0: - return x - seed, seed2 = random_seed.get_seed(seed) - noise_shape = _get_noise_shape(x, noise_shape) - gen_out = npu_aicore_ops.drop_out_gen_mask_v4(noise_shape, keep_prob, seed, seed2, output_dtype, name) - result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name) - return result - - -def lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=None): - """ - LRUCacheV2 op - - """ - is_last_call = ops.convert_to_tensor(is_last_call, name="is_last_call") - data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number = npu_aicore_ops.lru_cache_v2( - index_list, data, cache, tag, is_last_call, pre_route_count, name=name) - return [data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number] - - -def nonzero(x, transpose=False, output_type=dtypes.int64, name=None): - """ - nonezero op - Return the indices of the elementes that are non-zero. - Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes - in that dimension. The values in a are always tested and returned in row-major ,C-style order. - - """ - x = ops.convert_to_tensor(x, name="x") - result = npu_aicore_ops.non_zero(x, transpose, output_type, name=name) - return result - - -def nonzerowithvalue(x, transpose=False, output_type=dtypes.int64, name=None): - """ - nonezero op - Return the indices of the elementes that are non-zero. - Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes - in that dimension. The values in a are always tested and returned in row-major ,C-style order. - - """ - x = ops.convert_to_tensor(x, name="x") - result = npu_aicore_ops.non_zero_with_value(x, transpose, output_type, name=name) - return result - - -# go/tf-wildcard-import - - -def layer_norm(x, gamma, beta, begin_norm_axis=0, begin_params_axis=0, epsilon=0.0000001, name=None): - """ LayerNorm operator interface implementation - - Args: - x: A input tensor with type is float16 or float32. - gamma: scaling operation to normalized tensor. - beta: add offset to normalized tensor. - begin_norm_axis: A optional attribute, the type is int32. Defaults to 0. - begin_params_axis: A optional attribute, the type is int32. Defaults to 0. - epsilon: A optional attribute, the type is int32. Defaults to 0.0000001. - name: Layer name. - - Returns: - A tensor. - """ - res, mean, variance = npu_aicore_ops.fused_layer_norm(x, gamma, beta, begin_norm_axis, - begin_params_axis, epsilon, name) - - return [res, mean, variance] - - -@ops.RegisterGradient("FusedLayerNorm") -def _layer_norm_grad(op, *grad): - pd_x, pd_gamma, pd_beta = npu_aicore_ops.fused_layer_norm_grad(grad[0], op.inputs[0], op.outputs[2], op.outputs[1], - op.inputs[1]) - - return [pd_x, pd_gamma, pd_beta] - - -def prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a=0.0, - rcut_r=0.0, rcut_r_smth=0.0, sel_a=None, sel_r=None, name=None): - """ - prod_env_mat_a op - Return the indices of the elementes that are non-zero. - Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes - in that dimension. The values in a are always tested and returned in row-major ,C-style order. - - """ - sel_a = [] if sel_a is None else sel_a - sel_r = [] if sel_r is None else sel_r - coord = ops.convert_to_tensor(coord, name="coord") - types = ops.convert_to_tensor(types, name="type") - natoms = ops.convert_to_tensor(natoms, name="natoms") - box = ops.convert_to_tensor(box, name="box") - mesh = ops.convert_to_tensor(mesh, name="mesh") - davg = ops.convert_to_tensor(davg, name="davg") - dstd = ops.convert_to_tensor(dstd, name="dstd") - result = npu_aicore_ops.prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a, rcut_r, - rcut_r_smth, sel_a, sel_r, name) - - return result - - -def prodvirialsea(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None): - """ - ProdVirialSeA op - """ - net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv") - in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv") - rij = ops.convert_to_tensor(rij, name="rij") - nlist = ops.convert_to_tensor(nlist, name="nlist") - natoms = ops.convert_to_tensor(natoms, name="natoms") - result = npu_aicore_ops.prod_virial_se_a(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel, n_r_sel, - name=name) - return result - - -def prodforcesea(net_deriv, in_deriv, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None): - """ - ProdForceSeA op - """ - net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv") - in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv") - nlist = ops.convert_to_tensor(nlist, name="nlist") - natoms = ops.convert_to_tensor(natoms, name="natoms") - result = npu_aicore_ops.prod_force_se_a(net_deriv, in_deriv, nlist, natoms, n_a_sel, n_r_sel, - name=name) - return result - - -def tabulatefusionsea(table, table_info, em_x, em, last_layer_size=128, name=None): - """ - TabulateFusionSeA op - """ - table = ops.convert_to_tensor(table, name="table") - table_info = ops.convert_to_tensor(table_info, name="table_info") - em_x = ops.convert_to_tensor(em_x, name="em_x") - em = ops.convert_to_tensor(em, name="em") - result = npu_aicore_ops.tabulate_fusion_se_a(table, table_info, em_x, em, last_layer_size, name=name) - return result - - -def tabulatefusionseagrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None): - """ - TabulateFusionSeAGrad op - """ - table = ops.convert_to_tensor(table, name="table") - table_info = ops.convert_to_tensor(table_info, name="table_info") - em_x = ops.convert_to_tensor(em_x, name="em_x") - em = ops.convert_to_tensor(em, name="em") - dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x") - dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem") - result = npu_aicore_ops.tabulate_fusion_se_a_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name) - return result - - -def tabulatefusion(table, table_info, em_x, em, last_layer_size=128, name=None): - """ - TabulateFusion op - """ - table = ops.convert_to_tensor(table, name="table") - table_info = ops.convert_to_tensor(table_info, name="table_info") - em_x = ops.convert_to_tensor(em_x, name="em_x") - em = ops.convert_to_tensor(em, name="em") - result = npu_aicore_ops.tabulate_fusion(table, table_info, em_x, em, last_layer_size, name=name) - return result - - -def tabulatefusiongrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None): - """ - TabulateFusionGrad op - """ - table = ops.convert_to_tensor(table, name="table") - table_info = ops.convert_to_tensor(table_info, name="table_info") - em_x = ops.convert_to_tensor(em_x, name="em_x") - em = ops.convert_to_tensor(em, name="em") - dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x") - dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem") - result = npu_aicore_ops.tabulate_fusion_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name) - return result +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""All bert ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numbers +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.ops.nn_ops import _get_noise_shape +from tensorflow.python.framework import dtypes + +from npu_bridge.helper import helper +from npu_bridge.estimator.npu.npu_common import NPUBasics + +npu_aicore_ops = helper.get_gen_ops() + +DEFAULT_GRAPH_SEED = 87654321 +_MAXINT32 = 2 ** 31 - 1 + +def rfft_npu(input, n = 0, norm = "backward"): + result = npu_aicore_ops.fft1d(input, n, norm, mode = "r2c", forward = True) + result = tf.complex(result[:,:,:,0],result[:,:,:,1]) + return result + +def irfft_npu(input, n = 0, norm = "backward"): + re = tf.real(input) + im = tf.imag(input) + input = [re,im] + input = tf.transpose(input, [1,2,3,0]) + result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2r", forward = False) + return result + +def cfft_npu(input, n = 0, norm = "backward"): + re = tf.real(input) + im = tf.imag(input) + input = [re,im] + input = tf.transpose(input, [1,2,3,0]) + result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2c", forward = True) + result = tf.complex(result[:,:,:,0],result[:,:,:,1]) + return result + +def icfft_npu(input, n = 0, norm = "backward"): + re = tf.real(input) + im = tf.imag(input) + input = [re,im] + input = tf.transpose(input, [1,2,3,0]) + result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2c", forward = False) + result = tf.complex(result[:,:,:,0],result[:,:,:,1]) + return result + +@ops.RegisterGradient("FastGelu") +def _fast_gelu_grad(op, grad): + """The gradient for `fast_gelu`. + + Args: + op: The `fast_gelu` `Operation` that we are differentiating, which we can use + to find the inputs and outputs of the original op. + grad: Gradient with respect to the output of the `fast_gelu` op. + + Returns: + Gradients with respect to the input of `fast_gelu`. + """ + return [npu_aicore_ops.fast_gelu_grad(grad, op.inputs[0])] # List of one Tensor, since we have one input + + +def fast_gelu_v2(x, name=None): + """ fast_gelu_v2 operator interface implementation + + Args: + x: A input tensor with type is float16 or float32. + + Returns: + A tensor. + """ + return npu_aicore_ops.fast_gelu_v2(x, name) + + +def centralization(x, axes, name=None): + """ + centralization op + return x - reduce_mean(x, axes) + """ + x = ops.convert_to_tensor(x, name="x") + result = npu_aicore_ops.centralization(x, axes, name=name) + return result + + +@ops.RegisterGradient("PRelu") +def prelu_grad(op, grad): + """Gradient for prelu""" + dx, da = npu_aicore_ops.p_relu_grad(grad, op.inputs[0], op.inputs[1]) + return [dx, da] + + +def prelu(x, weight): + """prelu op""" + return npu_aicore_ops.p_relu(x, weight) + + +def _truncate_seed(seed): + return seed % _MAXINT32 # Truncate to fit into 32-bit integer + + +def dropout_v3(x, keep_prob, noise_shape=None, seed=None, name=None): + """The gradient for `gelu`. + + Args: + x: A tensor with type is float. + keep_prob: A tensor, float, rate of every element reserved. + noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random + generated. + seed: Random seed. + name: Layer name. + + Returns: + A tensor. + """ + x = ops.convert_to_tensor(x, name="x") + if not x.dtype.is_floating: + raise ValueError("x must be a floating point tensor." + " Got a %s tensor instead." % x.dtype) + if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0: + raise ValueError("keep_prob must be a float value or a scalar tensor in the " + "range (0, 1], got %g" % keep_prob) + if isinstance(keep_prob, float) and keep_prob == 1.0: + return x + seed, seed2 = random_seed.get_seed(seed) + noise_shape = _get_noise_shape(x, noise_shape) + gen_out = npu_aicore_ops.drop_out_gen_mask_v3(noise_shape, keep_prob, seed, seed2, name) + result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name) + return result + + +@ops.RegisterGradient("DropOutDoMaskV3") +def _DropOutDoMaskV3Grad(op, grad): + result = npu_aicore_ops.drop_out_do_mask_v3(grad, op.inputs[1], op.inputs[2]) + return [result, None, None] + + +def dropout_v4(x, keep_prob, noise_shape=None, seed=None, output_dtype=dtypes.bool, name=None): + """The gradient for `gelu`. + + Args: + x: A tensor with type is float. + keep_prob: A tensor, float, rate of every element reserved. + noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random + generated. + seed: Random seed. + output_dtype: dtype of output tensor, default is bool. + name: Layer name. + + Returns: + A tensor. + """ + x = ops.convert_to_tensor(x, name="x") + if not x.dtype.is_floating: + raise ValueError("x must be a floating point tensor." + " Got a %s tensor instead." % x.dtype) + if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0: + raise ValueError("keep_prob must be a float value or a scalar tensor in the " + "range (0, 1], got %g" % keep_prob) + if isinstance(keep_prob, float) and keep_prob == 1.0: + return x + seed, seed2 = random_seed.get_seed(seed) + noise_shape = _get_noise_shape(x, noise_shape) + gen_out = npu_aicore_ops.drop_out_gen_mask_v4(noise_shape, keep_prob, seed, seed2, output_dtype, name) + result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name) + return result + + +def lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=None): + """ + LRUCacheV2 op + + """ + is_last_call = ops.convert_to_tensor(is_last_call, name="is_last_call") + data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number = npu_aicore_ops.lru_cache_v2( + index_list, data, cache, tag, is_last_call, pre_route_count, name=name) + return [data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number] + + +def nonzero(x, transpose=False, output_type=dtypes.int64, name=None): + """ + nonezero op + Return the indices of the elementes that are non-zero. + Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes + in that dimension. The values in a are always tested and returned in row-major ,C-style order. + + """ + x = ops.convert_to_tensor(x, name="x") + result = npu_aicore_ops.non_zero(x, transpose, output_type, name=name) + return result + + +def nonzerowithvalue(x, transpose=False, output_type=dtypes.int64, name=None): + """ + nonezero op + Return the indices of the elementes that are non-zero. + Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes + in that dimension. The values in a are always tested and returned in row-major ,C-style order. + + """ + x = ops.convert_to_tensor(x, name="x") + result = npu_aicore_ops.non_zero_with_value(x, transpose, output_type, name=name) + return result + + +# go/tf-wildcard-import + + +def layer_norm(x, gamma, beta, begin_norm_axis=0, begin_params_axis=0, epsilon=0.0000001, name=None): + """ LayerNorm operator interface implementation + + Args: + x: A input tensor with type is float16 or float32. + gamma: scaling operation to normalized tensor. + beta: add offset to normalized tensor. + begin_norm_axis: A optional attribute, the type is int32. Defaults to 0. + begin_params_axis: A optional attribute, the type is int32. Defaults to 0. + epsilon: A optional attribute, the type is int32. Defaults to 0.0000001. + name: Layer name. + + Returns: + A tensor. + """ + res, mean, variance = npu_aicore_ops.fused_layer_norm(x, gamma, beta, begin_norm_axis, + begin_params_axis, epsilon, name) + + return [res, mean, variance] + + +@ops.RegisterGradient("FusedLayerNorm") +def _layer_norm_grad(op, *grad): + pd_x, pd_gamma, pd_beta = npu_aicore_ops.fused_layer_norm_grad(grad[0], op.inputs[0], op.outputs[2], op.outputs[1], + op.inputs[1]) + + return [pd_x, pd_gamma, pd_beta] + + +def prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a=0.0, + rcut_r=0.0, rcut_r_smth=0.0, sel_a=None, sel_r=None, name=None): + """ + prod_env_mat_a op + Return the indices of the elementes that are non-zero. + Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes + in that dimension. The values in a are always tested and returned in row-major ,C-style order. + + """ + sel_a = [] if sel_a is None else sel_a + sel_r = [] if sel_r is None else sel_r + coord = ops.convert_to_tensor(coord, name="coord") + types = ops.convert_to_tensor(types, name="type") + natoms = ops.convert_to_tensor(natoms, name="natoms") + box = ops.convert_to_tensor(box, name="box") + mesh = ops.convert_to_tensor(mesh, name="mesh") + davg = ops.convert_to_tensor(davg, name="davg") + dstd = ops.convert_to_tensor(dstd, name="dstd") + result = npu_aicore_ops.prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a, rcut_r, + rcut_r_smth, sel_a, sel_r, name) + + return result + + +def prodvirialsea(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None): + """ + ProdVirialSeA op + """ + net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv") + in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv") + rij = ops.convert_to_tensor(rij, name="rij") + nlist = ops.convert_to_tensor(nlist, name="nlist") + natoms = ops.convert_to_tensor(natoms, name="natoms") + result = npu_aicore_ops.prod_virial_se_a(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel, n_r_sel, + name=name) + return result + + +def prodforcesea(net_deriv, in_deriv, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None): + """ + ProdForceSeA op + """ + net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv") + in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv") + nlist = ops.convert_to_tensor(nlist, name="nlist") + natoms = ops.convert_to_tensor(natoms, name="natoms") + result = npu_aicore_ops.prod_force_se_a(net_deriv, in_deriv, nlist, natoms, n_a_sel, n_r_sel, + name=name) + return result + + +def tabulatefusionsea(table, table_info, em_x, em, last_layer_size=128, name=None): + """ + TabulateFusionSeA op + """ + table = ops.convert_to_tensor(table, name="table") + table_info = ops.convert_to_tensor(table_info, name="table_info") + em_x = ops.convert_to_tensor(em_x, name="em_x") + em = ops.convert_to_tensor(em, name="em") + result = npu_aicore_ops.tabulate_fusion_se_a(table, table_info, em_x, em, last_layer_size, name=name) + return result + + +def tabulatefusionseagrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None): + """ + TabulateFusionSeAGrad op + """ + table = ops.convert_to_tensor(table, name="table") + table_info = ops.convert_to_tensor(table_info, name="table_info") + em_x = ops.convert_to_tensor(em_x, name="em_x") + em = ops.convert_to_tensor(em, name="em") + dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x") + dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem") + result = npu_aicore_ops.tabulate_fusion_se_a_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name) + return result + + +def tabulatefusion(table, table_info, em_x, em, last_layer_size=128, name=None): + """ + TabulateFusion op + """ + table = ops.convert_to_tensor(table, name="table") + table_info = ops.convert_to_tensor(table_info, name="table_info") + em_x = ops.convert_to_tensor(em_x, name="em_x") + em = ops.convert_to_tensor(em, name="em") + result = npu_aicore_ops.tabulate_fusion(table, table_info, em_x, em, last_layer_size, name=name) + return result + + +def tabulatefusiongrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None): + """ + TabulateFusionGrad op + """ + table = ops.convert_to_tensor(table, name="table") + table_info = ops.convert_to_tensor(table_info, name="table_info") + em_x = ops.convert_to_tensor(em_x, name="em_x") + em = ops.convert_to_tensor(em, name="em") + dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x") + dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem") + result = npu_aicore_ops.tabulate_fusion_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name) + return result