diff --git a/tf_adapter/kernels/aicore/fft1_d_ops.cc b/tf_adapter/kernels/aicore/fft1_d_ops.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0375b6f3988c7cc225a7c194918cfa6b2b9e4475
--- /dev/null
+++ b/tf_adapter/kernels/aicore/fft1_d_ops.cc
@@ -0,0 +1,22 @@
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+
+namespace tensorflow {
+class Fft1DOp : public OpKernel {
+ public:
+  explicit Fft1DOp(OpKernelConstruction *context) : OpKernel(context) {
+    LOG(INFO) << "new Fft1DOp";
+  }
+  ~Fft1DOp() = default;
+  void Compute(OpKernelContext *context) override {
+    (void) context;
+    LOG(INFO) << "Fft1D Compute";
+  }
+  bool IsExpensive() override {
+    LOG(INFO) << "in Fft1D IsExpensive";
+    return false;
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("FFT1D").Device(DEVICE_CPU), Fft1DOp);
+}  // namespace tensorflow
diff --git a/tf_adapter/ops/aicore/npu_aicore_ops.cc b/tf_adapter/ops/aicore/npu_aicore_ops.cc
index fbe6035bdb24c97067cf9517a9dd736a31070c3d..b53ccb20c2fabe41afa49b5b582193dbcde1a1db 100644
--- a/tf_adapter/ops/aicore/npu_aicore_ops.cc
+++ b/tf_adapter/ops/aicore/npu_aicore_ops.cc
@@ -1,782 +1,856 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-using shape_inference::DimensionHandle;
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-using shape_inference::UnchangedShape;
-
-namespace {
-REGISTER_OP("FastGelu")
-    .Input("features: T")
-    .Output("activations: T")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(tensorflow::shape_inference::UnchangedShape);
-
-REGISTER_OP("FastGeluV2")
-    .Input("features: T")
-    .Output("activations: T")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(tensorflow::shape_inference::UnchangedShape);
-
-REGISTER_OP("FastGeluGrad")
-    .Input("gradients: T")
-    .Input("features: T")
-    .Output("backprops: T")
-    .Attr("T: realnumbertype")
-    .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn);
-
-REGISTER_OP("DynamicGruV2")
-    .Input("x: T")
-    .Input("weight_input: T")
-    .Input("weight_hidden: T")
-    .Input("bias_input: T")
-    .Input("bias_hidden: T")
-    .Input("seq_length: int32")
-    .Input("init_h: T")
-    .Output("y: T")
-    .Output("output_h: T")
-    .Output("update: T")
-    .Output("reset: T")
-    .Output("new: T")
-    .Output("hidden_new: T")
-    .Attr("T: {float16, float32}")
-    .Attr("direction: string")
-    .Attr("cell_depth: int = 1")
-    .Attr("keep_prob: float = 1.0")
-    .Attr("cell_clip: float = -1.0")
-    .Attr("num_proj: int = 0")
-    .Attr("time_major: bool = true")
-    .Attr("activation: string")
-    .Attr("gate_order: string")
-    .Attr("reset_after: bool = true")
-    .Attr("is_training: bool = true")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      auto weight_hidden_shape = c->input(2);
-      auto num_step = c->Dim(input_shape, 0);
-      auto batch_size = c->Dim(input_shape, 1);
-      auto hidden_size = c->Dim(weight_hidden_shape, 0);
-      int32_t num_proj = 0;
-      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
-      ShapeHandle output_y_shape;
-      if (num_proj == 0) {
-        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      } else {
-        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
-        DimensionHandle output_hidden_size;
-        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
-        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
-      }
-      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      c->set_output(0, output_y_shape);
-      c->set_output(1, output_h_shape);
-      c->set_output(2, c->UnknownShape());
-      c->set_output(3, c->UnknownShape());
-      c->set_output(4, c->UnknownShape());
-      c->set_output(5, c->UnknownShape());
-      return Status::OK();
-    });
-
-REGISTER_OP("DynamicGruV2Grad")
-    .Input("x: T")
-    .Input("weight_input: T")
-    .Input("weight_hidden: T")
-    .Input("y: T")
-    .Input("init_h: T")
-    .Input("h: T")
-    .Input("dy: T")
-    .Input("dh: T")
-    .Input("update: T")
-    .Input("reset: T")
-    .Input("new: T")
-    .Input("hidden_new: T")
-    .Input("seq_length: int32")
-    .Output("dw_input: T")
-    .Output("dw_hidden: T")
-    .Output("db_input: T")
-    .Output("db_hidden: T")
-    .Output("dx: T")
-    .Output("dh_prev: T")
-    .Attr("T: {float16, float32}")
-    .Attr("direction: string")
-    .Attr("cell_depth: int = 1")
-    .Attr("keep_prob: float = 1.0")
-    .Attr("cell_clip: float = -1.0")
-    .Attr("num_proj: int = 0")
-    .Attr("time_major: bool = true")
-    .Attr("gate_order: string")
-    .Attr("reset_after: bool = true")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      auto weight_hidden_shape = c->input(2);
-      auto num_step = c->Dim(input_shape, 0);
-      auto batch_size = c->Dim(input_shape, 1);
-      auto input_size = c->Dim(input_shape, 2);
-      auto hidden_size = c->Dim(weight_hidden_shape, 0);
-      auto hidden_size_1 = c->Dim(weight_hidden_shape, 1);
-      auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1});
-      auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1});
-      auto output_db_input_shape = c->MakeShape({hidden_size_1});
-      auto output_db_hidden_shape = c->MakeShape({hidden_size_1});
-      auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
-      auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size});
-      c->set_output(0, output_dw_input_shape);
-      c->set_output(1, output_dw_hidden_shape);
-      c->set_output(2, output_db_input_shape);
-      c->set_output(3, output_db_hidden_shape);
-      c->set_output(4, output_dx_shape);
-      c->set_output(5, output_dh_prev_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("DynamicAUGRU")
-.Input("x: T")
-.Input("weight_input: T")
-.Input("weight_hidden: T")
-.Input("weight_att: T")
-.Input("bias_input: T")
-.Input("bias_hidden: T")
-.Input("seq_length: int32")
-.Input("init_h: T")
-.Output("y: T")
-.Output("output_h: T")
-.Output("update: T")
-.Output("update_att: T")
-.Output("reset: T")
-.Output("new: T")
-.Output("hidden_new: T")
-.Attr("T: {float16, float32}")
-.Attr("direction: string")
-.Attr("cell_depth: int = 1")
-.Attr("keep_prob: float = 1.0")
-.Attr("cell_clip: float = -1.0")
-.Attr("num_proj: int = 0")
-.Attr("time_major: bool = true")
-.Attr("activation: string")
-.Attr("gate_order: string")
-.Attr("reset_after: bool = true")
-.Attr("is_training: bool = true")
-.SetIsStateful()
-.SetShapeFn([](InferenceContext *c) {
-  auto input_shape = c->input(0);
-  auto weight_hidden_shape = c->input(2);
-  auto num_step = c->Dim(input_shape, 0);
-  auto batch_size = c->Dim(input_shape, 1);
-  auto hidden_size = c->Dim(weight_hidden_shape, 0);
-  int32_t num_proj = 0;
-  TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
-
-  ShapeHandle output_y_shape;
-  if (num_proj == 0) {
-    output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
-  } else {
-    auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
-    DimensionHandle output_hidden_size;
-    TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
-    output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
-  }
-  auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
-  c->set_output(0, output_y_shape);
-  c->set_output(1, output_h_shape);
-  c->set_output(2, c->UnknownShape());
-  c->set_output(3, c->UnknownShape());
-  c->set_output(4, c->UnknownShape());
-  c->set_output(5, c->UnknownShape());
-  c->set_output(6, c->UnknownShape());
-  return Status::OK();
-});
-
-REGISTER_OP("DynamicAUGRUGrad")
-.Input("x: T")
-.Input("weight_input: T")
-.Input("weight_hidden: T")
-.Input("weight_att: T")
-.Input("y: T")
-.Input("init_h: T")
-.Input("h: T")
-.Input("dy: T")
-.Input("dh: T")
-.Input("update: T")
-.Input("update_att: T")
-.Input("reset: T")
-.Input("new: T")
-.Input("hidden_new: T")
-.Input("seq_length: int32")
-.Output("dw_input: T")
-.Output("dw_hidden: T")
-.Output("db_input: T")
-.Output("db_hidden: T")
-.Output("dx: T")
-.Output("dh_prev: T")
-.Output("dw_att: T")
-.Attr("T: {float16, float32}")
-.Attr("direction: string")
-.Attr("cell_depth: int = 1")
-.Attr("keep_prob: float = 1.0")
-.Attr("cell_clip: float = -1.0")
-.Attr("num_proj: int = 0")
-.Attr("time_major: bool = true")
-.Attr("gate_order: string")
-.Attr("reset_after: bool = true")
-.SetIsStateful()
-.SetShapeFn([](InferenceContext *c) {
-auto input_shape = c->input(0);
-auto weight_hidden_shape = c->input(2);
-auto num_step = c->Dim(input_shape, 0);
-auto batch_size = c->Dim(input_shape, 1);
-auto input_size = c->Dim(input_shape, 2);
-auto hidden_size = c->Dim(weight_hidden_shape, 0);
-auto hidden_size_1 = c->Dim(weight_hidden_shape, 1);
-auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1});
-auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1});
-auto output_db_input_shape = c->MakeShape({hidden_size_1});
-auto output_db_hidden_shape = c->MakeShape({hidden_size_1});
-auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
-auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size});
-auto output_dw_att_shape = c->MakeShape({num_step, batch_size});
-c->set_output(0, output_dw_input_shape);
-c->set_output(1, output_dw_hidden_shape);
-c->set_output(2, output_db_input_shape);
-c->set_output(3, output_db_hidden_shape);
-c->set_output(4, output_dx_shape);
-c->set_output(5, output_dh_prev_shape);
-c->set_output(6, output_dw_att_shape);
-return Status::OK();
-});
-
-REGISTER_OP("DynamicRnn")
-    .Input("x: T")
-    .Input("w: T")
-    .Input("b: T")
-    .Input("seq_length: int32")
-    .Input("init_h: T")
-    .Input("init_c: T")
-    .Output("y: T")
-    .Output("output_h: T")
-    .Output("output_c: T")
-    .Output("i: T")
-    .Output("j: T")
-    .Output("f: T")
-    .Output("o: T")
-    .Output("tanhc: T")
-    .Attr("T: {float16, float32}")
-    .Attr("cell_type: string")
-    .Attr("direction: string")
-    .Attr("cell_depth: int = 1")
-    .Attr("use_peephole: bool = false")
-    .Attr("keep_prob: float = 1.0")
-    .Attr("cell_clip: float = -1.0")
-    .Attr("num_proj: int = 0")
-    .Attr("time_major: bool = true")
-    .Attr("activation: string")
-    .Attr("forget_bias: float = 0.0")
-    .Attr("is_training: bool = true")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      auto num_step = c->Dim(input_shape, 0);
-      auto batch_size = c->Dim(input_shape, 1);
-      auto input_size = c->Dim(input_shape, 2);
-      auto w = c->input(1);
-      auto hidden_size_total = c->Dim(w, 0);
-      DimensionHandle hidden_size;
-      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
-      int32_t num_proj = 0;
-      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
-      ShapeHandle output_y_shape;
-      if (num_proj == 0) {
-        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      } else {
-        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
-        DimensionHandle output_hidden_size;
-        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
-        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
-      }
-      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size});
-
-      c->set_output(0, output_y_shape);
-      c->set_output(1, output_h_shape);
-      c->set_output(2, output_c_shape);
-      c->set_output(3, c->UnknownShape());
-      c->set_output(4, c->UnknownShape());
-      c->set_output(5, c->UnknownShape());
-      c->set_output(6, c->UnknownShape());
-      c->set_output(7, c->UnknownShape());
-      return Status::OK();
-    });
-
-REGISTER_OP("DynamicRnnV2")
-    .Input("x: T")
-    .Input("w: T")
-    .Input("b: T")
-    .Input("init_h: T")
-    .Input("init_c: T")
-    .Output("y: T")
-    .Output("output_h: T")
-    .Output("output_c: T")
-    .Output("i: T")
-    .Output("j: T")
-    .Output("f: T")
-    .Output("o: T")
-    .Output("tanhc: T")
-    .Attr("T: {float16, float32}")
-    .Attr("cell_type: string")
-    .Attr("direction: string")
-    .Attr("cell_depth: int = 1")
-    .Attr("use_peephole: bool = false")
-    .Attr("keep_prob: float = 1.0")
-    .Attr("cell_clip: float = -1.0")
-    .Attr("num_proj: int = 0")
-    .Attr("time_major: bool = true")
-    .Attr("activation: string")
-    .Attr("forget_bias: float = 0.0")
-    .Attr("is_training: bool = true")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      auto num_step = c->Dim(input_shape, 0);
-      auto batch_size = c->Dim(input_shape, 1);
-      auto input_size = c->Dim(input_shape, 2);
-      auto w = c->input(1);
-      auto hidden_size_total = c->Dim(w, 0);
-      DimensionHandle hidden_size;
-      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
-      int32_t num_proj = 0;
-      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
-      ShapeHandle output_y_shape;
-      if (num_proj == 0) {
-        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      } else {
-        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
-        DimensionHandle output_hidden_size;
-        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
-        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
-      }
-      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
-      auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size});
-
-      c->set_output(0, output_y_shape);
-      c->set_output(1, output_h_shape);
-      c->set_output(2, output_c_shape);
-      c->set_output(3, c->UnknownShape());
-      c->set_output(4, c->UnknownShape());
-      c->set_output(5, c->UnknownShape());
-      c->set_output(6, c->UnknownShape());
-      c->set_output(7, c->UnknownShape());
-      return Status::OK();
-    });
-
-REGISTER_OP("DynamicRnnGrad")
-    .Input("x: T")
-    .Input("w: T")
-    .Input("b: T")
-    .Input("y: T")
-    .Input("init_h: T")
-    .Input("init_c: T")
-    .Input("h: T")
-    .Input("c: T")
-    .Input("dy: T")
-    .Input("dh: T")
-    .Input("dc: T")
-    .Input("i: T")
-    .Input("j: T")
-    .Input("f: T")
-    .Input("o: T")
-    .Input("tanhc: T")
-    .Output("dw: T")
-    .Output("db: T")
-    .Output("dx: T")
-    .Output("dh_prev: T")
-    .Output("dc_prev: T")
-    .Attr("T: {float16, float32}")
-    .Attr("cell_type: string")
-    .Attr("direction: string")
-    .Attr("cell_depth: int = 1")
-    .Attr("use_peephole: bool = false")
-    .Attr("keep_prob: float = 1.0")
-    .Attr("cell_clip: float = -1.0")
-    .Attr("num_proj: int = 0")
-    .Attr("time_major: bool = true")
-    .Attr("forget_bias: float = 0.0")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      auto num_step = c->Dim(input_shape, 0);
-      auto batch_size = c->Dim(input_shape, 1);
-      auto input_size = c->Dim(input_shape, 2);
-      auto w = c->input(1);
-      auto hidden_size_total = c->Dim(w, 0);
-      auto hidden_size_4 = c->Dim(w, 1);
-      DimensionHandle hidden_size;
-      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
-
-      auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
-      auto output_dw_shape = c->MakeShape({hidden_size_total, hidden_size_4});
-      auto output_db_shape = c->MakeShape({hidden_size_4});
-      auto output_dh_prev_shape = c->MakeShape({1, batch_size, hidden_size});
-      auto output_dc_prev_shape = c->MakeShape({1, batch_size, hidden_size});
-      c->set_output(0, output_dw_shape);
-      c->set_output(1, output_db_shape);
-      c->set_output(2, output_dx_shape);
-      c->set_output(3, output_dh_prev_shape);
-      c->set_output(4, output_dc_prev_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("LRUCacheV2")
-    .Input("index_list: T")
-    .Input("data: Ref(dtype)")
-    .Input("cache: Ref(dtype)")
-    .Input("tag: Ref(T)")
-    .Input("is_last_call: T")
-    .Output("data1:Ref(dtype)")
-    .Output("cache1: Ref(dtype)")
-    .Output("tag1: Ref(dtype)")
-    .Output("index_offset_list: T")
-    .Output("not_in_cache_index_list: T")
-    .Output("not_in_cache_number: T")
-    .Attr("T: {int32, int64}")
-    .Attr("dtype: {float32}")
-    .Attr("pre_route_count: int")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(1));
-      c->set_output(1, c->input(2));
-      c->set_output(2, c->input(3));
-      c->set_output(3, c->input(0));
-      c->set_output(4, c->input(0));
-      c->set_output(5, c->MakeShape({1}));
-      return Status::OK();
-    });
-
-REGISTER_OP("Centralization")
-    .Input("x: T")
-    .Output("y: T")
-    .Attr("T: {float16, float32}")
-    .Attr("axes: list(int)")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(0));
-      return Status::OK();
-    });
-
-REGISTER_OP("PRelu")
-    .Input("x: T")
-    .Input("weight: T")
-    .Output("y: T")
-    .Attr("T: {float16, float32}")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(0));
-      return Status::OK();
-    });
-
-REGISTER_OP("DropOutDoMaskV3")
-    .Input("x: T")
-    .Input("mask: uint8")
-    .Input("keep_prob: T")
-    .Output("y: T")
-    .Attr("T: {float16, float32}")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(0));
-      return Status::OK();
-    });
-
-REGISTER_OP("PReluGrad")
-    .Input("grads: T")
-    .Input("features: T")
-    .Input("weights: T")
-    .Output("dx: T")
-    .Output("da: T")
-    .Attr("T: {float16, float32}")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(0));
-      c->set_output(1, c->input(2));
-      return Status::OK();
-    });
-
-REGISTER_OP("NonZero")
-    .Input("x:T")
-    .Output("y:output_type")
-    .Attr("transpose:bool = false")
-    .Attr("T:numbertype")
-    .Attr("output_type:{int32, int64} = DT_INT64")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto rank = InferenceContext::Rank(c->input(0));
-      c->set_output(0, c->MakeShape({rank, -1}));
-      return Status::OK();
-    });
-
-REGISTER_OP("NonZeroWithValue")
-    .Input("x:T")
-    .Output("value:T")
-    .Output("index:output_type")
-    .Output("count:output_type")
-    .Attr("transpose:bool = false")
-    .Attr("T:numbertype")
-    .Attr("output_type:{int32, int64} = DT_INT32")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext *c) {
-      auto input_shape = c->input(0);
-      int64_t dim1 = InferenceContext::Value(c->Dim(input_shape, 0));
-      int64_t dim2 = InferenceContext::Value(c->Dim(input_shape, 1));
-      int64_t value_num = dim1 * dim2;
-      int64_t index_dim = 2 * dim1 * dim2;
-      int64_t count_dim = 1;
-
-      c->set_output(0, c->MakeShape({c->MakeDim(value_num)}));
-      c->set_output(1, c->MakeShape({c->MakeDim(index_dim)}));
-      c->set_output(2, c->MakeShape({c->MakeDim(count_dim)}));
-      return Status::OK();
-    });
-
-REGISTER_OP("FusedLayerNorm")
-    .Input("x: T")
-    .Input("gamma: T")
-    .Input("beta: T")
-    .Output("y: T")
-    .Output("mean: T")
-    .Output("variance: T")
-    .Attr("T: {float16, float32}")
-    .Attr("begin_norm_axis: int = 0")
-    .Attr("begin_params_axis: int = 0")
-    .Attr("epsilon: float = 0.0000001")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      int32_t real_dim_num = InferenceContext::Rank(c->input(0));
-      int32_t begin_norm_axis = 0;
-      TF_RETURN_IF_ERROR(c->GetAttr("begin_norm_axis", &begin_norm_axis));
-      if (begin_norm_axis < 0) {
-        begin_norm_axis += real_dim_num;
-      }
-      if (begin_norm_axis < 0 || begin_norm_axis >= real_dim_num) {
-        return errors::InvalidArgument("begin_norm_axis is invalid");
-      }
-      ShapeHandle input_shape_handle;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), real_dim_num, &input_shape_handle));
-      ShapeHandle out_shape_handle;
-      for (int32_t i = 0; i < real_dim_num; ++i) {
-        DimensionHandle tmp_dim_handle = c->Dim(input_shape_handle, i);
-        if (i >= begin_norm_axis) {
-          tmp_dim_handle = c->MakeDim(1);
-          TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_handle, i, tmp_dim_handle, &out_shape_handle));
-        }
-      }
-      c->set_output(0, c->input(0));
-      c->set_output(1, out_shape_handle);
-      c->set_output(2, out_shape_handle);
-      return Status::OK();
-    });
-
-REGISTER_OP("FusedLayerNormGrad")
-    .Input("dy: T")
-    .Input("x: T")
-    .Input("variance: T")
-    .Input("mean: T")
-    .Input("gamma: T")
-    .Output("pd_x: T")
-    .Output("pd_gamma: T")
-    .Output("pd_beta: T")
-    .Attr("T: {float16, float32}")
-    .SetIsStateful()
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      c->set_output(0, c->input(0));
-      c->set_output(1, c->input(4));
-      c->set_output(2, c->input(4));
-      return Status::OK();
-    });
-
-REGISTER_OP("GetShape")
-    .Input("x: T")
-    .Output("y: int32")
-    .Attr("N: int = 1")
-    .Attr("T: {float16, float32, uint8}")
-    .SetShapeFn([](InferenceContext* c) {
-        int64_t sumSize = 0;
-        for (int32_t i = 0; i < c->num_inputs(); i++) {
-            sumSize += InferenceContext::Rank(c->input(i));
-        }
-        c->set_output(0, c->MakeShape({c->MakeDim(sumSize)}));
-        return Status::OK();
-    });
-
-REGISTER_OP("ProdEnvMatA")
-    .Input("coord: T")
-    .Input("type:int32")
-    .Input("natoms:int32")
-    .Input("box: T")
-    .Input("mesh:int32")
-    .Input("davg: T")
-    .Input("dstd: T")
-    .Output("descrpt: T")
-    .Output("descrpt_deriv: T")
-    .Output("rij: T")
-    .Output("nlist:int32")
-    .Attr("T: {float16, float32}")
-    .Attr("rcut_a: float = 0.0")
-    .Attr("rcut_r: float = 0.0")
-    .Attr("rcut_r_smth: float = 0.0")
-    .Attr("sel_a: list(int)")
-    .Attr("sel_r: list(int)")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      auto coord_shape = c->input(0);
-      int64_t nsample = InferenceContext::Value(c->Dim(coord_shape, 0));
-      int64_t nloc = 12288;
-      int64_t nnei = 0;
-      std::vector<int32_t> sel_a;
-      TF_RETURN_IF_ERROR(c->GetAttr("sel_a", &sel_a));
-      for (size_t i = 0; i < sel_a.size(); ++i) {
-        nnei = nnei + sel_a[i];
-      }
-      int64_t des = nloc * nnei * 4;
-      int64_t des_a = des * 3;
-      int64_t rij = nloc * nnei * 3;
-      int64_t nlist = nloc * nnei;
-      c->set_output(0, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des)}));
-      c->set_output(1, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des_a)}));
-      c->set_output(2, c->MakeShape({c->MakeDim(nsample), c->MakeDim(rij)}));
-      c->set_output(3, c->MakeShape({c->MakeDim(nsample), c->MakeDim(nlist)}));
-      return Status::OK();
-    });
-
-REGISTER_OP("ProdVirialSeA")
-    .Input("net_deriv:T")
-    .Input("in_deriv:T")
-    .Input("rij:T")
-    .Input("nlist:int32")
-    .Input("natoms:int32")
-    .Output("virial:T")
-    .Output("atom_virial:T")
-    .Attr("n_a_sel:int = 0")
-    .Attr("n_r_sel:int = 0")
-    .Attr("T: {float32, float64}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      auto input_shape = c->input(0);
-      auto nframes = c->Dim(input_shape, 0);
-      ShapeHandle virial_shape = c->MakeShape({nframes, 9});
-      c->set_output(0, virial_shape);
-      ShapeHandle atom_virial_shape = c->MakeShape({nframes, 254952});
-      c->set_output(1, atom_virial_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("ProdForceSeA")
-    .Input("net_deriv:T")
-    .Input("in_deriv:T")
-    .Input("nlist:int32")
-    .Input("natoms:int32")
-    .Output("force:T")
-    .Attr("n_a_sel:int = 0")
-    .Attr("n_r_sel:int = 0")
-    .Attr("T: {float32}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      auto input_shape = c->input(0);
-      auto nframes = c->Dim(input_shape, 0);
-      ShapeHandle force_shape = c->MakeShape({nframes, 84984});
-      c->set_output(0, force_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("TabulateFusionSeA")
-    .Input("table:T")
-    .Input("table_info:T")
-    .Input("em_x:T")
-    .Input("em:T")
-    .Output("descriptor:T")
-    .Attr("last_layer_size:int")
-    .Attr("T: {float16, float32, float64}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      auto input_shape = c->input(3);
-      auto nloc = c->Dim(input_shape, 0);
-
-      int32_t last_layer_size;
-      TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size));
-      ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size});
-      c->set_output(0, out_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("TabulateFusionSeAGrad")
-    .Input("table:T")
-    .Input("table_info:T")
-    .Input("em_x:T")
-    .Input("em:T")
-    .Input("dy:T")
-    .Input("descriptor:T")
-    .Output("dy_dem_x:T")
-    .Output("dy_dem:T")
-    .Attr("T: {float16, float32, float64}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      c->set_output(0, c->input(2));
-      c->set_output(1, c->input(3));
-      return Status::OK();
-    });
-
-REGISTER_OP("TabulateFusion")
-    .Input("table:T")
-    .Input("table_info:T")
-    .Input("em_x:T")
-    .Input("em:T")
-    .Output("descriptor:T")
-    .Attr("last_layer_size:int")
-    .Attr("T: {float16, float32, float64}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      auto input_shape = c->input(3);
-      auto nloc = c->Dim(input_shape, 0);
-
-      int32_t last_layer_size;
-      TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size));
-      ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size});
-      c->set_output(0, out_shape);
-      return Status::OK();
-    });
-
-REGISTER_OP("TabulateFusionGrad")
-    .Input("table:T")
-    .Input("table_info:T")
-    .Input("em_x:T")
-    .Input("em:T")
-    .Input("dy:T")
-    .Input("descriptor:T")
-    .Output("dy_dem_x:T")
-    .Output("dy_dem:T")
-    .Attr("T: {float16, float32, float64}")
-    .SetIsStateful()
-    .SetShapeFn([](InferenceContext* c) {
-      c->set_output(0, c->input(2));
-      c->set_output(1, c->input(3));
-      return Status::OK();
-    });
-}  // namespace
-}  // namespace tensorflow
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::UnchangedShape;
+
+namespace {
+
+REGISTER_OP("FFT1D")
+    .Input("input: float32")
+    .Output("output: float32")
+    .Attr("n: int")
+    .Attr("norm: string")
+    .Attr("mode: string")
+    .Attr("forward: bool")
+    .SetShapeFn([](InferenceContext *c) {
+      string mode;
+      bool forward;
+      TF_RETURN_IF_ERROR(c->GetAttr("mode", &mode));
+      TF_RETURN_IF_ERROR(c->GetAttr("forward", &forward));
+      if (mode == "r2c" && forward == true)
+      {
+          auto input_shape = c->input(0);
+          auto batch = c->Dim(input_shape, 0);
+          auto inner_batch = c->Dim(input_shape, 1);
+          int32_t n;
+          TF_RETURN_IF_ERROR(c->GetAttr("n", &n));
+          ShapeHandle output_dim;
+          if (n == 0)
+          {
+              int64_t N = InferenceContext::Value(c->Dim(input_shape, 2));
+              output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N / 2 + 1), 2});
+          }
+          else
+          {
+              auto N = n / 2 + 1;
+              output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N), 2});
+          }
+          c->set_output(0, output_dim);
+      }
+      if (mode == "c2r" && forward == false)
+        {
+            auto input_shape = c->input(0);
+            auto batch = c->Dim(input_shape, 0);
+            auto inner_batch = c->Dim(input_shape, 1);
+            int32_t n;
+            TF_RETURN_IF_ERROR(c->GetAttr("n", &n));
+            ShapeHandle output_dim;
+            if (n == 0)
+            {
+                int64_t N = InferenceContext::Value(c->Dim(input_shape, 2));
+                output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(2 * (N - 1))});
+            }
+            else
+            {
+                output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(n)});
+            }
+            c->set_output(0, output_dim);
+        }
+        if (mode == "c2c")
+        {
+            auto input_shape = c->input(0);
+            auto batch = c->Dim(input_shape, 0);
+            auto inner_batch = c->Dim(input_shape, 1);
+            int32_t n;
+            TF_RETURN_IF_ERROR(c->GetAttr("n", &n));
+            ShapeHandle output_dim;
+            if (n == 0)
+            {
+                int64_t N = InferenceContext::Value(c->Dim(input_shape, 2));
+                output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(N), 2});
+            }
+            else
+            {
+                output_dim = c->MakeShape({batch, inner_batch, c->MakeDim(n), 2});
+            }
+            c->set_output(0, output_dim);
+        }
+      return Status::OK();
+      });
+
+REGISTER_OP("FastGelu")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(tensorflow::shape_inference::UnchangedShape);
+
+REGISTER_OP("FastGeluV2")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(tensorflow::shape_inference::UnchangedShape);
+
+REGISTER_OP("FastGeluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Output("backprops: T")
+    .Attr("T: realnumbertype")
+    .SetShapeFn(tensorflow::shape_inference::MergeBothInputsShapeFn);
+
+REGISTER_OP("DynamicGruV2")
+    .Input("x: T")
+    .Input("weight_input: T")
+    .Input("weight_hidden: T")
+    .Input("bias_input: T")
+    .Input("bias_hidden: T")
+    .Input("seq_length: int32")
+    .Input("init_h: T")
+    .Output("y: T")
+    .Output("output_h: T")
+    .Output("update: T")
+    .Output("reset: T")
+    .Output("new: T")
+    .Output("hidden_new: T")
+    .Attr("T: {float16, float32}")
+    .Attr("direction: string")
+    .Attr("cell_depth: int = 1")
+    .Attr("keep_prob: float = 1.0")
+    .Attr("cell_clip: float = -1.0")
+    .Attr("num_proj: int = 0")
+    .Attr("time_major: bool = true")
+    .Attr("activation: string")
+    .Attr("gate_order: string")
+    .Attr("reset_after: bool = true")
+    .Attr("is_training: bool = true")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      auto weight_hidden_shape = c->input(2);
+      auto num_step = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto hidden_size = c->Dim(weight_hidden_shape, 0);
+      int32_t num_proj = 0;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
+      ShapeHandle output_y_shape;
+      if (num_proj == 0) {
+        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      } else {
+        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
+        DimensionHandle output_hidden_size;
+        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
+        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
+      }
+      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      c->set_output(0, output_y_shape);
+      c->set_output(1, output_h_shape);
+      c->set_output(2, c->UnknownShape());
+      c->set_output(3, c->UnknownShape());
+      c->set_output(4, c->UnknownShape());
+      c->set_output(5, c->UnknownShape());
+      return Status::OK();
+    });
+
+REGISTER_OP("DynamicGruV2Grad")
+    .Input("x: T")
+    .Input("weight_input: T")
+    .Input("weight_hidden: T")
+    .Input("y: T")
+    .Input("init_h: T")
+    .Input("h: T")
+    .Input("dy: T")
+    .Input("dh: T")
+    .Input("update: T")
+    .Input("reset: T")
+    .Input("new: T")
+    .Input("hidden_new: T")
+    .Input("seq_length: int32")
+    .Output("dw_input: T")
+    .Output("dw_hidden: T")
+    .Output("db_input: T")
+    .Output("db_hidden: T")
+    .Output("dx: T")
+    .Output("dh_prev: T")
+    .Attr("T: {float16, float32}")
+    .Attr("direction: string")
+    .Attr("cell_depth: int = 1")
+    .Attr("keep_prob: float = 1.0")
+    .Attr("cell_clip: float = -1.0")
+    .Attr("num_proj: int = 0")
+    .Attr("time_major: bool = true")
+    .Attr("gate_order: string")
+    .Attr("reset_after: bool = true")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      auto weight_hidden_shape = c->input(2);
+      auto num_step = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto input_size = c->Dim(input_shape, 2);
+      auto hidden_size = c->Dim(weight_hidden_shape, 0);
+      auto hidden_size_1 = c->Dim(weight_hidden_shape, 1);
+      auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1});
+      auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1});
+      auto output_db_input_shape = c->MakeShape({hidden_size_1});
+      auto output_db_hidden_shape = c->MakeShape({hidden_size_1});
+      auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
+      auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size});
+      c->set_output(0, output_dw_input_shape);
+      c->set_output(1, output_dw_hidden_shape);
+      c->set_output(2, output_db_input_shape);
+      c->set_output(3, output_db_hidden_shape);
+      c->set_output(4, output_dx_shape);
+      c->set_output(5, output_dh_prev_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("DynamicAUGRU")
+.Input("x: T")
+.Input("weight_input: T")
+.Input("weight_hidden: T")
+.Input("weight_att: T")
+.Input("bias_input: T")
+.Input("bias_hidden: T")
+.Input("seq_length: int32")
+.Input("init_h: T")
+.Output("y: T")
+.Output("output_h: T")
+.Output("update: T")
+.Output("update_att: T")
+.Output("reset: T")
+.Output("new: T")
+.Output("hidden_new: T")
+.Attr("T: {float16, float32}")
+.Attr("direction: string")
+.Attr("cell_depth: int = 1")
+.Attr("keep_prob: float = 1.0")
+.Attr("cell_clip: float = -1.0")
+.Attr("num_proj: int = 0")
+.Attr("time_major: bool = true")
+.Attr("activation: string")
+.Attr("gate_order: string")
+.Attr("reset_after: bool = true")
+.Attr("is_training: bool = true")
+.SetIsStateful()
+.SetShapeFn([](InferenceContext *c) {
+  auto input_shape = c->input(0);
+  auto weight_hidden_shape = c->input(2);
+  auto num_step = c->Dim(input_shape, 0);
+  auto batch_size = c->Dim(input_shape, 1);
+  auto hidden_size = c->Dim(weight_hidden_shape, 0);
+  int32_t num_proj = 0;
+  TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
+
+  ShapeHandle output_y_shape;
+  if (num_proj == 0) {
+    output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
+  } else {
+    auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
+    DimensionHandle output_hidden_size;
+    TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
+    output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
+  }
+  auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
+  c->set_output(0, output_y_shape);
+  c->set_output(1, output_h_shape);
+  c->set_output(2, c->UnknownShape());
+  c->set_output(3, c->UnknownShape());
+  c->set_output(4, c->UnknownShape());
+  c->set_output(5, c->UnknownShape());
+  c->set_output(6, c->UnknownShape());
+  return Status::OK();
+});
+
+REGISTER_OP("DynamicAUGRUGrad")
+.Input("x: T")
+.Input("weight_input: T")
+.Input("weight_hidden: T")
+.Input("weight_att: T")
+.Input("y: T")
+.Input("init_h: T")
+.Input("h: T")
+.Input("dy: T")
+.Input("dh: T")
+.Input("update: T")
+.Input("update_att: T")
+.Input("reset: T")
+.Input("new: T")
+.Input("hidden_new: T")
+.Input("seq_length: int32")
+.Output("dw_input: T")
+.Output("dw_hidden: T")
+.Output("db_input: T")
+.Output("db_hidden: T")
+.Output("dx: T")
+.Output("dh_prev: T")
+.Output("dw_att: T")
+.Attr("T: {float16, float32}")
+.Attr("direction: string")
+.Attr("cell_depth: int = 1")
+.Attr("keep_prob: float = 1.0")
+.Attr("cell_clip: float = -1.0")
+.Attr("num_proj: int = 0")
+.Attr("time_major: bool = true")
+.Attr("gate_order: string")
+.Attr("reset_after: bool = true")
+.SetIsStateful()
+.SetShapeFn([](InferenceContext *c) {
+auto input_shape = c->input(0);
+auto weight_hidden_shape = c->input(2);
+auto num_step = c->Dim(input_shape, 0);
+auto batch_size = c->Dim(input_shape, 1);
+auto input_size = c->Dim(input_shape, 2);
+auto hidden_size = c->Dim(weight_hidden_shape, 0);
+auto hidden_size_1 = c->Dim(weight_hidden_shape, 1);
+auto output_dw_input_shape = c->MakeShape({input_size, hidden_size_1});
+auto output_dw_hidden_shape = c->MakeShape({hidden_size, hidden_size_1});
+auto output_db_input_shape = c->MakeShape({hidden_size_1});
+auto output_db_hidden_shape = c->MakeShape({hidden_size_1});
+auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
+auto output_dh_prev_shape = c->MakeShape({batch_size, hidden_size});
+auto output_dw_att_shape = c->MakeShape({num_step, batch_size});
+c->set_output(0, output_dw_input_shape);
+c->set_output(1, output_dw_hidden_shape);
+c->set_output(2, output_db_input_shape);
+c->set_output(3, output_db_hidden_shape);
+c->set_output(4, output_dx_shape);
+c->set_output(5, output_dh_prev_shape);
+c->set_output(6, output_dw_att_shape);
+return Status::OK();
+});
+
+REGISTER_OP("DynamicRnn")
+    .Input("x: T")
+    .Input("w: T")
+    .Input("b: T")
+    .Input("seq_length: int32")
+    .Input("init_h: T")
+    .Input("init_c: T")
+    .Output("y: T")
+    .Output("output_h: T")
+    .Output("output_c: T")
+    .Output("i: T")
+    .Output("j: T")
+    .Output("f: T")
+    .Output("o: T")
+    .Output("tanhc: T")
+    .Attr("T: {float16, float32}")
+    .Attr("cell_type: string")
+    .Attr("direction: string")
+    .Attr("cell_depth: int = 1")
+    .Attr("use_peephole: bool = false")
+    .Attr("keep_prob: float = 1.0")
+    .Attr("cell_clip: float = -1.0")
+    .Attr("num_proj: int = 0")
+    .Attr("time_major: bool = true")
+    .Attr("activation: string")
+    .Attr("forget_bias: float = 0.0")
+    .Attr("is_training: bool = true")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      auto num_step = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto input_size = c->Dim(input_shape, 2);
+      auto w = c->input(1);
+      auto hidden_size_total = c->Dim(w, 0);
+      DimensionHandle hidden_size;
+      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
+      int32_t num_proj = 0;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
+      ShapeHandle output_y_shape;
+      if (num_proj == 0) {
+        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      } else {
+        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
+        DimensionHandle output_hidden_size;
+        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
+        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
+      }
+      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size});
+
+      c->set_output(0, output_y_shape);
+      c->set_output(1, output_h_shape);
+      c->set_output(2, output_c_shape);
+      c->set_output(3, c->UnknownShape());
+      c->set_output(4, c->UnknownShape());
+      c->set_output(5, c->UnknownShape());
+      c->set_output(6, c->UnknownShape());
+      c->set_output(7, c->UnknownShape());
+      return Status::OK();
+    });
+
+REGISTER_OP("DynamicRnnV2")
+    .Input("x: T")
+    .Input("w: T")
+    .Input("b: T")
+    .Input("init_h: T")
+    .Input("init_c: T")
+    .Output("y: T")
+    .Output("output_h: T")
+    .Output("output_c: T")
+    .Output("i: T")
+    .Output("j: T")
+    .Output("f: T")
+    .Output("o: T")
+    .Output("tanhc: T")
+    .Attr("T: {float16, float32}")
+    .Attr("cell_type: string")
+    .Attr("direction: string")
+    .Attr("cell_depth: int = 1")
+    .Attr("use_peephole: bool = false")
+    .Attr("keep_prob: float = 1.0")
+    .Attr("cell_clip: float = -1.0")
+    .Attr("num_proj: int = 0")
+    .Attr("time_major: bool = true")
+    .Attr("activation: string")
+    .Attr("forget_bias: float = 0.0")
+    .Attr("is_training: bool = true")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      auto num_step = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto input_size = c->Dim(input_shape, 2);
+      auto w = c->input(1);
+      auto hidden_size_total = c->Dim(w, 0);
+      DimensionHandle hidden_size;
+      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
+      int32_t num_proj = 0;
+      TF_RETURN_IF_ERROR(c->GetAttr("num_proj", &num_proj));
+      ShapeHandle output_y_shape;
+      if (num_proj == 0) {
+        output_y_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      } else {
+        auto num_proj_size = c->MakeDim(shape_inference::DimensionOrConstant(num_proj));
+        DimensionHandle output_hidden_size;
+        TF_RETURN_IF_ERROR(c->Min(num_proj_size, hidden_size, &output_hidden_size));
+        output_y_shape = c->MakeShape({num_step, batch_size, output_hidden_size});
+      }
+      auto output_h_shape = c->MakeShape({num_step, batch_size, hidden_size});
+      auto output_c_shape = c->MakeShape({num_step, batch_size, hidden_size});
+
+      c->set_output(0, output_y_shape);
+      c->set_output(1, output_h_shape);
+      c->set_output(2, output_c_shape);
+      c->set_output(3, c->UnknownShape());
+      c->set_output(4, c->UnknownShape());
+      c->set_output(5, c->UnknownShape());
+      c->set_output(6, c->UnknownShape());
+      c->set_output(7, c->UnknownShape());
+      return Status::OK();
+    });
+
+REGISTER_OP("DynamicRnnGrad")
+    .Input("x: T")
+    .Input("w: T")
+    .Input("b: T")
+    .Input("y: T")
+    .Input("init_h: T")
+    .Input("init_c: T")
+    .Input("h: T")
+    .Input("c: T")
+    .Input("dy: T")
+    .Input("dh: T")
+    .Input("dc: T")
+    .Input("i: T")
+    .Input("j: T")
+    .Input("f: T")
+    .Input("o: T")
+    .Input("tanhc: T")
+    .Output("dw: T")
+    .Output("db: T")
+    .Output("dx: T")
+    .Output("dh_prev: T")
+    .Output("dc_prev: T")
+    .Attr("T: {float16, float32}")
+    .Attr("cell_type: string")
+    .Attr("direction: string")
+    .Attr("cell_depth: int = 1")
+    .Attr("use_peephole: bool = false")
+    .Attr("keep_prob: float = 1.0")
+    .Attr("cell_clip: float = -1.0")
+    .Attr("num_proj: int = 0")
+    .Attr("time_major: bool = true")
+    .Attr("forget_bias: float = 0.0")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      auto num_step = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto input_size = c->Dim(input_shape, 2);
+      auto w = c->input(1);
+      auto hidden_size_total = c->Dim(w, 0);
+      auto hidden_size_4 = c->Dim(w, 1);
+      DimensionHandle hidden_size;
+      TF_RETURN_IF_ERROR(c->Subtract(hidden_size_total, input_size, &hidden_size));
+
+      auto output_dx_shape = c->MakeShape({num_step, batch_size, input_size});
+      auto output_dw_shape = c->MakeShape({hidden_size_total, hidden_size_4});
+      auto output_db_shape = c->MakeShape({hidden_size_4});
+      auto output_dh_prev_shape = c->MakeShape({1, batch_size, hidden_size});
+      auto output_dc_prev_shape = c->MakeShape({1, batch_size, hidden_size});
+      c->set_output(0, output_dw_shape);
+      c->set_output(1, output_db_shape);
+      c->set_output(2, output_dx_shape);
+      c->set_output(3, output_dh_prev_shape);
+      c->set_output(4, output_dc_prev_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("LRUCacheV2")
+    .Input("index_list: T")
+    .Input("data: Ref(dtype)")
+    .Input("cache: Ref(dtype)")
+    .Input("tag: Ref(T)")
+    .Input("is_last_call: T")
+    .Output("data1:Ref(dtype)")
+    .Output("cache1: Ref(dtype)")
+    .Output("tag1: Ref(dtype)")
+    .Output("index_offset_list: T")
+    .Output("not_in_cache_index_list: T")
+    .Output("not_in_cache_number: T")
+    .Attr("T: {int32, int64}")
+    .Attr("dtype: {float32}")
+    .Attr("pre_route_count: int")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(1));
+      c->set_output(1, c->input(2));
+      c->set_output(2, c->input(3));
+      c->set_output(3, c->input(0));
+      c->set_output(4, c->input(0));
+      c->set_output(5, c->MakeShape({1}));
+      return Status::OK();
+    });
+
+REGISTER_OP("Centralization")
+    .Input("x: T")
+    .Output("y: T")
+    .Attr("T: {float16, float32}")
+    .Attr("axes: list(int)")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("PRelu")
+    .Input("x: T")
+    .Input("weight: T")
+    .Output("y: T")
+    .Attr("T: {float16, float32}")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("DropOutDoMaskV3")
+    .Input("x: T")
+    .Input("mask: uint8")
+    .Input("keep_prob: T")
+    .Output("y: T")
+    .Attr("T: {float16, float32}")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(0));
+      return Status::OK();
+    });
+
+REGISTER_OP("PReluGrad")
+    .Input("grads: T")
+    .Input("features: T")
+    .Input("weights: T")
+    .Output("dx: T")
+    .Output("da: T")
+    .Attr("T: {float16, float32}")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(0));
+      c->set_output(1, c->input(2));
+      return Status::OK();
+    });
+
+REGISTER_OP("NonZero")
+    .Input("x:T")
+    .Output("y:output_type")
+    .Attr("transpose:bool = false")
+    .Attr("T:numbertype")
+    .Attr("output_type:{int32, int64} = DT_INT64")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto rank = InferenceContext::Rank(c->input(0));
+      c->set_output(0, c->MakeShape({rank, -1}));
+      return Status::OK();
+    });
+
+REGISTER_OP("NonZeroWithValue")
+    .Input("x:T")
+    .Output("value:T")
+    .Output("index:output_type")
+    .Output("count:output_type")
+    .Attr("transpose:bool = false")
+    .Attr("T:numbertype")
+    .Attr("output_type:{int32, int64} = DT_INT32")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext *c) {
+      auto input_shape = c->input(0);
+      int64_t dim1 = InferenceContext::Value(c->Dim(input_shape, 0));
+      int64_t dim2 = InferenceContext::Value(c->Dim(input_shape, 1));
+      int64_t value_num = dim1 * dim2;
+      int64_t index_dim = 2 * dim1 * dim2;
+      int64_t count_dim = 1;
+
+      c->set_output(0, c->MakeShape({c->MakeDim(value_num)}));
+      c->set_output(1, c->MakeShape({c->MakeDim(index_dim)}));
+      c->set_output(2, c->MakeShape({c->MakeDim(count_dim)}));
+      return Status::OK();
+    });
+
+REGISTER_OP("FusedLayerNorm")
+    .Input("x: T")
+    .Input("gamma: T")
+    .Input("beta: T")
+    .Output("y: T")
+    .Output("mean: T")
+    .Output("variance: T")
+    .Attr("T: {float16, float32}")
+    .Attr("begin_norm_axis: int = 0")
+    .Attr("begin_params_axis: int = 0")
+    .Attr("epsilon: float = 0.0000001")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      int32_t real_dim_num = InferenceContext::Rank(c->input(0));
+      int32_t begin_norm_axis = 0;
+      TF_RETURN_IF_ERROR(c->GetAttr("begin_norm_axis", &begin_norm_axis));
+      if (begin_norm_axis < 0) {
+        begin_norm_axis += real_dim_num;
+      }
+      if (begin_norm_axis < 0 || begin_norm_axis >= real_dim_num) {
+        return errors::InvalidArgument("begin_norm_axis is invalid");
+      }
+      ShapeHandle input_shape_handle;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), real_dim_num, &input_shape_handle));
+      ShapeHandle out_shape_handle;
+      for (int32_t i = 0; i < real_dim_num; ++i) {
+        DimensionHandle tmp_dim_handle = c->Dim(input_shape_handle, i);
+        if (i >= begin_norm_axis) {
+          tmp_dim_handle = c->MakeDim(1);
+          TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_handle, i, tmp_dim_handle, &out_shape_handle));
+        }
+      }
+      c->set_output(0, c->input(0));
+      c->set_output(1, out_shape_handle);
+      c->set_output(2, out_shape_handle);
+      return Status::OK();
+    });
+
+REGISTER_OP("FusedLayerNormGrad")
+    .Input("dy: T")
+    .Input("x: T")
+    .Input("variance: T")
+    .Input("mean: T")
+    .Input("gamma: T")
+    .Output("pd_x: T")
+    .Output("pd_gamma: T")
+    .Output("pd_beta: T")
+    .Attr("T: {float16, float32}")
+    .SetIsStateful()
+    .SetShapeFn([](shape_inference::InferenceContext *c) {
+      c->set_output(0, c->input(0));
+      c->set_output(1, c->input(4));
+      c->set_output(2, c->input(4));
+      return Status::OK();
+    });
+
+REGISTER_OP("GetShape")
+    .Input("x: T")
+    .Output("y: int32")
+    .Attr("N: int = 1")
+    .Attr("T: {float16, float32, uint8}")
+    .SetShapeFn([](InferenceContext* c) {
+        int64_t sumSize = 0;
+        for (int32_t i = 0; i < c->num_inputs(); i++) {
+            sumSize += InferenceContext::Rank(c->input(i));
+        }
+        c->set_output(0, c->MakeShape({c->MakeDim(sumSize)}));
+        return Status::OK();
+    });
+
+REGISTER_OP("ProdEnvMatA")
+    .Input("coord: T")
+    .Input("type:int32")
+    .Input("natoms:int32")
+    .Input("box: T")
+    .Input("mesh:int32")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist:int32")
+    .Attr("T: {float16, float32}")
+    .Attr("rcut_a: float = 0.0")
+    .Attr("rcut_r: float = 0.0")
+    .Attr("rcut_r_smth: float = 0.0")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      auto coord_shape = c->input(0);
+      int64_t nsample = InferenceContext::Value(c->Dim(coord_shape, 0));
+      int64_t nloc = 12288;
+      int64_t nnei = 0;
+      std::vector<int32_t> sel_a;
+      TF_RETURN_IF_ERROR(c->GetAttr("sel_a", &sel_a));
+      for (size_t i = 0; i < sel_a.size(); ++i) {
+        nnei = nnei + sel_a[i];
+      }
+      int64_t des = nloc * nnei * 4;
+      int64_t des_a = des * 3;
+      int64_t rij = nloc * nnei * 3;
+      int64_t nlist = nloc * nnei;
+      c->set_output(0, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des)}));
+      c->set_output(1, c->MakeShape({c->MakeDim(nsample), c->MakeDim(des_a)}));
+      c->set_output(2, c->MakeShape({c->MakeDim(nsample), c->MakeDim(rij)}));
+      c->set_output(3, c->MakeShape({c->MakeDim(nsample), c->MakeDim(nlist)}));
+      return Status::OK();
+    });
+
+REGISTER_OP("ProdVirialSeA")
+    .Input("net_deriv:T")
+    .Input("in_deriv:T")
+    .Input("rij:T")
+    .Input("nlist:int32")
+    .Input("natoms:int32")
+    .Output("virial:T")
+    .Output("atom_virial:T")
+    .Attr("n_a_sel:int = 0")
+    .Attr("n_r_sel:int = 0")
+    .Attr("T: {float32, float64}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(0);
+      auto nframes = c->Dim(input_shape, 0);
+      ShapeHandle virial_shape = c->MakeShape({nframes, 9});
+      c->set_output(0, virial_shape);
+      ShapeHandle atom_virial_shape = c->MakeShape({nframes, 254952});
+      c->set_output(1, atom_virial_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("ProdForceSeA")
+    .Input("net_deriv:T")
+    .Input("in_deriv:T")
+    .Input("nlist:int32")
+    .Input("natoms:int32")
+    .Output("force:T")
+    .Attr("n_a_sel:int = 0")
+    .Attr("n_r_sel:int = 0")
+    .Attr("T: {float32}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(0);
+      auto nframes = c->Dim(input_shape, 0);
+      ShapeHandle force_shape = c->MakeShape({nframes, 84984});
+      c->set_output(0, force_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("TabulateFusionSeA")
+    .Input("table:T")
+    .Input("table_info:T")
+    .Input("em_x:T")
+    .Input("em:T")
+    .Output("descriptor:T")
+    .Attr("last_layer_size:int")
+    .Attr("T: {float16, float32, float64}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(3);
+      auto nloc = c->Dim(input_shape, 0);
+
+      int32_t last_layer_size;
+      TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size));
+      ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size});
+      c->set_output(0, out_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("TabulateFusionSeAGrad")
+    .Input("table:T")
+    .Input("table_info:T")
+    .Input("em_x:T")
+    .Input("em:T")
+    .Input("dy:T")
+    .Input("descriptor:T")
+    .Output("dy_dem_x:T")
+    .Output("dy_dem:T")
+    .Attr("T: {float16, float32, float64}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(2));
+      c->set_output(1, c->input(3));
+      return Status::OK();
+    });
+
+REGISTER_OP("TabulateFusion")
+    .Input("table:T")
+    .Input("table_info:T")
+    .Input("em_x:T")
+    .Input("em:T")
+    .Output("descriptor:T")
+    .Attr("last_layer_size:int")
+    .Attr("T: {float16, float32, float64}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(3);
+      auto nloc = c->Dim(input_shape, 0);
+
+      int32_t last_layer_size;
+      TF_RETURN_IF_ERROR(c->GetAttr("last_layer_size", &last_layer_size));
+      ShapeHandle out_shape = c->MakeShape({nloc, 4, last_layer_size});
+      c->set_output(0, out_shape);
+      return Status::OK();
+    });
+
+REGISTER_OP("TabulateFusionGrad")
+    .Input("table:T")
+    .Input("table_info:T")
+    .Input("em_x:T")
+    .Input("em:T")
+    .Input("dy:T")
+    .Input("descriptor:T")
+    .Output("dy_dem_x:T")
+    .Output("dy_dem:T")
+    .Attr("T: {float16, float32, float64}")
+    .SetIsStateful()
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(2));
+      c->set_output(1, c->input(3));
+      return Status::OK();
+    });
+}  // namespace
+}  // namespace tensorflow
diff --git a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py
index 9968ec052e2832b6d7d06a6203b8576b81c6ebea..06ada22d6248b7cabc6d75ffc8beb7354f0c0470 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu_aicore_ops.py
@@ -1,328 +1,358 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""All bert ops."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numbers
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.ops.nn_ops import _get_noise_shape
-from tensorflow.python.framework import dtypes
-
-from npu_bridge.helper import helper
-from npu_bridge.estimator.npu.npu_common import NPUBasics
-
-npu_aicore_ops = helper.get_gen_ops()
-
-DEFAULT_GRAPH_SEED = 87654321
-_MAXINT32 = 2 ** 31 - 1
-
-
-@ops.RegisterGradient("FastGelu")
-def _fast_gelu_grad(op, grad):
-    """The gradient for `fast_gelu`.
-
-    Args:
-        op: The `fast_gelu` `Operation` that we are differentiating, which we can use
-            to find the inputs and outputs of the original op.
-        grad: Gradient with respect to the output of the `fast_gelu` op.
-
-    Returns:
-        Gradients with respect to the input of `fast_gelu`.
-    """
-    return [npu_aicore_ops.fast_gelu_grad(grad, op.inputs[0])]  # List of one Tensor, since we have one input
-
-
-def fast_gelu_v2(x, name=None):
-    """ fast_gelu_v2 operator interface implementation
-
-    Args:
-        x: A input tensor with type is float16 or float32.
-
-    Returns:
-        A tensor.
-    """
-    return npu_aicore_ops.fast_gelu_v2(x, name)
-
-
-def centralization(x, axes, name=None):
-    """
-    centralization op
-        return x - reduce_mean(x, axes)
-    """
-    x = ops.convert_to_tensor(x, name="x")
-    result = npu_aicore_ops.centralization(x, axes, name=name)
-    return result
-
-
-@ops.RegisterGradient("PRelu")
-def prelu_grad(op, grad):
-    """Gradient for prelu"""
-    dx, da = npu_aicore_ops.p_relu_grad(grad, op.inputs[0], op.inputs[1])
-    return [dx, da]
-
-
-def prelu(x, weight):
-    """prelu op"""
-    return npu_aicore_ops.p_relu(x, weight)
-
-
-def _truncate_seed(seed):
-    return seed % _MAXINT32  # Truncate to fit into 32-bit integer
-
-
-def dropout_v3(x, keep_prob, noise_shape=None, seed=None, name=None):
-    """The gradient for `gelu`.
-
-    Args:
-        x: A tensor with type is float.
-        keep_prob: A tensor, float, rate of every element reserved.
-        noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random
-            generated.
-        seed: Random seed.
-        name: Layer name.
-
-    Returns:
-        A tensor.
-    """
-    x = ops.convert_to_tensor(x, name="x")
-    if not x.dtype.is_floating:
-        raise ValueError("x must be a floating point tensor."
-                         " Got a %s tensor instead." % x.dtype)
-    if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0:
-        raise ValueError("keep_prob must be a float value or a scalar tensor in the "
-                         "range (0, 1], got %g" % keep_prob)
-    if isinstance(keep_prob, float) and keep_prob == 1.0:
-        return x
-    seed, seed2 = random_seed.get_seed(seed)
-    noise_shape = _get_noise_shape(x, noise_shape)
-    gen_out = npu_aicore_ops.drop_out_gen_mask_v3(noise_shape, keep_prob, seed, seed2, name)
-    result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name)
-    return result
-
-
-@ops.RegisterGradient("DropOutDoMaskV3")
-def _DropOutDoMaskV3Grad(op, grad):
-    result = npu_aicore_ops.drop_out_do_mask_v3(grad, op.inputs[1], op.inputs[2])
-    return [result, None, None]
-
-
-def dropout_v4(x, keep_prob, noise_shape=None, seed=None, output_dtype=dtypes.bool, name=None):
-    """The gradient for `gelu`.
-
-    Args:
-        x: A tensor with type is float.
-        keep_prob: A tensor, float, rate of every element reserved.
-        noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random
-            generated.
-        seed: Random seed.
-        output_dtype: dtype of output tensor, default is bool.
-        name: Layer name.
-
-    Returns:
-        A tensor.
-    """
-    x = ops.convert_to_tensor(x, name="x")
-    if not x.dtype.is_floating:
-        raise ValueError("x must be a floating point tensor."
-                         " Got a %s tensor instead." % x.dtype)
-    if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0:
-        raise ValueError("keep_prob must be a float value or a scalar tensor in the "
-                         "range (0, 1], got %g" % keep_prob)
-    if isinstance(keep_prob, float) and keep_prob == 1.0:
-        return x
-    seed, seed2 = random_seed.get_seed(seed)
-    noise_shape = _get_noise_shape(x, noise_shape)
-    gen_out = npu_aicore_ops.drop_out_gen_mask_v4(noise_shape, keep_prob, seed, seed2, output_dtype, name)
-    result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name)
-    return result
-
-
-def lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=None):
-    """
-    LRUCacheV2 op
-
-    """
-    is_last_call = ops.convert_to_tensor(is_last_call, name="is_last_call")
-    data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number = npu_aicore_ops.lru_cache_v2(
-        index_list, data, cache, tag, is_last_call, pre_route_count, name=name)
-    return [data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number]
-
-
-def nonzero(x, transpose=False, output_type=dtypes.int64, name=None):
-    """
-    nonezero op
-    Return the indices of the elementes that are non-zero.
-    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
-    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
-
-    """
-    x = ops.convert_to_tensor(x, name="x")
-    result = npu_aicore_ops.non_zero(x, transpose, output_type, name=name)
-    return result
-
-
-def nonzerowithvalue(x, transpose=False, output_type=dtypes.int64, name=None):
-    """
-    nonezero op
-    Return the indices of the elementes that are non-zero.
-    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
-    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
-
-    """
-    x = ops.convert_to_tensor(x, name="x")
-    result = npu_aicore_ops.non_zero_with_value(x, transpose, output_type, name=name)
-    return result
-
-
-# go/tf-wildcard-import
-
-
-def layer_norm(x, gamma, beta, begin_norm_axis=0, begin_params_axis=0, epsilon=0.0000001, name=None):
-    """ LayerNorm operator interface implementation
-
-    Args:
-        x: A input tensor with type is float16 or float32.
-        gamma: scaling operation to normalized tensor.
-        beta: add offset to normalized tensor.
-        begin_norm_axis: A optional attribute, the type is int32. Defaults to 0.
-        begin_params_axis: A optional attribute, the type is int32. Defaults to 0.
-        epsilon: A optional attribute, the type is int32. Defaults to 0.0000001.
-        name: Layer name.
-
-    Returns:
-        A tensor.
-    """
-    res, mean, variance = npu_aicore_ops.fused_layer_norm(x, gamma, beta, begin_norm_axis,
-                                                          begin_params_axis, epsilon, name)
-
-    return [res, mean, variance]
-
-
-@ops.RegisterGradient("FusedLayerNorm")
-def _layer_norm_grad(op, *grad):
-    pd_x, pd_gamma, pd_beta = npu_aicore_ops.fused_layer_norm_grad(grad[0], op.inputs[0], op.outputs[2], op.outputs[1],
-                                                                   op.inputs[1])
-
-    return [pd_x, pd_gamma, pd_beta]
-
-
-def prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a=0.0,
-                   rcut_r=0.0, rcut_r_smth=0.0, sel_a=None, sel_r=None, name=None):
-    """
-    prod_env_mat_a op
-    Return the indices of the elementes that are non-zero.
-    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
-    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
-
-    """
-    sel_a = [] if sel_a is None else sel_a
-    sel_r = [] if sel_r is None else sel_r
-    coord = ops.convert_to_tensor(coord, name="coord")
-    types = ops.convert_to_tensor(types, name="type")
-    natoms = ops.convert_to_tensor(natoms, name="natoms")
-    box = ops.convert_to_tensor(box, name="box")
-    mesh = ops.convert_to_tensor(mesh, name="mesh")
-    davg = ops.convert_to_tensor(davg, name="davg")
-    dstd = ops.convert_to_tensor(dstd, name="dstd")
-    result = npu_aicore_ops.prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a, rcut_r,
-                                           rcut_r_smth, sel_a, sel_r, name)
-
-    return result
-
-
-def prodvirialsea(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None):
-    """
-    ProdVirialSeA op
-    """
-    net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv")
-    in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv")
-    rij = ops.convert_to_tensor(rij, name="rij")
-    nlist = ops.convert_to_tensor(nlist, name="nlist")
-    natoms = ops.convert_to_tensor(natoms, name="natoms")
-    result = npu_aicore_ops.prod_virial_se_a(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel, n_r_sel,
-                                             name=name)
-    return result
-
-
-def prodforcesea(net_deriv, in_deriv, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None):
-    """
-    ProdForceSeA op
-    """
-    net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv")
-    in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv")
-    nlist = ops.convert_to_tensor(nlist, name="nlist")
-    natoms = ops.convert_to_tensor(natoms, name="natoms")
-    result = npu_aicore_ops.prod_force_se_a(net_deriv, in_deriv, nlist, natoms, n_a_sel, n_r_sel,
-                                             name=name)
-    return result
-
-
-def tabulatefusionsea(table, table_info, em_x, em, last_layer_size=128, name=None):
-    """
-    TabulateFusionSeA op
-    """
-    table = ops.convert_to_tensor(table, name="table")
-    table_info = ops.convert_to_tensor(table_info, name="table_info")
-    em_x = ops.convert_to_tensor(em_x, name="em_x")
-    em = ops.convert_to_tensor(em, name="em")
-    result = npu_aicore_ops.tabulate_fusion_se_a(table, table_info, em_x, em, last_layer_size, name=name)
-    return result
-
-
-def tabulatefusionseagrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None):
-    """
-    TabulateFusionSeAGrad op
-    """
-    table = ops.convert_to_tensor(table, name="table")
-    table_info = ops.convert_to_tensor(table_info, name="table_info")
-    em_x = ops.convert_to_tensor(em_x, name="em_x")
-    em = ops.convert_to_tensor(em, name="em")
-    dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x")
-    dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem")
-    result = npu_aicore_ops.tabulate_fusion_se_a_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name)
-    return result
-
-
-def tabulatefusion(table, table_info, em_x, em, last_layer_size=128, name=None):
-    """
-    TabulateFusion op
-    """
-    table = ops.convert_to_tensor(table, name="table")
-    table_info = ops.convert_to_tensor(table_info, name="table_info")
-    em_x = ops.convert_to_tensor(em_x, name="em_x")
-    em = ops.convert_to_tensor(em, name="em")
-    result = npu_aicore_ops.tabulate_fusion(table, table_info, em_x, em, last_layer_size, name=name)
-    return result
-
-
-def tabulatefusiongrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None):
-    """
-    TabulateFusionGrad op
-    """
-    table = ops.convert_to_tensor(table, name="table")
-    table_info = ops.convert_to_tensor(table_info, name="table_info")
-    em_x = ops.convert_to_tensor(em_x, name="em_x")
-    em = ops.convert_to_tensor(em, name="em")
-    dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x")
-    dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem")
-    result = npu_aicore_ops.tabulate_fusion_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name)
-    return result
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""All bert ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numbers
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops.nn_ops import _get_noise_shape
+from tensorflow.python.framework import dtypes
+
+from npu_bridge.helper import helper
+from npu_bridge.estimator.npu.npu_common import NPUBasics
+
+npu_aicore_ops = helper.get_gen_ops()
+
+DEFAULT_GRAPH_SEED = 87654321
+_MAXINT32 = 2 ** 31 - 1
+
+def rfft_npu(input, n = 0, norm = "backward"):
+    result = npu_aicore_ops.fft1d(input, n, norm, mode = "r2c", forward = True)
+    result = tf.complex(result[:,:,:,0],result[:,:,:,1])
+    return result
+
+def irfft_npu(input, n = 0, norm = "backward"):
+    re = tf.real(input)
+    im = tf.imag(input)
+    input = [re,im]
+    input = tf.transpose(input, [1,2,3,0])
+    result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2r", forward = False)
+    return result
+   
+def cfft_npu(input, n = 0, norm = "backward"):
+    re = tf.real(input)
+    im = tf.imag(input)
+    input = [re,im]
+    input = tf.transpose(input, [1,2,3,0])
+    result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2c", forward = True)
+    result = tf.complex(result[:,:,:,0],result[:,:,:,1])
+    return result
+
+def icfft_npu(input, n = 0, norm = "backward"):
+    re = tf.real(input)
+    im = tf.imag(input)
+    input = [re,im]
+    input = tf.transpose(input, [1,2,3,0])
+    result = npu_aicore_ops.fft1d(input, n, norm, mode = "c2c", forward = False)
+    result = tf.complex(result[:,:,:,0],result[:,:,:,1])
+    return result
+
+@ops.RegisterGradient("FastGelu")
+def _fast_gelu_grad(op, grad):
+    """The gradient for `fast_gelu`.
+
+    Args:
+        op: The `fast_gelu` `Operation` that we are differentiating, which we can use
+            to find the inputs and outputs of the original op.
+        grad: Gradient with respect to the output of the `fast_gelu` op.
+
+    Returns:
+        Gradients with respect to the input of `fast_gelu`.
+    """
+    return [npu_aicore_ops.fast_gelu_grad(grad, op.inputs[0])]  # List of one Tensor, since we have one input
+
+
+def fast_gelu_v2(x, name=None):
+    """ fast_gelu_v2 operator interface implementation
+
+    Args:
+        x: A input tensor with type is float16 or float32.
+
+    Returns:
+        A tensor.
+    """
+    return npu_aicore_ops.fast_gelu_v2(x, name)
+
+
+def centralization(x, axes, name=None):
+    """
+    centralization op
+        return x - reduce_mean(x, axes)
+    """
+    x = ops.convert_to_tensor(x, name="x")
+    result = npu_aicore_ops.centralization(x, axes, name=name)
+    return result
+
+
+@ops.RegisterGradient("PRelu")
+def prelu_grad(op, grad):
+    """Gradient for prelu"""
+    dx, da = npu_aicore_ops.p_relu_grad(grad, op.inputs[0], op.inputs[1])
+    return [dx, da]
+
+
+def prelu(x, weight):
+    """prelu op"""
+    return npu_aicore_ops.p_relu(x, weight)
+
+
+def _truncate_seed(seed):
+    return seed % _MAXINT32  # Truncate to fit into 32-bit integer
+
+
+def dropout_v3(x, keep_prob, noise_shape=None, seed=None, name=None):
+    """The gradient for `gelu`.
+
+    Args:
+        x: A tensor with type is float.
+        keep_prob: A tensor, float, rate of every element reserved.
+        noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random
+            generated.
+        seed: Random seed.
+        name: Layer name.
+
+    Returns:
+        A tensor.
+    """
+    x = ops.convert_to_tensor(x, name="x")
+    if not x.dtype.is_floating:
+        raise ValueError("x must be a floating point tensor."
+                         " Got a %s tensor instead." % x.dtype)
+    if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0:
+        raise ValueError("keep_prob must be a float value or a scalar tensor in the "
+                         "range (0, 1], got %g" % keep_prob)
+    if isinstance(keep_prob, float) and keep_prob == 1.0:
+        return x
+    seed, seed2 = random_seed.get_seed(seed)
+    noise_shape = _get_noise_shape(x, noise_shape)
+    gen_out = npu_aicore_ops.drop_out_gen_mask_v3(noise_shape, keep_prob, seed, seed2, name)
+    result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name)
+    return result
+
+
+@ops.RegisterGradient("DropOutDoMaskV3")
+def _DropOutDoMaskV3Grad(op, grad):
+    result = npu_aicore_ops.drop_out_do_mask_v3(grad, op.inputs[1], op.inputs[2])
+    return [result, None, None]
+
+
+def dropout_v4(x, keep_prob, noise_shape=None, seed=None, output_dtype=dtypes.bool, name=None):
+    """The gradient for `gelu`.
+
+    Args:
+        x: A tensor with type is float.
+        keep_prob: A tensor, float, rate of every element reserved.
+        noise_shape: A 1-D tensor, with type int32, shape of keep/drop what random
+            generated.
+        seed: Random seed.
+        output_dtype: dtype of output tensor, default is bool.
+        name: Layer name.
+
+    Returns:
+        A tensor.
+    """
+    x = ops.convert_to_tensor(x, name="x")
+    if not x.dtype.is_floating:
+        raise ValueError("x must be a floating point tensor."
+                         " Got a %s tensor instead." % x.dtype)
+    if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1.0:
+        raise ValueError("keep_prob must be a float value or a scalar tensor in the "
+                         "range (0, 1], got %g" % keep_prob)
+    if isinstance(keep_prob, float) and keep_prob == 1.0:
+        return x
+    seed, seed2 = random_seed.get_seed(seed)
+    noise_shape = _get_noise_shape(x, noise_shape)
+    gen_out = npu_aicore_ops.drop_out_gen_mask_v4(noise_shape, keep_prob, seed, seed2, output_dtype, name)
+    result = npu_aicore_ops.drop_out_do_mask_v3(x, gen_out, keep_prob, name)
+    return result
+
+
+def lru_cache_v2(index_list, data, cache, tag, is_last_call, pre_route_count, name=None):
+    """
+    LRUCacheV2 op
+
+    """
+    is_last_call = ops.convert_to_tensor(is_last_call, name="is_last_call")
+    data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number = npu_aicore_ops.lru_cache_v2(
+        index_list, data, cache, tag, is_last_call, pre_route_count, name=name)
+    return [data, cache, tag, index_offset_list, not_in_cache_index_list, not_in_cache_number]
+
+
+def nonzero(x, transpose=False, output_type=dtypes.int64, name=None):
+    """
+    nonezero op
+    Return the indices of the elementes that are non-zero.
+    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
+    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
+
+    """
+    x = ops.convert_to_tensor(x, name="x")
+    result = npu_aicore_ops.non_zero(x, transpose, output_type, name=name)
+    return result
+
+
+def nonzerowithvalue(x, transpose=False, output_type=dtypes.int64, name=None):
+    """
+    nonezero op
+    Return the indices of the elementes that are non-zero.
+    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
+    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
+
+    """
+    x = ops.convert_to_tensor(x, name="x")
+    result = npu_aicore_ops.non_zero_with_value(x, transpose, output_type, name=name)
+    return result
+
+
+# go/tf-wildcard-import
+
+
+def layer_norm(x, gamma, beta, begin_norm_axis=0, begin_params_axis=0, epsilon=0.0000001, name=None):
+    """ LayerNorm operator interface implementation
+
+    Args:
+        x: A input tensor with type is float16 or float32.
+        gamma: scaling operation to normalized tensor.
+        beta: add offset to normalized tensor.
+        begin_norm_axis: A optional attribute, the type is int32. Defaults to 0.
+        begin_params_axis: A optional attribute, the type is int32. Defaults to 0.
+        epsilon: A optional attribute, the type is int32. Defaults to 0.0000001.
+        name: Layer name.
+
+    Returns:
+        A tensor.
+    """
+    res, mean, variance = npu_aicore_ops.fused_layer_norm(x, gamma, beta, begin_norm_axis,
+                                                          begin_params_axis, epsilon, name)
+
+    return [res, mean, variance]
+
+
+@ops.RegisterGradient("FusedLayerNorm")
+def _layer_norm_grad(op, *grad):
+    pd_x, pd_gamma, pd_beta = npu_aicore_ops.fused_layer_norm_grad(grad[0], op.inputs[0], op.outputs[2], op.outputs[1],
+                                                                   op.inputs[1])
+
+    return [pd_x, pd_gamma, pd_beta]
+
+
+def prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a=0.0,
+                   rcut_r=0.0, rcut_r_smth=0.0, sel_a=None, sel_r=None, name=None):
+    """
+    prod_env_mat_a op
+    Return the indices of the elementes that are non-zero.
+    Return a tuple of arrays,one for each dimension of a ,containing the indices of the non-zero elementes
+    in that dimension. The values in a are always tested and returned in row-major ,C-style order.
+
+    """
+    sel_a = [] if sel_a is None else sel_a
+    sel_r = [] if sel_r is None else sel_r
+    coord = ops.convert_to_tensor(coord, name="coord")
+    types = ops.convert_to_tensor(types, name="type")
+    natoms = ops.convert_to_tensor(natoms, name="natoms")
+    box = ops.convert_to_tensor(box, name="box")
+    mesh = ops.convert_to_tensor(mesh, name="mesh")
+    davg = ops.convert_to_tensor(davg, name="davg")
+    dstd = ops.convert_to_tensor(dstd, name="dstd")
+    result = npu_aicore_ops.prod_env_mat_a(coord, types, natoms, box, mesh, davg, dstd, rcut_a, rcut_r,
+                                           rcut_r_smth, sel_a, sel_r, name)
+
+    return result
+
+
+def prodvirialsea(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None):
+    """
+    ProdVirialSeA op
+    """
+    net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv")
+    in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv")
+    rij = ops.convert_to_tensor(rij, name="rij")
+    nlist = ops.convert_to_tensor(nlist, name="nlist")
+    natoms = ops.convert_to_tensor(natoms, name="natoms")
+    result = npu_aicore_ops.prod_virial_se_a(net_deriv, in_deriv, rij, nlist, natoms, n_a_sel, n_r_sel,
+                                             name=name)
+    return result
+
+
+def prodforcesea(net_deriv, in_deriv, nlist, natoms, n_a_sel=0, n_r_sel=0, name=None):
+    """
+    ProdForceSeA op
+    """
+    net_deriv = ops.convert_to_tensor(net_deriv, name="net_deriv")
+    in_deriv = ops.convert_to_tensor(in_deriv, name="in_deriv")
+    nlist = ops.convert_to_tensor(nlist, name="nlist")
+    natoms = ops.convert_to_tensor(natoms, name="natoms")
+    result = npu_aicore_ops.prod_force_se_a(net_deriv, in_deriv, nlist, natoms, n_a_sel, n_r_sel,
+                                             name=name)
+    return result
+
+
+def tabulatefusionsea(table, table_info, em_x, em, last_layer_size=128, name=None):
+    """
+    TabulateFusionSeA op
+    """
+    table = ops.convert_to_tensor(table, name="table")
+    table_info = ops.convert_to_tensor(table_info, name="table_info")
+    em_x = ops.convert_to_tensor(em_x, name="em_x")
+    em = ops.convert_to_tensor(em, name="em")
+    result = npu_aicore_ops.tabulate_fusion_se_a(table, table_info, em_x, em, last_layer_size, name=name)
+    return result
+
+
+def tabulatefusionseagrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None):
+    """
+    TabulateFusionSeAGrad op
+    """
+    table = ops.convert_to_tensor(table, name="table")
+    table_info = ops.convert_to_tensor(table_info, name="table_info")
+    em_x = ops.convert_to_tensor(em_x, name="em_x")
+    em = ops.convert_to_tensor(em, name="em")
+    dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x")
+    dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem")
+    result = npu_aicore_ops.tabulate_fusion_se_a_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name)
+    return result
+
+
+def tabulatefusion(table, table_info, em_x, em, last_layer_size=128, name=None):
+    """
+    TabulateFusion op
+    """
+    table = ops.convert_to_tensor(table, name="table")
+    table_info = ops.convert_to_tensor(table_info, name="table_info")
+    em_x = ops.convert_to_tensor(em_x, name="em_x")
+    em = ops.convert_to_tensor(em, name="em")
+    result = npu_aicore_ops.tabulate_fusion(table, table_info, em_x, em, last_layer_size, name=name)
+    return result
+
+
+def tabulatefusiongrad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=None):
+    """
+    TabulateFusionGrad op
+    """
+    table = ops.convert_to_tensor(table, name="table")
+    table_info = ops.convert_to_tensor(table_info, name="table_info")
+    em_x = ops.convert_to_tensor(em_x, name="em_x")
+    em = ops.convert_to_tensor(em, name="em")
+    dy_dem_x = ops.convert_to_tensor(dy_dem_x, name="dy_dem_x")
+    dy_dem = ops.convert_to_tensor(dy_dem, name="dy_dem")
+    result = npu_aicore_ops.tabulate_fusion_grad(table, table_info, em_x, em, dy_dem_x, dy_dem, name=name)
+    return result