From 2ba4b0a6ec1854584eabfb46a9597b2e621f5245 Mon Sep 17 00:00:00 2001 From: zhangzhongzt Date: Tue, 10 Nov 2020 19:58:23 +0800 Subject: [PATCH 1/3] add lamb optimize ops --- .../kernels/lamb_apply_optimizer_assign.cc | 42 +++++++++++++++++++ .../kernels/lamb_apply_weight_assign.cc | 42 +++++++++++++++++++ tf_adapter/ops/npu_ops.cc | 39 +++++++++++++++++ .../python/npu_bridge/estimator/npu_ops.py | 16 +++++++ 4 files changed, 139 insertions(+) create mode 100644 tf_adapter/kernels/lamb_apply_optimizer_assign.cc create mode 100644 tf_adapter/kernels/lamb_apply_weight_assign.cc diff --git a/tf_adapter/kernels/lamb_apply_optimizer_assign.cc b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc new file mode 100644 index 000000000..08ffd7375 --- /dev/null +++ b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class LambApplyOptimizerAssignOp : public OpKernel { + public: + explicit LambApplyOptimizerAssignOp(OpKernelConstruction *context) : OpKernel(context) {} + ~LambApplyOptimizerAssignOp() override = default; + void Compute(OpKernelContext *context) override { + LOG(INFO) << "LambApplyOptimizerAssignOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LambApplyOptimizerAssign").Device(DEVICE_CPU), LambApplyOptimizerAssignOp); +} // namespace tensorflow diff --git a/tf_adapter/kernels/lamb_apply_weight_assign.cc b/tf_adapter/kernels/lamb_apply_weight_assign.cc new file mode 100644 index 000000000..f3ce002da --- /dev/null +++ b/tf_adapter/kernels/lamb_apply_weight_assign.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class LambApplyWeightAssignOp : public OpKernel { + public: + explicit LambApplyWeightAssignOp(OpKernelConstruction *context) : OpKernel(context) {} + ~LambApplyWeightAssignOp() override = default; + void Compute(OpKernelContext *context) override { + LOG(INFO) << "LambApplyWeightAssignOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LambApplyWeightAssign").Device(DEVICE_CPU), LambApplyWeightAssignOp); +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_ops.cc b/tf_adapter/ops/npu_ops.cc index 2592860cf..234330c33 100644 --- a/tf_adapter/ops/npu_ops.cc +++ b/tf_adapter/ops/npu_ops.cc @@ -369,6 +369,45 @@ REGISTER_OP("AdamApplyOneAssign") .Attr("T: {float16, float32}") .SetShapeFn(shape_inference::NoOutputs); +REGISTER_OP("LambApplyOptimizerAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("mul0_x: T") + .Input("mul1_x: T") + .Input("mul2_x: T") + .Input("mul3_x: T") + .Input("add2_y: T") + .Input("steps: T") + .Input("do_use_weight: T") + .Input("weight_decay_rate: T") + .Output("update: T") + .Output("output1: T") + .Output("output2: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + c->set_output(1, c->input(1)); + c->set_output(2, c->input(2)); + return Status::OK(); + }); + +REGISTER_OP("LambApplyWeightAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("input4: T") + .Output("output0: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(4)); + return Status::OK(); + }); + REGISTER_OP("AdamApplyOneWithDecayAssign") .Input("input0: T") .Input("input1: T") diff --git a/tf_adapter/python/npu_bridge/estimator/npu_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_ops.py index ab88bf56c..992b4cb30 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu_ops.py +++ b/tf_adapter/python/npu_bridge/estimator/npu_ops.py @@ -248,3 +248,19 @@ def adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, result = gen_npu_ops.adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name) return result + +def lamb_apply_optimizer_assign(input0, input1, input2, input3, mul0_x, mul1_x, + mul2_x, mul3_x, add2_y, steps, do_use_weight, weight_decay_rate, name=None): + if context.executing_eagerly():l + raise RuntimeError("tf.lamb_apply_optimizer_assign() is not compatible with " + "eager execution.") + update, nextv, nextm = gen_npu_ops.lamb_apply_optimizer_assign(input0, input1, input2, input3, mul0_x, mul1_x, mul2_x, + mul3_x, add2_y, steps, do_use_weight, weight_decay_rate, name) + return update, nextv, nextm +l +def lamb_apply_weight_assign(input0, input1, input2, input3, input4, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.lamb_apply_weight_assign() is not compatible with " + "eager execution.") + result = gen_npu_ops.lamb_apply_weight_assign(input0, input1, input2, input3, input4, name) + return result \ No newline at end of file -- Gitee From e3a389e8c54a7192f7a63ad986c722f65783bdb4 Mon Sep 17 00:00:00 2001 From: zhangzhongzt Date: Tue, 10 Nov 2020 20:34:21 +0800 Subject: [PATCH 2/3] lamb ops --- .../kernels/lamb_apply_optimizer_assign.cc | 42 +++++++++++++++++++ .../kernels/lamb_apply_weight_assign.cc | 42 +++++++++++++++++++ tf_adapter/ops/npu_ops.cc | 39 +++++++++++++++++ .../python/npu_bridge/estimator/npu_ops.py | 16 +++++++ 4 files changed, 139 insertions(+) create mode 100644 tf_adapter/kernels/lamb_apply_optimizer_assign.cc create mode 100644 tf_adapter/kernels/lamb_apply_weight_assign.cc diff --git a/tf_adapter/kernels/lamb_apply_optimizer_assign.cc b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc new file mode 100644 index 000000000..08ffd7375 --- /dev/null +++ b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class LambApplyOptimizerAssignOp : public OpKernel { + public: + explicit LambApplyOptimizerAssignOp(OpKernelConstruction *context) : OpKernel(context) {} + ~LambApplyOptimizerAssignOp() override = default; + void Compute(OpKernelContext *context) override { + LOG(INFO) << "LambApplyOptimizerAssignOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LambApplyOptimizerAssign").Device(DEVICE_CPU), LambApplyOptimizerAssignOp); +} // namespace tensorflow diff --git a/tf_adapter/kernels/lamb_apply_weight_assign.cc b/tf_adapter/kernels/lamb_apply_weight_assign.cc new file mode 100644 index 000000000..f3ce002da --- /dev/null +++ b/tf_adapter/kernels/lamb_apply_weight_assign.cc @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +class LambApplyWeightAssignOp : public OpKernel { + public: + explicit LambApplyWeightAssignOp(OpKernelConstruction *context) : OpKernel(context) {} + ~LambApplyWeightAssignOp() override = default; + void Compute(OpKernelContext *context) override { + LOG(INFO) << "LambApplyWeightAssignOp Compute, num_inputs: " << context->num_inputs(); + } + bool IsExpensive() override { return false; } +}; + +REGISTER_KERNEL_BUILDER(Name("LambApplyWeightAssign").Device(DEVICE_CPU), LambApplyWeightAssignOp); +} // namespace tensorflow diff --git a/tf_adapter/ops/npu_ops.cc b/tf_adapter/ops/npu_ops.cc index 2592860cf..234330c33 100644 --- a/tf_adapter/ops/npu_ops.cc +++ b/tf_adapter/ops/npu_ops.cc @@ -369,6 +369,45 @@ REGISTER_OP("AdamApplyOneAssign") .Attr("T: {float16, float32}") .SetShapeFn(shape_inference::NoOutputs); +REGISTER_OP("LambApplyOptimizerAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("mul0_x: T") + .Input("mul1_x: T") + .Input("mul2_x: T") + .Input("mul3_x: T") + .Input("add2_y: T") + .Input("steps: T") + .Input("do_use_weight: T") + .Input("weight_decay_rate: T") + .Output("update: T") + .Output("output1: T") + .Output("output2: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + c->set_output(1, c->input(1)); + c->set_output(2, c->input(2)); + return Status::OK(); + }); + +REGISTER_OP("LambApplyWeightAssign") + .Input("input0: T") + .Input("input1: T") + .Input("input2: T") + .Input("input3: T") + .Input("input4: T") + .Output("output0: T") + .Attr("T: {float16, float32}") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(4)); + return Status::OK(); + }); + REGISTER_OP("AdamApplyOneWithDecayAssign") .Input("input0: T") .Input("input1: T") diff --git a/tf_adapter/python/npu_bridge/estimator/npu_ops.py b/tf_adapter/python/npu_bridge/estimator/npu_ops.py index ab88bf56c..c39033747 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu_ops.py +++ b/tf_adapter/python/npu_bridge/estimator/npu_ops.py @@ -248,3 +248,19 @@ def adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, result = gen_npu_ops.adam_apply_one_with_decay_assign(input0, input1, input2, input3, input4, mul0_x, mul1_x, mul2_x, mul3_x, mul4_x, add2_y, name) return result + +def lamb_apply_optimizer_assign(input0, input1, input2, input3, mul0_x, mul1_x, + mul2_x, mul3_x, add2_y, steps, do_use_weight, weight_decay_rate, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.lamb_apply_optimizer_assign() is not compatible with " + "eager execution.") + update, nextv, nextm = gen_npu_ops.lamb_apply_optimizer_assign(input0, input1, input2, input3, mul0_x, mul1_x, mul2_x, + mul3_x, add2_y, steps, do_use_weight, weight_decay_rate, name) + return update, nextv, nextm +l +def lamb_apply_weight_assign(input0, input1, input2, input3, input4, name=None): + if context.executing_eagerly(): + raise RuntimeError("tf.lamb_apply_weight_assign() is not compatible with " + "eager execution.") + result = gen_npu_ops.lamb_apply_weight_assign(input0, input1, input2, input3, input4, name) + return result \ No newline at end of file -- Gitee From 8ebf23b127c2bde7e139f689841b5034bf796fd5 Mon Sep 17 00:00:00 2001 From: zhangzhongzt Date: Fri, 13 Nov 2020 14:46:07 +0800 Subject: [PATCH 3/3] lamb ops --- tf_adapter/kernels/lamb_apply_optimizer_assign.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tf_adapter/kernels/lamb_apply_optimizer_assign.cc b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc index 08ffd7375..ad4686bb2 100644 --- a/tf_adapter/kernels/lamb_apply_optimizer_assign.cc +++ b/tf_adapter/kernels/lamb_apply_optimizer_assign.cc @@ -38,5 +38,6 @@ class LambApplyOptimizerAssignOp : public OpKernel { bool IsExpensive() override { return false; } }; + REGISTER_KERNEL_BUILDER(Name("LambApplyOptimizerAssign").Device(DEVICE_CPU), LambApplyOptimizerAssignOp); } // namespace tensorflow -- Gitee