1 Star 0 Fork 4.3K

liuyang / mindspore

forked from MindSpore / mindspore 
 / 详情

tensor

待办的
拥有者
创建于  
2022-03-09 10:22

apply_ada_max_cpu_kernel.h
/**

  • Copyright 2022 Huawei Technologies Co., Ltd
  • Licensed under the Apache License, Version 2.0 (the "License");
  • you may not use this file except in compliance with the License.
  • You may obtain a copy of the License at
  • http://www.apache.org/licenses/LICENSE-2.0
  • Unless required by applicable law or agreed to in writing, software
  • distributed under the License is distributed on an "AS IS" BASIS,
  • WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  • See the License for the specific language governing permissions and
  • limitations under the License.
    */

#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADA_MAX_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADA_MAX_CPU_KERNEL_H_

#include
#include
#include
#include

#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/factory/ms_factory.h"

namespace mindspore {
namespace kernel {
class ApplyAdaMaxCpuKernelMod : public NativeCpuKernelMod {
public:
ApplyAdaMaxCpuKernelMod() = default;
~ApplyAdaMaxCpuKernelMod() override = default;

bool Init(const BaseOperatorPtr &base_operator, const std::vector &inputs,
const std::vector &outputs) override;

int Resize(
const BaseOperatorPtr &base_operator, const std::vector &inputs,
const std::vector &outputs,
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost = std::map<uint32_t, tensor::TensorPtr>()) override;

bool Launch(const std::vector &inputs, const std::vector &,
const std::vector &outputs) override;

protected:
std::vector GetOpSupport() override {
static std::vector support_list = {KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16)};
return support_list;
}

private:
TypeId dtype_{kTypeUnknown};

template
void LaunchKernel(const std::vector &inputs, const std::vector &outputs);
};
} // namespace kernel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADA_MAX_CPU_KERNEL_H_

apply_ada_max_cpu_kernel.cc
/**

  • Copyright 2022 Huawei Technologies Co., Ltd
  • Licensed under the Apache License, Version 2.0 (the "License");
  • you may not use this file except in compliance with the License.
  • You may obtain a copy of the License at
  • http://www.apache.org/licenses/LICENSE-2.0
  • Unless required by applicable law or agreed to in writing, software
  • distributed under the License is distributed on an "AS IS" BASIS,
  • WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  • See the License for the specific language governing permissions and
  • limitations under the License.
    */

#include
#include "plugin/device/cpu/kernel/apply_ada_max_cpu_kernel.h"
#include "plugin/device/cpu/kernel/nnacl/errorcode.h"
#include "plugin/device/cpu/kernel/nnacl/fp32/adam_fp32.h"
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
#include "utils/ms_utils.h"

namespace {
const size_t kZero = 0;
const size_t kOne = 1;
const size_t kTwo = 2;

constexpr size_t kScalarIndex = 0;
constexpr size_t kIndexVar = 0;
constexpr size_t kIndexM = 1;
constexpr size_t kIndexV = 2;
constexpr size_t kIndexBeta1Power = 3;
constexpr size_t kIndexLr = 4;
constexpr size_t kIndexBeta1 = 5;
constexpr size_t kIndexBeta2 = 6;
constexpr size_t kIndexEpsilon = 7;
constexpr size_t kIndexGrad = 8;

constexpr size_t kApplyAdaMaxInputsNum = 9;
constexpr size_t kApplyAdaMaxOutputsNum = 3;
} // namespace

namespace mindspore {
namespace kernel {
bool ApplyAdaMaxCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs,
const std::vector &outputs) {
kernel_name_ = base_operator->name();
dtype_ = inputs[0]->GetDtype();
return true;
}

int ApplyAdaMaxCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector &inputs,
const std::vector &outputs,
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
int ret = 0;
if ((ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost)) != 0) {
return ret;
}
return ret;
}

bool ApplyAdaMaxCpuKernelMod::Launch(const std::vectorkernel::AddressPtr &inputs,
const std::vectorkernel::AddressPtr &,
const std::vectorkernel::AddressPtr &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kApplyAdaMaxInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kApplyAdaMaxOutputsNum, kernel_name_);
if (inputs[kIndexVar]->size != inputs[kIndexM]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the dtype and shape of 'm' and 'var' must be the same, but got the memory size of 'm': "
<< inputs[kIndexM]->size << " and 'var': " << inputs[kIndexVar]->size;
}
if (inputs[kIndexVar]->size != inputs[kIndexV]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the dtype and shape of 'v' and 'var' must be the same, but got the memory size of 'v': "
<< inputs[kIndexV]->size << " and 'var': " << inputs[kIndexVar]->size;
}
if (inputs[kIndexVar]->size != inputs[kIndexGrad]->size) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the dtype and shape of 'grad' and 'var' must be the same, "
"but got the memory size of 'grad': "
<< inputs[kIndexGrad]->size << " and 'var': " << inputs[kIndexVar]->size;
}
if (dtype_ == kNumberTypeFloat16) {
LaunchKernel(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel(inputs, outputs);
} else {
MS_EXCEPTION(TypeError) << "For '" << kernel_name_ << "', input dtype only support float16 and float32, but got ["
<< dtype_ << "].";
}
return true;
}

template
void ApplyAdaMaxCpuKernelMod::LaunchKernel(const std::vector &inputs,
const std::vector &outputs) {
T *var = reinterpret_cast<T *>(inputs[kIndexVar]->addr);
T *m = reinterpret_cast<T *>(inputs[kIndexM]->addr);
T *v = reinterpret_cast<T *>(inputs[kIndexV]->addr);
T beta1_power = static_cast(reinterpret_cast<float *>(inputs[kIndexBeta1Power]->addr)[kScalarIndex]);
T lr = static_cast(reinterpret_cast<float *>(inputs[kIndexLr]->addr)[kScalarIndex]);
T beta1 = static_cast(reinterpret_cast<float *>(inputs[kIndexBeta1]->addr)[kScalarIndex]);
T beta2 = static_cast(reinterpret_cast<float *>(inputs[kIndexBeta2]->addr)[kScalarIndex]);
T epsilon = static_cast(reinterpret_cast<float *>(inputs[kIndexEpsilon]->addr)[kScalarIndex]);
T *grad = reinterpret_cast<T *>(inputs[kIndexGrad]->addr);

auto one = static_cast(1);
if (beta1_power == one) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'beta1_power' can't be set 1.";
}

// multithreading
size_t length = inputs[kZero]->size / sizeof(T);
auto task = [this, &var, &m, &v, &beta1_power, &lr, &beta1, &beta2, &epsilon, &grad](size_t start, size_t end) {
T one = static_cast(1.0);
for (size_t i = start; i < end; i++) {
m[i] = static_cast(beta1 * m[i] + (one - beta1) * grad[i]);
auto zero = static_cast(0);
auto grad_abs = (grad[i] > zero) ? grad[i] : -grad[i];
v[i] = std::max(beta2 * v[i], grad_abs);
var[i] = var[i] - (lr / (one - beta1_power)) * (m[i] / (v[i] + epsilon));
}
};
CPUKernelUtils::ParallelForAutoSearch(task, length, &parallel_search_info_);

// Copy result to output tensor
auto output_var = reinterpret_cast<T *>(outputs[kZero]->addr);
auto output_m = reinterpret_cast<T *>(outputs[kOne]->addr);
auto output_v = reinterpret_cast<T *>(outputs[kTwo]->addr);
auto ret = memcpy_s(output_var, outputs[kZero]->size, var, inputs[kZero]->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
}
ret = memcpy_s(output_m, outputs[kOne]->size, m, inputs[kOne]->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
}
ret = memcpy_s(output_v, outputs[kTwo]->size, v, inputs[kTwo]->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
}
}

MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, ApplyAdaMax, ApplyAdaMaxCpuKernelMod);
} // namespace kernel
} // namespace mindspore

import numpy as np
import pytest

import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
import mindspore.common.dtype as mstype

class Net(nn.Cell):
def init(self):
super(Net, self).init()
self.apply_ada_max = P.ApplyAdaMax()
self.var = Parameter(Tensor(np.array([[0.6, 0.4],
[0.1, 0.5]]).astype(np.float32)), name="var")
self.m = Parameter(Tensor(np.array([[0.6, 0.5],
[0.2, 0.6]]).astype(np.float32)), name="m")
self.v = Parameter(Tensor(np.array([[0.9, 0.1],
[0.7, 0.8]]).astype(np.float32)), name="v")
def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad):
out = self.apply_ada_max(self.var, self.m, self.v, beta1_power, lr, beta1, beta2, epsilon, grad)
return out

@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.platform_arm_cpu
@pytest.mark.env_onecard
def test_apply_ada_max():
"""
Feature: ApplyAdaMax Operator on CPU
Description: Test ApplyAdaMax Operator
Expectation: Consistent with the results calculated using numpy
"""
# ms
net = Net()
beta1_power = Tensor(0.9, mstype.float32)
lr = Tensor(0.001, mstype.float32)
beta1 = Tensor(0.9, mstype.float32)
beta2 = Tensor(0.99, mstype.float32)
epsilon = Tensor(1e-10, mstype.float32)
grad = Tensor(np.array([[0.3, 0.7], [0.1, 0.8]]).astype(np.float32))
output = net(beta1_power, lr, beta1, beta2, epsilon, grad)

# numpy
np_var = np.array([[0.6, 0.4], [0.1, 0.5]])
np_m = np.array([[0.6, 0.5], [0.2, 0.6]])
np_v = np.array([[0.9, 0.1], [0.7, 0.8]])
np_beta1_power = 0.9
np_lr = 0.001
np_beta1 = 0.9
np_beta2 = 0.99
np_epsilon = 1e-10
np_grad = np.array([[0.3, 0.7], [0.1, 0.8]])

np_m = np_beta1 * np_m + (1.0 - np_beta1) * np_grad
np_v = np.maximum(np_beta2 * np_v, abs(np_grad))
np_var = np_var - (np_lr / (1 - np_beta1_power)) * (np_m / (np_v + np_epsilon))

ms_m = output[1].asnumpy()
ms_v = output[2].asnumpy()
ms_var = output[0].asnumpy()
eps = np.array([1e-6 for i in range(4)]).reshape(2, 2)
assert np.all(np_m - ms_m < eps)
assert np.all(np_v - ms_v < eps)
assert np.all(np_var - ms_var < eps)

评论 (0)

liuyang 创建了任务
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
liuyang 修改了描述
展开全部操作日志

登录 后才可以发表评论

状态
负责人
里程碑
Pull Requests
关联的 Pull Requests 被合并后可能会关闭此 issue
分支
开始日期   -   截止日期
-
置顶选项
优先级
参与者(1)
Python
1
https://gitee.com/liuyang_655/mindspore.git
git@gitee.com:liuyang_655/mindspore.git
liuyang_655
mindspore
mindspore

搜索帮助

53164aa7 5694891 3bd8fe86 5694891