!9168 Add ApplyAdagrad for cpu

From: @yang_chun Reviewed-by: @wuxuejian,@c_34 Signed-off-by: @c_34
4 years ago · 886c551a0b
parent 95573571f0 52637531cd
commit 886c551a0b
3 changed files with 239 additions and 0 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
@ -0,0 +1,111 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.h"
 #include <thread>
 #include <vector>
 namespace mindspore {
 namespace kernel {
 namespace {
 constexpr size_t kSizeFloat16 = 2;
 constexpr size_t kSizeFloat32 = 4;
 constexpr size_t kInputSize = 4;
 constexpr size_t kOutputSize = 2;
 }  // namespace
 void ApplyAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
  MS_EXCEPTION_IF_NULL(kernel_node);
  update_slots_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "update_slots");
  dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
 }
 bool ApplyAdagradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
                                   const std::vector<AddressPtr> &outputs) {
  CheckParam(inputs, outputs);
  if (dtype_ == kNumberTypeFloat16) {
    LaunchKernel<float16>(inputs);
  } else if (dtype_ == kNumberTypeFloat32) {
    LaunchKernel<float>(inputs);
  }
  return true;
 }
 void ApplyAdagradCPUKernel::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
  // inputs: var, accum, lr, gradient
  if (inputs.size() != kInputSize) {
    MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but ApplyAdagrad needs 4 inputs.";
  }
  // outputs: var, accum
  if (outputs.size() != kOutputSize) {
    MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but ApplyAdagrad needs 2 outputs.";
  }
  if (inputs[0]->size != inputs[1]->size || inputs[0]->size != inputs[3]->size) {
    MS_LOG(EXCEPTION) << "Error input data size!";
  }
  if (inputs[2]->size != kSizeFloat16 && inputs[2]->size != kSizeFloat32) {
    MS_LOG(EXCEPTION) << "The attribute lr and grad must be float16 or float32!";
  }
 }
 template <typename T>
 void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs) {
  auto var = reinterpret_cast<T *>(inputs[0]->addr);
  auto accum = reinterpret_cast<T *>(inputs[1]->addr);
  auto lr = reinterpret_cast<T *>(inputs[2]->addr);
  auto gradient = reinterpret_cast<T *>(inputs[3]->addr);
  // multithreading
  size_t length = inputs[0]->size / sizeof(T);
  size_t max_thread_num = std::thread::hardware_concurrency();
  size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num;
  std::vector<std::thread> threads;
  threads.reserve(use_thread_num);
  size_t start = 0;
  size_t batch_size = (length + use_thread_num - 1) / use_thread_num;
  while (start < length) {
    size_t end = (start + batch_size) > length ? length : (start + batch_size);
    threads.emplace_back(
      std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T>, this, var, accum, *lr, gradient, start, end));
    start += batch_size;
  }
  for (auto &it : threads) {
    it.join();
  }
 }
 template <typename T>
 void ApplyAdagradCPUKernel::LaunchApplyAdagrad(T *var, T *accum, T lr, T *gradient, size_t start, size_t end) {
  const T one = T(1);
  const T eps = T(1e-6);
  for (size_t i = start; i < end; ++i) {
    // update accum: accum += grad * grad
    if (update_slots_) {
      accum[i] += gradient[i] * gradient[i];
    }
    // update var: var -= lr * grad * \frac{1}{\sqrt{accum}}
    var[i] -= lr * gradient[i] * (one / sqrt(accum[i] + eps));
  }
 }
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.h
@ -0,0 +1,67 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADAGRAD_CPU_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_ADAGRAD_CPU_KERNEL_H_
 #include <thread>
 #include <vector>
 #include "backend/kernel_compiler/cpu/cpu_kernel.h"
 #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 namespace mindspore {
 namespace kernel {
 class ApplyAdagradCPUKernel : public CPUKernel {
 public:
  ApplyAdagradCPUKernel() = default;
  ~ApplyAdagradCPUKernel() override = default;
  void InitKernel(const CNodePtr &kernel_node) override;
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
              const std::vector<AddressPtr> &outputs) override;
 private:
  static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
  template <typename T>
  void LaunchKernel(const std::vector<AddressPtr> &inputs);
  template <typename T>
  void LaunchApplyAdagrad(T *var, T *accum, T lr, T *gradient, size_t start, size_t end);
  bool update_slots_{true};
  TypeId dtype_{kTypeUnknown};
 };
 MS_REG_CPU_KERNEL(ApplyAdagrad,
                  KernelAttr()
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddInputAttr(kNumberTypeFloat32)
                    .AddOutputAttr(kNumberTypeFloat32)
                    .AddOutputAttr(kNumberTypeFloat32),
                  ApplyAdagradCPUKernel);
 MS_REG_CPU_KERNEL(ApplyAdagrad,
                  KernelAttr()
                    .AddInputAttr(kNumberTypeFloat16)
                    .AddInputAttr(kNumberTypeFloat16)
                    .AddInputAttr(kNumberTypeFloat16)
                    .AddInputAttr(kNumberTypeFloat16)
                    .AddOutputAttr(kNumberTypeFloat16)
                    .AddOutputAttr(kNumberTypeFloat16),
                  ApplyAdagradCPUKernel);
 }  // namespace kernel
 }  // namespace mindspore
 #endif
--- a/tests/st/ops/cpu/test_apply_adagrad_op.py
+++ b/tests/st/ops/cpu/test_apply_adagrad_op.py
@ -0,0 +1,61 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor, Parameter
 from mindspore.ops import operations as P
 import mindspore.common.dtype as mstype
 context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
 var_np = np.random.rand(3, 3).astype(np.float32)
 accum_np = np.random.rand(3, 3).astype(np.float32)
 class Net(nn.Cell):
    def __init__(self):
        super(Net, self).__init__()
        self.apply_adagrad = P.ApplyAdagrad()
        self.var = Parameter(Tensor(var_np), name="var")
        self.accum = Parameter(Tensor(accum_np), name="accum")
    def construct(self, lr, grad):
        self.apply_adagrad(self.var, self.accum, lr, grad)
        return self.var, self.accum
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
 def test_apply_adagrad():
    # numpy op
    grident_np = np.random.rand(3, 3).astype(np.float32)
    expect_accum_np = accum_np + grident_np * grident_np
    expect_var_np = var_np - (0.001 * grident_np * (1 / np.sqrt(expect_accum_np + 1e-6)))
    net = Net()
    lr = Tensor(0.001, mstype.float32)
    grad = Tensor(grident_np)
    out = net(lr, grad)
    res_var_mindspore = out[0].asnumpy()
    res_accum_mindspore = out[1].asnumpy()
    eps = np.array([1e-6 for i in range(9)]).reshape(3, 3)
    assert np.all(expect_var_np - res_var_mindspore < eps)
    assert np.all(expect_accum_np - res_accum_mindspore < eps)