diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc new file mode 100644 index 0000000000..657cec2781 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc @@ -0,0 +1,174 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h" +#include +#include +#include +#include +#include "backend/kernel_compiler/common_utils.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +constexpr size_t kAdamDeltaInputSize = 9; +constexpr size_t kUsedThreadNum = 23; +namespace { +struct ComputeParam { + float *delta_{nullptr}; + float *m_{nullptr}; + float *v_{nullptr}; + float *grad_{nullptr}; + float beta1_{0}; + float beta2_{0}; + float epsilon_{0}; + float lr_{0}; + bool use_nesterov_{0}; +}; + +void ComputeWeightDelta(const std::shared_ptr &input_params, size_t start, size_t end) { + MS_EXCEPTION_IF_NULL(input_params); + MS_EXCEPTION_IF_NULL(input_params->delta_); + MS_EXCEPTION_IF_NULL(input_params->m_); + MS_EXCEPTION_IF_NULL(input_params->v_); + MS_EXCEPTION_IF_NULL(input_params->grad_); + auto delta = input_params->delta_; + auto m = input_params->m_; + auto v = input_params->v_; + auto lr = input_params->lr_; + auto beta1 = input_params->beta1_; + auto beta2 = input_params->beta2_; + auto epsilon = input_params->epsilon_; + auto use_nesterov = input_params->use_nesterov_; + auto grad = input_params->grad_; + for (size_t i = start; i < end; ++i) { + m[i] *= beta1; + v[i] *= beta2; + m[i] += (1 - beta1) * grad[i]; + v[i] += (1 - beta2) * grad[i] * grad[i]; + if (use_nesterov) { + delta[i] = -lr * (m[i] * beta1 + (1 - beta1) * grad[i]) / (std::sqrt(v[i]) + epsilon); + } else { + delta[i] = -lr * m[i] / (std::sqrt(v[i]) + epsilon); + } + } +} +} // namespace + +void AdamDeltaCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector delta_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); + std::vector m_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + std::vector v_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector grad_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 8); + if (!IsSameShape(delta_shape, m_shape)) { + MS_LOG(EXCEPTION) << "Delta and m should have the same shape"; + } + if (!IsSameShape(delta_shape, v_shape)) { + MS_LOG(EXCEPTION) << "Delta and v should have the same shape"; + } + if (!IsSameShape(delta_shape, grad_shape)) { + MS_LOG(EXCEPTION) << "Delta and grad should have the same shape"; + } + if (delta_shape.empty()) { + MS_LOG(EXCEPTION) << "Delta must be at least 1D"; + } + elem_num_ = 1; + for (size_t i = 0; i < delta_shape.size(); ++i) { + elem_num_ *= delta_shape[i]; + } + if (elem_num_ < 1) { + MS_LOG(EXCEPTION) << "Invalid delta shape"; + } + if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) { + use_nesterov_ = AnfAlgo::GetNodeAttr(kernel_node, "use_nesterov"); + } +} + +void AdamDeltaCPUKernel::CheckParams(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) const { + if (inputs.size() != kAdamDeltaInputSize) { + MS_LOG(EXCEPTION) << "Error input size!"; + } + size_t elem_size = elem_num_ * 4; + std::vector expect_sizes = {elem_size, elem_size, 4, 4, 4, 4, 4, 4, elem_size}; + std::vector input_names = {"m", "v", "beta1_power", "beta2_power", "lr", + "beta1", "beta2", "epsilon", "grad"}; + for (size_t i = 0; i < kAdamDeltaInputSize; ++i) { + if (inputs[i]->size != expect_sizes[i]) { + MS_LOG(EXCEPTION) << "Error input " << input_names[i] << " size!"; + } + } + if (outputs.size() < 1 || outputs[0]->size != elem_size) { + MS_LOG(EXCEPTION) << "Error output delta size!"; + } +} + +bool AdamDeltaCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + CheckParams(inputs, workspace, outputs); + auto m = reinterpret_cast(inputs[0]->addr); + auto v = reinterpret_cast(inputs[1]->addr); + auto beta1_power = reinterpret_cast(inputs[2]->addr)[0]; + if (beta1_power == 1) { + MS_LOG(EXCEPTION) << "The beta1_power should not be 1"; + } + auto beta2_power = reinterpret_cast(inputs[3]->addr)[0]; + auto lr = reinterpret_cast(inputs[4]->addr)[0]; + auto beta1 = reinterpret_cast(inputs[5]->addr)[0]; + auto beta2 = reinterpret_cast(inputs[6]->addr)[0]; + auto epsilon = reinterpret_cast(inputs[7]->addr)[0]; + auto grad = reinterpret_cast(inputs[8]->addr); + auto delta = reinterpret_cast(outputs[0]->addr); + lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power); + size_t thread_num = kUsedThreadNum; + if (elem_num_ < thread_num) { + thread_num = elem_num_; + } + std::vector threads; + std::vector> thread_params; + threads.reserve(thread_num); + + size_t end = 0; + size_t offset = elem_num_ / thread_num; + size_t left = elem_num_ % thread_num; + for (size_t i = 0; i < thread_num; ++i) { + auto params = std::make_shared(); + params->delta_ = delta; + params->m_ = m; + params->v_ = v; + params->grad_ = grad; + params->beta1_ = beta1; + params->beta2_ = beta2; + params->use_nesterov_ = use_nesterov_; + params->lr_ = lr; + params->epsilon_ = epsilon; + size_t start = end; + end = start + offset; + if (i < left) { + end += 1; + } + threads.emplace_back(std::thread(ComputeWeightDelta, params, start, end)); + thread_params.emplace_back(params); + } + for (size_t i = 0; i < thread_num; ++i) { + threads[i].join(); + } + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h new file mode 100644 index 0000000000..4ac7df24ca --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h @@ -0,0 +1,56 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_ +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class AdamDeltaCPUKernel : public CPUKernel { + public: + AdamDeltaCPUKernel() = default; + ~AdamDeltaCPUKernel() override = default; + void InitKernel(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + protected: + void CheckParams(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) const; + bool use_nesterov_{false}; + size_t elem_num_{0}; +}; + +MS_REG_CPU_KERNEL(AdamDelta, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + AdamDeltaCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_ diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 2648edcf6f..6519f44f1d 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -110,6 +110,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc" "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc" "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc" + "../../../mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc" "../../../mindspore/ccsrc/backend/kernel_compiler/akg/*.cc" "../../../mindspore/ccsrc/backend/kernel_compiler/rts/*.cc" "../../../mindspore/ccsrc/backend/kernel_compiler/hccl/*.cc" diff --git a/tests/ut/cpp/kernel/cpu/adam_delta_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/adam_delta_cpu_kernel_test.cc new file mode 100644 index 0000000000..03848b1baf --- /dev/null +++ b/tests/ut/cpp/kernel/cpu/adam_delta_cpu_kernel_test.cc @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "common/common_test.h" +#define private public +#define protected public +#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h" +#undef private +#undef protected + +namespace mindspore { +namespace kernel { +class AdamDeltaCpuKernelTest : public UT::Common { + public: + AdamDeltaCpuKernelTest() : adam_delta_(std::make_shared()) {} + + void SetUp() override { + delta_.clear(); + m_.clear(); + v_.clear(); + grad_.clear(); + inputs_.clear(); + workspace_.clear(); + outputs_.clear(); + } + + AddressPtr CreateKernelAddress(void *addr, size_t elem_num) { + auto kernel_addr = std::make_shared
(); + kernel_addr->addr = addr; + kernel_addr->size = elem_num * 4; + return kernel_addr; + } + + void CreateAddress() { + inputs_.push_back(CreateKernelAddress(m_.data(), elem_num_)); + inputs_.push_back(CreateKernelAddress(v_.data(), elem_num_)); + inputs_.push_back(CreateKernelAddress(&beta1_power_, 1)); + inputs_.push_back(CreateKernelAddress(&beta2_power_, 1)); + inputs_.push_back(CreateKernelAddress(&lr_, 1)); + inputs_.push_back(CreateKernelAddress(&beta1_, 1)); + inputs_.push_back(CreateKernelAddress(&beta2_, 1)); + inputs_.push_back(CreateKernelAddress(&epsilon_, 1)); + inputs_.push_back(CreateKernelAddress(grad_.data(), elem_num_)); + outputs_.push_back(CreateKernelAddress(delta_.data(), elem_num_)); + } + + std::vector delta_; + std::vector m_; + std::vector v_; + std::vector grad_; + std::vector inputs_; + std::vector workspace_; + std::vector outputs_; + std::shared_ptr adam_delta_; + float beta1_power_ = 0.9; + float beta2_power_ = 0.999; + float lr_ = 0.001; + float beta1_ = 0.9; + float beta2_ = 0.999; + float epsilon_ = 1e-8; + size_t elem_num_ = 27; +}; + +TEST_F(AdamDeltaCpuKernelTest, compute_test) { + for (size_t i = 0; i < elem_num_; ++i) { + delta_.push_back(1.0); + m_.push_back(1.0); + v_.push_back(1.0); + grad_.push_back(1.0); + } + adam_delta_->elem_num_ = elem_num_; + CreateAddress(); + adam_delta_->Launch(inputs_, workspace_, outputs_); + for (size_t i = 0; i < elem_num_; ++i) { + EXPECT_TRUE(std::fabs(delta_[i] + 0.000316) < 1e-6); + } +} +} // namespace kernel +} // namespace mindspore