diff --git a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc index 76e91e059a..9d72bcab89 100644 --- a/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc +++ b/mindspore/ccsrc/device/cpu/kernel_select_cpu.cc @@ -71,9 +71,6 @@ void GetInputFormatsAndDtypes(const CNodePtr &kernel_node, std::vector *output_formats, std::vector *output_types) { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - if (kernel_attr.GetOutputSize() != output_num) { - MS_LOG(EXCEPTION) << "Output num is not equal!"; - } for (size_t output_index = 0; output_index < output_num; ++output_index) { output_formats->emplace_back(kernel_attr.GetOutputAttr(output_index).second); auto dtype = kernel_attr.GetOutputAttr(output_index).first; @@ -145,6 +142,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) { ExpandKernelAttr(kernel_node, &kernel_attr); } if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (kernel_attr.GetOutputSize() != output_num) { + MS_LOG(DEBUG) << "Output num is not equal!"; + continue; + } MS_LOG(INFO) << "Input format and dtype is matched, index: " << index; GetOutputFormatsAndDtypes(kernel_node, kernel_attr, &output_formats, &output_types); UpdatePrevNotCNodeFormatDtype(kernel_attr, input_not_cnode_indexes, kernel_node); diff --git a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc index d0db0c7685..5b3194608e 100644 --- a/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/addn_cpu_kernel.cc @@ -32,17 +32,17 @@ bool AddNCPUKernel::Launch(const std::vector &inputs, const std::vector &outputs) { auto output_addr = reinterpret_cast(outputs[0]->addr); + size_t offset = 0; for (size_t i = 0; i < output_shape_[0]; ++i) { for (size_t j = 0; j < output_shape_[1]; ++j) { for (size_t k = 0; k < output_shape_[2]; ++k) { for (size_t m = 0; m < output_shape_[3]; ++m) { - auto offset = CPUKernelUtils::CalcOffset(output_shape_, i, j, k, m); float sum = 0; for (size_t index = 0; index < input_num_; ++index) { auto input_addr = reinterpret_cast(inputs[index]->addr); sum += input_addr[offset]; } - output_addr[offset] = sum; + output_addr[offset++] = sum; } } } diff --git a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h index 0ce671f4f5..c0ca581974 100644 --- a/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h @@ -42,6 +42,16 @@ MS_REG_CPU_KERNEL(ApplyMomentum, .AddInputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), ApplyMomentumCPUKernel); +MS_REG_CPU_KERNEL(ApplyMomentum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ApplyMomentumCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc index d6896222b1..d2530430e9 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.cc @@ -23,7 +23,6 @@ void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); - CPUKernelUtils::ExpandDimsTo4(&output_shape_); begin_ = AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); for (size_t i = 0; i < begin_.size(); i++) { @@ -61,6 +60,15 @@ void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { end_.emplace_back(begin_[i] + sizes[i]); } } + + ExpandAllMemberDims(); + CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); + CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); +} + +void SliceCPUKernel::ExpandAllMemberDims() { + CPUKernelUtils::ExpandDimsTo4(&output_shape_); + auto input_len = input_shape_.size(); if (input_len < 4) { for (size_t i = 0; i < 4 - input_len; ++i) { @@ -70,8 +78,6 @@ void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { end_.insert(end_.begin(), 1); } } - CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); - CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); } bool SliceCPUKernel::Launch(const std::vector &inputs, diff --git a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h index c39ec12f59..913c993d7a 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/slice_cpu_kernel.h @@ -33,6 +33,7 @@ class SliceCPUKernel : public CPUKernel { const std::vector &outputs) override; private: + void ExpandAllMemberDims(); bool CanCopyMemoryOnAxis(size_t dim) const; void CopyDataToOutput(const std::vector &inputs, size_t in_offset, const std::vector &outputs, size_t out_offset, size_t copy_num) const; diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc index 778d214f4f..92eaffe8c6 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.cc @@ -23,7 +23,6 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - CPUKernelUtils::ExpandDimsTo4(&input_shape_); begin_ = AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); for (size_t i = 0; i < begin_.size(); i++) { @@ -63,6 +62,14 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { } } + ExpandAllMemberDims(); + CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); + CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); +} + +void SliceGradCPUKernel::ExpandAllMemberDims() { + CPUKernelUtils::ExpandDimsTo4(&input_shape_); + auto output_len = output_shape_.size(); if (output_len < 4) { for (size_t i = 0; i < 4 - output_len; ++i) { @@ -72,8 +79,6 @@ void SliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { end_.insert(end_.begin(), 1); } } - CPUKernelUtils::GetElementNumEveryDim(input_shape_, &input_element_num_); - CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); } bool SliceGradCPUKernel::Launch(const std::vector &inputs, diff --git a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h index a256543ae0..1e42c8ac68 100644 --- a/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/slice_grad_cpu_kernel.h @@ -33,6 +33,7 @@ class SliceGradCPUKernel : public CPUKernel { const std::vector &outputs) override; private: + void ExpandAllMemberDims(); bool CanCopyMemoryOnAxis(size_t dim) const; void CopyDataToOutput(const std::vector &inputs, size_t in_offset, const std::vector &outputs, size_t out_offset, size_t copy_num) const;