diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc new file mode 100644 index 0000000000..6d7cb494f7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc @@ -0,0 +1,121 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h" + +namespace mindspore { +namespace kernel { + +template +void BroadcastToCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); + + size_t offset = output_shape_.size() - input_shape_.size(); + for (size_t i = 0; i < offset; ++i) { + input_shape_.insert(input_shape_.begin(), 1); + } + + for (size_t i = 0; i < input_shape_.size(); ++i) { + if (output_shape_[i] < input_shape_[i] || output_shape_[i] % input_shape_[i] != 0) { + MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to " + << "output tensor with shape " << output_shape_ + << ". Output shape must be the integer times of input shape at the " << i << " dim!"; + } + } + for (size_t j = 0; j < output_shape_.size(); j++) { + nums_ *= output_shape_[j]; + } + + tmp_ptr_ = reinterpret_cast(malloc(nums_ * sizeof(T))); +} + +// BroadcastTo +template +void BroadcastToCPUKernel::BroadcastToImpl(size_t dim) { + if (dim == output_shape_.size() - 1) { + size_t input_nums = 1; + for (size_t j = 0; j < input_shape_.size() - 1; ++j) { + input_nums *= input_shape_[j]; + } + size_t rate = output_shape_[dim] / input_shape_[dim]; + + for (size_t j = 0; j < input_nums; ++j) { + T *in_ptr = input_ptr_ + input_shape_[dim] * j; + for (size_t i = 0; i < rate; ++i) { + T *out_ptr = tmp_ptr_ + (j * rate + i) * input_shape_[dim]; + memcpy_s(out_ptr, input_shape_[dim] * sizeof(T), in_ptr, input_shape_[dim] * sizeof(T)); + } + } + size_t elems = input_shape_[dim] * rate * input_nums; + memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); + return; + } + + BroadcastToImpl(dim + 1); + + size_t rate = output_shape_[dim] / input_shape_[dim]; + if (rate > 1) { + size_t elems_nums = 1; + for (size_t j = output_shape_.size() - 1; j > dim; --j) { + elems_nums *= output_shape_[j]; + } + size_t input_nums = 1; + for (size_t j = 0; j < dim; ++j) { + input_nums *= input_shape_[j]; + } + + for (size_t j = 0; j < input_nums; ++j) { + T *in_ptr = output_ptr_ + elems_nums * j; + for (size_t i = 0; i < rate; ++i) { + T *out_ptr = tmp_ptr_ + (j * rate + i) * elems_nums; + memcpy_s(out_ptr, elems_nums * sizeof(T), in_ptr, elems_nums * sizeof(T)); + } + } + size_t elems = elems_nums * rate * input_nums; + memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); + } +} + +template +bool BroadcastToCPUKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) { + if (inputs.size() != 1 || outputs.size() != 1) { + MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!"; + return false; + } + + if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) { + MS_LOG(EXCEPTION) << "Input data is NULL!"; + return false; + } + + if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) { + MS_LOG(EXCEPTION) << "Output data is NULL!"; + return false; + } + + input_ptr_ = reinterpret_cast(inputs[0]->addr); + output_ptr_ = reinterpret_cast(outputs[0]->addr); + + BroadcastToImpl(0); + + return true; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h new file mode 100644 index 0000000000..ade559b3e4 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h @@ -0,0 +1,65 @@ +/** + * Copyright 2021Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_BROADCAST_TO_CPU_KERNEL_H +#define MINDSPORE_BROADCAST_TO_CPU_KERNEL_H + +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { + +template +class BroadcastToCPUKernel : public CPUKernel { + public: + BroadcastToCPUKernel() = default; + ~BroadcastToCPUKernel() override { + if (tmp_ptr_ != nullptr) { + free(tmp_ptr_); + tmp_ptr_ = nullptr; + } + }; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + void InitKernel(const CNodePtr &kernel_node) override; + + void BroadcastToImpl(size_t dim); + + size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; } + + private: + std::vector input_shape_; + std::vector output_shape_; + size_t nums_{1}; + T *input_ptr_{nullptr}; + T *output_ptr_{nullptr}; + T *tmp_ptr_{nullptr}; +}; + +MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + BroadcastToCPUKernel); +MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastToCPUKernel); +MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), + BroadcastToCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_BROADCAST_TO_CPU_KERNEL_H diff --git a/mindspore/nn/layer/container.py b/mindspore/nn/layer/container.py index 53de13879e..f588100e80 100644 --- a/mindspore/nn/layer/container.py +++ b/mindspore/nn/layer/container.py @@ -118,7 +118,7 @@ class SequentialCell(Cell): TypeError: If the type of the `args` is not list or OrderedDict. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> conv = nn.Conv2d(3, 2, 3, pad_mode='valid', weight_init="ones") diff --git a/mindspore/nn/layer/conv.py b/mindspore/nn/layer/conv.py index 855a4eaba9..69edd06132 100644 --- a/mindspore/nn/layer/conv.py +++ b/mindspore/nn/layer/conv.py @@ -555,7 +555,7 @@ class Conv2dTranspose(_Conv): ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') @@ -740,7 +740,7 @@ class Conv1dTranspose(_Conv): ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py index d356cce714..1b7efd30a3 100755 --- a/mindspore/nn/layer/embedding.py +++ b/mindspore/nn/layer/embedding.py @@ -81,7 +81,7 @@ class Embedding(Cell): ValueError: If `padding_idx` is an int which not in range [0, `vocab_size`]. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.Embedding(20000, 768, True) diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py index 100e4882bf..a2e0fccba7 100644 --- a/mindspore/nn/layer/image.py +++ b/mindspore/nn/layer/image.py @@ -226,7 +226,7 @@ class SSIM(Cell): ValueError: If `filter_size` is less than 0. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.SSIM() @@ -417,7 +417,7 @@ class PSNR(Cell): ValueError: If length of shape of `img1` or `img2` is not equal to 4. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.PSNR() diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py index 0044360e79..04d0800f58 100644 --- a/mindspore/nn/layer/math.py +++ b/mindspore/nn/layer/math.py @@ -78,7 +78,7 @@ class ReduceLogSumExp(Cell): TypeError: If dtype of `x` is neither float16 nor float32. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) @@ -926,7 +926,7 @@ class Moments(Cell): TypeError: If dtype of `input_x` is neither float16 nor float32. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> net = nn.Moments(axis=3, keep_dims=True) diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py index ab63be595b..d5c98462c2 100644 --- a/mindspore/nn/layer/quant.py +++ b/mindspore/nn/layer/quant.py @@ -293,7 +293,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): TypeError: If `quant_delay` is not greater than or equal to 0. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> fake_quant = nn.FakeQuantWithMinMaxObserver() @@ -448,7 +448,7 @@ class Conv2dBnFoldQuantOneConv(Cell): ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> qconfig = compression.quant.create_quant_config() diff --git a/tests/st/ops/cpu/test_broadcast_to_op.py b/tests/st/ops/cpu/test_broadcast_to_op.py new file mode 100644 index 0000000000..05b0969c0d --- /dev/null +++ b/tests/st/ops/cpu/test_broadcast_to_op.py @@ -0,0 +1,95 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +from mindspore.common.tensor import Tensor +from mindspore.ops import operations as P + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_broadcast(): + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + shape = (4, 5, 2, 3, 4, 5, 6) + x_np = np.random.rand(2, 3, 1, 5, 1).astype(np.float32) + output = P.BroadcastTo(shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, shape) + assert np.allclose(output.asnumpy(), expect) + + shape = (3, 4, 5, 6) + x_np = np.random.rand(3, 1, 5, 1).astype(np.float32) + output = P.BroadcastTo(shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, shape) + assert np.allclose(output.asnumpy(), expect) + + x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16) + output = P.BroadcastTo(shape)(Tensor(x1_np)) + expect = np.broadcast_to(x1_np, shape) + assert np.allclose(output.asnumpy(), expect) + + shape = (2, 3, 4, 5) + x1_np = np.random.rand(4, 5).astype(np.float32) + output = P.BroadcastTo(shape)(Tensor(x1_np)) + expect = np.broadcast_to(x1_np, shape) + assert np.allclose(output.asnumpy(), expect) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_broadcast_dyn_init(): + """ + Test running the op with -1's in the init shape to support varied inputs. + """ + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + ms_shape = (-1, 4, 5, 6) + np_shape = (3, 4, 5, 6) + x_np = np.random.rand(3, 1, 5, 1).astype(np.float32) + output = P.BroadcastTo(ms_shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, np_shape) + assert np.allclose(output.asnumpy(), expect) + + x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16) + output = P.BroadcastTo(ms_shape)(Tensor(x1_np)) + expect = np.broadcast_to(x1_np, np_shape) + assert np.allclose(output.asnumpy(), expect) + + ms_shape = (2, 3, -1, 5) + np_shape = (2, 3, 4, 5) + x1_np = np.random.rand(4, 5).astype(np.float32) + output = P.BroadcastTo(ms_shape)(Tensor(x1_np)) + expect = np.broadcast_to(x1_np, np_shape) + assert np.allclose(output.asnumpy(), expect) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_broadcast_dyn_invalid_init(): + """ + Test running the op with -1's in the init shape in incorrect positions. + Expected to fail. + """ + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + ms_shape = (2, -1, 4, 5) + x_np = np.random.rand(4, 5).astype(np.float32) + with pytest.raises(ValueError): + P.BroadcastTo(ms_shape)(Tensor(x_np))