!11120 [cpu] add P.FloorDiv to cpu

From: @yanglf1121 Reviewed-by: @kisnwang Signed-off-by:
4 years ago · a1cb402763
parent 59551879a0 4a0fe13430
commit a1cb402763
5 changed files with 117 additions and 0 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc
@ -102,6 +102,29 @@ void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t s
  }
 }

+template <typename T>
+void ArithmeticCPUKernel::FloorDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) {
+  for (size_t i = start; i < end; i++) {
+    std::vector<size_t> idx;
+    GenIndex(i, &idx);
+    auto dividend = input1[idx[0]];
+    auto divisor = input2[idx[1]];
+    if (divisor == 0) {
+      if (dividend == 0) {
+        out[i] = std::numeric_limits<T>::quiet_NaN();
+        continue;
+      }
+      if (std::numeric_limits<T>::has_infinity) {
+        out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
+      } else {
+        out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
+      }
+      continue;
+    }
+    out[i] = floor(dividend / divisor);
+  }
+}
+
 template <typename T>
 void ArithmeticCPUKernel::Mod(const T *input1, const T *input2, T *out, size_t start, size_t end) {
  for (size_t i = start; i < end; i++) {
@ -207,6 +230,8 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) {
    operate_type_ = REALDIV;
  } else if (kernel_name == prim::kPrimDiv->name()) {
    operate_type_ = DIV;
+  } else if (kernel_name == prim::kPrimFloorDiv->name()) {
+    operate_type_ = FLOORDIV;
  } else if (kernel_name == prim::kPrimMod->name()) {
    operate_type_ = MOD;
  } else if (kernel_name == prim::kPrimPow->name()) {
@ -389,6 +414,8 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == DIV) {
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div<T>, this, input1, input2, output, start, end));
+    } else if (operate_type_ == FLOORDIV) {
+      threads.emplace_back(std::thread(&ArithmeticCPUKernel::FloorDiv<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == MOD) {
      threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mod<T>, this, input1, input2, output, start, end));
    } else if (operate_type_ == POW) {
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h
@ -50,6 +50,8 @@ class ArithmeticCPUKernel : public CPUKernel {
  template <typename T>
  void Div(const T *input1, const T *input2, T *out, size_t start, size_t end);
  template <typename T>
+  void FloorDiv(const T *input1, const T *input2, T *out, size_t start, size_t end);
+  template <typename T>
  void Mod(const T *input1, const T *input2, T *out, size_t start, size_t end);
  template <typename T>
  void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end);
@ -117,6 +119,16 @@ MS_REG_CPU_KERNEL(
 MS_REG_CPU_KERNEL(
  Div, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  ArithmeticCPUKernel);
+MS_REG_CPU_KERNEL(
+  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+  ArithmeticCPUKernel);
+MS_REG_CPU_KERNEL(
+  FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+  ArithmeticCPUKernel);
+MS_REG_CPU_KERNEL(
+  FloorDiv,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  ArithmeticCPUKernel);
 MS_REG_CPU_KERNEL(
  Mod, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
  ArithmeticCPUKernel);
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@ -67,6 +67,7 @@ enum OperateType {
  SQRT,
  POW,
  REALDIV,
+  FLOORDIV,
  MOD,
  NEG,
  LESS,
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@ -261,6 +261,7 @@ inline const PrimitivePtr kPrimInplaceAdd = std::make_shared<Primitive>("Inplace
 inline const PrimitivePtr kPrimInplaceSub = std::make_shared<Primitive>("InplaceSub");
 inline const PrimitivePtr kPrimPow = std::make_shared<Primitive>("Pow");
 inline const PrimitivePtr kPrimRealDiv = std::make_shared<Primitive>("RealDiv");
+inline const PrimitivePtr kPrimFloorDiv = std::make_shared<Primitive>("FloorDiv");
 inline const PrimitivePtr kPrimSqrt = std::make_shared<Primitive>("Sqrt");
 inline const PrimitivePtr kPrimSqrtGrad = std::make_shared<Primitive>("SqrtGrad");
 inline const PrimitivePtr kPrimReciprocal = std::make_shared<Primitive>("Reciprocal");
--- a/tests/st/ops/cpu/test_arithmetic_op.py
+++ b/tests/st/ops/cpu/test_arithmetic_op.py
@ -42,6 +42,15 @@ class DivNet(nn.Cell):
        return self.div(x, y)


+class FloorDivNet(nn.Cell):
+    def __init__(self):
+        super(FloorDivNet, self).__init__()
+        self.floor_div = P.FloorDiv()
+
+    def construct(self, x, y):
+        return self.floor_div(x, y)
+
+
 class ModNet(nn.Cell):
    def __init__(self):
        super(ModNet, self).__init__()
@ -156,6 +165,71 @@ def test_div():
    assert output7.shape == expect7.shape


+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu_training
+@pytest.mark.env_onecard
+def test_floor_div():
+    prop = 1 if np.random.random() < 0.5 else -1
+    x0_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
+    y0_np = np.random.randint(1, 100, (2, 1, 4, 4)).astype(np.float32) * prop
+    x1_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.float16) * prop
+    y1_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float16) * prop
+    x2_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int32) * prop
+    y2_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int32) * prop
+    x3_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int32) * prop
+    y3_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.float32) * prop
+    x4_np = np.random.randint(1, 100, (2, 1, 1, 4)).astype(np.int64) * prop
+    y4_np = np.random.randint(1, 100, (2, 3, 4, 4)).astype(np.int64) * prop
+
+    x0 = Tensor(x0_np)
+    y0 = Tensor(y0_np)
+    x1 = Tensor(x1_np)
+    y1 = Tensor(y1_np)
+    x2 = Tensor(x2_np)
+    y2 = Tensor(y2_np)
+    x3 = Tensor(x3_np)
+    y3 = Tensor(y3_np)
+    x4 = Tensor(x4_np)
+    y4 = Tensor(y4_np)
+
+    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
+    floor_div = FloorDivNet()
+    output0 = floor_div(x0, y0)
+    expect0 = np.floor_divide(x0_np, y0_np)
+    diff0 = output0.asnumpy() - expect0
+    error0 = np.ones(shape=expect0.shape) * 1.0e-5
+    assert np.all(diff0 < error0)
+    assert output0.shape == expect0.shape
+
+    output1 = floor_div(x1, y1)
+    expect1 = np.floor_divide(x1_np, y1_np)
+    diff1 = output1.asnumpy() - expect1
+    error1 = np.ones(shape=expect1.shape) * 1.0e-5
+    assert np.all(diff1 < error1)
+    assert output1.shape == expect1.shape
+
+    output2 = floor_div(x2, y2)
+    expect2 = np.floor_divide(x2_np, y2_np).astype(np.float16)
+    diff2 = output2.asnumpy() - expect2
+    error2 = np.ones(shape=expect2.shape) * 1.0e-5
+    assert np.all(diff2 < error2)
+    assert output2.shape == expect2.shape
+
+    output3 = floor_div(x3, y3)
+    expect3 = np.floor_divide(x3_np, y3_np)
+    diff3 = output3.asnumpy() - expect3
+    error3 = np.ones(shape=expect3.shape) * 1.0e-5
+    assert np.all(diff3 < error3)
+    assert output3.shape == expect3.shape
+
+    output4 = floor_div(x4, y4)
+    expect4 = np.floor_divide(x4_np, y4_np)
+    diff4 = output4.asnumpy() - expect4
+    error4 = np.ones(shape=expect4.shape) * 1.0e-5
+    assert np.all(diff4 < error4)
+    assert output4.shape == expect4.shape
+
+
@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
@ -249,6 +323,8 @@ def test_mod():
    assert np.all(output7.asnumpy() == expect7)
    assert output6.shape == expect6.shape

+
 test_sub()
 test_div()
+test_floor_div()
 test_mod()