[NPU] Support npu kernel for reduceany op (#31422)

* add reduce any npu op * add gather python unittest * update c_plus unittest * update python unittest * del c++ unittest * update c++ unittest * update c++ unittest
5 years ago · f400ce9f51
parent 7524ac9345
commit f400ce9f51
4 changed files with 275 additions and 0 deletions
--- a/paddle/fluid/operators/reduce_ops/CMakeLists.txt
+++ b/paddle/fluid/operators/reduce_ops/CMakeLists.txt
@ -38,3 +38,7 @@ if(WITH_GPU)
 	nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor)
    endif()
 endif()
 if(WITH_ASCEND_CL)
    cc_test(reduce_any_op_npu_test SRCS reduce_any_op_npu_test.cc DEPS op_registry reduce_any_op scope device_context enforce executor)
 endif()
--- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc
@ -0,0 +1,55 @@
 /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <memory>
 #include <string>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/operators/npu_op_runner.h"
 namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 template <typename T>
 class ReduceAnyNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    const Tensor* x = ctx.Input<Tensor>("X");
    auto* out = ctx.Output<Tensor>("Out");
    bool keep_dim = ctx.Attr<bool>("keep_dim");
    auto dims = ctx.Attr<std::vector<int>>("dim");
    out->mutable_data<T>(ctx.GetPlace());
    // set attr
    NPUAttributeMap attr = {{"keep_dims", keep_dim}, {"axes", dims}};
    auto runner = NpuOpRunner("ReduceAnyD", {*x}, {*out}, attr);
    auto stream =
        ctx.template device_context<paddle::platform::NPUDeviceContext>()
            .stream();
    runner.Run(stream);
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 REGISTER_OP_NPU_KERNEL(reduce_any, ops::ReduceAnyNPUKernel<bool>);
--- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc
@ -0,0 +1,83 @@
 /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #ifndef _WIN32
 #include <unistd.h>
 #endif
 #include <memory>
 #include <string>
 #include <thread>  // NOLINT
 #include <vector>
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/string/printf.h"
 namespace f = paddle::framework;
 namespace p = paddle::platform;
 namespace m = paddle::operators::math;
 using Tensor = paddle::framework::Tensor;
 USE_OP(reduce_any);
 USE_OP_DEVICE_KERNEL(reduce_any, NPU);
 template <typename T>
 void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
  // init
  auto x = scope->Var("X");
  auto tensor_x = x->GetMutable<f::LoDTensor>();
  std::vector<bool> init_x = {true, false, false, false};
  f::TensorFromVector<bool>(init_x, ctx, tensor_x);
  tensor_x->Resize(paddle::framework::make_ddim({2}));
  ctx.Wait();
  auto place = ctx.GetPlace();
  auto out = scope->Var("Out");
  auto tensor_out = out->GetMutable<f::LoDTensor>();
  // run
  std::vector<int> axes;
  f::AttributeMap attrs = {{"axes", axes}, {"keep_dims", true}};
  auto op = f::OpRegistry::CreateOp("reduce_any", {{"X", {"X"}}},
                                    {{"Out", {"Out"}}}, attrs);
  op->Run(*scope, place);
  ctx.Wait();
  std::vector<bool> out_vec;
  f::TensorToVector<bool>(*tensor_out, ctx, &out_vec);
  ctx.Wait();
  std::vector<bool> expected_vec = {true};
  EXPECT_EQ(out_vec.size(), expected_vec.size());
  for (uint32_t i = 0; i < out_vec.size(); i++) {
    EXPECT_EQ(out_vec[i], expected_vec[i]);
  }
 }
 TEST(reduce_any, NPU) {
  f::Scope scope;
  p::NPUDeviceContext ctx(p::NPUPlace(0));
  Compare<bool>(&scope, ctx);
 }
--- a/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_any_op_npu.py
@ -0,0 +1,133 @@
 #   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
 from paddle.fluid import compiler, Program, program_guard
 from paddle.fluid.framework import convert_np_dtype_to_dtype_
 paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_npu(),
                 "core is not compiled with NPU")
 class TestAny8DOp(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "reduce_any"
        self.place = paddle.NPUPlace(0)
        self.inputs = {
            'X': np.random.randint(0, 2,
                                   (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool")
        }
        self.attrs = {'dim': (3, 5, 4)}
        self.outputs = {'Out': self.inputs['X'].any(axis=self.attrs['dim'])}
    def set_npu(self):
        self.__class__.use_npu = True
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
                 "core is not compiled with NPU")
 class TestAnyOpWithDim(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "reduce_any"
        self.place = paddle.NPUPlace(0)
        self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
        self.attrs = {'dim': [1]}
        self.outputs = {'Out': self.inputs['X'].any(axis=1)}
    def set_npu(self):
        self.__class__.use_npu = True
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
                 "core is not compiled with NPU")
 class TestAny8DOpWithDim(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "reduce_any"
        self.place = paddle.NPUPlace(0)
        self.inputs = {
            'X': np.random.randint(0, 2,
                                   (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool")
        }
        self.attrs = {'dim': (3, 6)}
        self.outputs = {'Out': self.inputs['X'].any(axis=self.attrs['dim'])}
    def set_npu(self):
        self.__class__.use_npu = True
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
                 "core is not compiled with NPU")
 class TestAnyOpWithKeepDim(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "reduce_any"
        self.place = paddle.NPUPlace(0)
        self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")}
        self.attrs = {'dim': (1, ), 'keep_dim': True}
        self.outputs = {
            'Out': np.expand_dims(
                self.inputs['X'].any(axis=self.attrs['dim']), axis=1)
        }
    def set_npu(self):
        self.__class__.use_npu = True
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False)
 class TestAny8DOpWithKeepDim(OpTest):
    def setUp(self):
        self.set_npu()
        self.op_type = "reduce_any"
        self.place = paddle.NPUPlace(0)
        self.inputs = {
            'X': np.random.randint(0, 2,
                                   (2, 5, 3, 2, 2, 3, 4, 2)).astype("bool")
        }
        self.attrs = {'dim': (1, ), 'keep_dim': True}
        self.outputs = {
            'Out': np.expand_dims(
                self.inputs['X'].any(axis=self.attrs['dim']), axis=1)
        }
    def set_npu(self):
        self.__class__.use_npu = True
    def test_check_output(self):
        self.check_output_with_place(self.place, check_dygraph=False)
 if __name__ == '__main__':
    unittest.main()