!2166 GPU update argmaxwithvalue

Merge pull request !2166 from VectorSL/update-argmaxwithvalue
5 years ago · b6209eb841
parent fc3b0b95e9 46afb18e25
commit b6209eb841
3 changed files with 17 additions and 30 deletions
--- a/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/arrays/argmaxwithvalue_gpu_kernel.h
@ -26,15 +26,7 @@ namespace kernel {
 template <typename T, typename S>
 class ArgmaxWithValueGpuKernel : public GpuKernel {
 public:
-  ArgmaxWithValueGpuKernel()
+  ArgmaxWithValueGpuKernel() : input_size_(0), output_size_(0), bound_(0), outerSize_(0), innerSize_(0) {}
      : input_size_(0),
        output_size_(0),
        workspace_size_(0),
        axis_(0),
        dims_(1),
        bound_(0),
        outerSize_(0),
        innerSize_(0) {}
  ~ArgmaxWithValueGpuKernel() override = default;
  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
@ -46,37 +38,36 @@ class ArgmaxWithValueGpuKernel : public GpuKernel {
    T *input = GetDeviceAddress<T>(inputs, 0);
    T *output = GetDeviceAddress<T>(outputs, 1);
    S *index = GetDeviceAddress<S>(outputs, 0);
-    CalArgmaxWithValue(input_size_ / sizeof(T), input, bound_, outerSize_, innerSize_, axis_, dims_, index, output,
+    CalArgmaxWithValue(input_size_ / sizeof(T), input, bound_, outerSize_, innerSize_, index, output,
                       reinterpret_cast<cudaStream_t>(stream_ptr));
    return true;
  }
  bool Init(const CNodePtr &kernel_node) override {
-    shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1);
-    dims_ = shape_.size();
+    int dims = shape.size();
-
+    int axis = GetAttr<int>(kernel_node, "axis");
-    axis_ = GetAttr<int>(kernel_node, "axis");
+    if (axis < 0) {
-    if (axis_ < 0) {
+      axis += dims;
      axis_ += dims_;
    }
    input_size_ = sizeof(T);
-    for (auto x : shape_) {
+    for (auto x : shape) {
      input_size_ *= x;
    }
    output_size_ = sizeof(S);
    for (auto x : output_shape) {
      output_size_ *= x;
    }
-    bound_ = shape_[axis_];
+    bound_ = shape[axis];
    outerSize_ = 1;
-    for (int i = axis_ - 1; i >= 0; i--) {
+    for (int i = axis - 1; i >= 0; i--) {
-      outerSize_ *= shape_[i];
+      outerSize_ *= shape[i];
    }
    innerSize_ = 1;
-    for (int i = axis_ + 1; i < dims_; i++) {
+    for (int i = axis + 1; i < dims; i++) {
-      innerSize_ *= shape_[i];
+      innerSize_ *= shape[i];
    }
    InitSizeLists();
    return true;
@ -92,13 +83,9 @@ class ArgmaxWithValueGpuKernel : public GpuKernel {
 private:
  size_t input_size_;
  size_t output_size_;
  size_t workspace_size_;
  std::vector<size_t> input_size_list_;
  std::vector<size_t> output_size_list_;
  std::vector<size_t> workspace_size_list_;
  std::vector<size_t> shape_;
  int axis_;
  int dims_;
  int bound_;
  int outerSize_;
  int innerSize_;
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cu
@ -44,15 +44,15 @@ __global__ void ArgmaxWithValue(size_t size, const T* input, const int bound, in
 template <typename T, typename S>
 void CalArgmaxWithValue(size_t size, const T* input, const int bound_, const int outerSize_, const int innerSize_,
-                        int axis_, int dims_, S* index, T* output, cudaStream_t cuda_stream) {
+                        S* index, T* output, cudaStream_t cuda_stream) {
  ArgmaxWithValue<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, input, bound_, outerSize_, innerSize_,
                                                                     index, output);
  return;
 }
 template void CalArgmaxWithValue<float, int>(size_t size, const float* input, const int bound_, const int outerSize_,
-                                  const int innerSize_, int axis_, int dims_, int* index, float* output,
+                                  const int innerSize_, int* index, float* output,
                                  cudaStream_t cuda_stream);
 template void CalArgmaxWithValue<half, int>(size_t size, const half* input, const int bound_, const int outerSize_,
-                                  const int innerSize_, int axis_, int dims_, int* index, half* output,
+                                  const int innerSize_, int* index, half* output,
                                  cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/argmaxwithvalue_impl.cuh
@ -18,5 +18,5 @@
 #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_
 template <typename T, typename S>
 void CalArgmaxWithValue(size_t size, const T* input, const int bound_, const int outerSize_, const int innerSize_,
-                        int axis_, int dims_, S* index, T* output, cudaStream_t cuda_stream);
+                        S* index, T* output, cudaStream_t cuda_stream);
 #endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ARGMAXWITHVALUE_H_