Paddle/paddle/fluid/operators/softmax_op.h

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/transpose_op.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

static inline void CalcTransPermAndShapeByAxis(const Tensor& x, const int axis,
                                               std::vector<int>* perm,
                                               std::vector<int>* shape) {
  auto dim_x = x.dims();
  int rank = dim_x.size();

  if (axis == -1 || axis == rank - 1) {
    return;
  }

  for (int i = 0; i < rank - 1; i++) {
    if (i == axis) {
      perm->push_back(rank - 1);
      shape->push_back(dim_x[rank - 1]);
    } else {
      perm->push_back(i);
      shape->push_back(dim_x[i]);
    }
  }
  perm->push_back(axis);
  shape->push_back(dim_x[axis]);
}

template <typename DeviceContext, typename T>
class SoftmaxKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto& dev_ctx = context.template device_context<DeviceContext>();
    auto* X = context.Input<Tensor>("X");
    auto* Out = context.Output<Tensor>("Out");
    const int axis = context.Attr<int>("axis");
    int rank = X->dims().size();

    // allocate memory on device.
    Out->mutable_data<T>(context.GetPlace());

    std::vector<int> perm, shape;
    CalcTransPermAndShapeByAxis(*X, axis, &perm, &shape);

    Tensor X_2d, Out_2d;
    Tensor X_trans, Out_trans;
    if (axis != -1 && axis != rank - 1) {
      X_trans.mutable_data<T>(framework::make_ddim(shape), context.GetPlace());
      Out_trans.mutable_data<T>(framework::make_ddim(shape),
                                context.GetPlace());
      TransCompute<DeviceContext, T>(rank, dev_ctx, *X, &X_trans, perm);
      TransCompute<DeviceContext, T>(rank, dev_ctx, *Out, &Out_trans, perm);
      X_2d = framework::ReshapeToMatrix(X_trans, rank - 1);
      Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1);
    } else {
      X_2d = framework::ReshapeToMatrix(*X, rank - 1);
      Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
    }

#ifdef PADDLE_ON_INFERENCE
    math::SoftmaxFunctor<DeviceContext, T, true>()(
        context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
#else
    math::SoftmaxFunctor<DeviceContext, T, false>()(
        context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
#endif

    if (axis != -1 && axis != rank - 1) {
      TransCompute<DeviceContext, T>(rank, dev_ctx, Out_trans, Out, perm);
    }
  }
};

template <typename DeviceContext, typename T>
class SoftmaxGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto& dev_ctx = context.template device_context<DeviceContext>();
    auto* Out = context.Input<Tensor>("Out");
    auto* dOut = context.Input<Tensor>(framework::GradVarName("Out"));
    auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
    const int axis = context.Attr<int>("axis");
    int rank = Out->dims().size();

    // allocate memory on device.
    dX->mutable_data<T>(context.GetPlace());

    std::vector<int> perm, shape;
    CalcTransPermAndShapeByAxis(*dX, axis, &perm, &shape);

    Tensor dX_2d, Out_2d, dOut_2d;
    Tensor dX_trans, Out_trans, dOut_trans;
    if (axis != -1 && axis != rank - 1) {
      dX_trans.mutable_data<T>(framework::make_ddim(shape), context.GetPlace());
      Out_trans.mutable_data<T>(framework::make_ddim(shape),
                                context.GetPlace());
      dOut_trans.mutable_data<T>(framework::make_ddim(shape),
                                 context.GetPlace());
      TransCompute<DeviceContext, T>(rank, dev_ctx, *dX, &dX_trans, perm);
      TransCompute<DeviceContext, T>(rank, dev_ctx, *Out, &Out_trans, perm);
      TransCompute<DeviceContext, T>(rank, dev_ctx, *dOut, &dOut_trans, perm);
      dX_2d = framework::ReshapeToMatrix(dX_trans, rank - 1);
      Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1);
      dOut_2d = framework::ReshapeToMatrix(dOut_trans, rank - 1);
    } else {
      dX_2d = framework::ReshapeToMatrix(*dX, rank - 1);
      Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
      dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1);
    }

    math::SoftmaxGradFunctor<DeviceContext, T>()(
        context.template device_context<DeviceContext>(), &Out_2d, &dOut_2d,
        &dX_2d);

    if (axis != -1 && axis != rank - 1) {
      TransCompute<DeviceContext, T>(rank, dev_ctx, dX_trans, dX, perm);
    }
  }
};

}  // namespace operators
}  // namespace paddle
Fix the grammar in copyright. (#8403) 7 years ago			`/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago			`http://www.apache.org/licenses/LICENSE-2.0`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago
			`#pragma once`
add softmax_axis CPU kernel. test=develop 6 years ago			`#include <vector>`
Correct #include path 7 years ago			`#include "paddle/fluid/framework/op_registry.h"`
			`#include "paddle/fluid/operators/math/softmax.h"`
add softmax_axis CPU kernel. test=develop 6 years ago			`#include "paddle/fluid/operators/transpose_op.h"`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago
			`namespace paddle {`
			`namespace operators {`

"remove a lot alias" 8 years ago			`using Tensor = framework::Tensor;`

add cudnn support. test=develop 6 years ago			`static inline void CalcTransPermAndShapeByAxis(const Tensor& x, const int axis,`
fix format. test=develop 6 years ago			`std::vector<int>* perm,`
			`std::vector<int>* shape) {`
add softmax_axis CPU kernel. test=develop 6 years ago			`auto dim_x = x.dims();`
			`int rank = dim_x.size();`

			`if (axis == -1 \|\| axis == rank - 1) {`
			`return;`
			`}`

			`for (int i = 0; i < rank - 1; i++) {`
			`if (i == axis) {`
add cudnn support. test=develop 6 years ago			`perm->push_back(rank - 1);`
			`shape->push_back(dim_x[rank - 1]);`
add softmax_axis CPU kernel. test=develop 6 years ago			`} else {`
add cudnn support. test=develop 6 years ago			`perm->push_back(i);`
			`shape->push_back(dim_x[i]);`
add softmax_axis CPU kernel. test=develop 6 years ago			`}`
			`}`
add cudnn support. test=develop 6 years ago			`perm->push_back(axis);`
			`shape->push_back(dim_x[axis]);`
add softmax_axis CPU kernel. test=develop 6 years ago			`}`

Refine device context (#6433) There are mainly following fixes: - take `DeviceContext` as the template parameter of math functors and OpKernel instead of `Place` - remove `eigen_device` interface in base class `DeviceContext` - remove `GetEigenDevice` interface in `ExecutionContext` and base class `DeviceContext` - remove unused `platform::EigenDeviceConverter` - rename `REGISTER_OP_GPU_KERNEL` to `REGISTER_OP_CUDA_KERNEL` - rename `USE_GPU_ONLY_OP` to `USE_CUDA_ONLY_OP` 7 years ago			`template <typename DeviceContext, typename T>`
Add Skeleton of Double support 7 years ago			`class SoftmaxKernel : public framework::OpKernel<T> {`
Reformat paddle/operators/* strictly following Google Style Guide 8 years ago			`public:`
"remove a lot alias" 8 years ago			`void Compute(const framework::ExecutionContext& context) const override {`
add cudnn support. test=develop 6 years ago			`auto& dev_ctx = context.template device_context<DeviceContext>();`
Add SoftmaxGradFunctor, and use SoftmaxGradFunctor in softmax_op instead. 7 years ago			`auto* X = context.Input<Tensor>("X");`
Change softmax 7 years ago			`auto* Out = context.Output<Tensor>("Out");`
add softmax_axis CPU kernel. test=develop 6 years ago			`const int axis = context.Attr<int>("axis");`
add cudnn support. test=develop 6 years ago			`int rank = X->dims().size();`
implement some basic OpKernel 8 years ago
softmax as function. 8 years ago			`// allocate memory on device.`
Change softmax 7 years ago			`Out->mutable_data<T>(context.GetPlace());`
implement some basic OpKernel 8 years ago
add cudnn support. test=develop 6 years ago			`std::vector<int> perm, shape;`
			`CalcTransPermAndShapeByAxis(*X, axis, &perm, &shape);`

			`Tensor X_2d, Out_2d;`
add softmax_axis CPU kernel. test=develop 6 years ago			`Tensor X_trans, Out_trans;`
add cudnn support. test=develop 6 years ago			`if (axis != -1 && axis != rank - 1) {`
			`X_trans.mutable_data<T>(framework::make_ddim(shape), context.GetPlace());`
fix format. test=develop 6 years ago			`Out_trans.mutable_data<T>(framework::make_ddim(shape),`
			`context.GetPlace());`
add cudnn support. test=develop 6 years ago			`TransCompute<DeviceContext, T>(rank, dev_ctx, *X, &X_trans, perm);`
			`TransCompute<DeviceContext, T>(rank, dev_ctx, *Out, &Out_trans, perm);`
			`X_2d = framework::ReshapeToMatrix(X_trans, rank - 1);`
			`Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1);`
			`} else {`
			`X_2d = framework::ReshapeToMatrix(*X, rank - 1);`
			`Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);`
			`}`
add softmax_axis CPU kernel. test=develop 6 years ago
Squashing MKL based softmax for inference test=develop - Added profiling to softmax functors - MKL based softmax inference op - Fix to softmax compuation via MKL - cleaning - Cosmetic fixes to softmax MKL - Fix to ON_INFER lack of propagation 6 years ago			`#ifdef PADDLE_ON_INFERENCE`
- ASUM MKL integration 6 years ago			`math::SoftmaxFunctor<DeviceContext, T, true>()(`
Revert "Softmax op optimization for inference " 6 years ago			`context.template device_context<DeviceContext>(), &X_2d, &Out_2d);`
- Softmax for Inference is enabled when ON_INFER is set test=develop 6 years ago			`#else`
			`math::SoftmaxFunctor<DeviceContext, T, false>()(`
			`context.template device_context<DeviceContext>(), &X_2d, &Out_2d);`
			`#endif`
add softmax_axis CPU kernel. test=develop 6 years ago
			`if (axis != -1 && axis != rank - 1) {`
			`TransCompute<DeviceContext, T>(rank, dev_ctx, Out_trans, Out, perm);`
			`}`
Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago			`}`
			`};`
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago
Refine device context (#6433) There are mainly following fixes: - take `DeviceContext` as the template parameter of math functors and OpKernel instead of `Place` - remove `eigen_device` interface in base class `DeviceContext` - remove `GetEigenDevice` interface in `ExecutionContext` and base class `DeviceContext` - remove unused `platform::EigenDeviceConverter` - rename `REGISTER_OP_GPU_KERNEL` to `REGISTER_OP_CUDA_KERNEL` - rename `USE_GPU_ONLY_OP` to `USE_CUDA_ONLY_OP` 7 years ago			`template <typename DeviceContext, typename T>`
Add Skeleton of Double support 7 years ago			`class SoftmaxGradKernel : public framework::OpKernel<T> {`
Reformat paddle/operators/* strictly following Google Style Guide 8 years ago			`public:`
"remove type alias header file" 8 years ago			`void Compute(const framework::ExecutionContext& context) const override {`
add cudnn support. test=develop 6 years ago			`auto& dev_ctx = context.template device_context<DeviceContext>();`
Change softmax 7 years ago			`auto* Out = context.Input<Tensor>("Out");`
			`auto* dOut = context.Input<Tensor>(framework::GradVarName("Out"));`
Add SoftmaxGradFunctor, and use SoftmaxGradFunctor in softmax_op instead. 7 years ago			`auto* dX = context.Output<Tensor>(framework::GradVarName("X"));`
add cudnn support. test=develop 6 years ago			`const int axis = context.Attr<int>("axis");`
			`int rank = Out->dims().size();`
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago
Add SoftmaxGradFunctor, and use SoftmaxGradFunctor in softmax_op instead. 7 years ago			`// allocate memory on device.`
			`dX->mutable_data<T>(context.GetPlace());`
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago
add cudnn support. test=develop 6 years ago			`std::vector<int> perm, shape;`
			`CalcTransPermAndShapeByAxis(*dX, axis, &perm, &shape);`

			`Tensor dX_2d, Out_2d, dOut_2d;`
			`Tensor dX_trans, Out_trans, dOut_trans;`
			`if (axis != -1 && axis != rank - 1) {`
			`dX_trans.mutable_data<T>(framework::make_ddim(shape), context.GetPlace());`
fix format. test=develop 6 years ago			`Out_trans.mutable_data<T>(framework::make_ddim(shape),`
			`context.GetPlace());`
			`dOut_trans.mutable_data<T>(framework::make_ddim(shape),`
			`context.GetPlace());`
add cudnn support. test=develop 6 years ago			`TransCompute<DeviceContext, T>(rank, dev_ctx, *dX, &dX_trans, perm);`
			`TransCompute<DeviceContext, T>(rank, dev_ctx, *Out, &Out_trans, perm);`
			`TransCompute<DeviceContext, T>(rank, dev_ctx, *dOut, &dOut_trans, perm);`
			`dX_2d = framework::ReshapeToMatrix(dX_trans, rank - 1);`
			`Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1);`
			`dOut_2d = framework::ReshapeToMatrix(dOut_trans, rank - 1);`
			`} else {`
			`dX_2d = framework::ReshapeToMatrix(*dX, rank - 1);`
			`Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);`
			`dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1);`
			`}`
make softmax supporting tensors 7 years ago
Refine device context (#6433) There are mainly following fixes: - take `DeviceContext` as the template parameter of math functors and OpKernel instead of `Place` - remove `eigen_device` interface in base class `DeviceContext` - remove `GetEigenDevice` interface in `ExecutionContext` and base class `DeviceContext` - remove unused `platform::EigenDeviceConverter` - rename `REGISTER_OP_GPU_KERNEL` to `REGISTER_OP_CUDA_KERNEL` - rename `USE_GPU_ONLY_OP` to `USE_CUDA_ONLY_OP` 7 years ago			`math::SoftmaxGradFunctor<DeviceContext, T>()(`
clean softmax_op code 7 years ago			`context.template device_context<DeviceContext>(), &Out_2d, &dOut_2d,`
			`&dX_2d);`
add cudnn support. test=develop 6 years ago
			`if (axis != -1 && axis != rank - 1) {`
			`TransCompute<DeviceContext, T>(rank, dev_ctx, dX_trans, dX, perm);`
			`}`
Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments 8 years ago			`}`
			`};`

Add skeletons of `mul`, `rowwise_add`, `sigmoid`, `softmax` ops * Implement InferShape and register them, give a stub Kernel method by LOG(INFO) 8 years ago			`} // namespace operators`
			`} // namespace paddle`