Paddle/paddle/gserver/activations/ActivationFunction.cpp

/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "ActivationFunction.h"

#include <algorithm>
#include <memory>
#include <iostream>
#include <type_traits>
#include <string>
#include <thread>
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/parameter/Argument.h"

#include "paddle/utils/Logging.h"

namespace paddle {

static ClassRegistrar<ActivationFunction> gActivationRegistrar;
#define ACTIVATION_CLASS_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Activation

#define BEGIN_DEFINE_ACTIVATION(ACTIVATION_NAME)                             \
  class ACTIVATION_CLASS_NAME(ACTIVATION_NAME) : public ActivationFunction { \
  private:                                                                   \
    static const std::string name;                                           \
                                                                             \
  public:                                                                    \
    const std::string& getName() const { return name; }

#define END_DEFINE_ACTIVATION(ACTIVATION_NAME)                     \
  };                                                               \
  const std::string ACTIVATION_CLASS_NAME(ACTIVATION_NAME)::name = \
      #ACTIVATION_NAME;                                            \
  static InitFunction __reg_activation__##ACTIVATION_NAME([] {     \
    gActivationRegistrar.registerClass<                            \
        ACTIVATION_CLASS_NAME(ACTIVATION_NAME)>(#ACTIVATION_NAME); \
  });

/**
 * @brief The IdentityActivation class
 *
 * Do nothing when forward/backward.
 */
class IdentityActivation : public ActivationFunction {
public:
  static const std::string name;
  void forward(Argument& act) { (void)act; }
  void backward(Argument& act) { (void)act; }
  const std::string& getName() const { return name; }
};
const std::string IdentityActivation::name = "";
static InitFunction __reg_activation__identity([] {
  gActivationRegistrar.registerClass<IdentityActivation>("");
  gActivationRegistrar.registerClass<IdentityActivation>("linear");
});

/**
 * SigmoidActivation
 *
 * f(z) = \frac{1}{1+exp(-z)}
 */
BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); }
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
END_DEFINE_ACTIVATION(sigmoid)

/**
 * Do Softmax activation for all sample.
 * P(y=j|x) = \frac{e^{x^Tw_j}}{\sum^K_{k=1}e^{x^Tw_k}}
 */
BEGIN_DEFINE_ACTIVATION(softmax)
private:
MatrixPtr sftMaxSum_;
MatrixPtr sftMaxDot_;
MatrixPtr one_;

public:
void forward(Argument& act) { act.value->softmax(*act.value); }

void backward(Argument& act) {
  MatrixPtr outputV = act.value;
  MatrixPtr outputG = act.grad;

  if (outputG->useGpu()) {
    outputG->softmaxBackward(*outputV);
  } else {
    SetDevice device(act.deviceId);
    Matrix::resizeOrCreate(sftMaxDot_, outputG->getHeight(),
                           outputG->getWidth(),
                           /* trans */ false, useGpu(act.deviceId));
    Matrix::resizeOrCreate(sftMaxSum_, outputG->getHeight(), 1,
                           /* trans */ false, useGpu(act.deviceId));
    if (!one_ || one_->getWidth() != outputG->getWidth()) {
      Matrix::resizeOrCreate(one_, 1, outputG->getWidth(),
                             /* trans */ false, useGpu(act.deviceId));
      one_->one();
    }

    sftMaxDot_->dotMul(*outputG, *outputV);
    sftMaxSum_->colMerge(*sftMaxDot_);

    act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
  }
}
END_DEFINE_ACTIVATION(softmax)

/// Softmax on all frames of one sequence.
/// Width of frame must be one.
BEGIN_DEFINE_ACTIVATION(sequence_softmax)
private:
ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;

public:
void forward(Argument& act) {
  CHECK_EQ(act.value->getWidth(), 1UL);

  if (!argument_.value) {
    argument_.value = Matrix::create(nullptr, /* height= */ 1, 1,
                                     /* trans= */ false, useGpu(act.deviceId));
    argument_.grad = Matrix::create(nullptr, /* height= */ 1, 1,
                                    /* trans= */ false, useGpu(act.deviceId));
  }

  auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
  act.value->sequenceSoftmax(*act.value, *starts);
}

void backward(Argument& act) {
  CHECK_EQ(act.grad->getWidth(), 1UL);

  size_t numSequences = act.getNumSequences();
  const int* starts = act.sequenceStartPositions->getData(false);

  for (size_t i = 0; i < numSequences; ++i) {
    // TODO(Dangqingqing) optimization for GPU
    size_t offset = starts[i];
    size_t size = starts[i + 1] - starts[i];
    argument_.value->setData(act.value->getData() + offset, 1UL, size);
    argument_.grad->setData(act.grad->getData() + offset, 1UL, size);

    softmax_.backward(argument_);
  }
}
END_DEFINE_ACTIVATION(sequence_softmax)

/**
 * Relu Activation.
 *
 * forward. y = max(0, z)
 *
 * derivative of relu is:
 *
 *    1 if z > 0
 *
 *    0 otherwise.
 */
BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); }

void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
END_DEFINE_ACTIVATION(relu)

/**
 * BRelu Activation.
 *
 * forward. y = min(24, max(0, z))
 *
 * derivative of brelu is:
 *
 *    1 if 0 < z < 24
 *
 *    0 otherwise.
 *
 * TODO(yuyang18): Remove magic number 24 or make it configuable.
 */
BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); }

void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
END_DEFINE_ACTIVATION(brelu)

/**
 * tanh activation.
 *
 * f(z) = tanh(z)=\frac{e^z-e^{-z}}{e^z+e^{-z}}
 */
BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); }

void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
END_DEFINE_ACTIVATION(tanh)

/**
 * Scaled Tanh Activation
 *
 * f(z) = 1.7159 * tanh(2/3*z)
 */
BEGIN_DEFINE_ACTIVATION(stanh)
private:
real a, b;

public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); }

void backward(Argument& act) {
  act.grad->scaledTanhDerivative(*act.value, a, b);
}
END_DEFINE_ACTIVATION(stanh)

/**
 * Soft relu activation.
 *
 * f(z) = ln(1+e^z)
 */
BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); }

void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
END_DEFINE_ACTIVATION(softrelu)

/**
 * Abs Activation.
 *
 * Forward: f(z) = abs(z)
 *
 * Derivative:
 *
 *     1   if z>0
 *
 *    -1   if z<0
 *
 *     0   if z=0
 */
BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) {
  SetDevice device(act.deviceId);
  Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
                         /* trans */ false, useGpu(act.deviceId));

  act.in->copyFrom(*act.value);
  act.value->abs(*act.value);
}

void backward(Argument& act) { act.grad->absDerivative(*act.in); }
END_DEFINE_ACTIVATION(abs)

/**
 * Square Activation.
 *
 * f(z) = z^2.
 */
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
  SetDevice device(act.deviceId);
  Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
                         /* trans */ false, useGpu(act.deviceId));

  act.in->copyFrom(*act.value);
  act.value->square(*act.value);
}

void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
END_DEFINE_ACTIVATION(square)

BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp(*act.value); }

void backward(Argument& act) { act.grad->expDerivative(*act.value); }
END_DEFINE_ACTIVATION(exponential)

ActivationFunction* ActivationFunction::create(const std::string& type) {
  return gActivationRegistrar.createByType(type);
}

}  // namespace paddle