refine code and remove min clip

8 years ago · 2c6ac6297d
parent 799f80ad33
commit 2c6ac6297d
3 changed files with 198 additions and 172 deletions
--- a/paddle/gserver/activations/MKLDNNActivation.cpp
+++ b/paddle/gserver/activations/MKLDNNActivation.cpp
@ -27,16 +27,21 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
 #define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation
 /**
- * @def DEFINE_MKLDNN_ACTIVATION
+ * @def BEGIN_MKLDNN_ACTIVATION
 */
-#define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS)               \
+#define BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \
-  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS { \
+  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS {
-  private:                                                           \
+/**
 * @def END_MKLDNN_ACTIVATION
 */
 #define END_MKLDNN_ACTIVATION(ACT_TYPE)                            \
 private:                                                           \
  static const std::string name;                                   \
                                                                   \
-  public:                                                            \
+public:                                                            \
  const std::string& getName() const { return name; }              \
-  };                                                                 \
+  }                                                                \
  ;                                                                \
  const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \
      "mkldnn_" #ACT_TYPE;                                         \
  static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] {     \
@ -45,31 +50,29 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
            "mkldnn_" #ACT_TYPE);                                  \
  });
 /**
 * @def DEFINE_MKLDNN_ACTIVATION
 */
 #define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \
  BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS)        \
  END_MKLDNN_ACTIVATION(ACT_TYPE)
 /**
 * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
 */
-#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA)        \
+#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(                            \
-  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)                              \
+    ACT_TYPE, BASE_CLASS, ALPHA, BWD_ALPHA)                          \
-      : public MKLDNNEltwiseActivation {                                    \
+  BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS)                      \
-  private:                                                                  \
+private:                                                             \
    static const std::string name;                                          \
  static const float alpha;                                          \
  static const float bwdAlpha;                                       \
                                                                     \
-  public:                                                                   \
+public:                                                              \
    const std::string& getName() const { return name; }                     \
  float getAlpha() const { return alpha; }                           \
  float getBwdAlpha() const { return bwdAlpha; }                     \
-  };                                                                        \
+  END_MKLDNN_ACTIVATION(ACT_TYPE)                                    \
  const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name =          \
      "mkldnn_" #ACT_TYPE;                                                  \
  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \
-  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
+  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA;
  static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] {              \
    gMKLDNNActivationRegistrar                                              \
        .registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>(             \
            "mkldnn_" #ACT_TYPE);                                           \
  });
 /**
 * @brief MKLDNN Relu Activation.
@ -78,25 +81,138 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
 *  f(x) = negative_slope * x  (x <  0)
 * @note the negative_slope should be -0.f in forward
 */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, MKLDNNEltwiseActivation, -0.f, 0.f)
 /**
 * @brief MKLDNN Tanh Activation.
 */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, MKLDNNEltwiseActivation, 0.f, 0.f)
 /**
 * @brief MKLDNN ELU(Exponential Linear Unit) Activation.
 *  f(x) = x                              (x >= 0)
 *  f(x) = negative_slope * (exp(x) - 1)  (x <  0)
 */
-DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f)
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, MKLDNNEltwiseActivation, 0.f, 0.f)
 mkldnn::algorithm MKLDNNEltwiseActivation::getAlgo(std::string type) const {
  const std::map<std::string, mkldnn::algorithm> algoMap = {
      {"relu", algorithm::eltwise_relu},
      {"tanh", algorithm::eltwise_tanh},
      {"elu", algorithm::eltwise_elu}};
  type.erase(0, 7);  // remove mkldnn_
  algorithm algo = (algorithm)0;
  mapGet(type, algoMap, &algo);
  return algo;
 }
 void MKLDNNEltwiseActivation::resetFwd(Argument& act) {
  if (cnt_ == act.value->getElementCnt()) {
    return;
  }
  MKLDNNActivation::resetFwd(act);
  // note: alpha represents the NegativeSlope when used in relu.
  float alpha = getAlpha();
  float beta = getBeta();
  algorithm algo = getAlgo(this->getName());
  auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training,
                                   algo,
                                   val_->getMemoryDesc(),
                                   alpha,
                                   beta);
  fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_));
  // use inplace for forward but save input value before submit
  inVal_ = val_;
  copyInVal_ = nullptr;
  if (act.grad && algo == algorithm::eltwise_tanh) {
    // tanh need save src input for backward
    inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
    copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
    CHECK(copyInVal_) << "should not be emptry";
    pipelineFwd_.push_back(*copyInVal_);
  }
  fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
  pipelineFwd_.push_back(*fwd_);
  needResetBwd_ = true;
 }
 void MKLDNNEltwiseActivation::resetBwd(Argument& act) {
  if (!needResetBwd_) {
    return;
  }
  VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
  needResetBwd_ = false;
  algorithm algo = getAlgo(this->getName());
  float alpha = getBwdAlpha();
  float beta = getBeta();
  grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
  auto eng = CPUEngine::Instance().getEngine();
  auto bwdDesc = eltwise_bwd::desc(
      algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
  auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
  CHECK(inVal_);
  bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
  pipelineBwd_.clear();
  pipelineBwd_.push_back(*bwd_);
 }
 /**
 * @brief MKLDNN Softmax Activation
 */
 DEFINE_MKLDNN_ACTIVATION(softmax, MKLDNNSoftmaxActivation)
 void MKLDNNSoftmaxActivation::resetFwd(Argument& act) {
  if (cnt_ == act.value->getElementCnt()) {
    return;
  }
  MKLDNNActivation::resetFwd(act);
  int axis = 1;
  auto fwdDesc = softmax_fwd::desc(
      mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis);
  auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_);
  fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_));
  pipelineFwd_.push_back(*fwd_);
 }
 Error __must_check MKLDNNSoftmaxActivation::forward(Argument& act) {
  resetFwd(act);
  stream_->submit(pipelineFwd_);
  real* v = act.value->getData();
  real threshold = exp(-64);
 #pragma omp parallel for
  for (size_t i = 0; i < act.value->getElementCnt(); ++i) {
    v[i] = v[i] < threshold ? threshold : v[i];
  }
  return Error();
 }
 Error __must_check MKLDNNSoftmaxActivation::backward(Argument& act) {
  MatrixPtr outputV = act.value;
  MatrixPtr outputG = act.grad;
  if (outputG->useGpu()) {
    outputG->softmaxBackward(*outputV);
  } else {
    SetDevice device(act.deviceId);
    Matrix::resizeOrCreate(sftMaxDot_,
                           outputG->getHeight(),
                           outputG->getWidth(),
                           /* trans */ false,
                           useGpu(act.deviceId));
    Matrix::resizeOrCreate(sftMaxSum_,
                           outputG->getHeight(),
                           1,
                           /* trans */ false,
                           useGpu(act.deviceId));
    sftMaxDot_->dotMul(*outputG, *outputV);
    sftMaxSum_->colMerge(*sftMaxDot_);
    act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
  }
  return Error();
 }
 ActivationFunction* MKLDNNActivation::create(const std::string& type) {
  return gMKLDNNActivationRegistrar.createByType(type);
 }
@ -108,4 +224,34 @@ std::vector<std::string> MKLDNNActivation::getAllRegisteredTypes() {
  return types;
 }
 void MKLDNNActivation::resetFwd(Argument& act) {
  VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
  cnt_ = act.value->getElementCnt();
  pipelineFwd_.clear();
  stream_.reset(new MKLDNNStream());
  engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0));
  val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
  if (val_ == nullptr) {
    int bs = act.getBatchSize();
    int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1;
    int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1;
    int ic = cnt_ / bs / ih / iw;
    CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
    val_ = MKLDNNMatrix::create(
        act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_);
    CHECK(val_);
    val_->downSpatial();
  }
 }
 Error __must_check MKLDNNActivation::forward(Argument& act) {
  resetFwd(act);
  stream_->submit(pipelineFwd_);
  return Error();
 }
 Error __must_check MKLDNNActivation::backward(Argument& act) {
  resetBwd(act);
  stream_->submit(pipelineBwd_);
  return Error();
 }
 }  // namespace paddle
--- a/paddle/gserver/activations/MKLDNNActivation.h
+++ b/paddle/gserver/activations/MKLDNNActivation.h
@ -52,41 +52,15 @@ public:
  /**
   * reset the forward primitives
   */
-  virtual void resetFwd(Argument& act) {
+  virtual void resetFwd(Argument& act);
    VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
    cnt_ = act.value->getElementCnt();
    pipelineFwd_.clear();
    stream_.reset(new MKLDNNStream());
    engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0));
    val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
    if (val_ == nullptr) {
      int bs = act.getBatchSize();
      int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1;
      int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1;
      int ic = cnt_ / bs / ih / iw;
      CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
      val_ = MKLDNNMatrix::create(
          act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_);
      CHECK(val_);
      val_->downSpatial();
    }
  }
  /**
   * reset the backward primitives,
   * can not merge this functions into resetFwd as the grad data
   * would be changing before backward.
   */
  virtual void resetBwd(Argument& act) {}
-  virtual Error __must_check forward(Argument& act) {
+  virtual Error __must_check forward(Argument& act);
-    resetFwd(act);
+  virtual Error __must_check backward(Argument& act);
    stream_->submit(pipelineFwd_);
    return Error();
  }
  virtual Error __must_check backward(Argument& act) {
    resetBwd(act);
    stream_->submit(pipelineBwd_);
    return Error();
  }
 };
 /**
@ -96,6 +70,7 @@ public:
 class MKLDNNEltwiseActivation : public MKLDNNActivation {
  typedef mkldnn::eltwise_forward eltwise_fwd;
  typedef mkldnn::eltwise_backward eltwise_bwd;
  typedef mkldnn::algorithm algorithm;
 protected:
  // save the forward primitive desc, which can be used backward
@ -115,68 +90,9 @@ public:
  virtual float getAlpha() const = 0;
  virtual float getBwdAlpha() const = 0;
  virtual float getBeta() const { return 0.f; }
-  virtual mkldnn::algorithm getAlgo(const std::string& type) const {
+  virtual algorithm getAlgo(std::string type) const;
-    if (type == "mkldnn_relu") {
+  void resetFwd(Argument& act) override;
-      return mkldnn::algorithm::eltwise_relu;
+  void resetBwd(Argument& act) override;
    } else if (type == "mkldnn_tanh") {
      return mkldnn::algorithm::eltwise_tanh;
    } else if (type == "mkldnn_elu") {
      return mkldnn::algorithm::eltwise_elu;
    } else {
      LOG(FATAL) << "Unkown eltwise activation type: " << type;
    }
    return (mkldnn::algorithm)0;
  }
  void resetFwd(Argument& act) override {
    if (cnt_ == act.value->getElementCnt()) {
      return;
    }
    MKLDNNActivation::resetFwd(act);
    // note: alpha represents the NegativeSlope when used in relu.
    float alpha = getAlpha();
    float beta = getBeta();
    mkldnn::algorithm algo = getAlgo(this->getName());
    auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training,
                                     algo,
                                     val_->getMemoryDesc(),
                                     alpha,
                                     beta);
    fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_));
    // use inplace for forward but save input value before submit
    inVal_ = val_;
    copyInVal_ = nullptr;
    if (act.grad && algo == mkldnn::algorithm::eltwise_tanh) {
      // tanh need save src input for backward
      inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
      copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
      CHECK(copyInVal_) << "should not be emptry";
      pipelineFwd_.push_back(*copyInVal_);
    }
    fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
    pipelineFwd_.push_back(*fwd_);
    needResetBwd_ = true;
  }
  void resetBwd(Argument& act) override {
    if (!needResetBwd_) {
      return;
    }
    VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
    needResetBwd_ = false;
    mkldnn::algorithm algo = getAlgo(this->getName());
    float alpha = getBwdAlpha();
    float beta = getBeta();
    grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
    auto eng = CPUEngine::Instance().getEngine();
    auto bwdDesc = eltwise_bwd::desc(
        algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
    auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
    CHECK(inVal_);
    bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
    pipelineBwd_.clear();
    pipelineBwd_.push_back(*bwd_);
  }
 };
 /**
@ -195,45 +111,9 @@ public:
  MKLDNNSoftmaxActivation() {}
  ~MKLDNNSoftmaxActivation() {}
  virtual const std::string& getName() const = 0;
-  void resetFwd(Argument& act) override {
+  void resetFwd(Argument& act) override;
-    if (cnt_ == act.value->getElementCnt()) {
+  Error __must_check forward(Argument& act) override;
-      return;
+  Error __must_check backward(Argument& act) override;
    }
    MKLDNNActivation::resetFwd(act);
    int axis = 1;
    auto fwdDesc = softmax_fwd::desc(
        mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis);
    auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_);
    fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_));
    pipelineFwd_.push_back(*fwd_);
  }
  Error __must_check backward(Argument& act) override {
    MatrixPtr outputV = act.value;
    MatrixPtr outputG = act.grad;
    if (outputG->useGpu()) {
      outputG->softmaxBackward(*outputV);
    } else {
      SetDevice device(act.deviceId);
      Matrix::resizeOrCreate(sftMaxDot_,
                             outputG->getHeight(),
                             outputG->getWidth(),
                             /* trans */ false,
                             useGpu(act.deviceId));
      Matrix::resizeOrCreate(sftMaxSum_,
                             outputG->getHeight(),
                             1,
                             /* trans */ false,
                             useGpu(act.deviceId));
      sftMaxDot_->dotMul(*outputG, *outputV);
      sftMaxSum_->colMerge(*sftMaxDot_);
      act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
    }
    return Error();
  }
 };
 }  // namespace paddle
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@ -3637,7 +3637,7 @@ void CpuMatrix::oneHotCrossEntropy(Matrix& output, IVector& label) {
  for (size_t i = 0; i < numSamples; ++i, out += dim) {
    CHECK_GE(lbl[i], 0);
    CHECK_LT((size_t)lbl[i], dim);
-    cost[i] = -std::log(std::max(out[lbl[i]], real(FLT_MIN)));
+    cost[i] = -std::log(out[lbl[i]]);
  }
 }
@ -3652,7 +3652,7 @@ void CpuMatrix::oneHotCrossEntropyBp(Matrix& output, IVector& label) {
  real* grad = getData();
  int* lbl = label.getData();
  for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim) {
-    grad[lbl[i]] -= 1 / std::max(out[lbl[i]], real(FLT_MIN));
+    grad[lbl[i]] -= 1 / out[lbl[i]];
  }
 }