Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into clip_op
commit
a3c3b7866e
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,138 @@
|
||||
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "MKLDNNLayer.h"
|
||||
#include "mkldnn.hpp"
|
||||
|
||||
namespace paddle {
|
||||
typedef mkldnn::pooling_forward pool_fwd;
|
||||
typedef mkldnn::pooling_backward pool_bwd;
|
||||
|
||||
/**
|
||||
* @brief A subclass of MKLDNNLayer pool layer.
|
||||
*
|
||||
* The config file api is mkldnn_pool
|
||||
*/
|
||||
class MKLDNNPoolLayer : public MKLDNNLayer {
|
||||
protected:
|
||||
// padding height and width
|
||||
int ph_, pw_;
|
||||
// stride height and width
|
||||
int sh_, sw_;
|
||||
// filter(kenerl) height and width
|
||||
int fh_, fw_;
|
||||
|
||||
// pooling_avg or pooling_max
|
||||
mkldnn::algorithm poolAlgo_;
|
||||
|
||||
// MKLDNNMatrixPtr which should be created from CPU Device
|
||||
MKLDNNMatrixPtr cpuOutVal_;
|
||||
MKLDNNMatrixPtr cpuOutGrad_;
|
||||
// convert handle between CPU device and MKLDNN device
|
||||
std::shared_ptr<mkldnn::reorder> cvtOutVal_;
|
||||
std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
|
||||
|
||||
// save forward primitive_desc, which can be used backward
|
||||
std::shared_ptr<pool_fwd::primitive_desc> fwdPD_;
|
||||
// according to https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
|
||||
// test_pooling_forward.cpp, pool need workspace for backward
|
||||
std::shared_ptr<mkldnn::memory> workspace_;
|
||||
|
||||
public:
|
||||
explicit MKLDNNPoolLayer(const LayerConfig& config) : MKLDNNLayer(config) {}
|
||||
|
||||
~MKLDNNPoolLayer() {}
|
||||
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
|
||||
void reshape(
|
||||
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
|
||||
|
||||
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
|
||||
MKLDNNMatrixPtr& in,
|
||||
MKLDNNMatrixPtr& wgt,
|
||||
MKLDNNMatrixPtr& bias,
|
||||
MKLDNNMatrixPtr& out) override;
|
||||
|
||||
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
|
||||
MKLDNNMatrixPtr& in,
|
||||
MKLDNNMatrixPtr& wgt,
|
||||
MKLDNNMatrixPtr& bias,
|
||||
MKLDNNMatrixPtr& out) override;
|
||||
|
||||
void updateInputData() override;
|
||||
|
||||
void printSizeInfo() override {
|
||||
MKLDNNLayer::printSizeInfo();
|
||||
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
|
||||
<< ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
|
||||
<< ", sw: " << sw_;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Forward functions: reset buffers(input, output),
|
||||
* reset primitive descriptor,
|
||||
* reset pipeline.
|
||||
*/
|
||||
void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
|
||||
void resetInValue(MKLDNNMatrixPtr& in);
|
||||
void resetOutValue(MKLDNNMatrixPtr& out);
|
||||
void resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
|
||||
MKLDNNMatrixPtr in,
|
||||
MKLDNNMatrixPtr out);
|
||||
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
|
||||
std::shared_ptr<pool_fwd::primitive_desc>& pd,
|
||||
MKLDNNMatrixPtr& in,
|
||||
MKLDNNMatrixPtr& out);
|
||||
|
||||
/**
|
||||
* Backward functions: reset buffers(input, output),
|
||||
* reset primitive descriptor,
|
||||
* reset pipeline.
|
||||
*/
|
||||
void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
|
||||
void resetOutGrad(MKLDNNMatrixPtr& out);
|
||||
void resetInGrad(MKLDNNMatrixPtr& in);
|
||||
void resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
|
||||
MKLDNNMatrixPtr& in,
|
||||
MKLDNNMatrixPtr& out);
|
||||
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
|
||||
std::shared_ptr<pool_bwd::primitive_desc>& pd,
|
||||
MKLDNNMatrixPtr& in,
|
||||
MKLDNNMatrixPtr& out);
|
||||
|
||||
/**
|
||||
* get padding_r according to
|
||||
* https://github.com/01org/mkl-dnn/blob/master/tests/gtests/
|
||||
* test_pooling_forward.cpp
|
||||
*/
|
||||
mkldnn::memory::dims getPaddingR() const {
|
||||
mkldnn::memory::dims padR = {ph_, pw_};
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if ((ih_ + ph_ + padR[0] - fh_) / sh_ + 1 < oh_) {
|
||||
++padR[0];
|
||||
}
|
||||
if ((iw_ + pw_ + padR[1] - fw_) / sw_ + 1 < ow_) {
|
||||
++padR[1];
|
||||
}
|
||||
}
|
||||
return padR;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,55 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
|
||||
#include "NEONFunctions.h"
|
||||
#include <arm_neon.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace neon {
|
||||
|
||||
// b[i] = a[i] > 0.0f ? a[i] : 0.0f
|
||||
void relu(const float* a, float* b, int len) {
|
||||
int offset = len % 16;
|
||||
float32x4_t ma0, ma1, ma2, ma3;
|
||||
float32x4_t mb0, mb1, mb2, mb3;
|
||||
|
||||
float32x4_t zero = vdupq_n_f32(0.f);
|
||||
for (int k = 0; k < len / 16; k++, a += 16, b += 16) {
|
||||
ma0 = vld1q_f32(a);
|
||||
ma1 = vld1q_f32(a + 4);
|
||||
ma2 = vld1q_f32(a + 8);
|
||||
ma3 = vld1q_f32(a + 12);
|
||||
|
||||
mb0 = vmaxq_f32(ma0, zero);
|
||||
mb1 = vmaxq_f32(ma1, zero);
|
||||
mb2 = vmaxq_f32(ma2, zero);
|
||||
mb3 = vmaxq_f32(ma3, zero);
|
||||
|
||||
vst1q_f32(b, mb0);
|
||||
vst1q_f32(b + 4, mb1);
|
||||
vst1q_f32(b + 8, mb2);
|
||||
vst1q_f32(b + 12, mb3);
|
||||
}
|
||||
|
||||
for (int i = 0; i < offset; i++) {
|
||||
b[i] = a[i] > 0.0f ? a[i] : 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace neon
|
||||
} // namespace paddle
|
||||
|
||||
#endif
|
@ -0,0 +1,23 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace paddle {
|
||||
namespace neon {
|
||||
|
||||
void relu(const float* a, float* b, int len);
|
||||
|
||||
} // namespace neon
|
||||
} // namespace paddle
|
@ -0,0 +1,147 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/cross_entropy_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using framework::LoDTensor;
|
||||
|
||||
class CrossEntropyOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(const framework::InferShapeContext &ctx) const override {
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
|
||||
"Input(Label) must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), "Output(Y) must not be null.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto label = ctx.Input<Tensor>("Label");
|
||||
PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2.");
|
||||
PADDLE_ENFORCE_EQ(label->dims().size(), 2,
|
||||
"Input(Label)'s rank must be 2.");
|
||||
// TODO(xinghai-sun): remove this check after swtiching to bool
|
||||
PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
|
||||
ctx.Attr<int>("soft_label") == 1);
|
||||
PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
|
||||
"The 1st dimension of Input(X) and Input(Label) must "
|
||||
"be equal.");
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
|
||||
"If Attr(soft_label) == 1, The 2nd dimension of "
|
||||
"Input(X) and Input(Label) must be equal.");
|
||||
} else {
|
||||
PADDLE_ENFORCE_EQ(label->dims()[1], 1,
|
||||
"If Attr(soft_label) == 0, The 2nd dimension of "
|
||||
"Input(Label) must be 1.");
|
||||
}
|
||||
|
||||
ctx.Output<LoDTensor>("Y")->Resize({x->dims()[0], 1});
|
||||
}
|
||||
};
|
||||
|
||||
class CrossEntropyGradientOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(const framework::InferShapeContext &ctx) const override {
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
|
||||
"Input(Label) must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
|
||||
"Input(Y@GRAD) must not be null.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto label = ctx.Input<Tensor>("Label");
|
||||
auto dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2.");
|
||||
PADDLE_ENFORCE_EQ(dy->dims().size(), 2, "Input(Y@Grad)'s rank must be 2.");
|
||||
PADDLE_ENFORCE_EQ(label->dims().size(), 2,
|
||||
"Input(Label)'s rank must be 2.");
|
||||
// TODO(xinghai-sun): remove this check after swtiching to bool
|
||||
PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
|
||||
ctx.Attr<int>("soft_label") == 1);
|
||||
PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
|
||||
"The 1st dimension of Input(X) and Input(Label) must "
|
||||
"be equal.");
|
||||
PADDLE_ENFORCE_EQ(x->dims()[0], dy->dims()[0],
|
||||
"The 1st dimension of Input(X) and Input(Y@Grad) must "
|
||||
"be equal.");
|
||||
PADDLE_ENFORCE_EQ(dy->dims()[1], 1,
|
||||
"The 2nd dimension of Input(Y@Grad) must be 1.");
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
|
||||
"If Attr(soft_label) == 1, The 2nd dimension of "
|
||||
"Input(X) and Input(Label) must be equal.");
|
||||
} else {
|
||||
PADDLE_ENFORCE_EQ(label->dims()[1], 1,
|
||||
"If Attr(soft_label) == 0, The 2nd dimension of "
|
||||
"Input(Label) must be 1.");
|
||||
}
|
||||
|
||||
auto dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
|
||||
dx->Resize(x->dims());
|
||||
}
|
||||
};
|
||||
|
||||
class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
CrossEntropyOpMaker(framework::OpProto *proto,
|
||||
framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X", "The first input of CrossEntropyOp");
|
||||
AddInput("Label", "The second input of CrossEntropyOp");
|
||||
AddOutput("Y", "The output of CrossEntropyOp");
|
||||
AddAttr<int>("soft_label", "Is soft label. Default zero.").SetDefault(0);
|
||||
|
||||
AddComment(R"DOC(
|
||||
CrossEntropy Operator.
|
||||
|
||||
It supports both standard cross-entropy and soft-label cross-entropy loss
|
||||
computation.
|
||||
1) One-hot cross-entropy:
|
||||
soft_label = 0, Label[i, 0] indicates the class index for sample i:
|
||||
|
||||
Y[i] = -log(X[i, Label[i]])
|
||||
|
||||
2) Soft-label cross-entropy:
|
||||
soft_label = 1, Label[i, j] indicates the soft label of class j
|
||||
for sample i:
|
||||
|
||||
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
|
||||
|
||||
Please make sure that in this case the summuation of each row of Label
|
||||
equals one.
|
||||
|
||||
3) One-hot cross-entropy with vecterized Input(Label):
|
||||
As a special case of 2), when each row of Input(Label) has only one
|
||||
non-zero element (equals 1), soft-label cross-entropy degenerates to a
|
||||
one-hot cross-entropy with one-hot label representation.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker,
|
||||
cross_entropy_grad, ops::CrossEntropyGradientOp);
|
||||
REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel<float>);
|
||||
REGISTER_OP_CPU_KERNEL(cross_entropy_grad,
|
||||
ops::CrossEntropyGradientOpKernel<float>);
|
@ -0,0 +1,158 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/cross_entropy_op.h"
|
||||
#include "paddle/platform/assert.h"
|
||||
#include "paddle/platform/hostdevice.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
__global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
|
||||
const int N, const int D) {
|
||||
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
|
||||
// CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
PADDLE_ASSERT(label[i] >= 0 && label[i] < D);
|
||||
Y[i] = -tolerable_value(log(X[i * D + label[i]]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label,
|
||||
const int N, const int D) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
T sum = static_cast<T>(0);
|
||||
for (int j = 0; j < D; j++) {
|
||||
sum += label[i * D + j] * tolerable_value(log(X[i * D + j]));
|
||||
}
|
||||
Y[i] = -sum;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(qingqing): make zero setting an common function.
|
||||
template <typename T>
|
||||
__global__ void zero(T* X, const int N) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
X[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X,
|
||||
const int* label, const int N,
|
||||
const int D) {
|
||||
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
|
||||
// CUDA_1D_KERNEL_LOOP(i, N) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
int idx = i * D + label[i];
|
||||
dX[idx] = -dY[i] / X[idx];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void SoftCrossEntropyGradientKernel(T* dX, const T* dY, const T* X,
|
||||
const T* label, const int N,
|
||||
const int D) {
|
||||
// TOOD(qingqing): optimize for this kernel
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||
i += blockDim.x * gridDim.x) {
|
||||
for (int j = 0; j < D; ++j) {
|
||||
int idx = i * D + j;
|
||||
dX[idx] = -label[idx] * dY[i] / X[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class CrossEntropyOpCUDAKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto y = ctx.Output<Tensor>("Y");
|
||||
auto label = ctx.Input<Tensor>("Label");
|
||||
|
||||
auto* x_data = x->data<T>();
|
||||
y->mutable_data<T>(ctx.GetPlace());
|
||||
auto* y_data = y->data<T>();
|
||||
|
||||
int n = x->dims()[0];
|
||||
int d = x->dims()[1];
|
||||
int block = 512;
|
||||
int grid = (n + block - 1) / block;
|
||||
// TODO(qingqing) launch kernel on specified stream
|
||||
// base on ExecutionContext.
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
|
||||
SoftCrossEntropyKernel<T><<<grid, block>>>(y_data, x_data, label_data, n,
|
||||
d);
|
||||
} else {
|
||||
auto* label_data = ctx.Input<Tensor>("Label")->data<int>();
|
||||
CrossEntropyKernel<T><<<grid, block>>>(y_data, x_data, label_data, n, d);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
auto dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto label = ctx.Input<Tensor>("Label");
|
||||
|
||||
auto* dx_data = dx->mutable_data<T>(ctx.GetPlace());
|
||||
auto* dy_data = dy->data<T>();
|
||||
auto* x_data = x->data<T>();
|
||||
|
||||
int n = x->dims()[0];
|
||||
int d = x->dims()[1];
|
||||
int block = 512;
|
||||
int grid = (n * d + block - 1) / block;
|
||||
zero<T><<<grid, block>>>(dx_data, n * d);
|
||||
grid = (n + block - 1) / block;
|
||||
// TODO(qingqing): launch kernel on specified stream
|
||||
// base on ExecutionContext.
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
auto* label_data = label->data<T>();
|
||||
SoftCrossEntropyGradientKernel<T><<<grid, block>>>(
|
||||
dx_data, dy_data, x_data, label_data, n, d);
|
||||
} else {
|
||||
auto* label_data = label->data<int>();
|
||||
CrossEntropyGradientKernel<T><<<grid, block>>>(dx_data, dy_data, x_data,
|
||||
label_data, n, d);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_GPU_KERNEL(cross_entropy, ops::CrossEntropyOpCUDAKernel<float>);
|
||||
REGISTER_OP_GPU_KERNEL(cross_entropy_grad,
|
||||
ops::CrossEntropyGradientOpCUDAKernel<float>);
|
@ -0,0 +1,117 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/platform/hostdevice.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename T>
|
||||
HOSTDEVICE T tolerable_value(const T x) {
|
||||
PADDLE_ASSERT(std::is_floating_point<T>::value);
|
||||
const T kApproInf = 1e20;
|
||||
if (x == INFINITY) {
|
||||
return kApproInf;
|
||||
}
|
||||
if (x == -INFINITY) {
|
||||
return -kApproInf;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class CrossEntropyOpKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
|
||||
"It must use CPUPlace.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto y = ctx.Output<Tensor>("Y");
|
||||
|
||||
auto* x_data = x->data<T>();
|
||||
y->mutable_data<T>(ctx.GetPlace());
|
||||
auto* y_data = y->data<T>();
|
||||
|
||||
int batch_size = x->dims()[0];
|
||||
int class_num = x->dims()[1];
|
||||
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
|
||||
int index = 0;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
T sum = static_cast<T>(0);
|
||||
for (int j = 0; j < class_num; ++j) {
|
||||
sum += label_data[index] * tolerable_value(std::log(x_data[index]));
|
||||
y_data[i] = -sum;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto* label_data = ctx.Input<Tensor>("Label")->data<int>();
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
int index = i * class_num + label_data[i];
|
||||
y_data[i] = -tolerable_value(std::log(x_data[index]));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CrossEntropyGradientOpKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
|
||||
"It must use CPUPlace.");
|
||||
|
||||
auto x = ctx.Input<Tensor>("X");
|
||||
auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
auto dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
|
||||
auto label = ctx.Input<Tensor>("Label");
|
||||
|
||||
auto* dx_data = dx->mutable_data<T>(ctx.GetPlace());
|
||||
auto* dy_data = dy->data<T>();
|
||||
auto* x_data = x->data<T>();
|
||||
|
||||
int batch_size = x->dims()[0];
|
||||
int class_num = x->dims()[1];
|
||||
|
||||
// TODO(qingqing): make zero setting an common function.
|
||||
if (ctx.Attr<int>("soft_label") == 1) {
|
||||
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
|
||||
int index = 0;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
for (int j = 0; j < class_num; ++j) {
|
||||
dx_data[index] = -label_data[index] * dy_data[i] / x_data[index];
|
||||
index++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto* label_data = label->data<int>();
|
||||
memset(dx_data, 0, sizeof(T) * batch_size * class_num);
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
PADDLE_ASSERT(label_data[i] >= 0 || label_data[i] < class_num);
|
||||
int index = i * class_num + label_data[i];
|
||||
dx_data[index] = -dy_data[i] / x_data[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,113 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/dropout_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using framework::Tensor;
|
||||
using framework::LoDTensor;
|
||||
|
||||
class DropoutOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(const framework::InferShapeContext &ctx) const override {
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
|
||||
PADDLE_ENFORCE_GE(ctx.Attr<float>("dropout_prob"), 0);
|
||||
PADDLE_ENFORCE_LE(ctx.Attr<float>("dropout_prob"), 1);
|
||||
// TODO(xinghai-sun): remove this check after swtiching to bool
|
||||
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
|
||||
ctx.Attr<int>("is_training") == 1);
|
||||
|
||||
auto dims = ctx.Input<Tensor>("X")->dims();
|
||||
ctx.Output<LoDTensor>("Out")->Resize(dims);
|
||||
if (ctx.Attr<int>("is_training") == 1) {
|
||||
ctx.Output<LoDTensor>("Mask")->Resize(dims);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename AttrType>
|
||||
class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
DropoutOpMaker(framework::OpProto *proto,
|
||||
framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddAttr<AttrType>("dropout_prob", "Probability of setting units to zero.")
|
||||
.SetDefault(.5f);
|
||||
// TODO(xinghai-sun): use bool for is_training after bool is supported.
|
||||
AddAttr<int>("is_training", "Whether in training phase.").SetDefault(1);
|
||||
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
|
||||
AddInput("X", "The input of dropout op.");
|
||||
AddOutput("Out", "The output of dropout op.");
|
||||
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
|
||||
|
||||
AddComment(R"DOC(
|
||||
Dropout Operator.
|
||||
|
||||
"Dropout" refers to randomly dropping out units in a nerual network. It is a
|
||||
regularization technique for reducing overfitting by preventing neuron
|
||||
co-adaption during training. The dropout operator randomly set (according to
|
||||
the given dropout probability) the outputs of some units to zero, while others
|
||||
being set to their inputs.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
template <typename AttrType>
|
||||
class DropoutOpGrad : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(const framework::InferShapeContext &ctx) const override {
|
||||
PADDLE_ENFORCE_EQ(ctx.Attr<int>("is_training"), 1,
|
||||
"GradOp is only callable when is_training is true");
|
||||
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Mask"), "Mask must not be null.");
|
||||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
|
||||
"Input(Out@GRAD) must not be null.");
|
||||
|
||||
PADDLE_ENFORCE_GE(ctx.Attr<AttrType>("dropout_prob"), 0);
|
||||
PADDLE_ENFORCE_LE(ctx.Attr<AttrType>("dropout_prob"), 1);
|
||||
// TODO(xinghai-sun): remove this check after swtiching to bool
|
||||
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
|
||||
ctx.Attr<int>("is_training") == 1);
|
||||
auto x_dims = ctx.Input<Tensor>("X")->dims();
|
||||
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
|
||||
PADDLE_ENFORCE_EQ(x_dims, out_dims,
|
||||
"Dimensions of Input(X) and Out@Grad must be the same.");
|
||||
auto mask_dims = ctx.Input<Tensor>("Mask")->dims();
|
||||
PADDLE_ENFORCE_EQ(x_dims, mask_dims,
|
||||
"Dimensions of Input(X) and Mask must be the same.");
|
||||
|
||||
auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
|
||||
x_grad->Resize(x_dims);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(dropout, ops::DropoutOp, ops::DropoutOpMaker<float>, dropout_grad,
|
||||
ops::DropoutOpGrad<float>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
dropout, ops::CPUDropoutKernel<paddle::platform::CPUPlace, float, float>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
dropout_grad, ops::DropoutGradKernel<paddle::platform::CPUPlace, float>);
|
@ -0,0 +1,86 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/random.h>
|
||||
#include <thrust/transform.h>
|
||||
#include "paddle/operators/dropout_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T, typename AttrType>
|
||||
struct MaskGenerator {
|
||||
AttrType dropout_prob;
|
||||
int seed;
|
||||
|
||||
__host__ __device__ MaskGenerator(AttrType dropout_prob, int seed)
|
||||
: dropout_prob(dropout_prob), seed(seed) {}
|
||||
|
||||
__host__ __device__ T operator()(const unsigned int n) const {
|
||||
thrust::minstd_rand rng;
|
||||
rng.seed(seed);
|
||||
thrust::uniform_real_distribution<AttrType> dist(0, 1);
|
||||
rng.discard(n);
|
||||
if (dist(rng) < dropout_prob) {
|
||||
return static_cast<T>(0);
|
||||
} else {
|
||||
return static_cast<T>(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
|
||||
// Use std::random and thrust::random(thrust is a std library in CUDA) to
|
||||
// implement uniform random.
|
||||
template <typename Place, typename T, typename AttrType>
|
||||
class GPUDropoutKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* x = context.Input<Tensor>("X");
|
||||
auto* y = context.Output<Tensor>("Out");
|
||||
y->mutable_data<T>(context.GetPlace());
|
||||
AttrType dropout_prob = context.Attr<AttrType>("dropout_prob");
|
||||
|
||||
auto X = EigenMatrix<T>::Reshape(*x, 1);
|
||||
auto Y = EigenMatrix<T>::Reshape(*y, 1);
|
||||
|
||||
auto place = context.GetEigenDevice<Place>();
|
||||
if (context.Attr<int>("is_training") == 1) {
|
||||
auto* mask = context.Output<Tensor>("Mask");
|
||||
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
|
||||
int size = framework::product(mask->dims());
|
||||
int seed = context.Attr<int>("seed");
|
||||
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
|
||||
thrust::transform(index_sequence_begin, index_sequence_begin + size,
|
||||
thrust::device_ptr<T>(mask_data),
|
||||
MaskGenerator<T, AttrType>(dropout_prob, seed));
|
||||
auto M = EigenMatrix<T>::Reshape(*mask, 1);
|
||||
Y.device(place) = X * M;
|
||||
} else {
|
||||
Y.device(place) = X * dropout_prob;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_GPU_KERNEL(
|
||||
dropout, ops::GPUDropoutKernel<paddle::platform::GPUPlace, float, float>);
|
||||
REGISTER_OP_GPU_KERNEL(
|
||||
dropout_grad, ops::DropoutGradKernel<paddle::platform::GPUPlace, float>);
|
@ -0,0 +1,86 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <random>
|
||||
#include "paddle/framework/eigen.h"
|
||||
#include "paddle/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
template <typename T, int MajorType = Eigen::RowMajor,
|
||||
typename IndexType = Eigen::DenseIndex>
|
||||
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
|
||||
|
||||
template <typename Place, typename T, typename AttrType>
|
||||
class CPUDropoutKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* x = context.Input<Tensor>("X");
|
||||
auto* y = context.Output<Tensor>("Out");
|
||||
const auto* x_data = x->data<T>();
|
||||
auto* y_data = y->mutable_data<T>(context.GetPlace());
|
||||
AttrType dropout_prob = context.Attr<AttrType>("dropout_prob");
|
||||
|
||||
if (context.Attr<int>("is_training") == 1) {
|
||||
auto* mask = context.Output<Tensor>("Mask");
|
||||
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
|
||||
int seed = context.Attr<int>("seed");
|
||||
std::minstd_rand engine;
|
||||
engine.seed(seed);
|
||||
std::uniform_real_distribution<AttrType> dist(0, 1);
|
||||
size_t size = framework::product(mask->dims());
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (dist(engine) < dropout_prob) {
|
||||
mask_data[i] = 0;
|
||||
y_data[i] = 0;
|
||||
} else {
|
||||
mask_data[i] = 1;
|
||||
y_data[i] = x_data[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto X = EigenMatrix<T>::Reshape(*x, 1);
|
||||
auto Y = EigenMatrix<T>::Reshape(*y, 1);
|
||||
auto place = context.GetEigenDevice<Place>();
|
||||
Y.device(place) = X * dropout_prob;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Place, typename T>
|
||||
class DropoutGradKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
PADDLE_ENFORCE_EQ(context.Attr<int>("is_training"), 1,
|
||||
"GradOp is only callable when is_training is true");
|
||||
|
||||
auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
|
||||
auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
|
||||
auto* mask = context.Input<Tensor>("Mask");
|
||||
grad_x->mutable_data<T>(context.GetPlace());
|
||||
|
||||
auto M = EigenMatrix<T>::Reshape(*mask, 1);
|
||||
auto dX = EigenMatrix<T>::Reshape(*grad_x, 1);
|
||||
auto dY = EigenMatrix<T>::Reshape(*grad_y, 1);
|
||||
|
||||
auto place = context.GetEigenDevice<Place>();
|
||||
dX.device(place) = dY * M;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue