format some files

avx_docs
hedaoyuan 8 years ago
parent d04c206f30
commit abdcb8e128

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_MATRIX_TYPE_CUH_ #ifndef HL_MATRIX_TYPE_CUH_
#define HL_MATRIX_TYPE_CUH_ #define HL_MATRIX_TYPE_CUH_

File diff suppressed because it is too large Load Diff

@ -19,25 +19,20 @@ namespace paddle {
/** /**
* \brief The tensor evaluator classes. * \brief The tensor evaluator classes.
*/ */
template<typename Derived, class T> template <typename Derived, class T>
class TensorApply { class TensorApply {
public: public:
explicit INLINE TensorApply(const Derived& p) explicit INLINE TensorApply(const Derived& p)
: data_(p.data_), stride_(p.stride_), : data_(p.data_),
height_(p.height_), width_(p.width_), useGpu_(p.useGpu_) {} stride_(p.stride_),
height_(p.height_),
width_(p.width_),
useGpu_(p.useGpu_) {}
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const { return data_[i * stride_ + j]; }
return data_[i * stride_ + j]; INLINE T apply(int index) const { return data_[index]; }
} INLINE T& applyRef(int i, int j) { return data_[i * stride_ + j]; }
INLINE T apply(int index) const { INLINE T& applyRef(int index) { return data_[index]; }
return data_[index];
}
INLINE T& applyRef(int i, int j) {
return data_[i * stride_ + j];
}
INLINE T& applyRef(int index) {
return data_[index];
}
INLINE size_t getWidth() const { return width_; } INLINE size_t getWidth() const { return width_; }
INLINE size_t getHeight() const { return height_; } INLINE size_t getHeight() const { return height_; }
@ -53,22 +48,20 @@ public:
/** /**
* \brief The tensor evaluator classes. * \brief The tensor evaluator classes.
*
* evaluator for rvalues * evaluator for rvalues
*/ */
template<typename Derived, class T> template <typename Derived, class T>
class TensorApply<const Derived, T> { class TensorApply<const Derived, T> {
public: public:
explicit INLINE TensorApply(const Derived& p) explicit INLINE TensorApply(const Derived& p)
: data_(p.data_), stride_(p.stride_), : data_(p.data_),
height_(p.height_), width_(p.width_), useGpu_(p.useGpu_) {} stride_(p.stride_),
height_(p.height_),
width_(p.width_),
useGpu_(p.useGpu_) {}
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const { return data_[i * stride_ + j]; }
return data_[i * stride_ + j]; INLINE T apply(int index) const { return data_[index]; }
}
INLINE T apply(int index) const {
return data_[index];
}
INLINE size_t getWidth() const { return width_; } INLINE size_t getWidth() const { return width_; }
INLINE size_t getHeight() const { return height_; } INLINE size_t getHeight() const { return height_; }
@ -82,18 +75,14 @@ public:
bool useGpu_; bool useGpu_;
}; };
template<typename Derived, class T> template <typename Derived, class T>
class TensorApply<const TensorExpression<Derived, T>, T> { class TensorApply<const TensorExpression<Derived, T>, T> {
public: public:
explicit TensorApply(const TensorExpression<Derived, T>& expr) explicit TensorApply(const TensorExpression<Derived, T>& expr)
: expr_(expr.derived()) {} : expr_(expr.derived()) {}
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const { return expr_.apply(i, j); }
return expr_.apply(i, j); INLINE T apply(int index) const { return expr_.apply(index); }
}
INLINE T apply(int index) const {
return expr_.apply(index);
}
INLINE size_t getWidth() const { return expr_.getWidth(); } INLINE size_t getWidth() const { return expr_.getWidth(); }
INLINE size_t getHeight() const { return expr_.getHeight(); } INLINE size_t getHeight() const { return expr_.getHeight(); }
@ -106,18 +95,14 @@ public:
/** /**
* \brief The unary expression evaluator classes. * \brief The unary expression evaluator classes.
*/ */
template<class OP, typename ArgType, class T> template <class OP, typename ArgType, class T>
class TensorApply<const TensorUnaryOp<OP, ArgType, T>, T> { class TensorApply<const TensorUnaryOp<OP, ArgType, T>, T> {
public: public:
explicit INLINE TensorApply(const TensorUnaryOp<OP, ArgType, T>& expr) explicit INLINE TensorApply(const TensorUnaryOp<OP, ArgType, T>& expr)
: op_(expr.op_), expr_(expr.expr_) {} : op_(expr.op_), expr_(expr.expr_) {}
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const { return op_(expr_.apply(i, j)); }
return op_(expr_.apply(i, j)); INLINE T apply(int index) const { return op_(expr_.apply(index)); }
}
INLINE T apply(int index) const {
return op_(expr_.apply(index));
}
INLINE size_t getWidth() const { return expr_.getWidth(); } INLINE size_t getWidth() const { return expr_.getWidth(); }
INLINE size_t getHeight() const { return expr_.getHeight(); } INLINE size_t getHeight() const { return expr_.getHeight(); }
@ -131,17 +116,17 @@ public:
/** /**
* \brief The binary expression evaluator classes. * \brief The binary expression evaluator classes.
*/ */
template<class OP, typename LhsType, typename RhsType, class T> template <class OP, typename LhsType, typename RhsType, class T>
class TensorApply<const TensorBinaryOp<OP, LhsType, RhsType, T>, T> { class TensorApply<const TensorBinaryOp<OP, LhsType, RhsType, T>, T> {
public: public:
explicit INLINE TensorApply( explicit INLINE TensorApply(
const TensorBinaryOp<OP, LhsType, RhsType, T>& expr) const TensorBinaryOp<OP, LhsType, RhsType, T>& expr)
: op_(expr.op_), lhs_(expr.lhs_), rhs_(expr.rhs_) { : op_(expr.op_), lhs_(expr.lhs_), rhs_(expr.rhs_) {
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__
CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); CHECK_EQ(lhs_.getWidth(), rhs_.getWidth());
CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); CHECK_EQ(lhs_.getHeight(), rhs_.getHeight());
CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); CHECK_EQ(lhs_.useGpu(), rhs_.useGpu());
#endif #endif
} }
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const {
@ -166,20 +151,20 @@ public:
/** /**
* \brief The ternary expression evaluator classes. * \brief The ternary expression evaluator classes.
*/ */
template<typename ArgType1, typename ArgType2, typename ArgType3, class T> template <typename ArgType1, typename ArgType2, typename ArgType3, class T>
class TensorApply<const TensorTernaryOp<ArgType1, ArgType2, ArgType3, T>, T> { class TensorApply<const TensorTernaryOp<ArgType1, ArgType2, ArgType3, T>, T> {
public: public:
explicit INLINE TensorApply( explicit INLINE TensorApply(
const TensorTernaryOp<ArgType1, ArgType2, ArgType3, T>& expr) const TensorTernaryOp<ArgType1, ArgType2, ArgType3, T>& expr)
: expr1_(expr.expr1_), expr2_(expr.expr2_), expr3_(expr.expr3_) { : expr1_(expr.expr1_), expr2_(expr.expr2_), expr3_(expr.expr3_) {
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__
CHECK_EQ(expr1_.getWidth(), expr2_.getWidth()); CHECK_EQ(expr1_.getWidth(), expr2_.getWidth());
CHECK_EQ(expr1_.getWidth(), expr3_.getWidth()); CHECK_EQ(expr1_.getWidth(), expr3_.getWidth());
CHECK_EQ(expr1_.getHeight(), expr2_.getHeight()); CHECK_EQ(expr1_.getHeight(), expr2_.getHeight());
CHECK_EQ(expr1_.getHeight(), expr3_.getHeight()); CHECK_EQ(expr1_.getHeight(), expr3_.getHeight());
CHECK_EQ(expr1_.useGpu(), expr2_.useGpu()); CHECK_EQ(expr1_.useGpu(), expr2_.useGpu());
CHECK_EQ(expr1_.useGpu(), expr3_.useGpu()); CHECK_EQ(expr1_.useGpu(), expr3_.useGpu());
#endif #endif
} }
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const {
@ -192,8 +177,8 @@ public:
INLINE size_t getWidth() const { return expr1_.getWidth(); } INLINE size_t getWidth() const { return expr1_.getWidth(); }
INLINE size_t getHeight() const { return expr1_.getHeight(); } INLINE size_t getHeight() const { return expr1_.getHeight(); }
INLINE bool isContiguous() const { INLINE bool isContiguous() const {
return expr1_.isContiguous() && return expr1_.isContiguous() && expr2_.isContiguous() &&
expr2_.isContiguous() && expr3_.isContiguous(); expr3_.isContiguous();
} }
INLINE bool useGpu() const { return expr1_.useGpu(); } INLINE bool useGpu() const { return expr1_.useGpu(); }
@ -205,18 +190,14 @@ public:
/** /**
* \brief The const expression evaluator classes. * \brief The const expression evaluator classes.
*/ */
template<class OP, typename ArgType, class T> template <class OP, typename ArgType, class T>
class TensorApply<const TensorConstant<OP, ArgType, T>, T> { class TensorApply<const TensorConstant<OP, ArgType, T>, T> {
public: public:
explicit INLINE TensorApply(const TensorConstant<OP, ArgType, T>& expr) explicit INLINE TensorApply(const TensorConstant<OP, ArgType, T>& expr)
: op_(expr.op_), expr_(expr.expr_) {} : op_(expr.op_), expr_(expr.expr_) {}
INLINE T apply(int i, int j) const { INLINE T apply(int i, int j) const { return op_(i, j); }
return op_(i, j); INLINE T apply(int index) const { return op_(index); }
}
INLINE T apply(int index) const {
return op_(index);
}
INLINE size_t getWidth() const { return expr_.getWidth(); } INLINE size_t getWidth() const { return expr_.getWidth(); }
INLINE size_t getHeight() const { return expr_.getHeight(); } INLINE size_t getHeight() const { return expr_.getHeight(); }

@ -21,18 +21,18 @@ namespace paddle {
/** /**
* \brief Tensor Assign Expression(return by lazyAssign, * \brief Tensor Assign Expression(return by lazyAssign,
* and evaluated by AssignEvaluate) * and evaluated by AssignEvaluate)
*/ */
template<typename LhsType, typename RhsType, class T> template <typename LhsType, typename RhsType, class T>
class TensorAssignOp { class TensorAssignOp {
public: public:
explicit TensorAssignOp(const LhsType& lhs, const RhsType& rhs) explicit TensorAssignOp(const LhsType& lhs, const RhsType& rhs)
: lhs_(lhs), rhs_(rhs) { : lhs_(lhs), rhs_(rhs) {
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__
CHECK_EQ(lhs_.getWidth(), rhs_.getWidth()); CHECK_EQ(lhs_.getWidth(), rhs_.getWidth());
CHECK_EQ(lhs_.getHeight(), rhs_.getHeight()); CHECK_EQ(lhs_.getHeight(), rhs_.getHeight());
CHECK_EQ(lhs_.useGpu(), rhs_.useGpu()); CHECK_EQ(lhs_.useGpu(), rhs_.useGpu());
#endif #endif
} }
INLINE void apply(const int i, const int j) { INLINE void apply(const int i, const int j) {
@ -55,19 +55,22 @@ private:
}; };
template <typename Assign, typename... AssignOp> template <typename Assign, typename... AssignOp>
void AssignCpuEvaluate(int height, int width, bool isContiguous, void AssignCpuEvaluate(int height,
Assign&& assign, AssignOp&& ... args) { int width,
bool isContiguous,
Assign&& assign,
AssignOp&&... args) {
if (isContiguous) { if (isContiguous) {
int size = height * width; int size = height * width;
for (int index = 0; index < size; index++) { for (int index = 0; index < size; index++) {
assign.apply(index); assign.apply(index);
__attribute__((unused)) int dummy[] = { (((args)).apply(index), 0)... }; __attribute__((unused)) int dummy[] = {(((args)).apply(index), 0)...};
} }
} else { } else {
for (int i = 0; i < height; i++) { for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) { for (int j = 0; j < width; j++) {
assign.apply(i, j); assign.apply(i, j);
__attribute__((unused)) int dummy[] = { (((args)).apply(i, j), 0)... }; __attribute__((unused)) int dummy[] = {(((args)).apply(i, j), 0)...};
} }
} }
} }
@ -75,25 +78,27 @@ void AssignCpuEvaluate(int height, int width, bool isContiguous,
#ifdef __NVCC__ #ifdef __NVCC__
template <typename Assign, typename... AssignOp> template <typename Assign, typename... AssignOp>
__global__ __global__ void AssignGpuEvaluate1(const int border,
void AssignGpuEvaluate1(const int border, Assign assign, AssignOp ... args) { Assign assign,
AssignOp... args) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < border) { if (idx < border) {
assign.apply(idx); assign.apply(idx);
__attribute__((unused)) int dummy[] = { (((args)).apply(idx), 0)... }; __attribute__((unused)) int dummy[] = {(((args)).apply(idx), 0)...};
} }
} }
template <typename Assign, typename... AssignOp> template <typename Assign, typename... AssignOp>
__global__ __global__ void AssignGpuEvaluate2(const int height,
void AssignGpuEvaluate2(const int height, const int width, const int width,
Assign assign, AssignOp ... args) { Assign assign,
AssignOp... args) {
const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; const int colIdx = blockIdx.x * blockDim.x + threadIdx.x;
const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y;
for (int i = rowIdx; i < height; i += gridDim.y * blockDim.y) { for (int i = rowIdx; i < height; i += gridDim.y * blockDim.y) {
for (int j = colIdx; j < width; j += gridDim.x * blockDim.x) { for (int j = colIdx; j < width; j += gridDim.x * blockDim.x) {
assign.apply(i, j); assign.apply(i, j);
__attribute__((unused)) int dummy[] = { (((args)).apply(i, j), 0)... }; __attribute__((unused)) int dummy[] = {(((args)).apply(i, j), 0)...};
} }
} }
} }
@ -105,23 +110,23 @@ void AssignGpuEvaluate2(const int height, const int width,
* \note At least one assignment expression is required * \note At least one assignment expression is required
*/ */
template <typename Assign, typename... AssignOp> template <typename Assign, typename... AssignOp>
void AssignEvaluate(Assign&& assign, AssignOp&& ... args) { void AssignEvaluate(Assign&& assign, AssignOp&&... args) {
const bool useGpu_ = assign.useGpu(); const bool useGpu_ = assign.useGpu();
bool isContiguous_ = assign.isContiguous(); bool isContiguous_ = assign.isContiguous();
const size_t height = assign.getHeight(); const size_t height = assign.getHeight();
const size_t width = assign.getWidth(); const size_t width = assign.getWidth();
const int packSize = sizeof...(args); const int packSize = sizeof...(args);
const bool packUseGpu[] = { ((args)).useGpu()... }; const bool packUseGpu[] = {((args)).useGpu()...};
const bool packIsContiguous[] = { ((args)).isContiguous()... }; const bool packIsContiguous[] = {((args)).isContiguous()...};
const size_t packHeight[] = { ((args)).getHeight()... }; const size_t packHeight[] = {((args)).getHeight()...};
const size_t packWidth[] = { ((args)).getWidth()... }; const size_t packWidth[] = {((args)).getWidth()...};
for (int i = 0; i < packSize; i++) { for (int i = 0; i < packSize; i++) {
CHECK_EQ(useGpu_, packUseGpu[i]); CHECK_EQ(useGpu_, packUseGpu[i]);
CHECK_EQ(height, packHeight[i]); CHECK_EQ(height, packHeight[i]);
CHECK_EQ(width, packWidth[i]); CHECK_EQ(width, packWidth[i]);
isContiguous_ = isContiguous_ && packIsContiguous[i]; isContiguous_ = isContiguous_ && packIsContiguous[i];
} }
if (useGpu_) { if (useGpu_) {
@ -130,8 +135,8 @@ void AssignEvaluate(Assign&& assign, AssignOp&& ... args) {
int size = height * width; int size = height * width;
int blockSize = size <= 1024 ? size : 1024; int blockSize = size <= 1024 ? size : 1024;
int gridSize = (size + 1024 - 1) / 1024; int gridSize = (size + 1024 - 1) / 1024;
AssignGpuEvaluate1 AssignGpuEvaluate1<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(
<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(size, assign, args...); size, assign, args...);
} else { } else {
int blockSizeY = std::min(32, (int)height); int blockSizeY = std::min(32, (int)height);
int blockSizeX = (32 / blockSizeY) * 32; int blockSizeX = (32 / blockSizeY) * 32;
@ -139,8 +144,8 @@ void AssignEvaluate(Assign&& assign, AssignOp&& ... args) {
int gridSizeY = std::min(32, (int)(height + blockSizeY - 1) / blockSizeY); int gridSizeY = std::min(32, (int)(height + blockSizeY - 1) / blockSizeY);
dim3 threads(blockSizeX, blockSizeY); dim3 threads(blockSizeX, blockSizeY);
dim3 grid(gridSizeX, gridSizeY); dim3 grid(gridSizeX, gridSizeY);
AssignGpuEvaluate2 AssignGpuEvaluate2<<<grid, threads, 0, STREAM_DEFAULT>>>(
<<<grid, threads, 0, STREAM_DEFAULT>>>(height, width, assign, args...); height, width, assign, args...);
} }
CHECK_SYNC("AssignEvaluate failed"); CHECK_SYNC("AssignEvaluate failed");
@ -151,4 +156,3 @@ void AssignEvaluate(Assign&& assign, AssignOp&& ... args) {
} }
} // namespace paddle } // namespace paddle

@ -23,7 +23,7 @@ namespace paddle {
/** /**
* \brief The tensor cpu evaluate api. * \brief The tensor cpu evaluate api.
*/ */
template<class T, typename LeftType, typename RightType> template <class T, typename LeftType, typename RightType>
inline void TensorCpuApply(LeftType& lhs, const RightType& rhs) { inline void TensorCpuApply(LeftType& lhs, const RightType& rhs) {
TensorApply<LeftType, T> lhs_(lhs); TensorApply<LeftType, T> lhs_(lhs);
TensorApply<const RightType, T> rhs_(rhs); TensorApply<const RightType, T> rhs_(rhs);
@ -48,16 +48,17 @@ inline void TensorCpuApply(LeftType& lhs, const RightType& rhs) {
} }
#ifdef __NVCC__ #ifdef __NVCC__
template<typename LeftType, typename RightType> template <typename LeftType, typename RightType>
__global__ __global__ void TensorElementWiseOp(LeftType lhs,
void TensorElementWiseOp(LeftType lhs, RightType rhs, const int border) { RightType rhs,
const int border) {
const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < border) { if (idx < border) {
lhs.applyRef(idx) = rhs.apply(idx); lhs.applyRef(idx) = rhs.apply(idx);
} }
} }
template<typename LeftType, typename RightType> template <typename LeftType, typename RightType>
__global__ void TensorElementWiseOp(LeftType lhs, RightType rhs) { __global__ void TensorElementWiseOp(LeftType lhs, RightType rhs) {
const int colIdx = blockIdx.x * blockDim.x + threadIdx.x; const int colIdx = blockIdx.x * blockDim.x + threadIdx.x;
const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y; const int rowIdx = blockIdx.y * blockDim.y + threadIdx.y;
@ -71,7 +72,7 @@ __global__ void TensorElementWiseOp(LeftType lhs, RightType rhs) {
/** /**
* \brief The tensor gpu evaluate api. * \brief The tensor gpu evaluate api.
*/ */
template<class T, typename LeftType, typename RightType> template <class T, typename LeftType, typename RightType>
inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) { inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) {
TensorApply<LeftType, T> lhs_(lhs); TensorApply<LeftType, T> lhs_(lhs);
TensorApply<const RightType, T> rhs_(rhs); TensorApply<const RightType, T> rhs_(rhs);
@ -86,8 +87,8 @@ inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) {
int size = dimM * dimN; int size = dimM * dimN;
int blockSize = size <= 1024 ? size : 1024; int blockSize = size <= 1024 ? size : 1024;
int gridSize = (size + 1024 - 1) / 1024; int gridSize = (size + 1024 - 1) / 1024;
TensorElementWiseOp TensorElementWiseOp<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(
<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(lhs_, rhs_, size); lhs_, rhs_, size);
} else { } else {
int blockSizeY = std::min(32, dimM); int blockSizeY = std::min(32, dimM);
int blockSizeX = (32 / blockSizeY) * 32; int blockSizeX = (32 / blockSizeY) * 32;
@ -95,16 +96,14 @@ inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) {
int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY); int gridSizeY = std::min(32, (dimM + blockSizeY - 1) / blockSizeY);
dim3 threads(blockSizeX, blockSizeY); dim3 threads(blockSizeX, blockSizeY);
dim3 grid(gridSizeX, gridSizeY); dim3 grid(gridSizeX, gridSizeY);
TensorElementWiseOp TensorElementWiseOp<<<grid, threads, 0, STREAM_DEFAULT>>>(lhs_, rhs_);
<<<grid, threads, 0, STREAM_DEFAULT>>>(lhs_, rhs_);
} }
CHECK_SYNC("TensorGpuApply failed"); CHECK_SYNC("TensorGpuApply failed");
} }
#else #else
template<class T, typename LeftType, typename RightType> template <class T, typename LeftType, typename RightType>
inline void TensorGpuApply(LeftType& lhs, RightType& rhs) { inline void TensorGpuApply(LeftType& lhs, RightType& rhs) {}
}
#endif #endif
} // namespace paddle } // namespace paddle

File diff suppressed because it is too large Load Diff

@ -355,4 +355,3 @@ void adamaxApply(BaseMatrix& value,
} // namespace paddle } // namespace paddle
#endif #endif

@ -119,5 +119,4 @@ extern void adamaxApply(BaseMatrix& value,
real beta2, real beta2,
int64_t step, int64_t step,
real alpha); real alpha);
} // namespace paddle } // namespace paddle

@ -31,7 +31,8 @@ void SparseMomentumParameterOptimizer(const VectorPtr vecs[],
tau * alpha * gamma * learningRate); tau * alpha * gamma * learningRate);
vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT], vecs[PARAMETER_VALUE]->add(*vecs[PARAMETER_MOMENTUM_UT],
tau / beta + 1.0 / alpha, tau / beta + 1.0 / alpha,
*vecs[PARAMETER_MOMENTUM_VT], 1.0 / beta); *vecs[PARAMETER_MOMENTUM_VT],
1.0 / beta);
} }
void AdagradParameterOptimizer(const VectorPtr vecs[], void AdagradParameterOptimizer(const VectorPtr vecs[],
@ -46,10 +47,12 @@ void AdagradParameterOptimizer(const VectorPtr vecs[],
vecs[PARAMETER_LEARNING_RATE]->add(epsilon); vecs[PARAMETER_LEARNING_RATE]->add(epsilon);
vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]);
vecs[PARAMETER_VALUE]->sgdUpdate( vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], *vecs[PARAMETER_MOMENTUM],
*vecs[PARAMETER_LEARNING_RATE], learningRate, *vecs[PARAMETER_LEARNING_RATE],
momentum, decayRate); learningRate,
momentum,
decayRate);
} }
void AdaDeltaParameterOptimizer(const VectorPtr vecs[], void AdaDeltaParameterOptimizer(const VectorPtr vecs[],
@ -59,24 +62,29 @@ void AdaDeltaParameterOptimizer(const VectorPtr vecs[],
real momentum, real momentum,
real decayRate) { real decayRate) {
// E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2 // E(g_t^2) = \rou * E(g_{t-1}^2) + (1-\rou) * g^2
vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare(*vecs[PARAMETER_GRADIENT], vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare(
rou, 1.0f - rou); *vecs[PARAMETER_GRADIENT], rou, 1.0f - rou);
// learn_rate = sqrt( ( E(dx_{t-1}^2) + epsilon ) / ( E(g_t^2) + epsilon ) ) // learn_rate = sqrt( ( E(dx_{t-1}^2) + epsilon ) / ( E(g_t^2) + epsilon ) )
vecs[PARAMETER_LEARNING_RATE]->dotDiv(*vecs[PARAMETER_GRADIENT_SQURESUM1], vecs[PARAMETER_LEARNING_RATE]->dotDiv(*vecs[PARAMETER_GRADIENT_SQURESUM1],
*vecs[PARAMETER_GRADIENT_SQURESUM], *vecs[PARAMETER_GRADIENT_SQURESUM],
epsilon, epsilon); epsilon,
epsilon);
vecs[PARAMETER_LEARNING_RATE]->sqrt2(); vecs[PARAMETER_LEARNING_RATE]->sqrt2();
// E(dx_t^2) = \rou * E(dx_{t-1}^2) + (1-\rou) * (-g*learn_rate)^2 // E(dx_t^2) = \rou * E(dx_{t-1}^2) + (1-\rou) * (-g*learn_rate)^2
vecs[PARAMETER_GRADIENT_SQURESUM1]->decayAddSquareMul( vecs[PARAMETER_GRADIENT_SQURESUM1]->decayAddSquareMul(
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_LEARNING_RATE], rou, *vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_LEARNING_RATE],
rou,
1.0f - rou); 1.0f - rou);
vecs[PARAMETER_VALUE]->sgdUpdate( vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], *vecs[PARAMETER_MOMENTUM],
*vecs[PARAMETER_LEARNING_RATE], learningRate, *vecs[PARAMETER_LEARNING_RATE],
momentum, decayRate); learningRate,
momentum,
decayRate);
} }
void RMSPropParameterOptimizer(const VectorPtr vecs[], void RMSPropParameterOptimizer(const VectorPtr vecs[],
@ -91,12 +99,11 @@ void RMSPropParameterOptimizer(const VectorPtr vecs[],
// For the first time update, make the sum be the current square // For the first time update, make the sum be the current square
// so that the initial estimation of E(g_t^2) will not be too small. // so that the initial estimation of E(g_t^2) will not be too small.
vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare( vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare(
*vecs[PARAMETER_GRADIENT], accumulatedRou, *vecs[PARAMETER_GRADIENT], accumulatedRou, firstTime ? 1.0f : 1.0f - rou);
firstTime ? 1.0f : 1.0f - rou);
// E(g_t) = \rou * E(g_{t-1}) + (1-\rou) * g // E(g_t) = \rou * E(g_{t-1}) + (1-\rou) * g
vecs[PARAMETER_GRADIENT_SQURESUM1]->add(*vecs[PARAMETER_GRADIENT], vecs[PARAMETER_GRADIENT_SQURESUM1]->add(
accumulatedRou, 1.0f - rou); *vecs[PARAMETER_GRADIENT], accumulatedRou, 1.0f - rou);
// learn_rate = 1/sqrt( ( E(g_t^2) - (E(g_t))^2 + epsilon ) // learn_rate = 1/sqrt( ( E(g_t^2) - (E(g_t))^2 + epsilon )
// Basiclly if the sign of the gradient changes more often, // Basiclly if the sign of the gradient changes more often,
@ -107,10 +114,12 @@ void RMSPropParameterOptimizer(const VectorPtr vecs[],
vecs[PARAMETER_LEARNING_RATE]->add(epsilon); vecs[PARAMETER_LEARNING_RATE]->add(epsilon);
vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]);
vecs[PARAMETER_VALUE]->sgdUpdate( vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], *vecs[PARAMETER_MOMENTUM],
*vecs[PARAMETER_LEARNING_RATE], learningRate, *vecs[PARAMETER_LEARNING_RATE],
momentum, decayRate); learningRate,
momentum,
decayRate);
} }
void DecayedAdagradParameterOptimizer(const VectorPtr vecs[], void DecayedAdagradParameterOptimizer(const VectorPtr vecs[],
@ -125,8 +134,7 @@ void DecayedAdagradParameterOptimizer(const VectorPtr vecs[],
// For the first time update, make the sum be the current square // For the first time update, make the sum be the current square
// so that the initial estimation of E(g_t^2) will not be too small. // so that the initial estimation of E(g_t^2) will not be too small.
vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare( vecs[PARAMETER_GRADIENT_SQURESUM]->decayAddSquare(
*vecs[PARAMETER_GRADIENT], accumulatedRou, *vecs[PARAMETER_GRADIENT], accumulatedRou, firstTime ? 1.0f : 1.0f - rou);
firstTime ? 1.0f : 1.0f - rou);
// learn_rate = 1/sqrt( ( E(g_t^2) + epsilon ) // learn_rate = 1/sqrt( ( E(g_t^2) + epsilon )
// Basiclly if the bigger the magnitude gradient is, // Basiclly if the bigger the magnitude gradient is,
@ -135,10 +143,12 @@ void DecayedAdagradParameterOptimizer(const VectorPtr vecs[],
vecs[PARAMETER_LEARNING_RATE]->add(*vecs[PARAMETER_GRADIENT_SQURESUM]); vecs[PARAMETER_LEARNING_RATE]->add(*vecs[PARAMETER_GRADIENT_SQURESUM]);
vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]); vecs[PARAMETER_LEARNING_RATE]->invSqrt(*vecs[PARAMETER_LEARNING_RATE]);
vecs[PARAMETER_VALUE]->sgdUpdate( vecs[PARAMETER_VALUE]->sgdUpdate(*vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_MOMENTUM], *vecs[PARAMETER_MOMENTUM],
*vecs[PARAMETER_LEARNING_RATE], learningRate, *vecs[PARAMETER_LEARNING_RATE],
momentum, decayRate); learningRate,
momentum,
decayRate);
} }
void AdamParameterOptimizer(const VectorPtr vecs[], void AdamParameterOptimizer(const VectorPtr vecs[],
@ -164,16 +174,13 @@ void AdamParameterOptimizer(const VectorPtr vecs[],
// \theta_t = \theta_{t-1} - \alpha * \sqrt(1-\beta_2^t) / (1-\beta_1^t) * tmp // \theta_t = \theta_{t-1} - \alpha * \sqrt(1-\beta_2^t) / (1-\beta_1^t) * tmp
g->sqrt2(*v); g->sqrt2(*v);
g->dotDiv(*m, *g, 0., epsilon); g->dotDiv(*m, *g, 0., epsilon);
real alpha = learningRate * real alpha =
std::sqrt((real)1 - beta2_power) / ((real)1 - beta1_power); learningRate * std::sqrt((real)1 - beta2_power) / ((real)1 - beta1_power);
theta->add(*theta, 1.0, *g, -alpha); theta->add(*theta, 1.0, *g, -alpha);
} }
void AdamaxParameterOptimizer(const VectorPtr vecs[], void AdamaxParameterOptimizer(
real beta1, const VectorPtr vecs[], real beta1, real beta2, int64_t step, real alpha) {
real beta2,
int64_t step,
real alpha) {
Vector* m = vecs[PARAMETER_MOMENTUM].get(); Vector* m = vecs[PARAMETER_MOMENTUM].get();
Vector* g = vecs[PARAMETER_GRADIENT].get(); Vector* g = vecs[PARAMETER_GRADIENT].get();
Vector* u = vecs[PARAMETER_WEIGHTED_INFINITY_NORM].get(); Vector* u = vecs[PARAMETER_WEIGHTED_INFINITY_NORM].get();
@ -192,4 +199,3 @@ void AdamaxParameterOptimizer(const VectorPtr vecs[],
real learningRate = alpha / (1 - std::pow(beta1, step)); real learningRate = alpha / (1 - std::pow(beta1, step));
theta->add(*theta, 1.0, *g, -learningRate); theta->add(*theta, 1.0, *g, -learningRate);
} }

Loading…
Cancel
Save