You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							1954 lines
						
					
					
						
							58 KiB
						
					
					
				
			
		
		
	
	
							1954 lines
						
					
					
						
							58 KiB
						
					
					
				| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License. */
 | |
| 
 | |
| #include <paddle/utils/Logging.h>
 | |
| #include <string.h>
 | |
| #include <cmath>
 | |
| #include "BaseMatrix.h"
 | |
| #include "MathFunctions.h"
 | |
| #include "NEONFunctions.h"
 | |
| #include "SIMDFunctions.h"
 | |
| #include "hl_matrix_apply.cuh"
 | |
| #include "hl_matrix_base.cuh"
 | |
| #include "hl_matrix_ops.cuh"
 | |
| 
 | |
| namespace paddle {
 | |
| 
 | |
| const char* SPARSE_SUPPORT_ERROR = "Sparse Matrix/Vector is not supported.";
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyUnary(Op op) {
 | |
|   MatrixOffset offset(0, 0);
 | |
|   applyUnary(op, height_, width_, offset);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyUnary(Op op,
 | |
|                                int numRows,
 | |
|                                int numCols,
 | |
|                                MatrixOffset& offset) {
 | |
|   CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   int dimM = numRows;
 | |
|   int dimN = numCols;
 | |
|   int lda = stride_;
 | |
| 
 | |
|   T* A = data_;
 | |
|   CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
 | |
| 
 | |
|   CHECK_LE(dimM + offset.aRow_, this->height_);
 | |
|   CHECK_LE(dimN + offset.aCol_, this->width_);
 | |
|   if (true == useGpu_) {
 | |
|     hl_gpu_apply_unary_op(op, A, dimM, dimN, lda);
 | |
|   } else {
 | |
|     hl_cpu_apply_unary_op(op, A, dimM, dimN, lda);
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyBinary(Op op, BaseMatrixT& b) {
 | |
|   CHECK(height_ == b.height_ && width_ == b.width_)
 | |
|       << "Matrix dimensions are not equal";
 | |
| 
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   applyBinary(op, b, height_, width_, offset);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyBinary(
 | |
|     Op op, BaseMatrixT& b, int numRows, int numCols, MatrixOffset& offset) {
 | |
|   applyBinary(op, b, numRows, numCols, offset, false_type(), false_type());
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op, class bAsRowVector, class bAsColVector>
 | |
| int BaseMatrixT<T>::applyBinary(Op op,
 | |
|                                 BaseMatrixT& b,
 | |
|                                 int numRows,
 | |
|                                 int numCols,
 | |
|                                 MatrixOffset& offset,
 | |
|                                 bAsRowVector,
 | |
|                                 bAsColVector) {
 | |
|   CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(useGpu_ == b.useGpu_) << "Matrix type mismatch";
 | |
| 
 | |
|   int dimM = numRows;
 | |
|   int dimN = numCols;
 | |
|   int lda = stride_;
 | |
|   int ldb = b.stride_;
 | |
| 
 | |
|   T* A = data_;
 | |
|   T* B = b.data_;
 | |
|   CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_);
 | |
|   CHECK_LE(dimM + offset.aRow_, this->height_);
 | |
|   CHECK_LE(dimN + offset.aCol_, this->width_);
 | |
|   if (!bAsRowVector::value && !bAsColVector::value) {
 | |
|     CHECK_LE(dimM + offset.bRow_, b.height_);
 | |
|     CHECK_LE(dimN + offset.bCol_, b.width_);
 | |
|   } else if (bAsRowVector::value && !bAsColVector::value) {
 | |
|     CHECK_LE(dimN + offset.bCol_, b.width_);
 | |
|   } else if (!bAsRowVector::value && bAsColVector::value) {
 | |
|     CHECK_LE(dimM + offset.bRow_, b.height_);
 | |
|   } else {
 | |
|   }
 | |
|   if (true == useGpu_) {
 | |
|     hl_gpu_apply_binary_op<T, Op, bAsRowVector::value, bAsColVector::value>(
 | |
|         op, A, B, dimM, dimN, lda, ldb);
 | |
|   } else {
 | |
|     hl_cpu_apply_binary_op<T, Op, bAsRowVector::value, bAsColVector::value>(
 | |
|         op, A, B, dimM, dimN, lda, ldb);
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyTernary(Op op, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   CHECK_EQ(height_, b.height_);
 | |
|   CHECK_EQ(width_, b.width_);
 | |
|   CHECK_EQ(height_, c.height_);
 | |
|   CHECK_EQ(width_, c.width_);
 | |
| 
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   applyTernary(op, b, c, height_, width_, offset);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyTernary(Op op,
 | |
|                                  BaseMatrixT& b,
 | |
|                                  BaseMatrixT& c,
 | |
|                                  int numRows,
 | |
|                                  int numCols,
 | |
|                                  MatrixOffset& offset) {
 | |
|   applyTernary(op, b, c, numRows, numCols, offset, false_type(), false_type());
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op, class cAsRowVector, class cAsColVector>
 | |
| int BaseMatrixT<T>::applyTernary(Op op,
 | |
|                                  BaseMatrixT& b,
 | |
|                                  BaseMatrixT& c,
 | |
|                                  int numRows,
 | |
|                                  int numCols,
 | |
|                                  MatrixOffset& offset,
 | |
|                                  cAsRowVector,
 | |
|                                  cAsColVector) {
 | |
|   CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK_EQ(useGpu_, b.useGpu_);
 | |
|   CHECK_EQ(useGpu_, c.useGpu_);
 | |
| 
 | |
|   int dimM = numRows;
 | |
|   int dimN = numCols;
 | |
|   int lda = stride_;
 | |
|   int ldb = b.stride_;
 | |
|   int ldc = c.stride_;
 | |
| 
 | |
|   T* A = data_;
 | |
|   T* B = b.data_;
 | |
|   T* C = c.data_;
 | |
|   CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_);
 | |
| 
 | |
|   CHECK_LE(dimM + offset.aRow_, this->height_);
 | |
|   CHECK_LE(dimN + offset.aCol_, this->width_);
 | |
|   CHECK_LE(dimM + offset.bRow_, b.height_);
 | |
|   CHECK_LE(dimN + offset.bCol_, b.width_);
 | |
|   if (!cAsRowVector::value && !cAsColVector::value) {
 | |
|     CHECK_LE(dimM + offset.cRow_, c.height_);
 | |
|     CHECK_LE(dimN + offset.cCol_, c.width_);
 | |
|   } else if (cAsRowVector::value && !cAsColVector::value) {
 | |
|     CHECK_LE(dimN + offset.cCol_, c.width_);
 | |
|   } else if (!cAsRowVector::value && cAsColVector::value) {
 | |
|     CHECK_LE(dimM + offset.cRow_, c.height_);
 | |
|   } else {
 | |
|   }
 | |
| 
 | |
|   if (true == useGpu_) {
 | |
|     hl_gpu_apply_ternary_op<T, Op, cAsRowVector::value, cAsColVector::value>(
 | |
|         op, A, B, C, dimM, dimN, lda, ldb, ldc);
 | |
|   } else {
 | |
|     hl_cpu_apply_ternary_op<T, Op, cAsRowVector::value, cAsColVector::value>(
 | |
|         op, A, B, C, dimM, dimN, lda, ldb, ldc);
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyQuaternary(Op op,
 | |
|                                     BaseMatrixT& b,
 | |
|                                     BaseMatrixT& c,
 | |
|                                     BaseMatrixT& d) {
 | |
|   CHECK_EQ(height_, b.height_);
 | |
|   CHECK_EQ(width_, b.width_);
 | |
|   CHECK_EQ(height_, c.height_);
 | |
|   CHECK_EQ(width_, c.width_);
 | |
|   CHECK_EQ(height_, d.height_);
 | |
|   CHECK_EQ(width_, d.width_);
 | |
| 
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0, 0, 0);
 | |
|   applyQuaternary(op, b, c, d, height_, width_, offset);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Op>
 | |
| int BaseMatrixT<T>::applyQuaternary(Op op,
 | |
|                                     BaseMatrixT& b,
 | |
|                                     BaseMatrixT& c,
 | |
|                                     BaseMatrixT& d,
 | |
|                                     int numRows,
 | |
|                                     int numCols,
 | |
|                                     MatrixOffset& offset) {
 | |
|   CHECK(!this->isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!b.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!c.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK(!d.isSparse()) << SPARSE_SUPPORT_ERROR;
 | |
|   CHECK_EQ(useGpu_, b.useGpu_);
 | |
|   CHECK_EQ(useGpu_, c.useGpu_);
 | |
|   CHECK_EQ(useGpu_, d.useGpu_);
 | |
| 
 | |
|   int dimM = numRows;
 | |
|   int dimN = numCols;
 | |
|   int lda = stride_;
 | |
|   int ldb = b.stride_;
 | |
|   int ldc = c.stride_;
 | |
|   int ldd = d.stride_;
 | |
| 
 | |
|   T* A = data_;
 | |
|   T* B = b.data_;
 | |
|   T* C = c.data_;
 | |
|   T* D = d.data_;
 | |
|   CAL_MATRIX_START_ADDRESS(A, height_, width_, lda, offset.aCol_, offset.aRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       D, d.height_, d.width_, ldd, offset.dCol_, offset.dRow_);
 | |
| 
 | |
|   CHECK_LE(dimM + offset.aRow_, this->height_);
 | |
|   CHECK_LE(dimN + offset.aCol_, this->width_);
 | |
|   CHECK_LE(dimM + offset.bRow_, b.height_);
 | |
|   CHECK_LE(dimN + offset.bCol_, b.width_);
 | |
|   CHECK_LE(dimM + offset.cRow_, c.height_);
 | |
|   CHECK_LE(dimN + offset.cCol_, c.width_);
 | |
|   CHECK_LE(dimM + offset.dRow_, d.height_);
 | |
|   CHECK_LE(dimN + offset.dCol_, d.width_);
 | |
|   if (true == useGpu_) {
 | |
|     hl_gpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd);
 | |
|   } else {
 | |
|     hl_cpu_apply_quaternary_op(op, A, B, C, D, dimM, dimN, lda, ldb, ldc, ldd);
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Agg,
 | |
|           class Op,
 | |
|           class Saver,
 | |
|           class aAsRowVector,
 | |
|           class aAsColVector>
 | |
| int BaseMatrixT<T>::aggregate(Agg agg,
 | |
|                               Op op,
 | |
|                               Saver sv,
 | |
|                               BaseMatrixT& b,
 | |
|                               int numRows,
 | |
|                               int numCols,
 | |
|                               MatrixOffset& offset,
 | |
|                               aAsRowVector,
 | |
|                               aAsColVector) {
 | |
|   CHECK_EQ(useGpu_, b.useGpu_);
 | |
| 
 | |
|   int ld = stride_;
 | |
|   int ldb = b.stride_;
 | |
| 
 | |
|   T* dst = data_;
 | |
|   T* B = b.data_;
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       dst, height_, width_, ld, offset.aCol_, offset.aRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_);
 | |
| 
 | |
|   if (aAsRowVector::value && !aAsColVector::value) {
 | |
|     if (useGpu_) {
 | |
|       hl_gpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, ldb);
 | |
|     } else {
 | |
|       hl_cpu_matrix_column_op(agg, op, sv, numRows, numCols, dst, B, ldb);
 | |
|     }
 | |
|   } else if (!aAsRowVector::value && aAsColVector::value) {
 | |
|     if (useGpu_) {
 | |
|       hl_gpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, ldb);
 | |
|     } else {
 | |
|       hl_cpu_matrix_row_op(agg, op, sv, numRows, numCols, dst, ld, B, ldb);
 | |
|     }
 | |
|   } else {
 | |
|     LOG(FATAL) << "not supported";
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| template <class Agg,
 | |
|           class Op,
 | |
|           class Saver,
 | |
|           class aAsRowVector,
 | |
|           class aAsColVector>
 | |
| int BaseMatrixT<T>::aggregate(Agg agg,
 | |
|                               Op op,
 | |
|                               Saver sv,
 | |
|                               BaseMatrixT& b,
 | |
|                               BaseMatrixT& c,
 | |
|                               int numRows,
 | |
|                               int numCols,
 | |
|                               MatrixOffset& offset,
 | |
|                               aAsRowVector,
 | |
|                               aAsColVector) {
 | |
|   CHECK_EQ(useGpu_, b.useGpu_);
 | |
|   CHECK_EQ(useGpu_, c.useGpu_);
 | |
| 
 | |
|   int ld = stride_;
 | |
|   int ldb = b.stride_;
 | |
|   int ldc = c.stride_;
 | |
| 
 | |
|   T* dst = data_;
 | |
|   T* B = b.data_;
 | |
|   T* C = c.data_;
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       dst, height_, width_, ld, offset.aCol_, offset.aRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       B, b.height_, b.width_, ldb, offset.bCol_, offset.bRow_);
 | |
|   CAL_MATRIX_START_ADDRESS(
 | |
|       C, c.height_, c.width_, ldc, offset.cCol_, offset.cRow_);
 | |
| 
 | |
|   if (aAsRowVector::value && !aAsColVector::value) {
 | |
|     if (useGpu_) {
 | |
|       hl_gpu_matrix_column_op(
 | |
|           agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc);
 | |
|     } else {
 | |
|       hl_cpu_matrix_column_op(
 | |
|           agg, op, sv, numRows, numCols, dst, B, ldb, C, ldc);
 | |
|     }
 | |
|   } else if (!aAsRowVector::value && aAsColVector::value) {
 | |
|     if (useGpu_) {
 | |
|       hl_gpu_matrix_row_op(
 | |
|           agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc);
 | |
|     } else {
 | |
|       hl_cpu_matrix_row_op(
 | |
|           agg, op, sv, numRows, numCols, dst, ld, B, ldb, C, ldc);
 | |
|     }
 | |
|   } else {
 | |
|     LOG(FATAL) << "not supported";
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @brief   unary operator.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Neg, a = -a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::neg() {
 | |
|   applyUnary(unary::Neg<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Exp, a = exp(a));
 | |
| template <>
 | |
| void BaseMatrixT<real>::exp2() {
 | |
|   applyUnary(unary::Exp<real>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Log, a = log(a));
 | |
| template <>
 | |
| void BaseMatrixT<real>::log2() {
 | |
|   if (useGpu_) {
 | |
|     applyUnary(unary::Log<real>());
 | |
|   } else {
 | |
|     vLog(height_ * width_, data_, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Sqrt, a = sqrt(a));
 | |
| template <>
 | |
| void BaseMatrixT<real>::sqrt2() {
 | |
|   applyUnary(unary::Sqrt<real>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Square, a = a * a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::square2() {
 | |
|   applyUnary(unary::Square<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Reciprocal, a = 1.0f / a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reciprocal2() {
 | |
|   applyUnary(unary::Reciprocal<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Abs, a = a > 0 ? a : -a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::abs2() {
 | |
|   applyUnary(unary::Abs<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Sign, a = (a > 0) - (a < 0));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sign2() {
 | |
|   applyUnary(unary::Sign<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::zero() {
 | |
|   applyUnary(unary::Zero<T>());
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::zeroAtOffset(int64_t columnOffset, int64_t numColumns) {
 | |
|   int numRows = height_;
 | |
|   int numCols = numColumns;
 | |
|   MatrixOffset offset(columnOffset, 0);
 | |
|   applyUnary(unary::Zero<T>(), numRows, numCols, offset);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_OP(One, a = 1);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::one() {
 | |
|   applyUnary(unary::One<T>());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(a, p));
 | |
| template <>
 | |
| void BaseMatrixT<real>::pow2(real p) {
 | |
|   if (useGpu_) {
 | |
|     applyUnary(unary::Pow<real>(p));
 | |
|   } else {
 | |
|     vPow(height_ * width_, data_, p, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a -= p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::subScalar(T p) {
 | |
|   applyUnary(unary::SubScalar<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a *= p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::mulScalar(T p) {
 | |
|   applyUnary(unary::MulScalar<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a /= p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::divScalar(T p) {
 | |
|   applyUnary(unary::DivScalar<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(Assign, ONE_PARAMETER, a = p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::assign(T p) {
 | |
|   applyUnary(unary::Assign<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(Add, ONE_PARAMETER, a += p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(T p) {
 | |
|   applyUnary(unary::Add<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = a * p1 + p2);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(T p1, T p2) {
 | |
|   applyUnary(unary::Add2<T>(p1, p2));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip,
 | |
|                                  TWO_PARAMETER,
 | |
|                                  a = a < p1 ? p1 : (a > p2 ? p2 : a));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::clip(T p1, T p2) {
 | |
|   applyUnary(unary::Clip<T>(p1, p2));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative,
 | |
|                                   TWO_PARAMETER,
 | |
|                                   a = b < p1 ? 0 : (b > p2 ? 0 : 1));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::clipDerivative(BaseMatrixT& b, T p1, T p2) {
 | |
|   applyBinary(binary::ClipDerivative<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar,
 | |
|                                  ONE_PARAMETER,
 | |
|                                  a = a > p ? 1.0f : 0.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::biggerThanScalar(T p) {
 | |
|   applyUnary(unary::BiggerThanScalar<T>(p));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(DownClip, ONE_PARAMETER, a = a > p ? a : p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::downClip(T p) {
 | |
|   applyUnary(unary::DownClip<T>(p));
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @brief   binary operator.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Add, a += b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Add<T>(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::add(BaseMatrixT& b) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::Add<real>(), b);
 | |
|   } else {  // cpu branch
 | |
|     CHECK_EQ(height_, b.height_);
 | |
|     CHECK_EQ(width_, b.width_);
 | |
|     vAdd(height_ * width_, data_, b.data_, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addAtOffset(BaseMatrixT& b, int64_t columnOffset) {
 | |
|   if (columnOffset + b.width_ <= width_) {
 | |
|     int numRows = height_;
 | |
|     int numCols = b.width_;
 | |
|     MatrixOffset offset(columnOffset, 0, 0, 0);
 | |
|     applyBinary(binary::Add<T>(), b, numRows, numCols, offset);
 | |
|   } else if (columnOffset + width_ <= b.width_) {
 | |
|     int numRows = height_;
 | |
|     int numCols = width_;
 | |
|     MatrixOffset offset(0, 0, columnOffset, 0);
 | |
|     applyBinary(binary::Add<T>(), b, numRows, numCols, offset);
 | |
|   } else {
 | |
|     LOG(FATAL) << "Wrong argument "
 | |
|                << " a.width=" << width_ << " b.width=" << b.width_
 | |
|                << " columnOffset=" << columnOffset;
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addP2P(BaseMatrixT& b) {
 | |
|   T* A = data_;
 | |
|   T* B = b.data_;
 | |
|   int dimM = height_;
 | |
|   int dimN = width_;
 | |
| 
 | |
|   hl_gpu_apply_binary_op<T, binary::Add<T>, 0, 0>(
 | |
|       binary::Add<T>(), A, B, dimM, dimN, dimN, dimN);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addColVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::Add<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               false_type(),
 | |
|               true_type() /* bAsColVector */);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addRowVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::Add<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               true_type() /* bAsRowVector */,
 | |
|               false_type());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Add1, ONE_PARAMETER, a += b * p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::Add1<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Pow, ONE_PARAMETER, a = pow(b, p));
 | |
| template <>
 | |
| void BaseMatrixT<real>::pow2(BaseMatrixT& b, real p) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::Pow<real>(p), b);
 | |
|   } else {
 | |
|     vPow(height_ * width_, b.data_, p, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Add2, TWO_PARAMETER, a = p1 * a + p2 * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, T p2) {
 | |
|   applyBinary(binary::Add2<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addBias(BaseMatrixT& b, T scale) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::Add1<T>(scale),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               true_type() /* bAsRowVector */,
 | |
|               false_type());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Sub, a -= b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sub(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Sub<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Sub1, ONE_PARAMETER, a -= b * p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sub(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::Sub1<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Relu, b = a > 0.0f ? a : 0.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::relu(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Relu<T>(), b);
 | |
| }
 | |
| 
 | |
| #if defined(__ARM_NEON__) || defined(__ARM_NEON)
 | |
| template <>
 | |
| void BaseMatrixT<float>::relu(BaseMatrixT& b) {
 | |
|   neon::relu(data_, b.data_, height_ * width_);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(ReluDerivative, a *= (b > 0.0f ? 1.0f : 0.0f));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reluDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::ReluDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Softrelu, const T THRESHOLD = 40.0;
 | |
|                         b = log(1.0 + exp((a > THRESHOLD)
 | |
|                                               ? THRESHOLD
 | |
|                                               : ((a < -THRESHOLD) ? (-THRESHOLD)
 | |
|                                                                   : a))));
 | |
| template <>
 | |
| void BaseMatrixT<real>::softrelu(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Softrelu<real>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(
 | |
|     SoftreluDerivative, const T THRESHOLD = 40.0;
 | |
|     a *= (1.0 - exp(-1.0 * ((b > THRESHOLD)
 | |
|                                 ? THRESHOLD
 | |
|                                 : ((b < -THRESHOLD) ? (-THRESHOLD) : b)))));
 | |
| template <>
 | |
| void BaseMatrixT<real>::softreluDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::SoftreluDerivative<real>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Brelu, TWO_PARAMETER, b = a > p1 ? a : p1;
 | |
|                                   b = b < p2 ? b : p2);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::brelu(BaseMatrixT& b) {
 | |
|   int p1 = 0, p2 = 24;  //! TODO(yuyang18): Make p1,p2 configuable.
 | |
|   applyBinary(binary::Brelu<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(BreluDerivative,
 | |
|                                   TWO_PARAMETER,
 | |
|                                   a *= (b > p1 && b < p2) ? 1.0 : 0.0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::breluDerivative(BaseMatrixT& b) {
 | |
|   int p1 = 0, p2 = 24;
 | |
|   applyBinary(binary::BreluDerivative<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Square, b = a * a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::square2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Square<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(SquareDerivative, a *= 2.0 * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::squareDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::SquareDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Tanh, T tmp = -2.0 * a;
 | |
|                         tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
 | |
|                         b = 2.0 / (1.0 + std::exp(tmp)) - 1.0);
 | |
| template <>
 | |
| void BaseMatrixT<real>::tanh(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Tanh<real>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(TanhDerivative, a *= 1 - b * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::tanhDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::TanhDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(
 | |
|     ScaledTanh, TWO_PARAMETER, b = p1 * (2.0 / (1.0 + exp(-2 * p2 * a)) - 1.0));
 | |
| template <>
 | |
| void BaseMatrixT<real>::scaledTanh(BaseMatrixT& b, real p1, real p2) {
 | |
|   applyBinary(binary::ScaledTanh<real>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(ScaledTanhDerivative,
 | |
|                                   TWO_PARAMETER,
 | |
|                                   a *= p2 * (p1 - b * b));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::scaledTanhDerivative(BaseMatrixT& b, T p1, T p2) {
 | |
|   applyBinary(binary::ScaledTanhDerivative<T>(p1 * p1, p2 / p1), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Reciprocal, b = 1.0f / a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Reciprocal<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(ReciprocalDerivative, a *= -b * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reciprocalDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::ReciprocalDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Abs, b = a > 0.0f ? a : -a);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::abs2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Abs<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(AbsDerivative, a = (b > 0) ? a : (b < 0) ? -a : 0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::absDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::AbsDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Sigmoid, const T THRESHOLD_MIN = -40.0;
 | |
|                         const T THRESHOLD_MAX = 13.0;
 | |
|                         T tmp = (a < THRESHOLD_MIN)
 | |
|                                     ? THRESHOLD_MIN
 | |
|                                     : ((a > THRESHOLD_MAX) ? THRESHOLD_MAX : a);
 | |
|                         b = 1.0f / (1.0f + exp(-tmp)));
 | |
| template <>
 | |
| void BaseMatrixT<real>::sigmoid(BaseMatrixT& b) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::Sigmoid<real>(), b);
 | |
|   } else {  // cpu versioni
 | |
|     size_t numSamples = this->height_;
 | |
|     size_t dim = this->width_;
 | |
|     CHECK_EQ(b.height_, numSamples);
 | |
|     CHECK_EQ(b.width_, dim);
 | |
|     const real* in = this->data_;
 | |
|     real* out = b.data_;
 | |
| 
 | |
|     // out = - in
 | |
|     const float THRESHOLD_MIN = -40.0;  // make sure sigmoid(x) > 0
 | |
|     const float THRESHOLD_MAX = 13.0;   // make sure sigmoid(x) < 1
 | |
|     for (size_t i = 0; i < numSamples * dim; ++i) {
 | |
|       real tmp = in[i];
 | |
|       tmp = (tmp < THRESHOLD_MIN)
 | |
|                 ? THRESHOLD_MIN
 | |
|                 : ((tmp > THRESHOLD_MAX) ? THRESHOLD_MAX : tmp);
 | |
|       out[i] = -tmp;
 | |
|     }
 | |
| 
 | |
|     // out = exp(out)
 | |
|     vExp(numSamples * dim, out, out);
 | |
| 
 | |
|     // out = 1 / (1 + out)
 | |
|     for (size_t i = 0; i < numSamples * dim; ++i) {
 | |
|       out[i] = 1 / (1 + out[i]);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(SigmoidDerivative, a *= b * (1 - b));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sigmoidDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::SigmoidDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(ExpDerivative, a *= b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::expDerivative(BaseMatrixT& b) {
 | |
|   applyBinary(binary::ExpDerivative<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Sign, b = a > 0.0f ? 1.0f : -1.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sign2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Sign<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Exp, a = exp(b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::exp2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Exp<real>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Log, a = log(b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::log2(BaseMatrixT& b) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::Log<real>(), b);
 | |
|   } else {
 | |
|     vLog(height_ * width_, b.data_, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Sqrt, a = sqrt(b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::sqrt2(BaseMatrixT& b) {
 | |
|   applyBinary(binary::Sqrt<real>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(InvSqrt, a = 1.0f / sqrt(b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::invSqrt(BaseMatrixT& b) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::InvSqrt<real>(), b);
 | |
|   } else {  // cpu branch
 | |
|     CHECK_EQ(height_, b.height_);
 | |
|     CHECK_EQ(width_, b.width_);
 | |
|     vInvSqrt(height_ * width_, b.data_, data_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(IsEqual, ONE_PARAMETER, a = (b == p));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::isEqualTo(BaseMatrixT& b, T value) {
 | |
|   applyBinary(binary::IsEqual<T>(value), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(AddScalar, ONE_PARAMETER, a = b + p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addScalar(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::AddScalar<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(SubScalar, ONE_PARAMETER, a = b - p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::subScalar(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::SubScalar<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(MulScalar, ONE_PARAMETER, a = b * p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::mulScalar(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::MulScalar<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(DivScalar, ONE_PARAMETER, a = b / p);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::divScalar(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::DivScalar<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(ScalarDiv, ONE_PARAMETER, a = p / b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::scalarDiv(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::ScalarDiv<T>(p), b);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * @brief   ternary operator.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropy,
 | |
|                          a = -c * log(b) - (1 - c) * log(1 - b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::softCrossEntropy(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::SoftCrossEntropy<real>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(SoftCrossEntropyBp, a += (b - c) / (b * (1 - b)));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::softCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::SoftCrossEntropyBp<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropy,
 | |
|                          a = c > 0.5 ? -log(b) : -log(1.0 - b));
 | |
| template <>
 | |
| void BaseMatrixT<real>::binaryLabelCrossEntropy(BaseMatrixT& b,
 | |
|                                                 BaseMatrixT& c) {
 | |
|   if (useGpu_) {
 | |
|     applyTernary(ternary::BinaryCrossEntropy<real>(), b, c);
 | |
|   } else {
 | |
|     CHECK_EQ(height_, b.height_);
 | |
|     CHECK_EQ(height_, c.height_);
 | |
|     CHECK_EQ(width_, b.width_);
 | |
|     CHECK_EQ(width_, c.width_);
 | |
| 
 | |
|     size_t size = height_ * width_;
 | |
|     real* out = b.data_;
 | |
|     real* label = c.data_;
 | |
|     real* cost = data_;
 | |
| 
 | |
|     for (size_t i = 0; i < size; ++i) {
 | |
|       cost[i] = label[i] > 0.5 ? out[i] : 1.0 - out[i];
 | |
|     }
 | |
|     vLog(size, cost, cost);
 | |
|     for (size_t i = 0; i < size; ++i) {
 | |
|       cost[i] *= -1.0;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(BinaryCrossEntropyBp,
 | |
|                          a += c > 0.5 ? -1.0 / b : 1.0 / (1.0 - b));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::binaryLabelCrossEntropyBp(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::BinaryCrossEntropyBp<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(Add, a = b + c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::Add<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add1, TWO_PARAMETER, a = p1 * b + p2 * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) {
 | |
|   applyTernary(ternary::Add1<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(Sub, a = b - c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sub(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::Sub<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(Sub1, TWO_PARAMETER, a = p1 * b - p2 * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sub(BaseMatrixT& b, T p1, BaseMatrixT& c, T p2) {
 | |
|   applyTernary(ternary::Sub1<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(Add2, a = a + b + c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::Add2<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(Add3,
 | |
|                                    THREE_PARAMETER,
 | |
|                                    a = p1 * a + p2 * b + p3 * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add2(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) {
 | |
|   applyTernary(ternary::Add3<T>(p1, p2, p3), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(SgdUpdate,
 | |
|                                    THREE_PARAMETER,
 | |
|                                    c = p2 * c - p1 * (b + p3 * a);
 | |
|                                    a = a + c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b,  // grad
 | |
|                                BaseMatrixT& c,  // mom
 | |
|                                T p1,            // learningRate,
 | |
|                                T p2,            // momentum,
 | |
|                                T p3) {          // decayRate
 | |
|   applyTernary(ternary::SgdUpdate<T>(p1, p2, p3), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(SgdUpdate,
 | |
|                                       THREE_PARAMETER,
 | |
|                                       c = p2 * c - p1 * d * (b + p3 * a);
 | |
|                                       a += c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::sgdUpdate(BaseMatrixT& b,  // grad,
 | |
|                                BaseMatrixT& c,  // mom,
 | |
|                                BaseMatrixT& d,  // lr,
 | |
|                                T p1,            // learningRate,
 | |
|                                T p2,            // momentum,
 | |
|                                T p3) {          // decayRate
 | |
|   applyQuaternary(quaternary::SgdUpdate<T>(p1, p2, p3), b, c, d);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p * b;
 | |
|                                   a = (a > lambda)
 | |
|                                           ? (a - lambda)
 | |
|                                           : (a < -lambda) ? (a + lambda) : 0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::applyL1(BaseMatrixT& lr, T learningRate, T decayRate) {
 | |
|   applyBinary(binary::ApplyL1<T>(learningRate * decayRate), lr);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::applyL1(BaseMatrixT& lr,
 | |
|                                 real learningRate,
 | |
|                                 real decayRate) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::ApplyL1<real>(learningRate * decayRate), lr);
 | |
|   } else {
 | |
|     simd::decayL1(this->data_,
 | |
|                   this->data_,
 | |
|                   lr.data_,
 | |
|                   learningRate * decayRate,
 | |
|                   height_ * width_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_UNARY_PARAMETER_OP(ApplyL1, ONE_PARAMETER, T lambda = p;
 | |
|                                  a = (a > lambda)
 | |
|                                          ? (a - lambda)
 | |
|                                          : (a < -lambda) ? (a + lambda) : 0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::applyL1(T learningRate, T decayRate) {
 | |
|   applyUnary(unary::ApplyL1<T>(learningRate * decayRate));
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::applyL1(real learningRate, real decayRate) {
 | |
|   if (useGpu_) {
 | |
|     applyUnary(unary::ApplyL1<real>(learningRate * decayRate));
 | |
|   } else {
 | |
|     simd::decayL1(
 | |
|         this->data_, this->data_, learningRate * decayRate, height_ * width_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(ApplyL2,
 | |
|                                   ONE_PARAMETER,
 | |
|                                   a *= (1.0f / (1.0f + p * b)));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::applyL2(BaseMatrixT& lr, T learningRate, T decayRate) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::ApplyL2<T>(learningRate * decayRate), lr);
 | |
|   } else {
 | |
|     size_t size = this->height_ * this->width_;
 | |
|     T decay = learningRate * decayRate;
 | |
|     for (size_t j = 0; j < size; ++j) {
 | |
|       this->data_[j] *= 1.0f / (1.0f + decay * lr.data_[j]);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::applyL2(T learningRate, T decayRate) {
 | |
|   BaseMatrixT<T>::mulScalar(1.0f / (1.0f + learningRate * decayRate));
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(DotMul, a *= b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMul(BaseMatrixT& b) {
 | |
|   applyBinary(binary::DotMul<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(DotMul, a = b * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMul(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::DotMul<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(DotDiv, a = (b == 0.0) ? 0.0 : b / c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::DotDiv<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotDiv2P,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    a = (b + p1) / (c + p2));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotDiv(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
 | |
|   applyTernary(ternary::DotDiv2P<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_OP(RankLoss, const T THRESHOLD = 40.0; a = b - c;
 | |
|                             a = (a > THRESHOLD)
 | |
|                                     ? THRESHOLD
 | |
|                                     : ((a < -THRESHOLD) ? (-THRESHOLD) : a);
 | |
|                             a = log(1 + exp(a)) - a * d);
 | |
| template <>
 | |
| void BaseMatrixT<real>::rankLoss(BaseMatrixT& b,
 | |
|                                  BaseMatrixT& c,
 | |
|                                  BaseMatrixT& d) {
 | |
|   applyQuaternary(quaternary::RankLoss<real>(), b, c, d);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_OP(RankLossBp, const T THRESHOLD = 40.0; a = b - c;
 | |
|                             a = (a > THRESHOLD)
 | |
|                                     ? THRESHOLD
 | |
|                                     : ((a < -THRESHOLD) ? (-THRESHOLD) : a);
 | |
|                             a = exp(a);
 | |
|                             a = (a / (1 + a) - d));
 | |
| template <>
 | |
| void BaseMatrixT<real>::rankLossBp(BaseMatrixT& b,
 | |
|                                    BaseMatrixT& c,
 | |
|                                    BaseMatrixT& d) {
 | |
|   applyQuaternary(quaternary::RankLossBp<real>(), b, c, d);
 | |
| }
 | |
| 
 | |
| /* this = log(1 + exp(b)) - c * b */
 | |
| DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLoss, const T THRESHOLD = 40.0;
 | |
|                          T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD)
 | |
|                                                                  ? -THRESHOLD
 | |
|                                                                  : b;
 | |
|                          a = log(1 + exp(x)) - c * x);
 | |
| template <>
 | |
| void BaseMatrixT<real>::logisticRegressionLoss(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::LogisticRegressionLoss<real>(), b, c);
 | |
| }
 | |
| 
 | |
| /* this = exp(b)/(1+exp(b)) - c */
 | |
| DEFINE_MATRIX_TERNARY_OP(LogisticRegressionLossBp, const T THRESHOLD = 40.0;
 | |
|                          T x = (b > THRESHOLD) ? THRESHOLD : (b < -THRESHOLD)
 | |
|                                                                  ? -THRESHOLD
 | |
|                                                                  : b;
 | |
|                          x = exp(x);
 | |
|                          a = x / (1 + x) - c);
 | |
| template <>
 | |
| void BaseMatrixT<real>::logisticRegressionLossBp(BaseMatrixT& b,
 | |
|                                                  BaseMatrixT& c) {
 | |
|   applyTernary(ternary::LogisticRegressionLossBp<real>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(BiggerThan, a = (b > c) ? 1.0f : 0.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::biggerThan(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::BiggerThan<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_OP(
 | |
|     BiggerThan, a = ((b > c && d > 0.5f) || (b < c && d < 0.5f)) ? 1.0f : 0.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::biggerThan(BaseMatrixT& b,
 | |
|                                 BaseMatrixT& c,
 | |
|                                 BaseMatrixT& d) {
 | |
|   applyQuaternary(quaternary::BiggerThan<T>(), b, c, d);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(Max, a = (b > c) ? b : c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::max2(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::Max<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(BinaryClassificationError,
 | |
|                                    ONE_PARAMETER,
 | |
|                                    c += ((a > p) == (b > p)) ? 0.0f : 1.0f);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::binaryClassificationError2(size_t destCol,
 | |
|                                                 BaseMatrixT& b,
 | |
|                                                 BaseMatrixT& c,
 | |
|                                                 T p) {
 | |
|   CHECK(!useGpu_) << "do not support gpu";
 | |
|   MatrixOffset offset(0, 0, 0, 0, destCol, 0);
 | |
|   int numRows = b.height_;
 | |
|   int numCols = b.width_;
 | |
|   b.applyTernary(ternary::BinaryClassificationError<T>(p),
 | |
|                  c,
 | |
|                  *this,
 | |
|                  numRows,
 | |
|                  numCols,
 | |
|                  offset,
 | |
|                  false_type(),
 | |
|                  true_type() /*cAsColVector*/);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::binaryClassificationError(size_t destCol,
 | |
|                                                   BaseMatrixT& b,
 | |
|                                                   BaseMatrixT& c,
 | |
|                                                   real p) {
 | |
|   MatrixOffset offset(destCol, 0, 0, 0, 0, 0);
 | |
|   int numRows = b.height_;
 | |
|   int numCols = b.width_;
 | |
|   aggregate(aggregate::sum(),
 | |
|             base::binary::classificationError(p),
 | |
|             base::binary::add(),
 | |
|             b,
 | |
|             c,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             false_type(),
 | |
|             true_type() /*aAsColVector*/);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(Add3,
 | |
|                                       THREE_PARAMETER,
 | |
|                                       a = p1 * b + p2 * c + p3 * d);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::add3(
 | |
|     BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT& d, T p1, T p2, T p3) {
 | |
|   applyQuaternary(quaternary::Add3<T>(p1, p2, p3), b, c, d);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(DotMulSquare, a = b * c * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::DotMulSquare<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(DotSquareSquare, a = b * b * c * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotSquareSquare(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   applyTernary(ternary::DotSquareSquare<T>(), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(DotMulSquare, a *= b * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMulSquare(BaseMatrixT& b) {
 | |
|   applyBinary(binary::DotMulSquare<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(DotSquareMul, a = a * a * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotSquareMul(BaseMatrixT& b) {
 | |
|   applyBinary(binary::DotSquareMul<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_QUATERNARY_PARAMETER_OP(AddSquareSum,
 | |
|                                       THREE_PARAMETER,
 | |
|                                       T tmp = p1 * b + p2 * c + p3 * d;
 | |
|                                       a += tmp * tmp);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addSquareSum(
 | |
|     BaseMatrixT& b, BaseMatrixT& c, BaseMatrixT d, T p1, T p2, T p3) {
 | |
|   applyQuaternary(quaternary::AddSquareSum<T>(p1, p2, p3), b, c, d);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(AddSquare, ONE_PARAMETER, a += p * b * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addSquare(BaseMatrixT& b, T p) {
 | |
|   applyBinary(binary::AddSquare<T>(p), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(DecayAddSquare,
 | |
|                                   TWO_PARAMETER,
 | |
|                                   a = p1 * a + p2 * b * b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::decayAddSquare(BaseMatrixT& b, T p1, T p2) {
 | |
|   applyBinary(binary::DecayAddSquare<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(DecayAddSquareMul,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    a = p1 * a + p2 * b * b * c * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::decayAddSquareMul(BaseMatrixT& b,
 | |
|                                        BaseMatrixT& c,
 | |
|                                        T p1,
 | |
|                                        T p2) {
 | |
|   applyTernary(ternary::DecayAddSquareMul<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(ReciprocalSum,
 | |
|                                    THREE_PARAMETER,
 | |
|                                    a = 1 / (p1 * b + p2 * c + p3));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reciprocalSum(
 | |
|     BaseMatrixT& b, BaseMatrixT& c, T p1, T p2, T p3) {
 | |
|   applyTernary(ternary::ReciprocalSum<T>(p1, p2, p3), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_PARAMETER_OP(Reciprocal2,
 | |
|                                   TWO_PARAMETER,
 | |
|                                   a = 1 / (p1 * b + p2));
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::reciprocal2(BaseMatrixT& b, T p1, T p2) {
 | |
|   applyBinary(binary::Reciprocal2<T>(p1, p2), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSquareSum,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    T tmp = p1 * b + p2 * c;
 | |
|                                    a *= tmp * tmp);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMulSquareSum(BaseMatrixT& b,
 | |
|                                      BaseMatrixT& c,
 | |
|                                      T p1,
 | |
|                                      T p2) {
 | |
|   applyTernary(ternary::DotMulSquareSum<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotSquareSum,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    T tmp = p1 * b + p2 * c;
 | |
|                                    a = tmp * tmp);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotSquareSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
 | |
|   applyTernary(ternary::DotSquareSum<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(DotMulSum,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    a *= p1 * b + p2 * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::dotMulSum(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
 | |
|   applyTernary(ternary::DotMulSum<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(CopyAndClear, b = a; a = 0);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::copyAndClear(BaseMatrixT& b) {
 | |
|   applyBinary(binary::CopyAndClear<T>(), b);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(AddDotMul,
 | |
|                                    TWO_PARAMETER,
 | |
|                                    a = p1 * a + p2 * b * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addDotMul(BaseMatrixT& b, BaseMatrixT& c, T p1, T p2) {
 | |
|   applyTernary(ternary::AddDotMul<T>(p1, p2), b, c);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(Assign, a = b;);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::assign(BaseMatrixT& b) {
 | |
|   if (useGpu_) {
 | |
|     applyBinary(binary::Assign<T>(), b);
 | |
|   } else {  // cpu version
 | |
|     CHECK_EQ(this->height_, b.height_);
 | |
|     CHECK_EQ(this->width_, b.width_);
 | |
|     memcpy(data_, b.data_, sizeof(T) * height_ * width_);
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::assignAtOffset(BaseMatrixT& b, int64_t columnOffset) {
 | |
|   if (columnOffset + b.width_ <= width_) {
 | |
|     int numRows = height_;
 | |
|     int numCols = b.width_;
 | |
|     MatrixOffset offset(columnOffset, 0, 0, 0);
 | |
|     applyBinary(binary::Assign<T>(), b, numRows, numCols, offset);
 | |
|   } else if (columnOffset + width_ <= b.width_) {
 | |
|     int numRows = height_;
 | |
|     int numCols = width_;
 | |
|     MatrixOffset offset(0, 0, columnOffset, 0);
 | |
|     applyBinary(binary::Assign<T>(), b, numRows, numCols, offset);
 | |
|   } else {
 | |
|     LOG(FATAL) << "Wrong argument "
 | |
|                << " a.width=" << width_ << " b.width=" << b.width_
 | |
|                << " columnOffset=" << columnOffset;
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(DeepSwap, T tmp = a; a = b; b = tmp);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::deepSwap(BaseMatrixT& b) {
 | |
|   applyBinary(binary::DeepSwap<T>(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::rowDotMul(size_t destCol,
 | |
|                                   BaseMatrixT& b,
 | |
|                                   BaseMatrixT& c) {
 | |
|   int numRows = b.height_;
 | |
|   int numCols = b.width_;
 | |
|   MatrixOffset offset(destCol, 0, 0, 0, 0, 0);
 | |
|   aggregate(aggregate::sum(),
 | |
|             base::binary::mul(),
 | |
|             base::binary::add(),
 | |
|             b,
 | |
|             c,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             false_type(),
 | |
|             true_type() /*aAsColVector*/);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::rowDotMul2(size_t destCol,
 | |
|                                 BaseMatrixT& b,
 | |
|                                 BaseMatrixT& c) {
 | |
|   CHECK(!useGpu_) << "do not support gpu";
 | |
| 
 | |
|   size_t height = this->height_;
 | |
|   CHECK_LT(destCol, this->width_);
 | |
|   CHECK_EQ(height, b.height_);
 | |
|   CHECK_EQ(height, c.height_);
 | |
|   CHECK_EQ(b.width_, c.width_);
 | |
|   size_t width = b.width_;
 | |
|   T* A = this->data_;
 | |
|   const T* B = b.data_;
 | |
|   const T* C = c.data_;
 | |
|   for (size_t i = 0; i < height;
 | |
|        ++i, A += this->width_, B += width, C += width) {
 | |
|     for (size_t j = 0; j < width; ++j) {
 | |
|       A[destCol] += B[j] * C[j];
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::addDotMulVMM(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   int numRows = b.height_;
 | |
|   int numCols = b.width_;
 | |
|   aggregate(aggregate::sum(),
 | |
|             base::binary::mul(),
 | |
|             base::binary::add(),
 | |
|             b,
 | |
|             c,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             true_type() /*aAsRowVector*/,
 | |
|             false_type());
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addDotMulVMM2(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   CHECK(!useGpu_) << "do not support gpu";
 | |
| 
 | |
|   CHECK_EQ(height_, 1LU);
 | |
|   CHECK_EQ(b.height_, c.height_);
 | |
|   CHECK_EQ(width_, b.width_);
 | |
|   CHECK_EQ(width_, c.width_);
 | |
|   size_t height = b.height_;
 | |
|   size_t width = b.width_;
 | |
|   T* A = this->data_;
 | |
|   const T* B = b.data_;
 | |
|   const T* C = c.data_;
 | |
|   for (size_t i = 0; i < height; ++i, B += width, C += width) {
 | |
|     for (size_t j = 0; j < width; ++j) {
 | |
|       A[j] += B[j] * C[j];
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(addDotMulMMV, a += b * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addDotMulMMV(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::addDotMulMMV<T>(),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                true_type() /*cAsRowVector*/,
 | |
|                false_type());
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addDotMulMMV2(BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   CHECK(!useGpu_) << "do not support gpu";
 | |
| 
 | |
|   CHECK_EQ(c.height_, 1LU);
 | |
|   CHECK_EQ(height_, b.height_);
 | |
|   CHECK_EQ(width_, b.width_);
 | |
|   CHECK_EQ(width_, c.width_);
 | |
|   size_t height = height_;
 | |
|   size_t width = width_;
 | |
|   T* A = this->data_;
 | |
|   const T* B = b.data_;
 | |
|   const T* C = c.data_;
 | |
|   for (size_t i = 0; i < height; ++i, A += width, B += width) {
 | |
|     for (size_t j = 0; j < width; ++j) {
 | |
|       A[j] += B[j] * C[j];
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::rowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, cCol, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::DotMul<T>(),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                false_type(),
 | |
|                true_type() /*cAsColVector*/);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::rowScale2(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   CHECK(!useGpu_) << "do not support gpu";
 | |
| 
 | |
|   size_t height = this->height_;
 | |
|   size_t width = this->width_;
 | |
|   CHECK_EQ(height, b.height_);
 | |
|   CHECK_EQ(width, b.width_);
 | |
|   CHECK_LT(cCol, c.width_);
 | |
|   CHECK_EQ(height, c.height_);
 | |
|   T* A = this->data_;
 | |
|   const T* B = b.data_;
 | |
|   const T* C = c.data_;
 | |
|   for (size_t i = 0; i < height; ++i, A += width, B += width, C += c.width_) {
 | |
|     for (size_t j = 0; j < width; ++j) {
 | |
|       A[j] = B[j] * C[cCol];
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::colScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, cRow);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::DotMul<T>(),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                true_type() /* cAsRowVector */,
 | |
|                false_type() /* cAsColVector */);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addColScale(size_t cRow, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, cRow);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::addDotMulMMV<T>(),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                true_type() /* cAsRowVector */,
 | |
|                false_type() /* cAsColVector */);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, cCol, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::addDotMulMMV<T>(),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                false_type(),
 | |
|                true_type() /*cAsColVector*/);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_PARAMETER_OP(RowAdd, ONE_PARAMETER, a = b + p * c);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::rowAdd(size_t cCol, BaseMatrixT& b, BaseMatrixT& c, T p) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, cCol, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyTernary(ternary::RowAdd<T>(p),
 | |
|                b,
 | |
|                c,
 | |
|                numRows,
 | |
|                numCols,
 | |
|                offset,
 | |
|                false_type(),
 | |
|                true_type() /*cAsColVector*/);
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_TERNARY_OP(RowPow, a = pow(b, c));
 | |
| template <>
 | |
| void BaseMatrixT<real>::rowPow(size_t cCol, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   if (useGpu_) {
 | |
|     MatrixOffset offset(0, 0, 0, 0, cCol, 0);
 | |
|     int numRows = height_;
 | |
|     int numCols = width_;
 | |
|     applyTernary(ternary::RowPow<real>(),
 | |
|                  b,
 | |
|                  c,
 | |
|                  numRows,
 | |
|                  numCols,
 | |
|                  offset,
 | |
|                  false_type(),
 | |
|                  true_type() /*cAsColVector*/);
 | |
|   } else {
 | |
|     size_t height = this->height_;
 | |
|     size_t width = this->width_;
 | |
|     CHECK_EQ(height, b.height_);
 | |
|     CHECK_EQ(width, b.width_);
 | |
|     CHECK_LT(cCol, c.width_);
 | |
|     CHECK_EQ(height, c.height_);
 | |
|     real* A = this->data_;
 | |
|     const real* B = b.data_;
 | |
|     const real* C = c.data_;
 | |
|     for (size_t i = 0; i < height; ++i, A += width, B += width, C += c.width_) {
 | |
|       vPow(width, B, C[cCol], A);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::mulRowVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::DotMul<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               true_type() /* bAsRowVector */,
 | |
|               false_type());
 | |
| }
 | |
| 
 | |
| DEFINE_MATRIX_BINARY_OP(DotDiv, a /= b);
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::DotDiv<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               true_type() /* bAsRowVector */,
 | |
|               false_type());
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::DotMul<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               false_type(),
 | |
|               true_type() /* bAsColVector */);
 | |
| }
 | |
| 
 | |
| template <class T>
 | |
| void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0);
 | |
|   int numRows = height_;
 | |
|   int numCols = width_;
 | |
|   applyBinary(binary::DotDiv<T>(),
 | |
|               b,
 | |
|               numRows,
 | |
|               numCols,
 | |
|               offset,
 | |
|               false_type(),
 | |
|               true_type() /* bAsColVector */);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg>
 | |
| int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   size_t numRows = b.height_;
 | |
|   size_t numCols = b.width_;
 | |
|   CHECK_EQ(height_, numRows);
 | |
|   CHECK_EQ(width_, 1UL);
 | |
|   aggregate(agg,
 | |
|             base::unary::identity(),
 | |
|             base::binary::second(),
 | |
|             b,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             false_type(),
 | |
|             true_type() /*aAsColVector*/);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg, class Saver>
 | |
| int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   size_t numRows = b.height_;
 | |
|   size_t numCols = b.width_;
 | |
|   CHECK_EQ(height_, numRows);
 | |
|   CHECK_EQ(width_, 1UL);
 | |
|   aggregate(agg,
 | |
|             base::unary::identity(),
 | |
|             sv,
 | |
|             b,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             false_type(),
 | |
|             true_type() /*aAsColVector*/);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg>
 | |
| int BaseMatrixT<real>::applyRow(Agg agg,
 | |
|                                 real scaleDest,
 | |
|                                 real scaleAgg,
 | |
|                                 BaseMatrixT& b) {
 | |
|   if (scaleDest != 0) {
 | |
|     applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b);
 | |
|   } else {
 | |
|     applyRow(agg, base::binary::second(), b);
 | |
|     if (scaleAgg != 1) {
 | |
|       mulScalar(scaleAgg);
 | |
|     }
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg, class Op, class Saver>
 | |
| int BaseMatrixT<real>::applyRow(
 | |
|     Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   size_t numRows = b.height_;
 | |
|   size_t numCols = b.width_;
 | |
|   CHECK_EQ(height_, numRows);
 | |
|   CHECK_EQ(width_, 1UL);
 | |
|   CHECK_EQ(c.height_, numRows);
 | |
|   CHECK_EQ(c.width_, numCols);
 | |
|   aggregate(agg,
 | |
|             op,
 | |
|             sv,
 | |
|             b,
 | |
|             c,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             false_type(),
 | |
|             true_type() /*aAsColVector*/);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg, class Op>
 | |
| int BaseMatrixT<real>::applyRow(Agg agg,
 | |
|                                 Op op,
 | |
|                                 real scaleDest,
 | |
|                                 real scaleAgg,
 | |
|                                 BaseMatrixT& b,
 | |
|                                 BaseMatrixT& c) {
 | |
|   if (scaleDest != 0) {
 | |
|     applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c);
 | |
|   } else {
 | |
|     applyRow(agg, op, base::binary::second(), b, c);
 | |
|     if (scaleAgg != 1) {
 | |
|       mulScalar(scaleAgg);
 | |
|     }
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg>
 | |
| int BaseMatrixT<real>::applyCol(Agg agg, BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   size_t numRows = b.height_;
 | |
|   size_t numCols = b.width_;
 | |
|   CHECK_EQ(width_, numCols);
 | |
|   CHECK_EQ(height_, 1UL);
 | |
|   aggregate(agg,
 | |
|             base::unary::identity(),
 | |
|             base::binary::second(),
 | |
|             b,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             true_type() /*aAsRowVector*/,
 | |
|             false_type());
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg, class Saver>
 | |
| int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
 | |
|   MatrixOffset offset(0, 0, 0, 0, 0, 0);
 | |
|   size_t numRows = b.height_;
 | |
|   size_t numCols = b.width_;
 | |
|   CHECK_EQ(width_, numCols);
 | |
|   CHECK_EQ(height_, 1UL);
 | |
|   aggregate(agg,
 | |
|             base::unary::identity(),
 | |
|             sv,
 | |
|             b,
 | |
|             numRows,
 | |
|             numCols,
 | |
|             offset,
 | |
|             true_type() /*aAsRowVector*/,
 | |
|             false_type());
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| template <class Agg>
 | |
| int BaseMatrixT<real>::applyCol(Agg agg,
 | |
|                                 real scaleDest,
 | |
|                                 real scaleAgg,
 | |
|                                 BaseMatrixT& b) {
 | |
|   if (scaleDest != 0) {
 | |
|     applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b);
 | |
|   } else {
 | |
|     applyCol(agg, base::binary::second(), b);
 | |
|     if (scaleAgg != 1) {
 | |
|       mulScalar(scaleAgg);
 | |
|     }
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) {
 | |
|   applyRow(aggregate::sum(), scaleDest, scaleSum, b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::maxRows(BaseMatrixT& b) {
 | |
|   applyRow(aggregate::max(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::minRows(BaseMatrixT& b) {
 | |
|   applyRow(aggregate::min(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::maxCols(BaseMatrixT& b) {
 | |
|   applyCol(aggregate::max(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
 | |
|   applyCol(aggregate::min(), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) {
 | |
|   applyCol(aggregate::sum(), scaleDest, scaleSum, b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::sumOfSquaredDiffs(BaseMatrixT& b,
 | |
|                                           BaseMatrixT& c,
 | |
|                                           real scaleSum,
 | |
|                                           real scaleDest) {
 | |
|   applyRow(
 | |
|       aggregate::sum(), base::binary::squaredDiff(), scaleDest, scaleSum, b, c);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
 | |
|                                       BaseMatrixT& c,
 | |
|                                       real scaleSum,
 | |
|                                       real scaleDest) {
 | |
|   applyRow(aggregate::sum(), base::binary::mul(), scaleDest, scaleSum, b, c);
 | |
| }
 | |
| 
 | |
| template class BaseMatrixT<real>;
 | |
| 
 | |
| #ifndef PADDLE_MOBILE_INFERENCE
 | |
| 
 | |
| template class BaseMatrixT<int>;
 | |
| 
 | |
| #else
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::zero() {
 | |
|   applyUnary(unary::Zero<int>());
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::assign(int p) {
 | |
|   applyUnary(unary::Assign<int>(p));
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
 | |
|   applyBinary(binary::IsEqual<int>(value), b);
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::neg() {
 | |
|   applyUnary(unary::Neg<int>());
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::abs2() {
 | |
|   applyUnary(unary::Abs<int>());
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::add(int p) {
 | |
|   applyUnary(unary::Add<int>(p));
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::add(int p1, int p2) {
 | |
|   applyUnary(unary::Add2<int>(p1, p2));
 | |
| }
 | |
| 
 | |
| template <>
 | |
| void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
 | |
|   applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
 | |
| }
 | |
| 
 | |
| #endif
 | |
| }  // namespace paddle
 |