Merge pull request #564 from luotao1/clang

clang format .cc .h .cpp .c and .hpp file
avx_docs
Yu Yang 8 years ago committed by GitHub
commit 5c0eb23d1c

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
@ -112,7 +111,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
} }
void Arguments::setSlotSubSequenceStartPositions( void Arguments::setSlotSubSequenceStartPositions(
size_t idx, IVector *vec) throw(RangeError) { size_t idx, IVector* vec) throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
auto& v = m->cast<paddle::IVector>(vec->getSharedPtr()); auto& v = m->cast<paddle::IVector>(vec->getSharedPtr());
a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v); a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
@ -44,8 +43,7 @@ TrainerConfig* TrainerConfig::createFromTrainerConfigFile(
return retv; return retv;
} }
TrainerConfig* TrainerConfig::createFromProtoString( TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) {
const std::string& str) {
auto retv = new TrainerConfig(); auto retv = new TrainerConfig();
paddle::TrainerConfig trainerConfigProto; paddle::TrainerConfig trainerConfigProto;
auto conf = std::make_shared<paddle::TrainerConfigHelper>(trainerConfigProto); auto conf = std::make_shared<paddle::TrainerConfigHelper>(trainerConfigProto);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
@ -27,7 +26,8 @@ GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {}
GradientMachine::~GradientMachine() { delete m; } GradientMachine::~GradientMachine() { delete m; }
GradientMachine* GradientMachine::createFromPaddleModelPtr( GradientMachine* GradientMachine::createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode, const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
auto& conf = *(const paddle::ModelConfig*)(confPtr); auto& conf = *(const paddle::ModelConfig*)(confPtr);
std::vector<ParameterType> realTypes; std::vector<ParameterType> realTypes;
@ -44,7 +44,8 @@ GradientMachine* GradientMachine::createFromPaddleModelPtr(
} }
GradientMachine* GradientMachine::createByConfigProtoStr( GradientMachine* GradientMachine::createByConfigProtoStr(
const std::string& protoStr, GradientMatchineCreateMode mode, const std::string& protoStr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
paddle::ModelConfig conf; paddle::ModelConfig conf;
conf.ParseFromString(protoStr); conf.ParseFromString(protoStr);
@ -56,13 +57,15 @@ GradientMachine* GradientMachine::createByConfigProtoStr(
} }
GradientMachine* GradientMachine::createByModelConfig( GradientMachine* GradientMachine::createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode, ModelConfig* conf,
GradientMatchineCreateMode mode,
const std::vector<int>& types) { const std::vector<int>& types) {
auto confPtr = &conf->m->conf->getModelConfig(); auto confPtr = &conf->m->conf->getModelConfig();
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types); return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
} }
void GradientMachine::forward(const Arguments& inArgs, Arguments* outArgs, void GradientMachine::forward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType) { PassType passType) {
auto& in = auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr()); m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
@ -99,7 +102,8 @@ void GradientMachine::backward(const UpdateCallback& callback) {
} }
void GradientMachine::forwardBackward(const Arguments& inArgs, void GradientMachine::forwardBackward(const Arguments& inArgs,
Arguments* outArgs, PassType passType, Arguments* outArgs,
PassType passType,
const UpdateCallback& callback) { const UpdateCallback& callback) {
auto& in = auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr()); m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
@ -129,7 +133,7 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
void GradientMachine::randParameters() { m->machine->randParameters(); } void GradientMachine::randParameters() { m->machine->randParameters(); }
Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
throw(UnsupportError) { throw(UnsupportError) {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(m->machine); auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(m->machine);
if (nn) { if (nn) {
auto mat = nn->getLayerOutput(layerName); auto mat = nn->getLayerOutput(layerName);
@ -140,8 +144,11 @@ Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
} }
SequenceGenerator* GradientMachine::asSequenceGenerator( SequenceGenerator* GradientMachine::asSequenceGenerator(
const std::vector<std::string>& dict, size_t begin_id, size_t end_id, const std::vector<std::string>& dict,
size_t max_length, size_t beam_size) { size_t begin_id,
size_t end_id,
size_t max_length,
size_t beam_size) {
SequenceGenerator* r = SequenceGenerator* r =
SequenceGenerator::createByGradientMachineSharedPtr(&m->machine); SequenceGenerator::createByGradientMachineSharedPtr(&m->machine);
r->setDict(dict); r->setDict(dict);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "PaddleAPI.h" #include "PaddleAPI.h"
@ -23,7 +22,8 @@ limitations under the License. */
template <typename T1, typename T2> template <typename T1, typename T2>
void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) { void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
dest->resize(src.size()); dest->resize(src.size());
std::transform(src.begin(), src.end(), dest->begin(), [](T1 t){ std::transform(src.begin(),
return static_cast<T2>(t); src.end(),
}); dest->begin(),
[](T1 t) { return static_cast<T2>(t); });
} }

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
@ -44,17 +43,21 @@ Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) {
return m; return m;
} }
Matrix* Matrix::createDense(const std::vector<float>& data, size_t height, Matrix* Matrix::createDense(const std::vector<float>& data,
size_t width, bool useGpu) { size_t height,
size_t width,
bool useGpu) {
auto m = new Matrix(); auto m = new Matrix();
m->m->mat = paddle::Matrix::create(height, width, useGpu); m->m->mat = paddle::Matrix::create(height, width, useGpu);
m->m->mat->copyFrom(data.data(), data.size()); m->m->mat->copyFrom(data.data(), data.size());
return m; return m;
} }
Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2, Matrix* Matrix::createDenseFromNumpy(float* data,
bool copy, bool useGpu) int dim1,
throw (UnsupportError) { int dim2,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// Gpu mode only supports copy=True /// Gpu mode only supports copy=True
if (!copy) { if (!copy) {
@ -66,7 +69,9 @@ Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2,
} }
} }
Matrix* Matrix::createCpuDenseFromNumpy(float* data, int dim1, int dim2, Matrix* Matrix::createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy) { bool copy) {
auto m = new Matrix(); auto m = new Matrix();
if (copy) { if (copy) {
@ -85,12 +90,20 @@ Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) {
return m; return m;
} }
Matrix* Matrix::createSparse(size_t height, size_t width, size_t nnz, Matrix* Matrix::createSparse(size_t height,
bool isNonVal, bool isTrans, bool useGpu) { size_t width,
size_t nnz,
bool isNonVal,
bool isTrans,
bool useGpu) {
auto m = new Matrix(); auto m = new Matrix();
m->m->mat = paddle::Matrix::createSparseMatrix( m->m->mat = paddle::Matrix::createSparseMatrix(
height, width, nnz, isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE, height,
isTrans, useGpu); width,
nnz,
isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
isTrans,
useGpu);
return m; return m;
} }
@ -221,7 +234,8 @@ FloatArray Matrix::getData() const {
} }
void Matrix::sparseCopyFrom( void Matrix::sparseCopyFrom(
const std::vector<int>& rows, const std::vector<int>& cols, const std::vector<int>& rows,
const std::vector<int>& cols,
const std::vector<float>& vals) throw(UnsupportError) { const std::vector<float>& vals) throw(UnsupportError) {
auto cpuSparseMat = auto cpuSparseMat =
std::dynamic_pointer_cast<paddle::CpuSparseMatrix>(m->mat); std::dynamic_pointer_cast<paddle::CpuSparseMatrix>(m->mat);
@ -240,7 +254,8 @@ void Matrix::sparseCopyFrom(
void* Matrix::getSharedPtr() const { return &m->mat; } void* Matrix::getSharedPtr() const { return &m->mat; }
void Matrix::toNumpyMatInplace(float** view_data, int* dim1, void Matrix::toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError) { int* dim2) throw(UnsupportError) {
auto cpuMat = std::dynamic_pointer_cast<paddle::CpuMatrix>(m->mat); auto cpuMat = std::dynamic_pointer_cast<paddle::CpuMatrix>(m->mat);
if (cpuMat) { if (cpuMat) {
@ -251,7 +266,8 @@ void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
throw UnsupportError(); throw UnsupportError();
} }
} }
void Matrix::copyToNumpyMat(float** view_m_data, int* dim1, void Matrix::copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError) { int* dim2) throw(UnsupportError) {
static_assert(sizeof(paddle::real) == sizeof(float), static_assert(sizeof(paddle::real) == sizeof(float),
"Currently PaddleAPI only support for single " "Currently PaddleAPI only support for single "
@ -269,8 +285,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} else if (auto gpuMat = dynamic_cast<paddle::GpuMatrix*>(m->mat.get())) { } else if (auto gpuMat = dynamic_cast<paddle::GpuMatrix*>(m->mat.get())) {
auto src = gpuMat->getData(); auto src = gpuMat->getData();
auto dest = *view_m_data; auto dest = *view_m_data;
hl_memcpy_device2host(dest, src, hl_memcpy_device2host(
sizeof(paddle::real) * (*dim1) * (*dim2)); dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
} else { } else {
LOG(WARNING) << "Unexpected Situation"; LOG(WARNING) << "Unexpected Situation";
throw UnsupportError(); throw UnsupportError();
@ -278,7 +294,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} }
} }
void Matrix::copyFromNumpyMat(float* data, int dim1, void Matrix::copyFromNumpyMat(float* data,
int dim1,
int dim2) throw(UnsupportError, RangeError) { int dim2) throw(UnsupportError, RangeError) {
if (isSparse()) { if (isSparse()) {
throw UnsupportError(); throw UnsupportError();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <stddef.h> #include <stddef.h>
@ -61,8 +60,8 @@ class RangeError {};
/// Not support Error, such as access GPU memory directly, etc. /// Not support Error, such as access GPU memory directly, etc.
class UnsupportError : public std::runtime_error { class UnsupportError : public std::runtime_error {
public: public:
UnsupportError() : std::runtime_error(" ") {}; UnsupportError() : std::runtime_error(" "){};
UnsupportError(const std::string& message) : std::runtime_error(message) {}; UnsupportError(const std::string& message) : std::runtime_error(message){};
}; };
/// This type will map to python's list of float. /// This type will map to python's list of float.
@ -112,7 +111,8 @@ public:
/** /**
* Create A Matrix with height,width, which is filled by zero. * Create A Matrix with height,width, which is filled by zero.
*/ */
static Matrix* createZero(size_t height, size_t width, static Matrix* createZero(size_t height,
size_t width,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
/** /**
@ -124,8 +124,11 @@ public:
* *
* @note the default sparse type is SPARSE_CSR. * @note the default sparse type is SPARSE_CSR.
*/ */
static Matrix* createSparse(size_t height, size_t width, size_t nnz, static Matrix* createSparse(size_t height,
bool isNonVal = true, bool trans = false, size_t width,
size_t nnz,
bool isNonVal = true,
bool trans = false,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
/** /**
@ -134,13 +137,17 @@ public:
* @param data list of float should be passed in python. * @param data list of float should be passed in python.
* @note the value will be copy into a new matrix. * @note the value will be copy into a new matrix.
*/ */
static Matrix* createDense(const std::vector<float>& data, size_t height, static Matrix* createDense(const std::vector<float>& data,
size_t width, bool useGpu = isUsingGpu()); size_t height,
size_t width,
static Matrix* createDenseFromNumpy(float* data, int dim1, int dim2, bool useGpu = isUsingGpu());
bool copy = true,
bool useGpu = isUsingGpu()) static Matrix* createDenseFromNumpy(
throw (UnsupportError); float* data,
int dim1,
int dim2,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu Dense Matrix from numpy matrix, dtype=float32 * Create Cpu Dense Matrix from numpy matrix, dtype=float32
@ -151,7 +158,9 @@ public:
* @param copy true if copy into a new matrix, false will create * @param copy true if copy into a new matrix, false will create
* matrix inplace. * matrix inplace.
*/ */
static Matrix* createCpuDenseFromNumpy(float* data, int dim1, int dim2, static Matrix* createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy = false); bool copy = false);
/// Create Gpu Dense Matrix from numpy matrix, dtype=float32 /// Create Gpu Dense Matrix from numpy matrix, dtype=float32
@ -171,11 +180,13 @@ public:
* numpy_mat = m.toNumpyMat() * numpy_mat = m.toNumpyMat()
* @endcode * @endcode
*/ */
void toNumpyMatInplace(float** view_data, int* dim1, void toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError); int* dim2) throw(UnsupportError);
/// Copy To numpy mat. /// Copy To numpy mat.
void copyToNumpyMat(float** view_m_data, int* dim1, void copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError); int* dim2) throw(UnsupportError);
/// Copy From Numpy Mat /// Copy From Numpy Mat
@ -248,15 +259,18 @@ public:
static Vector* create(const std::vector<float>& data, static Vector* create(const std::vector<float>& data,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
static Vector* createVectorFromNumpy(float* data, int dim, bool copy = true, static Vector* createVectorFromNumpy(
bool useGpu = isUsingGpu()) float* data,
throw (UnsupportError); int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu Vector from numpy array, which dtype=float32 * Create Cpu Vector from numpy array, which dtype=float32
* *
* If copy is false, it will create vector inplace. * If copy is false, it will create vector inplace.
*/ */
static Vector* createCpuVectorFromNumpy(float* data, int dim, static Vector* createCpuVectorFromNumpy(float* data,
int dim,
bool copy = false); bool copy = false);
/// Create Gpu Vector from numpy array, which dtype=float32 /// Create Gpu Vector from numpy array, which dtype=float32
@ -312,16 +326,19 @@ public:
static IVector* create(const std::vector<int>& data, static IVector* create(const std::vector<int>& data,
bool useGpu = isUsingGpu()); bool useGpu = isUsingGpu());
static IVector* createVectorFromNumpy(int* data, int dim, bool copy = true, static IVector* createVectorFromNumpy(
bool useGpu = isUsingGpu()) int* data,
throw (UnsupportError); int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/** /**
* Create Cpu IVector from numpy array, which dtype=int32 * Create Cpu IVector from numpy array, which dtype=int32
* *
* If copy is false, it will create vector inplace * If copy is false, it will create vector inplace
*/ */
static IVector* createCpuVectorFromNumpy(int* data, int dim, static IVector* createCpuVectorFromNumpy(int* data,
int dim,
bool copy = false); bool copy = false);
/** /**
* Create Gpu IVector from numpy array, which dtype=int32 * Create Gpu IVector from numpy array, which dtype=int32
@ -605,7 +622,8 @@ class ParameterTraverseCallback {
public: public:
~ParameterTraverseCallback(); ~ParameterTraverseCallback();
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& config, void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& config,
size_t sparseId); size_t sparseId);
private: private:
@ -638,7 +656,8 @@ public:
void finishBatch(); void finishBatch();
void update(const std::vector<Vector*>& vecs, const ParameterConfig& conf, void update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId = NO_SPARSE_ID); size_t sparseId = NO_SPARSE_ID);
std::vector<int> getParameterTypes() const; std::vector<int> getParameterTypes() const;
@ -678,7 +697,8 @@ public:
* model config by TrainerConfig * model config by TrainerConfig
*/ */
static GradientMachine* createByModelConfig( static GradientMachine* createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode = CREATE_MODE_NORMAL, ModelConfig* conf,
GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
const std::vector<int>& parameterTypes = defaultParamTypes); const std::vector<int>& parameterTypes = defaultParamTypes);
/** /**
@ -701,7 +721,8 @@ public:
/** /**
* Combine forward/backward * Combine forward/backward
*/ */
void forwardBackward(const Arguments& inArgs, Arguments* outArgs, void forwardBackward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType, PassType passType,
const UpdateCallback& callback = UpdateCallback()); const UpdateCallback& callback = UpdateCallback());
@ -722,14 +743,17 @@ public:
*/ */
SequenceGenerator* asSequenceGenerator( SequenceGenerator* asSequenceGenerator(
const std::vector<std::string>& dict = std::vector<std::string>(), const std::vector<std::string>& dict = std::vector<std::string>(),
size_t begin_id = 0UL, size_t end_id = 0UL, size_t max_length = 100UL, size_t begin_id = 0UL,
size_t end_id = 0UL,
size_t max_length = 100UL,
size_t beam_size = -1UL); size_t beam_size = -1UL);
private: private:
GradientMachinePrivate* m; GradientMachinePrivate* m;
static GradientMachine* createFromPaddleModelPtr( static GradientMachine* createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode, const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types); const std::vector<int>& types);
// Not to use c++ 11 init-list, so we use static var as function default arg. // Not to use c++ 11 init-list, so we use static var as function default arg.
@ -751,8 +775,8 @@ public:
/// Create A Trainer By TrainerConfig. using paddle command line. /// Create A Trainer By TrainerConfig. using paddle command line.
static Trainer* createByCommandLine() throw(IOError); static Trainer* createByCommandLine() throw(IOError);
static Trainer* create(TrainerConfig* optConfig, GradientMachine* gm) static Trainer* create(TrainerConfig* optConfig,
throw(IOError); GradientMachine* gm) throw(IOError);
/// Start training /// Start training
void startTrain(); void startTrain();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/parameter/Parameter.h" #include "paddle/parameter/Parameter.h"

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/parameter/ParameterOptimizer.h" #include "paddle/parameter/ParameterOptimizer.h"
@ -32,17 +31,21 @@ struct ParameterTraverseCallbackPrivate {
const paddle::ParameterOptimizer::TraverseCallback& callback) const paddle::ParameterOptimizer::TraverseCallback& callback)
: callback(callback) {} : callback(callback) {}
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& conf, void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId) { size_t sparseId) {
std::vector<paddle::VectorPtr> real_vecs; std::vector<paddle::VectorPtr> real_vecs;
real_vecs.resize(vecs.size()); real_vecs.resize(vecs.size());
std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) { std::transform(vecs.begin(),
if (v) { vecs.end(),
return *(paddle::VectorPtr*)(v->getSharedPtr()); real_vecs.begin(),
} else { [](Vector* v) {
return paddle::VectorPtr(); if (v) {
} return *(paddle::VectorPtr*)(v->getSharedPtr());
}); } else {
return paddle::VectorPtr();
}
});
paddle::ParameterConfig& real_conf = paddle::ParameterConfig& real_conf =
*(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf) *(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf)
@ -86,10 +89,12 @@ void ParameterOptimizer::startBatch(size_t numSamplesProcessed) {
void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); } void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); }
void ParameterOptimizer::update(const std::vector<Vector*>& vecs, void ParameterOptimizer::update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf, size_t sparseId) { const ParameterConfig& conf,
ParameterTraverseCallbackPrivate invoker([&]( size_t sparseId) {
const paddle::VectorPtr _vecs[], const paddle::ParameterConfig& config, ParameterTraverseCallbackPrivate invoker(
size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); }); [&](const paddle::VectorPtr _vecs[],
const paddle::ParameterConfig& config,
size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
invoker.apply(vecs, conf, sparseId); invoker.apply(vecs, conf, sparseId);
} }
@ -116,8 +121,9 @@ void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs,
ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal( ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal(
const ParameterConfig& config) const { const ParameterConfig& config) const {
auto& param_config = *(paddle::ParameterConfig*)const_cast<ParameterConfig&>( auto& param_config =
config).getRawPtr(); *(paddle::ParameterConfig*)const_cast<ParameterConfig&>(config)
.getRawPtr();
auto callback = m->optimizer->needSpecialTraversal(param_config); auto callback = m->optimizer->needSpecialTraversal(param_config);
if (callback) { if (callback) {
auto retCallback = new ParameterTraverseCallback(); auto retCallback = new ParameterTraverseCallback();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h" #include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/parameter/Argument.h" #include "paddle/parameter/Argument.h"
@ -42,8 +41,10 @@ struct Path {
// position // position
static void findNBest(paddle::GradientMachine* gradMachine, static void findNBest(paddle::GradientMachine* gradMachine,
std::vector<paddle::Argument>& inArgs, std::vector<paddle::Argument>& inArgs,
std::vector<Path>& finalPaths, size_t bos_id, std::vector<Path>& finalPaths,
size_t eos_id, size_t max_length) { size_t bos_id,
size_t eos_id,
size_t max_length) {
std::vector<Path> paths; std::vector<Path> paths;
Path emptyPath; Path emptyPath;
paths.push_back(emptyPath); paths.push_back(emptyPath);
@ -166,7 +167,8 @@ public:
if (id < getSize()) { if (id < getSize()) {
Path& p = (*path_)[id]; Path& p = (*path_)[id];
std::ostringstream sout; std::ostringstream sout;
std::transform(p.ids.begin(), p.ids.end(), std::transform(p.ids.begin(),
p.ids.end(),
std::ostream_iterator<std::string>(sout, split ? " " : ""), std::ostream_iterator<std::string>(sout, split ? " " : ""),
[&](int id) { return (*dict_)[id]; }); [&](int id) { return (*dict_)[id]; });
return sout.str(); return sout.str();

@ -64,12 +64,11 @@ Trainer* Trainer::createByCommandLine() throw(IOError) {
Trainer::Trainer(TrainerConfig* config, GradientMachine* gm) Trainer::Trainer(TrainerConfig* config, GradientMachine* gm)
: m(new TrainerPrivate()) { : m(new TrainerPrivate()) {
m->init(config->m->conf, /* testing= */false, gm ? gm->m->machine : nullptr); m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr);
} }
Trainer* Trainer::create(TrainerConfig* config, GradientMachine* gm) Trainer* Trainer::create(TrainerConfig* config,
throw(IOError) GradientMachine* gm) throw(IOError) {
{
auto retv = new Trainer(config, gm); auto retv = new Trainer(config, gm);
if (retv->m->getConfig().IsInitialized()) { if (retv->m->getConfig().IsInitialized()) {
return retv; return retv;
@ -134,15 +133,17 @@ void Trainer::finishTestPeriod() { m->finishTestPeriod(); }
Matrix* Trainer::getLayerOutput(const std::string& layerName) { Matrix* Trainer::getLayerOutput(const std::string& layerName) {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>( auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(
this->m->getGradientMachine()); this->m->getGradientMachine());
CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork";
auto m = nn->getLayerOutput(layerName); auto m = nn->getLayerOutput(layerName);
return Matrix::createByPaddleMatrixPtr(&m); return Matrix::createByPaddleMatrixPtr(&m);
} }
void Trainer::forwardOneBatch(size_t batchSize) { m->forwardOneBatch(batchSize); } void Trainer::forwardOneBatch(size_t batchSize) {
m->forwardOneBatch(batchSize);
}
bool TrainerPrivate::forwardOneBatch(size_t batchSize) { bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
CHECK(dataProvider_) << "data_provider is not specified"; CHECK(dataProvider_) << "data_provider is not specified";
paddle::DataBatch dataBatch; paddle::DataBatch dataBatch;
int num = dataProvider_->getNextBatch(batchSize, &dataBatch); int num = dataProvider_->getNextBatch(batchSize, &dataBatch);
@ -156,7 +157,6 @@ bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
void TrainerPrivate::forwardOneDataBatch( void TrainerPrivate::forwardOneDataBatch(
const std::vector<paddle::Argument>& inArgs) { const std::vector<paddle::Argument>& inArgs) {
std::vector<paddle::Argument>& outArgs = forwardOutput_; std::vector<paddle::Argument>& outArgs = forwardOutput_;
if (config_->getOptConfig().use_sparse_remote_updater()) { if (config_->getOptConfig().use_sparse_remote_updater()) {

@ -37,13 +37,15 @@ FloatArray::FloatArray(const float* b, const size_t l)
IntArray::IntArray(const int* b, const size_t l, bool f) IntArray::IntArray(const int* b, const size_t l, bool f)
: buf(b), length(l), needFree(f) {} : buf(b), length(l), needFree(f) {}
IntWithFloatArray::IntWithFloatArray(const float* v, const int* i, size_t l, IntWithFloatArray::IntWithFloatArray(const float* v,
const int* i,
size_t l,
bool f) bool f)
: valBuf(v), idxBuf(i), length(l), needFree(f) {} : valBuf(v), idxBuf(i), length(l), needFree(f) {}
bool isUsingGpu() {return FLAGS_use_gpu;} bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) {FLAGS_use_gpu = useGpu;} void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() { bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU #ifdef PADDLE_ONLY_CPU

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
@ -39,8 +38,10 @@ IVector* IVector::create(const std::vector<int>& data, bool useGpu) {
return v; return v;
} }
IVector* IVector::createVectorFromNumpy(int* data, int dim, bool copy, IVector* IVector::createVectorFromNumpy(int* data,
bool useGpu) throw (UnsupportError){ int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// if use gpu only copy=true is supported /// if use gpu only copy=true is supported
if (!copy) { if (!copy) {
@ -137,8 +138,8 @@ void IVector::copyToNumpyArray(int** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuIVector*>(m->vec.get())) { if (auto cpuVec = dynamic_cast<paddle::CpuIVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1)); std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::GpuIVector*>(m->vec.get())) { } else if (auto gpuVec = dynamic_cast<paddle::GpuIVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(), hl_memcpy_device2host(
sizeof(int) * (*dim1)); *view_m_data, gpuVec->getData(), sizeof(int) * (*dim1));
} else { } else {
LOG(INFO) << "Unexpected situation"; LOG(INFO) << "Unexpected situation";
} }
@ -201,8 +202,10 @@ Vector* Vector::createByPaddleVectorPtr(void* ptr) {
} }
} }
Vector* Vector::createVectorFromNumpy(float* data, int dim, bool copy, Vector* Vector::createVectorFromNumpy(float* data,
bool useGpu) throw (UnsupportError){ int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) { if (useGpu) {
/// if use gpu only copy=True is supported /// if use gpu only copy=True is supported
if (!copy) { if (!copy) {
@ -251,8 +254,8 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) { if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1)); std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) { } else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(), hl_memcpy_device2host(
sizeof(float) * (*dim1)); *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
} else { } else {
LOG(INFO) << "Unexpected situation"; LOG(INFO) << "Unexpected situation";
} }

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_ACTIVATION_FUNCTIONS_H_ #ifndef HL_ACTIVATION_FUNCTIONS_H_
#define HL_ACTIVATION_FUNCTIONS_H_ #define HL_ACTIVATION_FUNCTIONS_H_
@ -21,11 +20,8 @@ limitations under the License. */
/** /**
* Active functions: sigmoid, relu, tanh and linear. * Active functions: sigmoid, relu, tanh and linear.
*/ */
#define HPPL_ACTIVE_FUNCTION {hppl::sigmoid, \ #define HPPL_ACTIVE_FUNCTION \
hppl::relu, \ { hppl::sigmoid, hppl::relu, hppl::tanh, hppl::linear }
hppl::tanh, \
hppl::linear \
}
namespace hppl { namespace hppl {
@ -42,18 +38,18 @@ public:
#ifdef __NVCC__ #ifdef __NVCC__
namespace gpu { namespace gpu {
static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static __device__ Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#else #else
namespace cpu { namespace cpu {
static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<real>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#ifdef __AVX__ #ifdef __AVX__
namespace avx { namespace avx {
static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::forward forward[] = HPPL_ACTIVE_FUNCTION;
static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION; static Active<__m256>::backward backward[] = HPPL_ACTIVE_FUNCTION;
} }
#endif #endif

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_AGGREGATE_H_ #ifndef HL_AGGREGATE_H_
#define HL_AGGREGATE_H_ #define HL_AGGREGATE_H_

@ -12,22 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_AVX_FUNCTIONS_H_ #ifndef HL_AVX_FUNCTIONS_H_
#define HL_AVX_FUNCTIONS_H_ #define HL_AVX_FUNCTIONS_H_
#include <immintrin.h> #include <immintrin.h>
namespace hppl { namespace hppl {
__m256 relu(const __m256 a); __m256 relu(const __m256 a);
__m256 sigmoid(const __m256 a); __m256 sigmoid(const __m256 a);
__m256 tanh(const __m256 a); __m256 tanh(const __m256 a);
__m256 linear(const __m256 a); __m256 linear(const __m256 a);
__m256 relu(const __m256 a, const __m256 b); __m256 relu(const __m256 a, const __m256 b);
__m256 sigmoid(const __m256 a, const __m256 b); __m256 sigmoid(const __m256 a, const __m256 b);
__m256 tanh(const __m256 a, const __m256 b); __m256 tanh(const __m256 a, const __m256 b);
__m256 linear(const __m256 a, const __m256 b); __m256 linear(const __m256 a, const __m256 b);
} // namespace hppl } // namespace hppl
#endif // HL_AVX_FUNCTIONS_H_ #endif // HL_AVX_FUNCTIONS_H_

@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_BASE_H_ #ifndef HL_BASE_H_
#define HL_BASE_H_ #define HL_BASE_H_
@ -33,36 +31,36 @@ limitations under the License. */
* HPPL_STREAM_DEFAULT is HPPL default stream. * HPPL_STREAM_DEFAULT is HPPL default stream.
*/ */
typedef enum { typedef enum {
HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/ HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
HPPL_STREAM_1 = 1, HPPL_STREAM_1 = 1,
HPPL_STREAM_2 = 2, HPPL_STREAM_2 = 2,
HPPL_STREAM_3 = 3, HPPL_STREAM_3 = 3,
HPPL_STREAM_4 = 4, HPPL_STREAM_4 = 4,
HPPL_THREAD_STREAM_1 = 5, HPPL_THREAD_STREAM_1 = 5,
HPPL_THREAD_STREAM_2 = 6, HPPL_THREAD_STREAM_2 = 6,
HPPL_THREAD_STREAM_3 = 7, HPPL_THREAD_STREAM_3 = 7,
HPPL_THREAD_STREAM_4 = 8, HPPL_THREAD_STREAM_4 = 8,
HPPL_STREAM_END HPPL_STREAM_END
} hl_stream_t; } hl_stream_t;
/** /**
* @brief HPPL activation mode. * @brief HPPL activation mode.
*/ */
typedef enum { typedef enum {
HL_ACTIVATION_SIGMOID = 0, HL_ACTIVATION_SIGMOID = 0,
HL_ACTIVATION_RELU = 1, HL_ACTIVATION_RELU = 1,
HL_ACTIVATION_TANH = 2, HL_ACTIVATION_TANH = 2,
HL_ACTIVATION_LINEAR = 3, HL_ACTIVATION_LINEAR = 3,
HL_ACTIVATION_END HL_ACTIVATION_END
} hl_activation_mode_t; } hl_activation_mode_t;
/** /**
* @brief Transpose type. * @brief Transpose type.
*/ */
typedef enum { typedef enum {
HPPL_OP_N = 0, /* transpose */ HPPL_OP_N = 0, /* transpose */
HPPL_OP_T = 1, /* non transpose */ HPPL_OP_T = 1, /* non transpose */
HPPL_OP_END HPPL_OP_END
} hl_trans_op_t; } hl_trans_op_t;
/** /**
@ -148,23 +146,21 @@ typedef struct {
* @brief Sparse matrix value type. * @brief Sparse matrix value type.
*/ */
typedef enum { typedef enum {
HL_NO_VALUE = 0, /* matrix values only 0 or 1 */ HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
HL_FLOAT_VALUE = 1, HL_FLOAT_VALUE = 1,
HL_VALUE_END HL_VALUE_END
} hl_matrix_value_t; } hl_matrix_value_t;
/** /**
* @brief HPPL matrix format. * @brief HPPL matrix format.
*/ */
typedef enum { typedef enum {
HL_SPARSE_CSR = 0, HL_SPARSE_CSR = 0,
HL_SPARSE_CSC = 1, HL_SPARSE_CSC = 1,
HL_SPARSE_END HL_SPARSE_END
} hl_matrix_format_t; } hl_matrix_format_t;
typedef struct _hl_matrix_s *hl_matrix_s;
typedef struct _hl_matrix_s * hl_matrix_s;
/** /**
* @brief HPPL sparse matrix. * @brief HPPL sparse matrix.
@ -177,12 +173,12 @@ typedef struct _hl_matrix_s * hl_matrix_s;
* @param nnz nonzero values of sparse matrix. * @param nnz nonzero values of sparse matrix.
*/ */
typedef struct { typedef struct {
hl_matrix_s matrix; hl_matrix_s matrix;
hl_matrix_format_t format; hl_matrix_format_t format;
hl_matrix_value_t type; hl_matrix_value_t type;
int rows; int rows;
int cols; int cols;
size_t nnz; size_t nnz;
} _hl_sparse_matrix_s, *hl_sparse_matrix_s; } _hl_sparse_matrix_s, *hl_sparse_matrix_s;
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
@ -195,7 +191,7 @@ typedef struct {
* *
* HL_FLOAT_MIN: 1.17549435e-38F * HL_FLOAT_MIN: 1.17549435e-38F
*/ */
#define HL_FLOAT_MAX 3.40282347e+38F #define HL_FLOAT_MAX 3.40282347e+38F
/** /**
* if real == double * if real == double
* *
@ -203,20 +199,18 @@ typedef struct {
* *
* HL_FLOAT_MIN: 2.2250738585072014e-308 * HL_FLOAT_MIN: 2.2250738585072014e-308
*/ */
#define HL_FLOAT_MIN 1.17549435e-38F #define HL_FLOAT_MIN 1.17549435e-38F
#else #else
#define HL_FLOAT_MAX 1.7976931348623157e+308 #define HL_FLOAT_MAX 1.7976931348623157e+308
#define HL_FLOAT_MIN 2.2250738585072014e-308 #define HL_FLOAT_MIN 2.2250738585072014e-308
#endif #endif
/** /**
* The maximum input value for exp, used to avoid overflow problem. * The maximum input value for exp, used to avoid overflow problem.
* *
* Currently only used for tanh function. * Currently only used for tanh function.
*/ */
#define EXP_MAX_INPUT 40.0 #define EXP_MAX_INPUT 40.0
/** /**
* @brief DIVUP(x, y) is similar to ceil(x / y). * @brief DIVUP(x, y) is similar to ceil(x / y).
@ -224,7 +218,7 @@ typedef struct {
* the size of blockDim. * the size of blockDim.
*/ */
#ifndef DIVUP #ifndef DIVUP
#define DIVUP(x, y) (((x) + (y) - 1) / (y)) #define DIVUP(x, y) (((x) + (y)-1) / (y))
#endif #endif
#ifdef __NVCC__ #ifdef __NVCC__
@ -233,7 +227,7 @@ typedef struct {
#include "hl_cuda.h" #include "hl_cuda.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
extern __thread bool g_sync_flag; extern __thread bool g_sync_flag;
extern __thread cudaStream_t default_stream; extern __thread cudaStream_t default_stream;
#define STREAM_DEFAULT default_stream #define STREAM_DEFAULT default_stream
@ -241,16 +235,15 @@ extern __thread cudaStream_t default_stream;
* @brief Check cuda kernel execution. * @brief Check cuda kernel execution.
* @param msg error string * @param msg error string
*/ */
#define CHECK_SYNC(msg) \ #define CHECK_SYNC(msg) \
if (true == g_sync_flag) { \ if (true == g_sync_flag) { \
hl_stream_synchronize(HPPL_STREAM_DEFAULT); \ hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
cudaError_t err \ cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
= (cudaError_t)hl_get_device_last_error(); \ CHECK_EQ(cudaSuccess, err) \
CHECK_EQ(cudaSuccess, err) << "[" << msg << "] " \ << "[" << msg << "] " \
<< "CUDA error: " \ << "CUDA error: " << hl_get_device_error_string((size_t)err); \
<< hl_get_device_error_string((size_t)err); \
} }
#endif /* __NVCC__ */ #endif /* __NVCC__ */
#endif /* HL_BASE_H_ */ #endif /* HL_BASE_H_ */

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_BATCH_TRANSPOSE_H_ #ifndef HL_BATCH_TRANSPOSE_H_
#define HL_BATCH_TRANSPOSE_H_ #define HL_BATCH_TRANSPOSE_H_
@ -31,10 +30,7 @@ limitations under the License. */
* order. Each batch has height * width data, which are * order. Each batch has height * width data, which are
* arranged in height-first (or row-first) manner. * arranged in height-first (or row-first) manner.
*/ */
extern void batchTranspose(const real* input, extern void batchTranspose(
real* output, const real* input, real* output, int width, int height, int batchSize);
int width,
int height,
int batchSize);
#endif // HL_BATCH_TRANSPOSE_H_ #endif // HL_BATCH_TRANSPOSE_H_

File diff suppressed because it is too large Load Diff

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_H_ #ifndef HL_CUDA_H_
#define HL_CUDA_H_ #define HL_CUDA_H_
@ -22,8 +21,7 @@ limitations under the License. */
/** /**
* @brief HPPL event. * @brief HPPL event.
*/ */
typedef struct _hl_event_st * hl_event_t; typedef struct _hl_event_st *hl_event_t;
/** /**
* @brief return cuda runtime api version. * @brief return cuda runtime api version.
@ -42,7 +40,7 @@ extern void hl_start();
* if device is NULL, will start all GPU. * if device is NULL, will start all GPU.
* @param[in] number number of devices. * @param[in] number number of devices.
*/ */
extern void hl_specify_devices_start(int* device, int number); extern void hl_specify_devices_start(int *device, int number);
/** /**
* @brief Queries if a device may directly access a peer device's memory. * @brief Queries if a device may directly access a peer device's memory.
@ -126,7 +124,7 @@ extern int hl_get_device();
* *
* @return dest_d pointer to device memory. * @return dest_d pointer to device memory.
*/ */
extern void* hl_malloc_device(size_t size); extern void *hl_malloc_device(size_t size);
/** /**
* @brief Free device memory. * @brief Free device memory.
@ -143,7 +141,7 @@ extern void hl_free_mem_device(void *dest_d);
* *
* @return dest_h pointer to host memory. * @return dest_h pointer to host memory.
*/ */
extern void* hl_malloc_host(size_t size); extern void *hl_malloc_host(size_t size);
/** /**
* @brief Free host page-lock memory. * @brief Free host page-lock memory.
@ -228,9 +226,9 @@ extern void hl_srand(unsigned int seed);
* @param[in] stream stream id. * @param[in] stream stream id.
*/ */
extern void hl_memcpy_async(void *dst, extern void hl_memcpy_async(void *dst,
void *src, void *src,
size_t size, size_t size,
hl_stream_t stream); hl_stream_t stream);
/** /**
* @brief Waits for stream tasks to complete. * @brief Waits for stream tasks to complete.
@ -261,8 +259,7 @@ extern void hl_destroy_event(hl_event_t event);
* *
* @return time Time between start and end in ms. * @return time Time between start and end in ms.
*/ */
extern float hl_event_elapsed_time(hl_event_t start, extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end);
hl_event_t end);
/** /**
* @brief Records an event. * @brief Records an event.
@ -300,7 +297,7 @@ extern void hl_set_device_flags_block();
/** /**
* @brief Returns the last error string from a cuda runtime call. * @brief Returns the last error string from a cuda runtime call.
*/ */
extern const char* hl_get_device_error_string(); extern const char *hl_get_device_error_string();
/** /**
* @brief Returns the last error string from a cuda runtime call. * @brief Returns the last error string from a cuda runtime call.
@ -309,7 +306,7 @@ extern const char* hl_get_device_error_string();
* *
* @see hl_get_device_last_error() * @see hl_get_device_last_error()
*/ */
extern const char* hl_get_device_error_string(size_t err); extern const char *hl_get_device_error_string(size_t err);
/** /**
* @brief Returns the last error number. * @brief Returns the last error number.

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUBLAS_H_ #ifndef HL_CUDA_CUBLAS_H_
#define HL_CUDA_CUBLAS_H_ #define HL_CUDA_CUBLAS_H_
@ -29,12 +28,8 @@ limitations under the License. */
* @param[in] ldc the first dimension of C_d. * @param[in] ldc the first dimension of C_d.
* *
*/ */
extern void hl_matrix_transpose(real *A_d, extern void hl_matrix_transpose(
real *C_d, real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc);
int dimM,
int dimN,
int lda,
int ldc);
/* /*
* @brief Matrix transpose, while lda = dimN, ldc = dimM. * @brief Matrix transpose, while lda = dimN, ldc = dimM.
@ -45,10 +40,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] dimN matrix width. * @param[in] dimN matrix width.
* *
*/ */
extern void hl_matrix_transpose(real *A_d, extern void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN);
real *C_d,
int dimM,
int dimN);
/* /*
* @brief Matrix inverse * @brief Matrix inverse
@ -60,11 +52,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] ldc the first dimension of C_d * @param[in] ldc the first dimension of C_d
* *
*/ */
extern void hl_matrix_inverse(real *A_d, extern void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc);
real *C_d,
int dimN,
int lda,
int ldc);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@ -84,12 +72,19 @@ extern void hl_matrix_inverse(real *A_d,
* @param[in] ldc the first dimension of C_d. * @param[in] ldc the first dimension of C_d.
* *
*/ */
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, extern void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta, int dimN,
int lda, int ldb, int ldc); int dimK,
real alpha,
real beta,
int lda,
int ldb,
int ldc);
/** /**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d * @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@ -106,11 +101,16 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* @param[in] beta scalar used for multiplication. * @param[in] beta scalar used for multiplication.
* *
*/ */
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa, extern void hl_matrix_mul(real *A_d,
real *B_d, hl_trans_op_t transb, hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d, real *C_d,
int dimM, int dimN, int dimK, int dimM,
real alpha, real beta); int dimN,
int dimK,
real alpha,
real beta);
/** /**
* @brief This function performs the matrix-vector multiplication. * @brief This function performs the matrix-vector multiplication.
@ -132,11 +132,17 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* *
*/ */
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, extern void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta, real *C_d,
int lda, int incb, int incc); int dimM,
int dimN,
real alpha,
real beta,
int lda,
int incb,
int incc);
/** /**
* @brief This function performs the matrix-vector multiplication. * @brief This function performs the matrix-vector multiplication.
@ -154,9 +160,13 @@ extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
* @param[in] beta scalar used for multiplication. * @param[in] beta scalar used for multiplication.
* *
*/ */
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans, extern void hl_matrix_mul_vector(real *A_d,
real *B_d, real *C_d, hl_trans_op_t trans,
int dimM, int dimN, real *B_d,
real alpha, real beta); real *C_d,
int dimM,
int dimN,
real alpha,
real beta);
#endif /* HL_CUDA_CUBLAS_H_ */ #endif /* HL_CUDA_CUBLAS_H_ */

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_CUDA_CUDNN_H_ #ifndef HL_CUDA_CUDNN_H_
#define HL_CUDA_CUDNN_H_ #define HL_CUDA_CUDNN_H_
@ -22,7 +21,7 @@ limitations under the License. */
* hppl pooling mode * hppl pooling mode
*/ */
typedef enum { typedef enum {
HL_POOLING_MAX = 0, HL_POOLING_MAX = 0,
// average includes padded values // average includes padded values
HL_POOLING_AVERAGE = 1, HL_POOLING_AVERAGE = 1,
// average does not include padded values // average does not include padded values
@ -324,17 +323,16 @@ extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
* @param[in] sizeInBytes gpu workspace size (bytes). * @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdFilterAlgo backward filter algorithm. * @param[in] convBwdFilterAlgo backward filter algorithm.
*/ */
extern void hl_convolution_backward_filter( extern void hl_convolution_backward_filter(hl_tensor_descriptor input,
hl_tensor_descriptor input, real* input_data,
real* input_data, hl_tensor_descriptor output,
hl_tensor_descriptor output, real* output_grad_data,
real* output_grad_data, hl_filter_descriptor filter,
hl_filter_descriptor filter, real* filter_grad_data,
real* filter_grad_data, hl_convolution_descriptor conv,
hl_convolution_descriptor conv, void* gpuWorkSpace,
void* gpuWorkSpace, size_t sizeInBytes,
size_t sizeInBytes, int convBwdFilterAlgo);
int convBwdFilterAlgo);
/** /**
* @brief convolution backward data(calculate input image grad data). * @brief convolution backward data(calculate input image grad data).
@ -350,17 +348,16 @@ extern void hl_convolution_backward_filter(
* @param[in] sizeInBytes gpu workspace size (bytes). * @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdDataAlgo backward data algorithm. * @param[in] convBwdDataAlgo backward data algorithm.
*/ */
extern void hl_convolution_backward_data( extern void hl_convolution_backward_data(hl_tensor_descriptor input,
hl_tensor_descriptor input, real* input_data_grad,
real* input_data_grad, hl_tensor_descriptor output,
hl_tensor_descriptor output, real* output_grad_data,
real* output_grad_data, hl_filter_descriptor filter,
hl_filter_descriptor filter, real* filter_data,
real* filter_data, hl_convolution_descriptor conv,
hl_convolution_descriptor conv, void* gpuWorkSpace,
void* gpuWorkSpace, size_t sizeInBytes,
size_t sizeInBytes, int convBwdDataAlgo);
int convBwdDataAlgo);
/** /**
* @brief convolution backward bias(calculate bias grad data). * @brief convolution backward bias(calculate bias grad data).
@ -383,8 +380,8 @@ extern void hl_convolution_backward_bias(hl_tensor_descriptor bias,
* @param[in] height matrix height. * @param[in] height matrix height.
* @param[in] width matrix width. * @param[in] width matrix width.
*/ */
extern void hl_softmax_forward(real *input, extern void hl_softmax_forward(real* input,
real *output, real* output,
int height, int height,
int width); int width);
@ -396,8 +393,8 @@ extern void hl_softmax_forward(real *input,
* @param[in] height matrix height. * @param[in] height matrix height.
* @param[in] width matrix width. * @param[in] width matrix width.
*/ */
extern void hl_softmax_backward(real *output_value, extern void hl_softmax_backward(real* output_value,
real *output_grad, real* output_grad,
int height, int height,
int width); int width);
@ -426,18 +423,18 @@ extern void hl_softmax_backward(real *output_value,
* *
*/ */
extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
double factor, double factor,
real *runningMean, real* runningMean,
real *runningInvVar, real* runningInvVar,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedVar); real* savedVar);
/** /**
* @brief cudnn batch norm forward. * @brief cudnn batch norm forward.
@ -463,14 +460,14 @@ extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
* *
*/ */
extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outputDesc, hl_tensor_descriptor outputDesc,
real *output, real* output,
hl_tensor_descriptor bnParamDesc, hl_tensor_descriptor bnParamDesc,
real *scale, real* scale,
real *bias, real* bias,
real *estimatedMean, real* estimatedMean,
real *estimatedVar, real* estimatedVar,
double epsilon); double epsilon);
/** /**
@ -483,7 +480,8 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* @param[in] inGradDesc input tensor descriptor desc. * @param[in] inGradDesc input tensor descriptor desc.
* @param[in] inGrad input data. * @param[in] inGrad input data.
* @param[in] dBnParamDesc tensor descriptor desc. * @param[in] dBnParamDesc tensor descriptor desc.
* bnScale, bnBias, running mean/var, save_mean/var. * bnScale, bnBias, running mean/var,
* save_mean/var.
* @param[in] scale batch normalization scale parameter (in original * @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma). * paper scale is referred to as gamma).
* @param[in] scaleGrad batch normalization scale parameter (in original * @param[in] scaleGrad batch normalization scale parameter (in original
@ -497,17 +495,17 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* *
*/ */
extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc, extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
real *input, real* input,
hl_tensor_descriptor outGradDesc, hl_tensor_descriptor outGradDesc,
real *outGrad, real* outGrad,
hl_tensor_descriptor inGradDesc, hl_tensor_descriptor inGradDesc,
real *inGrad, real* inGrad,
hl_tensor_descriptor dBnParamDesc, hl_tensor_descriptor dBnParamDesc,
real *scale, real* scale,
real *scaleGrad, real* scaleGrad,
real *biasGrad, real* biasGrad,
double epsilon, double epsilon,
real *savedMean, real* savedMean,
real *savedInvVar); real* savedInvVar);
#endif // HL_CUDA_CUDNN_H_ #endif // HL_CUDA_CUDNN_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_DSO_LOADER_H_ #ifndef HL_DSO_LOADER_H_
#define HL_DSO_LOADER_H_ #define HL_DSO_LOADER_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_FUNCTIONS_H_ #ifndef HL_FUNCTIONS_H_
#define HL_FUNCTIONS_H_ #define HL_FUNCTIONS_H_
@ -21,30 +20,30 @@ limitations under the License. */
/** /**
* sigmoid threshold maximum * sigmoid threshold maximum
*/ */
#define SIGMOID_THRESHOLD_MIN -40.0 #define SIGMOID_THRESHOLD_MIN -40.0
/** /**
* sigmoid threshold minimum * sigmoid threshold minimum
*/ */
#define SIGMOID_THRESHOLD_MAX 13.0 #define SIGMOID_THRESHOLD_MAX 13.0
#ifndef __NVCC__ #ifndef __NVCC__
namespace hppl { namespace hppl {
/* /*
* forward activation * forward activation
*/ */
real relu(const real a); real relu(const real a);
real sigmoid(const real a); real sigmoid(const real a);
real tanh(const real a); real tanh(const real a);
real linear(const real a); real linear(const real a);
/* /*
* backward activation * backward activation
*/ */
real relu(const real a, const real b); real relu(const real a, const real b);
real sigmoid(const real a, const real b); real sigmoid(const real a, const real b);
real tanh(const real a, const real b); real tanh(const real a, const real b);
real linear(const real a, const real b); real linear(const real a, const real b);
} // namespace hppl } // namespace hppl
#ifdef __AVX__ #ifdef __AVX__

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_GPU_H_ #ifndef HL_GPU_H_
#define HL_GPU_H_ #define HL_GPU_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef HL_LSTM_H_ #ifndef HL_LSTM_H_
#define HL_LSTM_H_ #define HL_LSTM_H_

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save