Merge branch 'develop' of https://github.com/baidu/Paddle into config_parse_bug_fix

avx_docs
dangqingqing 8 years ago
commit ebbe6e1ab1

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
@ -112,7 +111,7 @@ void Arguments::setSlotSequenceStartPositions(size_t idx,
}
void Arguments::setSlotSubSequenceStartPositions(
size_t idx, IVector *vec) throw(RangeError) {
size_t idx, IVector* vec) throw(RangeError) {
auto& a = m->getArg(idx);
auto& v = m->cast<paddle::IVector>(vec->getSharedPtr());
a.subSequenceStartPositions = std::make_shared<paddle::ICpuGpuVector>(v);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/trainer/Trainer.h"
@ -44,8 +43,7 @@ TrainerConfig* TrainerConfig::createFromTrainerConfigFile(
return retv;
}
TrainerConfig* TrainerConfig::createFromProtoString(
const std::string& str) {
TrainerConfig* TrainerConfig::createFromProtoString(const std::string& str) {
auto retv = new TrainerConfig();
paddle::TrainerConfig trainerConfigProto;
auto conf = std::make_shared<paddle::TrainerConfigHelper>(trainerConfigProto);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
@ -27,7 +26,8 @@ GradientMachine::GradientMachine() : m(new GradientMachinePrivate()) {}
GradientMachine::~GradientMachine() { delete m; }
GradientMachine* GradientMachine::createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode,
const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) {
auto& conf = *(const paddle::ModelConfig*)(confPtr);
std::vector<ParameterType> realTypes;
@ -44,7 +44,8 @@ GradientMachine* GradientMachine::createFromPaddleModelPtr(
}
GradientMachine* GradientMachine::createByConfigProtoStr(
const std::string& protoStr, GradientMatchineCreateMode mode,
const std::string& protoStr,
GradientMatchineCreateMode mode,
const std::vector<int>& types) {
paddle::ModelConfig conf;
conf.ParseFromString(protoStr);
@ -56,13 +57,15 @@ GradientMachine* GradientMachine::createByConfigProtoStr(
}
GradientMachine* GradientMachine::createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode,
ModelConfig* conf,
GradientMatchineCreateMode mode,
const std::vector<int>& types) {
auto confPtr = &conf->m->conf->getModelConfig();
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
}
void GradientMachine::forward(const Arguments& inArgs, Arguments* outArgs,
void GradientMachine::forward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType) {
auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
@ -99,7 +102,8 @@ void GradientMachine::backward(const UpdateCallback& callback) {
}
void GradientMachine::forwardBackward(const Arguments& inArgs,
Arguments* outArgs, PassType passType,
Arguments* outArgs,
PassType passType,
const UpdateCallback& callback) {
auto& in =
m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr());
@ -140,8 +144,11 @@ Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const
}
SequenceGenerator* GradientMachine::asSequenceGenerator(
const std::vector<std::string>& dict, size_t begin_id, size_t end_id,
size_t max_length, size_t beam_size) {
const std::vector<std::string>& dict,
size_t begin_id,
size_t end_id,
size_t max_length,
size_t beam_size) {
SequenceGenerator* r =
SequenceGenerator::createByGradientMachineSharedPtr(&m->machine);
r->setDict(dict);

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "PaddleAPI.h"
@ -23,7 +22,8 @@ limitations under the License. */
template <typename T1, typename T2>
void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
dest->resize(src.size());
std::transform(src.begin(), src.end(), dest->begin(), [](T1 t){
return static_cast<T2>(t);
});
std::transform(src.begin(),
src.end(),
dest->begin(),
[](T1 t) { return static_cast<T2>(t); });
}

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
@ -44,17 +43,21 @@ Matrix* Matrix::createZero(size_t height, size_t width, bool useGpu) {
return m;
}
Matrix* Matrix::createDense(const std::vector<float>& data, size_t height,
size_t width, bool useGpu) {
Matrix* Matrix::createDense(const std::vector<float>& data,
size_t height,
size_t width,
bool useGpu) {
auto m = new Matrix();
m->m->mat = paddle::Matrix::create(height, width, useGpu);
m->m->mat->copyFrom(data.data(), data.size());
return m;
}
Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2,
bool copy, bool useGpu)
throw (UnsupportError) {
Matrix* Matrix::createDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) {
/// Gpu mode only supports copy=True
if (!copy) {
@ -66,7 +69,9 @@ Matrix* Matrix::createDenseFromNumpy(float* data, int dim1, int dim2,
}
}
Matrix* Matrix::createCpuDenseFromNumpy(float* data, int dim1, int dim2,
Matrix* Matrix::createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy) {
auto m = new Matrix();
if (copy) {
@ -85,12 +90,20 @@ Matrix* Matrix::createGpuDenseFromNumpy(float* data, int dim1, int dim2) {
return m;
}
Matrix* Matrix::createSparse(size_t height, size_t width, size_t nnz,
bool isNonVal, bool isTrans, bool useGpu) {
Matrix* Matrix::createSparse(size_t height,
size_t width,
size_t nnz,
bool isNonVal,
bool isTrans,
bool useGpu) {
auto m = new Matrix();
m->m->mat = paddle::Matrix::createSparseMatrix(
height, width, nnz, isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
isTrans, useGpu);
height,
width,
nnz,
isNonVal ? paddle::NO_VALUE : paddle::FLOAT_VALUE,
isTrans,
useGpu);
return m;
}
@ -221,7 +234,8 @@ FloatArray Matrix::getData() const {
}
void Matrix::sparseCopyFrom(
const std::vector<int>& rows, const std::vector<int>& cols,
const std::vector<int>& rows,
const std::vector<int>& cols,
const std::vector<float>& vals) throw(UnsupportError) {
auto cpuSparseMat =
std::dynamic_pointer_cast<paddle::CpuSparseMatrix>(m->mat);
@ -240,7 +254,8 @@ void Matrix::sparseCopyFrom(
void* Matrix::getSharedPtr() const { return &m->mat; }
void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
void Matrix::toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError) {
auto cpuMat = std::dynamic_pointer_cast<paddle::CpuMatrix>(m->mat);
if (cpuMat) {
@ -251,7 +266,8 @@ void Matrix::toNumpyMatInplace(float** view_data, int* dim1,
throw UnsupportError();
}
}
void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
void Matrix::copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError) {
static_assert(sizeof(paddle::real) == sizeof(float),
"Currently PaddleAPI only support for single "
@ -269,8 +285,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
} else if (auto gpuMat = dynamic_cast<paddle::GpuMatrix*>(m->mat.get())) {
auto src = gpuMat->getData();
auto dest = *view_m_data;
hl_memcpy_device2host(dest, src,
sizeof(paddle::real) * (*dim1) * (*dim2));
hl_memcpy_device2host(
dest, src, sizeof(paddle::real) * (*dim1) * (*dim2));
} else {
LOG(WARNING) << "Unexpected Situation";
throw UnsupportError();
@ -278,7 +294,8 @@ void Matrix::copyToNumpyMat(float** view_m_data, int* dim1,
}
}
void Matrix::copyFromNumpyMat(float* data, int dim1,
void Matrix::copyFromNumpyMat(float* data,
int dim1,
int dim2) throw(UnsupportError, RangeError) {
if (isSparse()) {
throw UnsupportError();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h>
@ -61,8 +60,8 @@ class RangeError {};
/// Not support Error, such as access GPU memory directly, etc.
class UnsupportError : public std::runtime_error {
public:
UnsupportError() : std::runtime_error(" ") {};
UnsupportError(const std::string& message) : std::runtime_error(message) {};
UnsupportError() : std::runtime_error(" "){};
UnsupportError(const std::string& message) : std::runtime_error(message){};
};
/// This type will map to python's list of float.
@ -112,7 +111,8 @@ public:
/**
* Create A Matrix with height,width, which is filled by zero.
*/
static Matrix* createZero(size_t height, size_t width,
static Matrix* createZero(size_t height,
size_t width,
bool useGpu = isUsingGpu());
/**
@ -124,8 +124,11 @@ public:
*
* @note the default sparse type is SPARSE_CSR.
*/
static Matrix* createSparse(size_t height, size_t width, size_t nnz,
bool isNonVal = true, bool trans = false,
static Matrix* createSparse(size_t height,
size_t width,
size_t nnz,
bool isNonVal = true,
bool trans = false,
bool useGpu = isUsingGpu());
/**
@ -134,13 +137,17 @@ public:
* @param data list of float should be passed in python.
* @note the value will be copy into a new matrix.
*/
static Matrix* createDense(const std::vector<float>& data, size_t height,
size_t width, bool useGpu = isUsingGpu());
static Matrix* createDense(const std::vector<float>& data,
size_t height,
size_t width,
bool useGpu = isUsingGpu());
static Matrix* createDenseFromNumpy(float* data, int dim1, int dim2,
static Matrix* createDenseFromNumpy(
float* data,
int dim1,
int dim2,
bool copy = true,
bool useGpu = isUsingGpu())
throw (UnsupportError);
bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu Dense Matrix from numpy matrix, dtype=float32
@ -151,7 +158,9 @@ public:
* @param copy true if copy into a new matrix, false will create
* matrix inplace.
*/
static Matrix* createCpuDenseFromNumpy(float* data, int dim1, int dim2,
static Matrix* createCpuDenseFromNumpy(float* data,
int dim1,
int dim2,
bool copy = false);
/// Create Gpu Dense Matrix from numpy matrix, dtype=float32
@ -171,11 +180,13 @@ public:
* numpy_mat = m.toNumpyMat()
* @endcode
*/
void toNumpyMatInplace(float** view_data, int* dim1,
void toNumpyMatInplace(float** view_data,
int* dim1,
int* dim2) throw(UnsupportError);
/// Copy To numpy mat.
void copyToNumpyMat(float** view_m_data, int* dim1,
void copyToNumpyMat(float** view_m_data,
int* dim1,
int* dim2) throw(UnsupportError);
/// Copy From Numpy Mat
@ -248,15 +259,18 @@ public:
static Vector* create(const std::vector<float>& data,
bool useGpu = isUsingGpu());
static Vector* createVectorFromNumpy(float* data, int dim, bool copy = true,
bool useGpu = isUsingGpu())
throw (UnsupportError);
static Vector* createVectorFromNumpy(
float* data,
int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu Vector from numpy array, which dtype=float32
*
* If copy is false, it will create vector inplace.
*/
static Vector* createCpuVectorFromNumpy(float* data, int dim,
static Vector* createCpuVectorFromNumpy(float* data,
int dim,
bool copy = false);
/// Create Gpu Vector from numpy array, which dtype=float32
@ -312,16 +326,19 @@ public:
static IVector* create(const std::vector<int>& data,
bool useGpu = isUsingGpu());
static IVector* createVectorFromNumpy(int* data, int dim, bool copy = true,
bool useGpu = isUsingGpu())
throw (UnsupportError);
static IVector* createVectorFromNumpy(
int* data,
int dim,
bool copy = true,
bool useGpu = isUsingGpu()) throw(UnsupportError);
/**
* Create Cpu IVector from numpy array, which dtype=int32
*
* If copy is false, it will create vector inplace
*/
static IVector* createCpuVectorFromNumpy(int* data, int dim,
static IVector* createCpuVectorFromNumpy(int* data,
int dim,
bool copy = false);
/**
* Create Gpu IVector from numpy array, which dtype=int32
@ -605,7 +622,8 @@ class ParameterTraverseCallback {
public:
~ParameterTraverseCallback();
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& config,
void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& config,
size_t sparseId);
private:
@ -638,7 +656,8 @@ public:
void finishBatch();
void update(const std::vector<Vector*>& vecs, const ParameterConfig& conf,
void update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId = NO_SPARSE_ID);
std::vector<int> getParameterTypes() const;
@ -678,7 +697,8 @@ public:
* model config by TrainerConfig
*/
static GradientMachine* createByModelConfig(
ModelConfig* conf, GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
ModelConfig* conf,
GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
const std::vector<int>& parameterTypes = defaultParamTypes);
/**
@ -701,7 +721,8 @@ public:
/**
* Combine forward/backward
*/
void forwardBackward(const Arguments& inArgs, Arguments* outArgs,
void forwardBackward(const Arguments& inArgs,
Arguments* outArgs,
PassType passType,
const UpdateCallback& callback = UpdateCallback());
@ -722,14 +743,17 @@ public:
*/
SequenceGenerator* asSequenceGenerator(
const std::vector<std::string>& dict = std::vector<std::string>(),
size_t begin_id = 0UL, size_t end_id = 0UL, size_t max_length = 100UL,
size_t begin_id = 0UL,
size_t end_id = 0UL,
size_t max_length = 100UL,
size_t beam_size = -1UL);
private:
GradientMachinePrivate* m;
static GradientMachine* createFromPaddleModelPtr(
const void* confPtr, GradientMatchineCreateMode mode,
const void* confPtr,
GradientMatchineCreateMode mode,
const std::vector<int>& types);
// Not to use c++ 11 init-list, so we use static var as function default arg.
@ -751,8 +775,8 @@ public:
/// Create A Trainer By TrainerConfig. using paddle command line.
static Trainer* createByCommandLine() throw(IOError);
static Trainer* create(TrainerConfig* optConfig, GradientMachine* gm)
throw(IOError);
static Trainer* create(TrainerConfig* optConfig,
GradientMachine* gm) throw(IOError);
/// Start training
void startTrain();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/parameter/Parameter.h"

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/parameter/ParameterOptimizer.h"
@ -32,11 +31,15 @@ struct ParameterTraverseCallbackPrivate {
const paddle::ParameterOptimizer::TraverseCallback& callback)
: callback(callback) {}
void apply(const std::vector<Vector*>& vecs, const ParameterConfig& conf,
void apply(const std::vector<Vector*>& vecs,
const ParameterConfig& conf,
size_t sparseId) {
std::vector<paddle::VectorPtr> real_vecs;
real_vecs.resize(vecs.size());
std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) {
std::transform(vecs.begin(),
vecs.end(),
real_vecs.begin(),
[](Vector* v) {
if (v) {
return *(paddle::VectorPtr*)(v->getSharedPtr());
} else {
@ -86,9 +89,11 @@ void ParameterOptimizer::startBatch(size_t numSamplesProcessed) {
void ParameterOptimizer::finishBatch() { m->optimizer->finishBatch(); }
void ParameterOptimizer::update(const std::vector<Vector*>& vecs,
const ParameterConfig& conf, size_t sparseId) {
ParameterTraverseCallbackPrivate invoker([&](
const paddle::VectorPtr _vecs[], const paddle::ParameterConfig& config,
const ParameterConfig& conf,
size_t sparseId) {
ParameterTraverseCallbackPrivate invoker(
[&](const paddle::VectorPtr _vecs[],
const paddle::ParameterConfig& config,
size_t sid = -1UL) { m->optimizer->update(_vecs, config, sid); });
invoker.apply(vecs, conf, sparseId);
}
@ -116,8 +121,9 @@ void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs,
ParameterTraverseCallback* ParameterOptimizer::needSpecialTraversal(
const ParameterConfig& config) const {
auto& param_config = *(paddle::ParameterConfig*)const_cast<ParameterConfig&>(
config).getRawPtr();
auto& param_config =
*(paddle::ParameterConfig*)const_cast<ParameterConfig&>(config)
.getRawPtr();
auto callback = m->optimizer->needSpecialTraversal(param_config);
if (callback) {
auto retCallback = new ParameterTraverseCallback();

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/parameter/Argument.h"
@ -42,8 +41,10 @@ struct Path {
// position
static void findNBest(paddle::GradientMachine* gradMachine,
std::vector<paddle::Argument>& inArgs,
std::vector<Path>& finalPaths, size_t bos_id,
size_t eos_id, size_t max_length) {
std::vector<Path>& finalPaths,
size_t bos_id,
size_t eos_id,
size_t max_length) {
std::vector<Path> paths;
Path emptyPath;
paths.push_back(emptyPath);
@ -166,7 +167,8 @@ public:
if (id < getSize()) {
Path& p = (*path_)[id];
std::ostringstream sout;
std::transform(p.ids.begin(), p.ids.end(),
std::transform(p.ids.begin(),
p.ids.end(),
std::ostream_iterator<std::string>(sout, split ? " " : ""),
[&](int id) { return (*dict_)[id]; });
return sout.str();

@ -64,12 +64,11 @@ Trainer* Trainer::createByCommandLine() throw(IOError) {
Trainer::Trainer(TrainerConfig* config, GradientMachine* gm)
: m(new TrainerPrivate()) {
m->init(config->m->conf, /* testing= */false, gm ? gm->m->machine : nullptr);
m->init(config->m->conf, /* testing= */ false, gm ? gm->m->machine : nullptr);
}
Trainer* Trainer::create(TrainerConfig* config, GradientMachine* gm)
throw(IOError)
{
Trainer* Trainer::create(TrainerConfig* config,
GradientMachine* gm) throw(IOError) {
auto retv = new Trainer(config, gm);
if (retv->m->getConfig().IsInitialized()) {
return retv;
@ -140,7 +139,9 @@ Matrix* Trainer::getLayerOutput(const std::string& layerName) {
return Matrix::createByPaddleMatrixPtr(&m);
}
void Trainer::forwardOneBatch(size_t batchSize) { m->forwardOneBatch(batchSize); }
void Trainer::forwardOneBatch(size_t batchSize) {
m->forwardOneBatch(batchSize);
}
bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
CHECK(dataProvider_) << "data_provider is not specified";
@ -156,7 +157,6 @@ bool TrainerPrivate::forwardOneBatch(size_t batchSize) {
void TrainerPrivate::forwardOneDataBatch(
const std::vector<paddle::Argument>& inArgs) {
std::vector<paddle::Argument>& outArgs = forwardOutput_;
if (config_->getOptConfig().use_sparse_remote_updater()) {

@ -37,13 +37,15 @@ FloatArray::FloatArray(const float* b, const size_t l)
IntArray::IntArray(const int* b, const size_t l, bool f)
: buf(b), length(l), needFree(f) {}
IntWithFloatArray::IntWithFloatArray(const float* v, const int* i, size_t l,
IntWithFloatArray::IntWithFloatArray(const float* v,
const int* i,
size_t l,
bool f)
: valBuf(v), idxBuf(i), length(l), needFree(f) {}
bool isUsingGpu() {return FLAGS_use_gpu;}
bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) {FLAGS_use_gpu = useGpu;}
void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() {
#ifdef PADDLE_ONLY_CPU

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/math/Vector.h"
@ -39,8 +38,10 @@ IVector* IVector::create(const std::vector<int>& data, bool useGpu) {
return v;
}
IVector* IVector::createVectorFromNumpy(int* data, int dim, bool copy,
bool useGpu) throw (UnsupportError){
IVector* IVector::createVectorFromNumpy(int* data,
int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) {
/// if use gpu only copy=true is supported
if (!copy) {
@ -137,8 +138,8 @@ void IVector::copyToNumpyArray(int** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuIVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(int) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::GpuIVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(),
sizeof(int) * (*dim1));
hl_memcpy_device2host(
*view_m_data, gpuVec->getData(), sizeof(int) * (*dim1));
} else {
LOG(INFO) << "Unexpected situation";
}
@ -201,8 +202,10 @@ Vector* Vector::createByPaddleVectorPtr(void* ptr) {
}
}
Vector* Vector::createVectorFromNumpy(float* data, int dim, bool copy,
bool useGpu) throw (UnsupportError){
Vector* Vector::createVectorFromNumpy(float* data,
int dim,
bool copy,
bool useGpu) throw(UnsupportError) {
if (useGpu) {
/// if use gpu only copy=True is supported
if (!copy) {
@ -251,8 +254,8 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
} else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
hl_memcpy_device2host(*view_m_data, gpuVec->getData(),
sizeof(float) * (*dim1));
hl_memcpy_device2host(
*view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
} else {
LOG(INFO) << "Unexpected situation";
}

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_ACTIVATION_FUNCTIONS_H_
#define HL_ACTIVATION_FUNCTIONS_H_
@ -21,11 +20,8 @@ limitations under the License. */
/**
* Active functions: sigmoid, relu, tanh and linear.
*/
#define HPPL_ACTIVE_FUNCTION {hppl::sigmoid, \
hppl::relu, \
hppl::tanh, \
hppl::linear \
}
#define HPPL_ACTIVE_FUNCTION \
{ hppl::sigmoid, hppl::relu, hppl::tanh, hppl::linear }
namespace hppl {

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_AGGREGATE_H_
#define HL_AGGREGATE_H_

@ -12,22 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_AVX_FUNCTIONS_H_
#define HL_AVX_FUNCTIONS_H_
#include <immintrin.h>
namespace hppl {
__m256 relu(const __m256 a);
__m256 sigmoid(const __m256 a);
__m256 tanh(const __m256 a);
__m256 linear(const __m256 a);
__m256 relu(const __m256 a, const __m256 b);
__m256 sigmoid(const __m256 a, const __m256 b);
__m256 tanh(const __m256 a, const __m256 b);
__m256 linear(const __m256 a, const __m256 b);
__m256 relu(const __m256 a);
__m256 sigmoid(const __m256 a);
__m256 tanh(const __m256 a);
__m256 linear(const __m256 a);
__m256 relu(const __m256 a, const __m256 b);
__m256 sigmoid(const __m256 a, const __m256 b);
__m256 tanh(const __m256 a, const __m256 b);
__m256 linear(const __m256 a, const __m256 b);
} // namespace hppl
#endif // HL_AVX_FUNCTIONS_H_

@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_BASE_H_
#define HL_BASE_H_
@ -153,7 +151,6 @@ typedef enum {
HL_VALUE_END
} hl_matrix_value_t;
/**
* @brief HPPL matrix format.
*/
@ -163,8 +160,7 @@ typedef enum {
HL_SPARSE_END
} hl_matrix_format_t;
typedef struct _hl_matrix_s * hl_matrix_s;
typedef struct _hl_matrix_s *hl_matrix_s;
/**
* @brief HPPL sparse matrix.
@ -209,7 +205,6 @@ typedef struct {
#define HL_FLOAT_MIN 2.2250738585072014e-308
#endif
/**
* The maximum input value for exp, used to avoid overflow problem.
*
@ -217,14 +212,13 @@ typedef struct {
*/
#define EXP_MAX_INPUT 40.0
/**
* @brief DIVUP(x, y) is similar to ceil(x / y).
* @note For CUDA, DIVUP will be used to specify
* the size of blockDim.
*/
#ifndef DIVUP
#define DIVUP(x, y) (((x) + (y) - 1) / (y))
#define DIVUP(x, y) (((x) + (y)-1) / (y))
#endif
#ifdef __NVCC__
@ -244,11 +238,10 @@ extern __thread cudaStream_t default_stream;
#define CHECK_SYNC(msg) \
if (true == g_sync_flag) { \
hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
cudaError_t err \
= (cudaError_t)hl_get_device_last_error(); \
CHECK_EQ(cudaSuccess, err) << "[" << msg << "] " \
<< "CUDA error: " \
<< hl_get_device_error_string((size_t)err); \
cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
CHECK_EQ(cudaSuccess, err) \
<< "[" << msg << "] " \
<< "CUDA error: " << hl_get_device_error_string((size_t)err); \
}
#endif /* __NVCC__ */

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_BATCH_TRANSPOSE_H_
#define HL_BATCH_TRANSPOSE_H_
@ -31,10 +30,7 @@ limitations under the License. */
* order. Each batch has height * width data, which are
* arranged in height-first (or row-first) manner.
*/
extern void batchTranspose(const real* input,
real* output,
int width,
int height,
int batchSize);
extern void batchTranspose(
const real* input, real* output, int width, int height, int batchSize);
#endif // HL_BATCH_TRANSPOSE_H_

File diff suppressed because it is too large Load Diff

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CUDA_H_
#define HL_CUDA_H_
@ -22,8 +21,7 @@ limitations under the License. */
/**
* @brief HPPL event.
*/
typedef struct _hl_event_st * hl_event_t;
typedef struct _hl_event_st *hl_event_t;
/**
* @brief return cuda runtime api version.
@ -42,7 +40,7 @@ extern void hl_start();
* if device is NULL, will start all GPU.
* @param[in] number number of devices.
*/
extern void hl_specify_devices_start(int* device, int number);
extern void hl_specify_devices_start(int *device, int number);
/**
* @brief Queries if a device may directly access a peer device's memory.
@ -126,7 +124,7 @@ extern int hl_get_device();
*
* @return dest_d pointer to device memory.
*/
extern void* hl_malloc_device(size_t size);
extern void *hl_malloc_device(size_t size);
/**
* @brief Free device memory.
@ -143,7 +141,7 @@ extern void hl_free_mem_device(void *dest_d);
*
* @return dest_h pointer to host memory.
*/
extern void* hl_malloc_host(size_t size);
extern void *hl_malloc_host(size_t size);
/**
* @brief Free host page-lock memory.
@ -261,8 +259,7 @@ extern void hl_destroy_event(hl_event_t event);
*
* @return time Time between start and end in ms.
*/
extern float hl_event_elapsed_time(hl_event_t start,
hl_event_t end);
extern float hl_event_elapsed_time(hl_event_t start, hl_event_t end);
/**
* @brief Records an event.
@ -300,7 +297,7 @@ extern void hl_set_device_flags_block();
/**
* @brief Returns the last error string from a cuda runtime call.
*/
extern const char* hl_get_device_error_string();
extern const char *hl_get_device_error_string();
/**
* @brief Returns the last error string from a cuda runtime call.
@ -309,7 +306,7 @@ extern const char* hl_get_device_error_string();
*
* @see hl_get_device_last_error()
*/
extern const char* hl_get_device_error_string(size_t err);
extern const char *hl_get_device_error_string(size_t err);
/**
* @brief Returns the last error number.

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CUDA_CUBLAS_H_
#define HL_CUDA_CUBLAS_H_
@ -29,12 +28,8 @@ limitations under the License. */
* @param[in] ldc the first dimension of C_d.
*
*/
extern void hl_matrix_transpose(real *A_d,
real *C_d,
int dimM,
int dimN,
int lda,
int ldc);
extern void hl_matrix_transpose(
real *A_d, real *C_d, int dimM, int dimN, int lda, int ldc);
/*
* @brief Matrix transpose, while lda = dimN, ldc = dimM.
@ -45,10 +40,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] dimN matrix width.
*
*/
extern void hl_matrix_transpose(real *A_d,
real *C_d,
int dimM,
int dimN);
extern void hl_matrix_transpose(real *A_d, real *C_d, int dimM, int dimN);
/*
* @brief Matrix inverse
@ -60,11 +52,7 @@ extern void hl_matrix_transpose(real *A_d,
* @param[in] ldc the first dimension of C_d
*
*/
extern void hl_matrix_inverse(real *A_d,
real *C_d,
int dimN,
int lda,
int ldc);
extern void hl_matrix_inverse(real *A_d, real *C_d, int dimN, int lda, int ldc);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@ -84,12 +72,19 @@ extern void hl_matrix_inverse(real *A_d,
* @param[in] ldc the first dimension of C_d.
*
*/
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
real *B_d, hl_trans_op_t transb,
extern void hl_matrix_mul(real *A_d,
hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d,
int dimM, int dimN, int dimK,
real alpha, real beta,
int lda, int ldb, int ldc);
int dimM,
int dimN,
int dimK,
real alpha,
real beta,
int lda,
int ldb,
int ldc);
/**
* @brief C_d = alpha*(op(A_d) * op(B_d)) + beta*C_d
@ -106,11 +101,16 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
* @param[in] beta scalar used for multiplication.
*
*/
extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
real *B_d, hl_trans_op_t transb,
extern void hl_matrix_mul(real *A_d,
hl_trans_op_t transa,
real *B_d,
hl_trans_op_t transb,
real *C_d,
int dimM, int dimN, int dimK,
real alpha, real beta);
int dimM,
int dimN,
int dimK,
real alpha,
real beta);
/**
* @brief This function performs the matrix-vector multiplication.
@ -132,11 +132,17 @@ extern void hl_matrix_mul(real *A_d, hl_trans_op_t transa,
*
*/
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
real *B_d, real *C_d,
int dimM, int dimN,
real alpha, real beta,
int lda, int incb, int incc);
extern void hl_matrix_mul_vector(real *A_d,
hl_trans_op_t trans,
real *B_d,
real *C_d,
int dimM,
int dimN,
real alpha,
real beta,
int lda,
int incb,
int incc);
/**
* @brief This function performs the matrix-vector multiplication.
@ -154,9 +160,13 @@ extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
* @param[in] beta scalar used for multiplication.
*
*/
extern void hl_matrix_mul_vector(real *A_d, hl_trans_op_t trans,
real *B_d, real *C_d,
int dimM, int dimN,
real alpha, real beta);
extern void hl_matrix_mul_vector(real *A_d,
hl_trans_op_t trans,
real *B_d,
real *C_d,
int dimM,
int dimN,
real alpha,
real beta);
#endif /* HL_CUDA_CUBLAS_H_ */

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_CUDA_CUDNN_H_
#define HL_CUDA_CUDNN_H_
@ -324,8 +323,7 @@ extern void hl_convolution_forward_add_bias(hl_tensor_descriptor bias,
* @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdFilterAlgo backward filter algorithm.
*/
extern void hl_convolution_backward_filter(
hl_tensor_descriptor input,
extern void hl_convolution_backward_filter(hl_tensor_descriptor input,
real* input_data,
hl_tensor_descriptor output,
real* output_grad_data,
@ -350,8 +348,7 @@ extern void hl_convolution_backward_filter(
* @param[in] sizeInBytes gpu workspace size (bytes).
* @param[in] convBwdDataAlgo backward data algorithm.
*/
extern void hl_convolution_backward_data(
hl_tensor_descriptor input,
extern void hl_convolution_backward_data(hl_tensor_descriptor input,
real* input_data_grad,
hl_tensor_descriptor output,
real* output_grad_data,
@ -383,8 +380,8 @@ extern void hl_convolution_backward_bias(hl_tensor_descriptor bias,
* @param[in] height matrix height.
* @param[in] width matrix width.
*/
extern void hl_softmax_forward(real *input,
real *output,
extern void hl_softmax_forward(real* input,
real* output,
int height,
int width);
@ -396,8 +393,8 @@ extern void hl_softmax_forward(real *input,
* @param[in] height matrix height.
* @param[in] width matrix width.
*/
extern void hl_softmax_backward(real *output_value,
real *output_grad,
extern void hl_softmax_backward(real* output_value,
real* output_grad,
int height,
int width);
@ -426,18 +423,18 @@ extern void hl_softmax_backward(real *output_value,
*
*/
extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
real *input,
real* input,
hl_tensor_descriptor outputDesc,
real *output,
real* output,
hl_tensor_descriptor bnParamDesc,
real *scale,
real *bias,
real* scale,
real* bias,
double factor,
real *runningMean,
real *runningInvVar,
real* runningMean,
real* runningInvVar,
double epsilon,
real *savedMean,
real *savedVar);
real* savedMean,
real* savedVar);
/**
* @brief cudnn batch norm forward.
@ -463,14 +460,14 @@ extern void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
*
*/
extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real *input,
real* input,
hl_tensor_descriptor outputDesc,
real *output,
real* output,
hl_tensor_descriptor bnParamDesc,
real *scale,
real *bias,
real *estimatedMean,
real *estimatedVar,
real* scale,
real* bias,
real* estimatedMean,
real* estimatedVar,
double epsilon);
/**
@ -483,7 +480,8 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
* @param[in] inGradDesc input tensor descriptor desc.
* @param[in] inGrad input data.
* @param[in] dBnParamDesc tensor descriptor desc.
* bnScale, bnBias, running mean/var, save_mean/var.
* bnScale, bnBias, running mean/var,
* save_mean/var.
* @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma).
* @param[in] scaleGrad batch normalization scale parameter (in original
@ -497,17 +495,17 @@ extern void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
*
*/
extern void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
real *input,
real* input,
hl_tensor_descriptor outGradDesc,
real *outGrad,
real* outGrad,
hl_tensor_descriptor inGradDesc,
real *inGrad,
real* inGrad,
hl_tensor_descriptor dBnParamDesc,
real *scale,
real *scaleGrad,
real *biasGrad,
real* scale,
real* scaleGrad,
real* biasGrad,
double epsilon,
real *savedMean,
real *savedInvVar);
real* savedMean,
real* savedInvVar);
#endif // HL_CUDA_CUDNN_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_DSO_LOADER_H_
#define HL_DSO_LOADER_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_FUNCTIONS_H_
#define HL_FUNCTIONS_H_
@ -30,21 +29,21 @@ limitations under the License. */
#ifndef __NVCC__
namespace hppl {
/*
/*
* forward activation
*/
real relu(const real a);
real sigmoid(const real a);
real tanh(const real a);
real linear(const real a);
real relu(const real a);
real sigmoid(const real a);
real tanh(const real a);
real linear(const real a);
/*
/*
* backward activation
*/
real relu(const real a, const real b);
real sigmoid(const real a, const real b);
real tanh(const real a, const real b);
real linear(const real a, const real b);
real relu(const real a, const real b);
real sigmoid(const real a, const real b);
real tanh(const real a, const real b);
real linear(const real a, const real b);
} // namespace hppl
#ifdef __AVX__

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_GPU_H_
#define HL_GPU_H_

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_LSTM_H_
#define HL_LSTM_H_

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save