You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
700 lines
20 KiB
700 lines
20 KiB
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <cmath>
|
|
|
|
#include <hl_gpu.h>
|
|
|
|
#include "MemoryHandle.h"
|
|
#include "paddle/utils/TypeDefs.h"
|
|
#include "BaseMatrix.h"
|
|
#include "paddle/utils/Thread.h"
|
|
|
|
namespace paddle {
|
|
|
|
template <class T>
|
|
class GpuVectorT;
|
|
template <class T>
|
|
class CpuVectorT;
|
|
|
|
template <class T>
|
|
class BaseVector;
|
|
|
|
class SyncThreadPool;
|
|
|
|
template<class T>
|
|
class BaseVector : public BaseMatrixT<T> {
|
|
public:
|
|
BaseVector(size_t size, T* data, bool useGpu)
|
|
: BaseMatrixT<T>(1, size, data, false, useGpu),
|
|
size_(this->width_) {}
|
|
|
|
~BaseVector() {}
|
|
|
|
protected:
|
|
size_t& size_;
|
|
};
|
|
|
|
/**
|
|
* Copy or assignemnt constructor will share the data as opposed to making a
|
|
* copy of the original data. To make a copy of the orinal data, use copyFrom()
|
|
* instead.
|
|
*/
|
|
template <class T>
|
|
class VectorT : public BaseVector<T> {
|
|
protected:
|
|
VectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset, bool useGpu)
|
|
: BaseVector<T>(size,
|
|
reinterpret_cast<T*>(memoryHandle->getBuf()) + offset,
|
|
useGpu) {
|
|
memoryHandle_ = memoryHandle;
|
|
}
|
|
|
|
// data is still owned by the caller.
|
|
// data should be valid during the life of this vector.
|
|
// Caller is responsible for release the memory.
|
|
VectorT(size_t size, T* data, bool useGpu)
|
|
: BaseVector<T>(size, data, useGpu) {}
|
|
|
|
public:
|
|
virtual ~VectorT() {}
|
|
|
|
static std::shared_ptr<VectorT<T>> create(size_t size, bool useGpu);
|
|
|
|
static std::shared_ptr<VectorT<T>> create(T* data, size_t size, bool useGpu);
|
|
|
|
static std::shared_ptr<VectorT<T>> create(size_t size,
|
|
MemoryHandlePtr memoryHandle,
|
|
size_t offset = 0);
|
|
|
|
// owner can set SyncThreadPool,
|
|
// if not set, will use globalSyncThreadPool,
|
|
// which can be used in main thread only.
|
|
static std::shared_ptr<VectorT<T>> createParallelVector(
|
|
size_t size, bool useGpu, SyncThreadPool* pool = nullptr);
|
|
|
|
size_t getSize() const { return this->size_; }
|
|
const T* getData() const { return this->data_; }
|
|
T* getData() { return this->data_; }
|
|
|
|
virtual void zeroMem() = 0;
|
|
// set all elements to value
|
|
virtual void reset(const T& value) = 0;
|
|
// fill data by 0, 1, 2, ...
|
|
virtual void fillSequence() = 0;
|
|
|
|
MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; }
|
|
|
|
/**
|
|
* resizing to a big vector will not preserve old values.
|
|
*/
|
|
void resize(size_t newSize) {
|
|
if (!memoryHandle_ || newSize * sizeof(T) > memoryHandle_->getAllocSize()) {
|
|
memoryHandle_ = newMemory(newSize * sizeof(T));
|
|
this->data_ = reinterpret_cast<T*>(memoryHandle_->getBuf());
|
|
}
|
|
this->size_ = newSize;
|
|
}
|
|
|
|
static void resizeOrCreate(std::shared_ptr<VectorT<T>>& vec, size_t size,
|
|
bool useGpu) {
|
|
if (vec) {
|
|
vec->resize(size);
|
|
} else {
|
|
vec = create(size, useGpu);
|
|
}
|
|
}
|
|
|
|
virtual MemoryHandlePtr newMemory(size_t size) = 0;
|
|
|
|
/**
|
|
* form sub vector from *src*, shallow copy
|
|
*/
|
|
void subVecFrom(const VectorT<T>& src, size_t start, size_t size) {
|
|
CHECK_EQ(BaseVector<T>::useGpu_, src.useGpu_);
|
|
CHECK_LT(start, src.size_);
|
|
CHECK_LE(start + size, src.size_);
|
|
|
|
BaseVector<T>::size_ = size;
|
|
BaseVector<T>::data_ = const_cast<T*>(src.data_) + start;
|
|
}
|
|
|
|
std::shared_ptr<VectorT<T>> subVec(size_t start, size_t size) {
|
|
CHECK_LE(start + size, static_cast<size_t>(getSize()));
|
|
return VectorT<T>::create(getData() + start, size, BaseVector<T>::useGpu_);
|
|
}
|
|
|
|
/**
|
|
* form sub vector from *src*, shallow copy
|
|
*/
|
|
void subVecFrom(const T* src, size_t start, size_t size) {
|
|
BaseVector<T>::size_ = size;
|
|
BaseVector<T>::data_ = const_cast<T*>(src) + start;
|
|
}
|
|
|
|
/**
|
|
* form sub vector from *src*, shallow copy
|
|
* in *interval* [interval.first, interval.second)
|
|
*/
|
|
void subVecFrom(const VectorT<T>& src, std::pair<size_t, size_t> interval) {
|
|
subVecFrom(src, interval.first, interval.second - interval.first);
|
|
}
|
|
|
|
/**
|
|
* This function will crash if the size of src and dest is different.
|
|
*/
|
|
virtual void copyFrom(const VectorT<T>& src) = 0;
|
|
|
|
/**
|
|
* If use_gpu, this function will push the copy-task to the specifed-stream
|
|
* and return immediately.
|
|
*
|
|
* If not use GPU, this function is same as
|
|
* the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
|
|
*/
|
|
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream) = 0;
|
|
|
|
/**
|
|
* copy size elements from src
|
|
*
|
|
* If this is GpuVector, src can be cpu or gpu memory
|
|
*
|
|
* If this is CpuVector, src is assumed to be cpu memory
|
|
*/
|
|
virtual void copyFrom(const T* src, size_t size) = 0;
|
|
|
|
/**
|
|
* copy size elements from src
|
|
*
|
|
* If this is GpuVector, src can be cpu or gpu memory
|
|
*
|
|
* If this is CpuVector, src is assumed to be cpu memory,
|
|
*/
|
|
virtual void copyFrom(const T* src, size_t size, hl_stream_t stream) = 0;
|
|
|
|
/**
|
|
* exec a func in single/multi thread
|
|
*/
|
|
virtual void exec(SyncThreadPool::JobFunc func) { func(0, 1); }
|
|
|
|
/// Get the buffer point with beginPos
|
|
virtual T* getPoint(const uint64_t beginPos) = 0;
|
|
|
|
/// Get the value for the i'th element
|
|
virtual T getElement(size_t i) const = 0;
|
|
virtual void setElement(size_t i, const T& value) = 0;
|
|
|
|
//---------- math operations ----------------
|
|
|
|
// sum of the absolute value of each elements
|
|
virtual T getAbsSum() = 0;
|
|
|
|
virtual T getSum() = 0;
|
|
virtual T getMax() = 0;
|
|
virtual T getAbsMax() = 0;
|
|
virtual T getMin() = 0;
|
|
|
|
/// element-wise calc: this = (b == value)
|
|
virtual void isEqualTo(const VectorT<T>& b, const T& value) = 0;
|
|
|
|
/// select elements indexed by *ids* from vector *src*
|
|
virtual void selectFrom(const VectorT<T>& src, const VectorT<int>& ids) = 0;
|
|
|
|
enum HistogramType {
|
|
HISTOGRAM_EXPONENT = 0,
|
|
};
|
|
|
|
/**
|
|
* @brief print histogram of vector values
|
|
*
|
|
* @note only exponent histogram supported currently
|
|
*/
|
|
virtual void histogram(std::ostream& os, int type = HISTOGRAM_EXPONENT) = 0;
|
|
|
|
/// generate uniform random value for each element
|
|
virtual void rand() = 0;
|
|
/**
|
|
* generate uniform random value for each element,
|
|
* data range is from 0 to (classes - 1).
|
|
*/
|
|
virtual void rand(size_t classes) = 0;
|
|
|
|
/**
|
|
* Debug use only. Very inefficient for GPU vector.
|
|
* get the value at pos.
|
|
*/
|
|
virtual T get(size_t pos) = 0;
|
|
|
|
/**
|
|
* generate univariate Gaussian distributed random numbers
|
|
* with given mean and standardDeviation.
|
|
*/
|
|
virtual void randnorm(real mean, real standardDeviation) = 0;
|
|
|
|
/**
|
|
* generate uniform distributed random numbers
|
|
* with given range.
|
|
*/
|
|
virtual void uniform(real left, real right) = 0;
|
|
|
|
/// print the first "num" elements of the Vector
|
|
virtual void print(std::ostream& os, size_t num) const = 0;
|
|
|
|
/// print the "idx" element of the Vector
|
|
virtual void printOneElement(std::ostream& os, size_t idx) const = 0;
|
|
|
|
protected:
|
|
friend class GpuVectorT<T>;
|
|
friend class CpuVectorT<T>;
|
|
virtual void copyTo(CpuVectorT<T>* dest) const = 0;
|
|
virtual void copyTo(GpuVectorT<T>* dest) const = 0;
|
|
MemoryHandlePtr memoryHandle_;
|
|
};
|
|
|
|
template <class T>
|
|
std::ostream& operator<<(std::ostream& os, const VectorT<T>& vec) {
|
|
vec.print(os, vec.getSize());
|
|
return os;
|
|
}
|
|
|
|
template <class T>
|
|
class GpuVectorT : public VectorT<T> {
|
|
public:
|
|
explicit GpuVectorT(size_t size);
|
|
GpuVectorT(size_t size, GpuMemHandlePtr memHandle, size_t offset)
|
|
: VectorT<T>(size, memHandle, offset, true) {}
|
|
|
|
// data is still owned by the caller.
|
|
// data should be valid during the life of this vector.
|
|
// Caller is responsible for release the memory.
|
|
GpuVectorT(size_t size, T* data) : VectorT<T>(size, data, true) {}
|
|
|
|
virtual MemoryHandlePtr newMemory(size_t size) {
|
|
return std::make_shared<GpuMemoryHandle>(size);
|
|
}
|
|
virtual void zeroMem();
|
|
virtual void reset(const T& value);
|
|
virtual void fillSequence();
|
|
|
|
virtual void copyFrom(const T* src, size_t size);
|
|
virtual void copyFrom(const T* src, size_t size, hl_stream_t stream);
|
|
virtual void copyFrom(const VectorT<T>& src);
|
|
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream);
|
|
virtual T getElement(size_t i) const;
|
|
virtual void setElement(size_t i, const T& value);
|
|
virtual T* getPoint(const uint64_t beginPos);
|
|
|
|
virtual T getAbsSum();
|
|
virtual T getSum();
|
|
virtual T getMax();
|
|
virtual T getAbsMax();
|
|
virtual T getMin();
|
|
virtual void isEqualTo(const VectorT<T>& b, const T& value);
|
|
virtual void selectFrom(const VectorT<T>& src, const VectorT<int>& ids);
|
|
virtual void histogram(std::ostream& os, int type);
|
|
virtual void rand();
|
|
virtual void rand(size_t classes);
|
|
virtual void randnorm(real mean, real standardDeviation);
|
|
virtual void uniform(real left, real right);
|
|
virtual T get(size_t pos);
|
|
virtual void print(std::ostream& os, size_t num) const;
|
|
virtual void printOneElement(std::ostream& os, size_t idx) const;
|
|
|
|
protected:
|
|
virtual void copyTo(CpuVectorT<T>* dest) const;
|
|
virtual void copyTo(GpuVectorT<T>* dest) const;
|
|
};
|
|
|
|
template <class T>
|
|
class CpuVectorT : public VectorT<T> {
|
|
public:
|
|
explicit CpuVectorT(size_t size);
|
|
CpuVectorT(size_t size, MemoryHandlePtr memoryHandle, size_t offset)
|
|
: VectorT<T>(size, memoryHandle, offset, false) {}
|
|
|
|
// data is still owned by the caller.
|
|
// data should be valid during the life of this vector.
|
|
// Caller is responsible for release the memory.
|
|
CpuVectorT(size_t size, T* data) : VectorT<T>(size, data, false) {}
|
|
|
|
/**
|
|
* If src is a CpuVector, the new CpuVector will share the data with src
|
|
*
|
|
* If src is a GpuVector, the new CpuVector will copy data from src
|
|
*/
|
|
explicit CpuVectorT(const VectorT<T>& src);
|
|
|
|
virtual MemoryHandlePtr newMemory(size_t size) {
|
|
return std::make_shared<CpuMemoryHandle>(size);
|
|
}
|
|
|
|
virtual void zeroMem();
|
|
virtual void reset(const T& value);
|
|
virtual void fillSequence();
|
|
virtual void copyFrom(const T* src, size_t size);
|
|
virtual void copyFrom(const T* src, size_t size, hl_stream_t stream);
|
|
virtual void copyFrom(const VectorT<T>& src);
|
|
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream);
|
|
virtual void copyTo(CpuVectorT<T>* dest) const;
|
|
virtual void copyTo(GpuVectorT<T>* dest) const;
|
|
|
|
/// Get the buffer point with beginPos
|
|
virtual T* getPoint(const uint64_t beginPos) {
|
|
return this->getData() + beginPos;
|
|
}
|
|
|
|
virtual T getElement(size_t i) const { return this->getData()[i]; }
|
|
virtual void setElement(size_t i, const T& value) {
|
|
this->getData()[i] = value;
|
|
}
|
|
|
|
virtual T getAbsSum();
|
|
virtual T getSum();
|
|
virtual T getMax();
|
|
virtual T getAbsMax();
|
|
virtual T getMin();
|
|
virtual void isEqualTo(const VectorT<T>& b, const T& value);
|
|
virtual void selectFrom(const VectorT<T>& src, const VectorT<int>& ids);
|
|
virtual void histogram(std::ostream& os, int type);
|
|
virtual void rand();
|
|
virtual void rand(size_t classes);
|
|
virtual void randnorm(real mean, real standardDeviation);
|
|
virtual void uniform(real left, real right);
|
|
virtual T get(size_t pos);
|
|
virtual void print(std::ostream& os, size_t num) const;
|
|
virtual void printOneElement(std::ostream& os, size_t idx) const;
|
|
};
|
|
|
|
template <class T>
|
|
class ParallelCpuVectorT : public CpuVectorT<T> {
|
|
public:
|
|
ParallelCpuVectorT(size_t size, SyncThreadPool* pool)
|
|
: CpuVectorT<T>(size), pool_(pool) {}
|
|
|
|
virtual void zeroMem() {
|
|
parallelExec([](CpuVectorT<T>& vec) { vec.CpuVectorT<T>::zeroMem(); });
|
|
}
|
|
virtual void randnorm(real mean, real standardDeviation) {
|
|
parallelExec([=](CpuVectorT<T>& vec) {
|
|
vec.CpuVectorT<T>::randnorm(mean, standardDeviation);
|
|
});
|
|
}
|
|
virtual void uniform(real left, real right) {
|
|
parallelExec(
|
|
[=](CpuVectorT<T>& vec) { vec.CpuVectorT<T>::uniform(left, right); });
|
|
}
|
|
|
|
virtual void exec(SyncThreadPool::JobFunc jobFunc);
|
|
|
|
private:
|
|
typedef std::function<void(CpuVectorT<T>& vec)> ExecFunc;
|
|
void parallelExec(ExecFunc func);
|
|
SyncThreadPool* pool_;
|
|
};
|
|
|
|
/**
|
|
* A class to do conversion between CpuVector and GpuVector automatically.
|
|
*/
|
|
template <class T>
|
|
class CpuGpuVectorT {
|
|
public:
|
|
/**
|
|
* @brief An enum type of SyncedFlag using to
|
|
* mark data memory is in CPU or GPU.
|
|
*
|
|
* DATA_AT_CPU: data is located in CPU.
|
|
*
|
|
* DATA_AT_GPU: data is located in GPU.
|
|
*
|
|
* SYNCED: data is located in CPU and GPU simultaneously.
|
|
*/
|
|
enum SyncedFlag {
|
|
DATA_AT_CPU = 0,
|
|
DATA_AT_GPU = 1,
|
|
SYNCED = 2
|
|
};
|
|
|
|
/**
|
|
* @brief A constructor, create cpuVectorT_ or gpuVectorT_.
|
|
*
|
|
* @param[in] size data size.
|
|
* @param[in] useGpu use gpu or not.
|
|
*/
|
|
explicit CpuGpuVectorT(size_t size, bool useGpu);
|
|
|
|
/**
|
|
* @brief A constructor, create CpuGpuVectorT by VectorT.
|
|
*
|
|
* If src is CpuVector, cpuVectorT_ is shared data with src.
|
|
*
|
|
* If src is GpuVector, gpuVectorT_ is shared data with src.
|
|
*/
|
|
explicit CpuGpuVectorT(const std::shared_ptr<VectorT<T>>& src);
|
|
|
|
/**
|
|
* @brief A constructor.
|
|
*
|
|
* If useGpu is true, data should be located in device and
|
|
* create gpuVectorT_ with data.
|
|
*
|
|
* If useGpu is false, data should be located in host and
|
|
* create cpuVectorT_ with data.
|
|
*
|
|
* @note Data is owned by the caller and should be valid during
|
|
* the life of this vector.
|
|
* Caller is responsible for release the memory.
|
|
*/
|
|
CpuGpuVectorT(size_t size, T* data, bool useGpu);
|
|
|
|
CpuGpuVectorT(CpuGpuVectorT<T>& src,
|
|
size_t offset, size_t size);
|
|
|
|
virtual ~CpuGpuVectorT() {}
|
|
|
|
static std::shared_ptr<CpuGpuVectorT<T>> create(size_t size, bool useGpu);
|
|
|
|
/**
|
|
* @brief resize vector.
|
|
*
|
|
* If useGpu is true, resize gpuVectorT_ and set syncFlag_ to DATA_AT_GPU,
|
|
*
|
|
* otherwise resize cpuVectorT_ and set syncFlag_ to DATA_AT_CPU.
|
|
*/
|
|
void resize(size_t size, bool useGpu);
|
|
|
|
/**
|
|
* @brief resize or create CpuGpuVectorT.
|
|
*/
|
|
static void resizeOrCreate(std::shared_ptr<CpuGpuVectorT<T>>& vec,
|
|
size_t size, bool useGpu);
|
|
|
|
|
|
/**
|
|
* @brief return a const cpuVectorT_ or gpuVectorT_.
|
|
*
|
|
* If useGpu is true, return gpuVectorT_.
|
|
*
|
|
* If useGpu is false, return cpuVectorT_.
|
|
*
|
|
* @note Caller should not change the data.
|
|
* If caller changes const attribute,
|
|
* should set syncFlag_.
|
|
*/
|
|
std::shared_ptr<const VectorT<T>> getVector(bool useGpu) const;
|
|
|
|
/**
|
|
* @brief return a const cpuVectorT_ or gpuVectorT_.
|
|
*
|
|
* @note: This interface will change syncFlag_, so if you will
|
|
* not change the data, you should call getVector.
|
|
*/
|
|
std::shared_ptr<VectorT<T>>& getMutableVector(bool useGpu);
|
|
|
|
/**
|
|
* @brief return const T* data.
|
|
*
|
|
* If useGpu is true, return device data.
|
|
*
|
|
* If useGpu is false, return host data.
|
|
*/
|
|
const T* getData(bool useGpu) const;
|
|
|
|
// TODO(yuyang18): Make getData more c++ style.
|
|
// inline T* getData(bool useGpu) {
|
|
// return getMutableData(useGpu);
|
|
// }
|
|
|
|
T* getMutableData(bool useGpu);
|
|
|
|
/**
|
|
* If useGpu is true, gpuVectorT_->Op().
|
|
*
|
|
* If useGpu is false, cpuVectorT_->Op().
|
|
*
|
|
* Op is zeroMem, fillSequence, ...
|
|
*/
|
|
void zeroMem(bool useGpu);
|
|
void fillSequence(bool useGpu);
|
|
void setElement(size_t i, const T& value, bool useGpu);
|
|
|
|
/**
|
|
* @brief return i-th element.
|
|
*/
|
|
T getElement(size_t i) const;
|
|
|
|
/**
|
|
* @brief return vector size.
|
|
*/
|
|
size_t getSize() const {
|
|
size_t size = 0;
|
|
switch (*sync_) {
|
|
case SYNCED:
|
|
case DATA_AT_CPU:
|
|
size = cpuVectorT_->getSize();
|
|
break;
|
|
case DATA_AT_GPU:
|
|
size = gpuVectorT_->getSize();
|
|
break;
|
|
default:
|
|
LOG(FATAL) << "Not support";
|
|
break;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/// copy data to cpuVectorT_.
|
|
inline void copyToCpu(const T* data, size_t size) {
|
|
this->resizeOrCreate(size, false);
|
|
cpuVectorT_->copyFrom(data, size);
|
|
setSync(DATA_AT_CPU);
|
|
}
|
|
/// copy data to cpuVectorT_ using specifed-stream.
|
|
inline void copyToCpu(const T* data, size_t size, hl_stream_t stream) {
|
|
this->resizeOrCreate(size, false);
|
|
cpuVectorT_->copyFrom(data, size, stream);
|
|
setSync(DATA_AT_CPU);
|
|
}
|
|
|
|
/// copy data to gpuVectorT_.
|
|
inline void copyToGpu(const T* data, size_t size) {
|
|
this->resizeOrCreate(size, true);
|
|
gpuVectorT_->copyFrom(data, size);
|
|
setSync(DATA_AT_GPU);
|
|
}
|
|
/// copy data to gpuVectorT_ using specifed-stream.
|
|
inline void copyToGpu(const T* data, size_t size, hl_stream_t stream) {
|
|
this->resizeOrCreate(size, true);
|
|
gpuVectorT_->copyFrom(data, size, stream);
|
|
setSync(DATA_AT_GPU);
|
|
}
|
|
|
|
/**
|
|
* @brief copy from src using specifed-stream.
|
|
*
|
|
* If src is CpuVectorT, copy to cpuVectorT_.
|
|
*
|
|
* If src is GpuVectorT, copy to gpuVectorT_.
|
|
*/
|
|
void copyFrom(const VectorT<T>& src, hl_stream_t stream);
|
|
|
|
/**
|
|
* @brief copy data.
|
|
*
|
|
* If useGpu is false, copy host data to cpuVectorT_.
|
|
*
|
|
* If useGpu is true, copy device data to gpuVectorT_.
|
|
*
|
|
* @note data address should consistent with useGpu.
|
|
*/
|
|
void copyFrom(const T* data, size_t size, bool useGpu);
|
|
void copyFrom(const T* data, size_t size, hl_stream_t stream, bool useGpu);
|
|
|
|
/**
|
|
* @brief copy from (src + offset) using specifed-stream.
|
|
*/
|
|
void copyFrom(CpuGpuVectorT<T>& src, size_t offset, size_t size,
|
|
bool useGpu, hl_stream_t stream);
|
|
|
|
/**
|
|
* @brief copy from src using specifed-stream.
|
|
*/
|
|
void copyFrom(CpuGpuVectorT<T>& src, hl_stream_t stream);
|
|
|
|
/**
|
|
* @brief return sync_.
|
|
*/
|
|
inline SyncedFlag* getSync() const {
|
|
return sync_;
|
|
}
|
|
|
|
/**
|
|
* @brief set sync_.
|
|
*/
|
|
inline void setSync(SyncedFlag* sync) {
|
|
sync_ = sync;
|
|
}
|
|
|
|
inline void setSync(SyncedFlag syncFlag) {
|
|
if (sync_) {
|
|
*sync_ = syncFlag;
|
|
} else {
|
|
syncFlag_ = syncFlag;
|
|
sync_ = &syncFlag_;
|
|
}
|
|
}
|
|
|
|
inline void setSync(bool useGpu) {
|
|
SyncedFlag flag = useGpu ? DATA_AT_GPU : DATA_AT_CPU;
|
|
setSync(flag);
|
|
}
|
|
|
|
protected:
|
|
void resizeOrCreate(size_t size, bool useGpu);
|
|
|
|
/**
|
|
* @brief copy between cpuVectorT_ and gpuVectorT_.
|
|
*
|
|
* If syncFlag_ is DATA_AT_CPU and SYNCED, do nothing.
|
|
*
|
|
* If syncFlag_ is DATA_AT_GPU, copy gpuVectorT_ to cpuVectorT_
|
|
* and set syncFlag_ to SYNCED.
|
|
*/
|
|
void copyToCpu();
|
|
|
|
/**
|
|
* @brief copy between cpuVectorT_ and gpuVectorT_.
|
|
*
|
|
* If syncFlag_ is DATA_AT_GPU and SYNCED, do nothing.
|
|
*
|
|
* If syncFlag_ is DATA_AT_CPU, copy cpuVectorT_ to gpuVectorT_
|
|
* and set syncFlag_ to SYNCED.
|
|
*/
|
|
void copyToGpu();
|
|
|
|
/// host pointer.
|
|
std::shared_ptr<VectorT<T>> cpuVectorT_;
|
|
/// device pointer.
|
|
std::shared_ptr<VectorT<T>> gpuVectorT_;
|
|
/// specify current data address.
|
|
SyncedFlag syncFlag_;
|
|
SyncedFlag* sync_;
|
|
};
|
|
|
|
typedef VectorT<real> Vector;
|
|
typedef CpuVectorT<real> CpuVector;
|
|
typedef GpuVectorT<real> GpuVector;
|
|
|
|
typedef VectorT<int> IVector;
|
|
typedef CpuVectorT<int> CpuIVector;
|
|
typedef GpuVectorT<int> GpuIVector;
|
|
|
|
typedef std::shared_ptr<Vector> VectorPtr;
|
|
typedef std::shared_ptr<CpuVector> CpuVectorPtr;
|
|
typedef std::shared_ptr<GpuVector> GpuVectorPtr;
|
|
|
|
typedef std::shared_ptr<IVector> IVectorPtr;
|
|
typedef std::shared_ptr<CpuIVector> CpuIVectorPtr;
|
|
typedef std::shared_ptr<GpuIVector> GpuIVectorPtr;
|
|
|
|
typedef CpuGpuVectorT<real> CpuGpuVector;
|
|
typedef CpuGpuVectorT<int> ICpuGpuVector;
|
|
typedef std::shared_ptr<CpuGpuVector> CpuGpuVectorPtr;
|
|
typedef std::shared_ptr<ICpuGpuVector> ICpuGpuVectorPtr;
|
|
|
|
} // namespace paddle
|