|
|
|
@ -13,6 +13,7 @@
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
#include "paddle/fluid/imperative/layer.h"
|
|
|
|
|
|
|
|
|
|
#include <deque>
|
|
|
|
|
#include <limits>
|
|
|
|
|
#include <map>
|
|
|
|
@ -22,6 +23,9 @@
|
|
|
|
|
#include "paddle/fluid/framework/lod_tensor.h"
|
|
|
|
|
#include "paddle/fluid/framework/op_registry.h"
|
|
|
|
|
#include "paddle/fluid/framework/operator.h"
|
|
|
|
|
#include "paddle/fluid/framework/tensor_util.h"
|
|
|
|
|
#include "paddle/fluid/operators/math/blas.h"
|
|
|
|
|
#include "paddle/fluid/platform/device_context.h"
|
|
|
|
|
#include "paddle/fluid/string/printf.h"
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
@ -34,22 +38,66 @@ std::map<int, py::object> py_funcs_;
|
|
|
|
|
|
|
|
|
|
using framework::Variable;
|
|
|
|
|
|
|
|
|
|
void AddTo(Variable* src, Variable* dst) {
|
|
|
|
|
framework::LoDTensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
|
|
|
|
|
framework::LoDTensor* src_tensor = src->GetMutable<framework::LoDTensor>();
|
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
class TensorAddToFunctor : public boost::static_visitor<> {
|
|
|
|
|
public:
|
|
|
|
|
TensorAddToFunctor(int64_t numel, const T* x, T* y)
|
|
|
|
|
: numel_(numel), x_(x), y_(y) {}
|
|
|
|
|
|
|
|
|
|
void operator()(const platform::CPUPlace& place) {
|
|
|
|
|
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
|
|
|
|
|
platform::DeviceContextPool::Instance().Get(place));
|
|
|
|
|
auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
|
|
|
|
|
blas.AXPY(numel_, 1., x_, y_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
void operator()(const platform::CUDAPlace& place) {
|
|
|
|
|
platform::CUDADeviceContext* ctx =
|
|
|
|
|
dynamic_cast<platform::CUDADeviceContext*>(
|
|
|
|
|
platform::DeviceContextPool::Instance().Get(place));
|
|
|
|
|
auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
|
|
|
|
|
blas.AXPY(numel_, 1., x_, y_);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
void operator()(const platform::CUDAPlace& place) {
|
|
|
|
|
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// there is NO blas in CUDAPinnedPlace
|
|
|
|
|
void operator()(const platform::CUDAPinnedPlace& place) {
|
|
|
|
|
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
int64_t numel_;
|
|
|
|
|
const T* x_;
|
|
|
|
|
T* y_;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
|
|
|
|
|
|
void AddTo(Variable* src, Variable* dst, platform::Place place) {
|
|
|
|
|
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
|
|
|
|
|
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
|
|
|
|
|
|
|
|
|
|
// FIXME(minqiyang): loss_grad op will pass a zero grad of label
|
|
|
|
|
// ugly fix for it
|
|
|
|
|
if (src_tensor->numel() == 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(),
|
|
|
|
|
"dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
|
|
|
|
|
src_tensor->numel());
|
|
|
|
|
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
|
|
|
|
|
const float* src_data = src_tensor->data<float>();
|
|
|
|
|
for (int64_t i = 0; i < src_tensor->numel(); ++i) {
|
|
|
|
|
dst_data[i] += src_data[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
detail::TensorAddToFunctor<float> func(
|
|
|
|
|
src_tensor->numel(), src_tensor->data<float>(),
|
|
|
|
|
dst_tensor->mutable_data<float>(place));
|
|
|
|
|
boost::apply_visitor(func, place);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Autograd {
|
|
|
|
@ -120,6 +168,36 @@ class Autograd {
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<VarBase> VarBase::NewVarBase(const platform::Place& dst_place,
|
|
|
|
|
const bool blocking) const {
|
|
|
|
|
PADDLE_ENFORCE(var_->IsInitialized(),
|
|
|
|
|
"Variable must be initialized when getting numpy tensor");
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<VarBase> new_var(new VarBase());
|
|
|
|
|
framework::LoDTensor* tensor =
|
|
|
|
|
new_var->var_->GetMutable<framework::LoDTensor>();
|
|
|
|
|
tensor->Resize(var_->Get<framework::LoDTensor>().dims());
|
|
|
|
|
tensor->set_lod(var_->Get<framework::LoDTensor>().lod());
|
|
|
|
|
|
|
|
|
|
if (blocking) {
|
|
|
|
|
platform::DeviceContext* dev_ctx =
|
|
|
|
|
platform::DeviceContextPool::Instance().Get(dst_place);
|
|
|
|
|
|
|
|
|
|
framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
|
|
|
|
|
tensor);
|
|
|
|
|
|
|
|
|
|
dev_ctx->Wait();
|
|
|
|
|
} else {
|
|
|
|
|
framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (platform::is_gpu_place(dst_place)) {
|
|
|
|
|
VLOG(3) << "copy tensor " << var_desc_->Name() << " from gpu";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new_var;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
framework::LoDTensor& VarBase::GradValue() {
|
|
|
|
|
VLOG(3) << "get var grad " << var_desc_->Name();
|
|
|
|
|
return *(grads_->var_->GetMutable<framework::LoDTensor>());
|
|
|
|
@ -162,9 +240,8 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
|
|
|
|
|
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
|
|
|
|
|
|
|
|
|
|
framework::Scope scope;
|
|
|
|
|
platform::CPUPlace place;
|
|
|
|
|
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
|
|
|
|
|
p.op.RuntimeInferShape(scope, place, ctx);
|
|
|
|
|
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_);
|
|
|
|
|
p.op.RuntimeInferShape(scope, place_, ctx);
|
|
|
|
|
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -176,7 +253,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
|
|
|
|
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
|
|
|
|
framework::Variable* grad = outputs[i];
|
|
|
|
|
framework::Variable* orig_grad = origin_outputs[i];
|
|
|
|
|
AddTo(grad, orig_grad);
|
|
|
|
|
AddTo(grad, orig_grad, place_);
|
|
|
|
|
delete grad;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -188,8 +265,10 @@ void VarBase::RunBackward() {
|
|
|
|
|
|
|
|
|
|
VLOG(3) << "start backward";
|
|
|
|
|
auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
|
|
|
|
|
float* data = grads_t->mutable_data<float>(platform::CPUPlace());
|
|
|
|
|
std::fill(data, data + grads_t->numel(), 1.0);
|
|
|
|
|
operators::math::set_constant(
|
|
|
|
|
*(platform::DeviceContextPool::Instance().Get(
|
|
|
|
|
var_->GetMutable<framework::LoDTensor>()->place())),
|
|
|
|
|
grads_t, 1.0);
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
grads_ ==
|
|
|
|
|