Feature/auto prune in dygraph (#19757)

* refactor dygraph,test=develop

* fix failed unittest,test=develop

* polish code,test=develop

* check windows ci error,test=develop
try to fix windows ci error by np.allclose,test=develop

* polish vlog and profiler, test=develop

* try to fix preceding ops order,test=develop

* test transformer in windows ci, test=develop

* use python c-api to speed up tracer.trace,test=develop

* test=develop, fix docker with paddle nccl problem

* test=develop, add ut for debug string and gradient_accumulator

* test=develop, add tests for layer/gradient_accumulator/prepared_op

* test=develop, fix complie error for test_prepared_op

* test=develop, add more ut for dygraph

* test=develop, create API.spec for dygraph api change

* test=develop, refoctor name to make it easier to understand

* test=develop, refoctor name to make it easier to understand

* test=develop, fix multi-gpu failed problem , add Tracer tests, change PADDLEENFORCE to PADDLEENFORCE_EQ

* test=develop, fix ut failed on parallel se-resnext

* test=develop, change one more PADDLE_ENFORCE

* support auto prune in dygraph mode

* test=develop, support auto prune

* test=develop, merge develop conflict

* test=develop, fix test_layer and test_tracer ut

* test=develop, fix bug which may cause stop_gradient disabled with a list of backward inputs
expand_as_op_1
Jiabin Yang 6 years ago committed by GitHub
parent 418a0967f3
commit 454254115e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -70,23 +70,48 @@ void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) {
auto& fwd_var = var->Var().Get<framework::LoDTensor>(); auto& fwd_var = var->Var().Get<framework::LoDTensor>();
auto* grad_var = auto* grad_var =
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>(); var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
VLOG(6) << "init loss grad:" << var->GradVarBase()->Name()
<< " as stop_gradient false";
var->GradVarBase()->InnerSetOverridedStopGradient(false);
var->GradVarBase()->SetGradGenerated(true);
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place()); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
grad_var->Resize(fwd_var.dims()); grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type()); grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0); operators::math::set_constant(*dev_ctx, grad_var, 1.0);
} }
bool BasicEngine::CheckBackwardInputs(OpBase* op) { void BasicEngine::CheckBackwardInputs(OpBase* op) {
for (auto& pair : op->GetInsMap()) { for (auto& pair : op->GetInsMap()) {
for (auto& var : pair.second) { for (auto& var : pair.second) {
if (var && !var->StopGradient()) { if (var && IsGrad(var.get())) {
return true; // if grad var has OverridedStopGradient skip this Op
if (!var->GradGenerated()) {
VLOG(6) << "Set ungenerated Grad: " << var->Name() << " as zero";
auto* dev_ctx =
platform::DeviceContextPool::Instance().Get(op->place());
auto* tensor = var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(op->place(), var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
} else {
continue;
}
} }
} }
} }
return false;
} }
void BasicEngine::SetBackwardOutputs(paddle::imperative::OpBase* op) {
for (auto& pair : op->GetOutsMap()) {
for (auto& var : pair.second) {
if (var) {
// Set Backward outputs's generate_grad as true
var->SetGradGenerated(true);
VLOG(6) << "Set backward output: " << var->Name()
<< "'s SetGeneratedGrad as True";
}
}
}
}
void BasicEngine::PrepareGradAccumulators(OpBase* op) { void BasicEngine::PrepareGradAccumulators(OpBase* op) {
for (const auto& pair : op->GetOutsMap()) { for (const auto& pair : op->GetOutsMap()) {
for (const auto& var : pair.second) { for (const auto& var : pair.second) {
@ -126,22 +151,19 @@ void BasicEngine::PrepareDeps() {
q.pop(); q.pop();
VLOG(3) << "Checking grads of op " << cur_op->Type(); VLOG(3) << "Checking grads of op " << cur_op->Type();
if (!CheckBackwardInputs(cur_op)) { CheckBackwardInputs(cur_op);
// TODO(zjl): clear ops that do not need grad before running autograd
VLOG(3) << "Stop checking preceding ops of " << cur_op->Type() SetBackwardOutputs(cur_op);
<< " because all of its backward inputs is stop_gradient=True";
continue;
}
PrepareGradAccumulators(cur_op); PrepareGradAccumulators(cur_op);
auto& preceding_ops = cur_op->GradPendingOps(); auto& grad_pending_ops = cur_op->GradPendingOps();
for (auto* preceding_op : preceding_ops) { for (auto* grad_pending_op : grad_pending_ops) {
PADDLE_ENFORCE_NOT_NULL(preceding_op); PADDLE_ENFORCE_NOT_NULL(grad_pending_op);
++op_deps_[preceding_op]; ++op_deps_[grad_pending_op];
if (visited.count(preceding_op) == 0) { if (visited.count(grad_pending_op) == 0) {
visited.insert(preceding_op); visited.insert(grad_pending_op);
q.push(preceding_op); q.push(grad_pending_op);
} }
} }
} }
@ -204,19 +226,19 @@ void BasicEngine::Execute() {
} }
// Step 3: Collect ready ops // Step 3: Collect ready ops
for (auto* preceding_op : cur_op->GradPendingOps()) { for (auto* grad_pending_op : cur_op->GradPendingOps()) {
PADDLE_ENFORCE_NOT_NULL(preceding_op); PADDLE_ENFORCE_NOT_NULL(grad_pending_op);
auto iter = op_deps_.find(preceding_op); auto iter = op_deps_.find(grad_pending_op);
if (iter == op_deps_.end()) { if (iter == op_deps_.end()) {
continue; continue;
} }
VLOG(3) << "Found preceding op of " << cur_op->Type(); VLOG(3) << "Found grad_pending op of " << cur_op->Type();
// An Op is ready to go while its deps comes to zero // An Op is ready to go while its deps comes to zero
if (--(iter->second) == 0) { if (--(iter->second) == 0) {
q.push(preceding_op); q.push(grad_pending_op);
VLOG(3) << "Push preceding op " << preceding_op->Type() VLOG(3) << "Push grad_pending op " << grad_pending_op->Type()
<< " into queue"; << " into queue";
} }
} }

@ -18,6 +18,7 @@
#include <cstdint> #include <cstdint>
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/imperative/backward_strategy.h" #include "paddle/fluid/imperative/backward_strategy.h"
@ -49,11 +50,20 @@ class Engine {
void InsertOp(OpBase* op, std::shared_ptr<OpBase> op_shared) { void InsertOp(OpBase* op, std::shared_ptr<OpBase> op_shared) {
grad_ops_[op] = std::move(op_shared); grad_ops_[op] = std::move(op_shared);
} }
void Clear() { grad_ops_.clear(); }
void InsertGradVar(VarBase* grad) { grad_vars_.emplace(grad); }
bool IsGrad(VarBase* var) { return grad_vars_.count(var) > 0; }
void Clear() {
grad_ops_.clear();
grad_vars_.clear();
}
private: private:
std::unordered_map<OpBase*, std::shared_ptr<OpBase>> std::unordered_map<OpBase*, std::shared_ptr<OpBase>>
grad_ops_; // opBase for remove - grad_op grad_ops_; // opBase for remove - grad_op
std::unordered_set<VarBase*> grad_vars_;
}; };
class BasicEngine : public Engine { class BasicEngine : public Engine {
@ -69,7 +79,9 @@ class BasicEngine : public Engine {
private: private:
void PrepareDeps(); void PrepareDeps();
bool CheckBackwardInputs(OpBase* op); void CheckBackwardInputs(OpBase* op);
void SetBackwardOutputs(OpBase* op);
void PrepareGradAccumulators(OpBase* op); void PrepareGradAccumulators(OpBase* op);

@ -105,10 +105,23 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) {
void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var, void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) { size_t trace_id) {
auto* dst_var = var_->MutableVar(); auto* dst_var = var_->MutableVar();
if (cur_cnt_ == 0) { auto place = var->Var().Get<framework::LoDTensor>().place();
*dst_var = std::move(*(var->MutableVar())); if (!var_->OverridedStopGradient()) {
VLOG(3) << "Sum Gradient for: " << var_->Name();
if (cur_cnt_ == 0) {
*dst_var = std::move(*(var->MutableVar()));
} else {
TensorAdd(var->Var(), dst_var);
}
} else { } else {
TensorAdd(var->Var(), dst_var); if (!var_->Var().IsInitialized() ||
!var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
operators::math::set_constant(*dev_ctx, tensor, 0.0);
}
} }
++cur_cnt_; ++cur_cnt_;
} }
@ -116,30 +129,44 @@ void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var, void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) { size_t trace_id) {
auto* dst_var = var_->MutableVar(); auto* dst_var = var_->MutableVar();
if (ref_cnt_ == 1) { auto place = var->Var().Get<framework::LoDTensor>().place();
*dst_var = std::move(*(var->MutableVar())); if (!var_->OverridedStopGradient()) {
} else { if (ref_cnt_ == 1) {
if (tmp_grad_vars_.empty()) { *dst_var = std::move(*(var->MutableVar()));
tmp_grad_vars_.reserve(ref_cnt_); } else {
} if (tmp_grad_vars_.empty()) {
tmp_grad_vars_.reserve(ref_cnt_);
tmp_grad_vars_.emplace_back(std::move(var), trace_id); }
if (tmp_grad_vars_.size() != ref_cnt_) { tmp_grad_vars_.emplace_back(std::move(var), trace_id);
return;
if (tmp_grad_vars_.size() != ref_cnt_) {
return;
}
std::sort(tmp_grad_vars_.begin(), tmp_grad_vars_.end(),
[](const std::pair<std::shared_ptr<VarBase>, size_t>& p1,
const std::pair<std::shared_ptr<VarBase>, size_t>& p2) {
return p1.second > p2.second;
});
*dst_var = std::move(*(tmp_grad_vars_[0].first->MutableVar()));
for (size_t i = 1; i < tmp_grad_vars_.size(); ++i) {
TensorAdd(tmp_grad_vars_[i].first->Var(), dst_var);
}
tmp_grad_vars_.clear();
} }
} else {
std::sort(tmp_grad_vars_.begin(), tmp_grad_vars_.end(), if (!var_->Var().IsInitialized() ||
[](const std::pair<std::shared_ptr<VarBase>, size_t>& p1, !var_->Var().Get<framework::LoDTensor>().IsInitialized()) {
const std::pair<std::shared_ptr<VarBase>, size_t>& p2) { VLOG(6) << "Set StopGradient Grad: " << var->Name() << " as zero";
return p1.second > p2.second; auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
}); auto* tensor = var_->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
*dst_var = std::move(*(tmp_grad_vars_[0].first->MutableVar())); operators::math::set_constant(*dev_ctx, tensor, 0.0);
for (size_t i = 1; i < tmp_grad_vars_.size(); ++i) {
TensorAdd(tmp_grad_vars_[i].first->Var(), dst_var);
} }
// looks like tmp_grad_vars will not have any member but just in case
tmp_grad_vars_.clear(); tmp_grad_vars_.clear();
} }
} }

@ -93,14 +93,44 @@ class VarBase {
return &(grad_var_->var_); return &(grad_var_->var_);
} }
void SetStopGradient(bool stop_gradient) { // This is used for python api
stop_gradient_ = stop_gradient; void SetOverridedStopGradient(bool stop_gradient) {
if (stop_gradient) {
overrided_stop_gradient_ = 1;
} else {
overrided_stop_gradient_ = 0;
}
if (grad_var_) { if (grad_var_) {
grad_var_->stop_gradient_ = stop_gradient; grad_var_->SetOverridedStopGradient(stop_gradient);
}
}
// This is used for python api
bool OverridedStopGradient() const {
if (overrided_stop_gradient_ == 0) {
return false;
} else {
return true;
} }
} }
bool StopGradient() const { return stop_gradient_; } // This is used inside C++
int InnerOverridedStopGradient() const { return overrided_stop_gradient_; }
bool GradGenerated() const { return grad_generated_; }
void SetGradGenerated(bool generated) { grad_generated_ = generated; }
// This is used inside C++
void InnerSetOverridedStopGradient(bool stop_gradient) {
if (overrided_stop_gradient_ == -1) {
overrided_stop_gradient_ = static_cast<int>(stop_gradient);
if (grad_var_) {
grad_var_->InnerSetOverridedStopGradient(stop_gradient);
}
} else {
VLOG(6) << "Ignore Stop gradient conversion for Var: " << Name()
<< "Set value is: " << overrided_stop_gradient_;
}
}
void SetPersistable(bool persistable) { persistable_ = persistable; } void SetPersistable(bool persistable) { persistable_ = persistable; }
@ -156,8 +186,11 @@ class VarBase {
// grad_op indicates which grad_op will this var be used as input // grad_op indicates which grad_op will this var be used as input
std::vector<std::weak_ptr<OpBase>> grad_ops_; std::vector<std::weak_ptr<OpBase>> grad_ops_;
// add this property for users may set stop_gradient themselves and this
bool stop_gradient_{false}; // should override the
// frameworks setting (-1) unset, (1) true, (0) false
int overrided_stop_gradient_{-1};
bool grad_generated_{false};
bool persistable_{false}; bool persistable_{false};
framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR}; framework::proto::VarType::Type type_{framework::proto::VarType::LOD_TENSOR};

@ -139,10 +139,10 @@ TEST(test_layer, test_varbase_basic) {
vin_with_grad->MutableGradVar()) != 0)); vin_with_grad->MutableGradVar()) != 0));
ASSERT_TRUE( ASSERT_TRUE(
dynamic_cast<framework::Variable*>(vin_with_grad->MutableGradVar()) != 0); dynamic_cast<framework::Variable*>(vin_with_grad->MutableGradVar()) != 0);
vin_with_grad->SetStopGradient(true); vin_with_grad->SetOverridedStopGradient(false);
ASSERT_TRUE(vin_with_grad->StopGradient()); ASSERT_FALSE(vin_with_grad->OverridedStopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetPersistable(true)); ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetPersistable(true));
ASSERT_TRUE(vin_with_grad->StopGradient()); ASSERT_FALSE(vin_with_grad->OverridedStopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetName("new_name")); ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetName("new_name"));
ASSERT_EQ(vin_with_grad->Name(), "new_name"); ASSERT_EQ(vin_with_grad->Name(), "new_name");
} }

@ -81,6 +81,7 @@ TEST(test_tracer, test_track_backward_output) {
new imperative::VarBase(true, "x_in")); new imperative::VarBase(true, "x_in"));
std::shared_ptr<imperative::VarBase> y_in( std::shared_ptr<imperative::VarBase> y_in(
new imperative::VarBase(false, "y_in")); new imperative::VarBase(false, "y_in"));
x_in->SetOverridedStopGradient(false);
std::shared_ptr<imperative::VarBase> vout( std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(true, "vout")); new imperative::VarBase(true, "vout"));
platform::CPUPlace place; platform::CPUPlace place;
@ -119,6 +120,7 @@ TEST(test_tracer, test_track_backward_input) {
std::shared_ptr<imperative::VarBase> vout( std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout")); new imperative::VarBase(false, "vout"));
platform::CPUPlace place; platform::CPUPlace place;
x_in->SetOverridedStopGradient(false);
std::vector<float> src_data(10, 2.0); std::vector<float> src_data(10, 2.0);
std::vector<int64_t> dims1 = {2, 5}; std::vector<int64_t> dims1 = {2, 5};
std::vector<int64_t> dims2 = {5, 2}; std::vector<int64_t> dims2 = {5, 2};

@ -32,6 +32,16 @@ static std::vector<std::unique_ptr<framework::OpDesc>> CreateGradOpDescs(
} }
} }
static void PassStopGradient(const NameVarBaseMap& outs, bool generate_grad) {
for (const auto& name_pair : outs) {
for (const auto& vb : name_pair.second) {
VLOG(6) << "Set output: " << vb->Name() << "'s OverridedStopGradient as "
<< generate_grad;
vb->InnerSetOverridedStopGradient(generate_grad);
}
}
}
void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
const NameVarBaseMap& outs, framework::AttributeMap attrs, const NameVarBaseMap& outs, framework::AttributeMap attrs,
const platform::Place& place, bool trace_backward) { const platform::Place& place, bool trace_backward) {
@ -45,16 +55,27 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
TraceBackward(op, framework::OpDesc(op->Type(), op->InputNameMap(), TraceBackward(op, framework::OpDesc(op->Type(), op->InputNameMap(),
op->OutputNameMap(), op->Attrs()), op->OutputNameMap(), op->Attrs()),
ins, outs); ins, outs);
VLOG(6) << "Finish tracking Backward of op: " << type; } else {
VLOG(3) << "No Grad to track for Op: " << type;
} }
VLOG(6) << "Finish tracing fwd op: " << type;
} }
bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins, bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins,
const NameVarBaseMap& outs, const NameVarBaseMap& outs,
bool trace_backward) { bool trace_backward) {
// TODO(jiabin): Implement auto prune here if (!trace_backward) return false;
return trace_backward;
for (const auto& name_pair : ins) {
for (const auto& var_base : name_pair.second) {
if (!var_base->OverridedStopGradient()) {
VLOG(6) << "Find out input: " << var_base->Name()
<< "'s GeneratedGrad is True";
PassStopGradient(outs, var_base->OverridedStopGradient());
return true;
}
}
}
return false;
} }
void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op, void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
@ -133,14 +154,25 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true, PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s", "Cannot find forward variable named %s",
fwd_var_name); fwd_var_name);
const auto& tmp = (*(fwd_var_iter->second))->GradVarBase();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
(*(fwd_var_iter->second))->GradVarBase(), tmp.get(),
"Grad of %s should " "Grad of %s should "
"not be NULL when we Track_Backward Input of %s", "not be NULL when we Track_Backward Input of %s",
(*(fwd_var_iter->second))->Name(), grad_op->Type()); (*(fwd_var_iter->second))->Name(), grad_op->Type());
(*(fwd_var_iter->second))->GradVarBase()->AddGradOps(grad_op); // Create grad_in's dim in tensor for Grad Dependency compute
auto* tensor = tmp->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->Resize((*(fwd_var_iter->second))
->Var()
.Get<framework::LoDTensor>()
.dims());
// Add Grad Op for grad_in
tmp->AddGradOps(grad_op);
VLOG(3) << "Add Grad Op " << grad_op->Type() << " for :" VLOG(3) << "Add Grad Op " << grad_op->Type() << " for :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name(); << (*(fwd_var_iter->second))->GradVarBase()->Name();
// Add Grad var input to engine set
engine_->InsertGradVar(tmp.get());
VLOG(3) << "Add Grad: " << tmp->Name() << " in to Engine";
bwd_in.emplace_back((*(fwd_var_iter->second))->GradVarBase()); bwd_in.emplace_back((*(fwd_var_iter->second))->GradVarBase());
} else { } else {
// If it is a forward var, just add it // If it is a forward var, just add it
@ -150,8 +182,7 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
grad_in_var_name); grad_in_var_name);
bwd_in.emplace_back(*(fwd_var_iter->second)); bwd_in.emplace_back(*(fwd_var_iter->second));
} }
VLOG(3) << "Set backward input from fwd var" << grad_ins.first << " of "
VLOG(3) << "Set backward input " << grad_ins.first << " of "
<< grad_op->Type() << " to be " << grad_op->Type() << " to be "
<< (bwd_in.back() ? bwd_in.back()->Name() : "nullptr"); << (bwd_in.back() ? bwd_in.back()->Name() : "nullptr");
} }
@ -173,40 +204,44 @@ void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true, PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s", "Cannot find forward variable named %s",
iter->second); iter->second);
PADDLE_ENFORCE_NOT_NULL( const auto& tmp = (*(fwd_var_iter->second))->GradVarBase();
(*(fwd_var_iter->second))->GradVarBase(),
"Grad of %s should " PADDLE_ENFORCE_NOT_NULL(tmp.get(),
"not be NULL when we Track_Backward Output of %s", "Grad output: %s of op: %s should not be NULL",
(*(fwd_var_iter->second))->Name(), grad_op->Type()); (tmp->Name(), grad_op->Type()));
bwd_out.emplace_back((*(fwd_var_iter->second))->GradVarBase());
VLOG(3) << "Set backward output " << grad_outs.first << " of " if ((!tmp->OverridedStopGradient()) || (grad_outs.second.size() > 1)) {
<< grad_op->Type() << " to be " VLOG(3) << "Set backward output " << grad_outs.first << " of "
<< (bwd_out.back() ? bwd_out.back()->Name() : "nullptr"); << grad_op->Type() << " to be " << tmp->Name()
<< ". Its Overrided Stop_Gradient is: False";
auto preceding_ops = bwd_out.emplace_back(tmp);
(*(fwd_var_iter->second))->GradVarBase()->GradOps(); auto grad_pending_ops =
(*(fwd_var_iter->second))->GradVarBase()->GradOps();
if (VLOG_IS_ON(3) && !preceding_ops.empty()) { if (VLOG_IS_ON(3) && !grad_pending_ops.empty()) {
VLOG(3) << "Add preceding Op of :" VLOG(3) << "Add grad_pending Op of :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name() << (*(fwd_var_iter->second))->GradVarBase()->Name()
<< " It's preceding Op are: "; << " It's grad_pending Op are: ";
for (const auto& op : preceding_ops) { for (const auto& op : grad_pending_ops) {
VLOG(3) << op->Type(); VLOG(3) << op->Type();
}
} }
} if (!grad_pending_ops.empty()) {
for (const auto& op : grad_pending_ops) {
if (!preceding_ops.empty()) { PADDLE_ENFORCE_NOT_NULL(op,
for (const auto& op : preceding_ops) { "No nullptr should be grad_pending op");
PADDLE_ENFORCE_NOT_NULL(op, "No nullptr should be preceding_op"); if (visited_preceding_ops.count(op) == 0) {
if (visited_preceding_ops.count(op) == 0) { visited_preceding_ops.insert(op);
visited_preceding_ops.insert(op); grad_op->InsertGradPendingOps(op);
grad_op->InsertGradPendingOps(op); }
} }
} else {
VLOG(5) << "Hit leaf VarBase"
<< (*(fwd_var_iter->second))->GradVarBase()->Name();
} }
} else { } else {
VLOG(5) << "Hit leaf VarBase"; VLOG(3) << "Skip backward output " << grad_outs.first << " of "
VLOG(5) << "Hit leaf VarBase" << grad_op->Type() << " Named: " << tmp->Name()
<< (*(fwd_var_iter->second))->GradVarBase()->Name(); << ", since its Overrided Stop_Gradient is: True";
} }
} }
} }

@ -230,13 +230,11 @@ void BindImperative(py::module *m_ptr) {
[](imperative::VarBase &self, const std::string &name, [](imperative::VarBase &self, const std::string &name,
framework::proto::VarType::Type type, framework::proto::VarType::Type type,
framework::proto::VarType::Type dtype, framework::proto::VarType::Type dtype,
const std::vector<int> &dims, bool stop_gradient, const std::vector<int> &dims, bool persistable) {
bool persistable) {
new (&self) imperative::VarBase(name); new (&self) imperative::VarBase(name);
self.SetPersistable(persistable); self.SetPersistable(persistable);
self.SetType(type); self.SetType(type);
self.SetDataType(dtype); self.SetDataType(dtype);
self.SetStopGradient(stop_gradient);
if (type == framework::proto::VarType::LOD_TENSOR) { if (type == framework::proto::VarType::LOD_TENSOR) {
auto *tensor = auto *tensor =
self.MutableVar()->GetMutable<framework::LoDTensor>(); self.MutableVar()->GetMutable<framework::LoDTensor>();
@ -302,8 +300,9 @@ void BindImperative(py::module *m_ptr) {
.def_property_readonly("dtype", &imperative::VarBase::DataType) .def_property_readonly("dtype", &imperative::VarBase::DataType)
.def_property("persistable", &imperative::VarBase::Persistable, .def_property("persistable", &imperative::VarBase::Persistable,
&imperative::VarBase::SetPersistable) &imperative::VarBase::SetPersistable)
.def_property("stop_gradient", &imperative::VarBase::StopGradient, .def_property("stop_gradient",
&imperative::VarBase::SetStopGradient); &imperative::VarBase::OverridedStopGradient,
&imperative::VarBase::SetOverridedStopGradient);
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer"); py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>()) layer.def(py::init<>())

@ -456,12 +456,13 @@ class Variable(object):
if in_dygraph_mode(): if in_dygraph_mode():
# record vars in tracer rather than blocks # record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None) self._ivar = kwargs.get("ivar", None)
self.stop_gradient_ = kwargs.get("stop_gradient", True)
if not self._ivar: if not self._ivar:
self._ivar = core.VarBase( self._ivar = core.VarBase(
name, type name, type
if type else core.VarDesc.VarType.LOD_TENSOR, dtype if type else core.VarDesc.VarType.LOD_TENSOR, dtype
if dtype else core.VarDesc.VarType.FP32, if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [], stop_gradient, True list(shape) if shape else [], True
if persistable else False) if persistable else False)
if persistable: if persistable:
_dygraph_tracer().trace_var(name, self) _dygraph_tracer().trace_var(name, self)
@ -1847,6 +1848,7 @@ class Block(object):
pass pass
else: else:
initializer(param, self) initializer(param, self)
param.stop_gradient = False
return param return param
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):

@ -266,7 +266,8 @@ class LayerHelperBase(object):
shape, shape,
dtype, dtype,
is_bias=False, is_bias=False,
default_initializer=None): default_initializer=None,
stop_gradient=False):
"""Create parameters for this layers. """Create parameters for this layers.
Args: Args:
@ -320,6 +321,7 @@ class LayerHelperBase(object):
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
dtype=dtype, dtype=dtype,
shape=shape, shape=shape,
stop_gradient=stop_gradient,
**attr._to_kwargs(with_initializer=True)) **attr._to_kwargs(with_initializer=True))
else: else:
self.startup_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(

@ -6980,8 +6980,8 @@ def one_hot(input, depth, allow_out_of_range=False):
type="one_hot", type="one_hot",
inputs=inputs, inputs=inputs,
attrs=attrs, attrs=attrs,
outputs={'Out': one_hot_out}, outputs={'Out': one_hot_out})
stop_gradient=True) one_hot_out.stop_gradient = True
return one_hot_out return one_hot_out
@ -7019,8 +7019,7 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
type='increment', type='increment',
inputs={'X': [counter]}, inputs={'X': [counter]},
outputs={'Out': [counter]}, outputs={'Out': [counter]},
attrs={'step': float(step)}, attrs={'step': float(step)})
stop_gradient=True)
counter.stop_gradient = True counter.stop_gradient = True
return counter return counter

@ -183,14 +183,18 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs = [] inputs = []
for _ in range(10): for _ in range(10):
inputs.append(fluid.dygraph.base.to_variable(x)) tmp = fluid.dygraph.base.to_variable(x)
tmp.stop_gradient = False
inputs.append(tmp)
ret = fluid.layers.sums(inputs) ret = fluid.layers.sums(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss.backward() loss.backward()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inputs2 = [] inputs2 = []
for _ in range(10): for _ in range(10):
inputs2.append(fluid.dygraph.base.to_variable(x)) tmp = fluid.dygraph.base.to_variable(x)
tmp.stop_gradient = False
inputs2.append(tmp)
ret2 = fluid.layers.sums(inputs2) ret2 = fluid.layers.sums(inputs2)
loss2 = fluid.layers.reduce_sum(ret2) loss2 = fluid.layers.reduce_sum(ret2)
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
@ -214,6 +218,7 @@ class TestImperative(unittest.TestCase):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
var_inp.stop_gradient = False
l = MyLayer("my_layer") l = MyLayer("my_layer")
x = l(var_inp)[0] x = l(var_inp)[0]
self.assertIsNotNone(x) self.assertIsNotNone(x)
@ -223,6 +228,7 @@ class TestImperative(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = fluid.dygraph.base.to_variable(np_inp)
var_inp2.stop_gradient = False
l2 = MyLayer("my_layer") l2 = MyLayer("my_layer")
x2 = l2(var_inp2)[0] x2 = l2(var_inp2)[0]
self.assertIsNotNone(x2) self.assertIsNotNone(x2)

@ -47,6 +47,8 @@ class TestRecurrentFeed(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
original_in1 = to_variable(original_np1) original_in1 = to_variable(original_np1)
original_in2 = to_variable(original_np2) original_in2 = to_variable(original_np2)
original_in1.stop_gradient = False
original_in2.stop_gradient = False
rt = RecurrentTest("RecurrentTest") rt = RecurrentTest("RecurrentTest")
for i in range(3): for i in range(3):

Loading…
Cancel
Save