Implement StaticModelRunner to support dygraph fine-tune static graph pre-training model (#23171)

* static model runner basic implement, test=develop * add run program op to execute loaded program, test=develop * refactor static model runner & run program op, test=develop * reset engine.cc to resolve conflict * adapt the change of dygraph double grad, test=develop * refactor impl to solve control flow error, test=develop * clear debug code, test=develop * fix ci str compatible error & checkout dygraph grad maker & add example, test=develop * hide api & add op test, test=develop * fix run program op test places error, test=develop * fix program by review comment, test=develop * delete change var desc name, test=develop * fix other program by review comment, test=develop * remove _static_graph_guard, test=develop * add selectedrows test, test=develop * remove desc parser, test=develop * fix detail program, test=develop * change socpe create & add test, test=develop
6 years ago · 75bd350710
parent 9297f49e4b
commit 75bd350710
17 changed files with 1986 additions and 18 deletions
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@ -70,11 +70,6 @@ void ExecutorPrepareContext::PrepareUnusedVars(
    force_disable_gc = true;
  }
 #endif
-  force_disable_gc_ = force_disable_gc;
-  if (GetEagerDeletionThreshold() < 0 || force_disable_gc_) {
-    return;
-  }
-
  // If gc is enabled and block size > 1
  if (prog_.Size() > 1) {
    operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
@ -84,6 +79,12 @@ void ExecutorPrepareContext::PrepareUnusedVars(
    operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
        prog_, block_id_, ops_);
  }
+
+  force_disable_gc_ = force_disable_gc;
+  if (GetEagerDeletionThreshold() < 0 || force_disable_gc_) {
+    return;
+  }
+
  unused_vars_ = GetUnusedVars(prog_.Block(block_id_), ops_, keep_vars);
 }

@ -412,9 +413,11 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
  return result;
 }

-void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
-                                  bool create_local_scope, bool create_vars,
-                                  bool keep_kids) {
+void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
+                                         Scope* scope, int64_t start_op_index,
+                                         int64_t end_op_index,
+                                         bool create_local_scope,
+                                         bool create_vars, bool keep_kids) {
  platform::RecordBlock b(kProgramId);
  PADDLE_ENFORCE_NOT_NULL(scope);
  Scope* local_scope = scope;
@ -446,7 +449,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
 #endif
  }

-  for (auto& op : ctx->ops_) {
+  for (int64_t i = start_op_index; i < end_op_index; ++i) {
+    auto& op = ctx->ops_[i];
    op->Run(*local_scope, place_);
    if (gc) {
      DeleteUnusedTensors(*local_scope, op.get(), ctx->unused_vars_, gc.get());
@ -471,6 +475,15 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
  }
 }

+void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
+                                  bool create_local_scope, bool create_vars,
+                                  bool keep_kids) {
+  int64_t start_op_index = 0;
+  int64_t end_op_index = ctx->ops_.size();
+  RunPartialPreparedContext(ctx, scope, start_op_index, end_op_index,
+                            create_local_scope, create_vars, keep_kids);
+}
+
 void Executor::RunPreparedContext(
    ExecutorPrepareContext* ctx, Scope* scope,
    std::map<std::string, const LoDTensor*>* feed_targets,
--- a/paddle/fluid/framework/executor.h
+++ b/paddle/fluid/framework/executor.h
@ -115,6 +115,12 @@ class Executor {

  void CreateVariables(const ProgramDesc& pdesc, Scope* scope, int block_id);

+  void RunPartialPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
+                                 int64_t start_op_index, int64_t end_op_index,
+                                 bool create_local_scope = true,
+                                 bool create_vars = true,
+                                 bool keep_kids = false);
+
  void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
                          bool create_local_scope = true,
                          bool create_vars = true, bool keep_kids = false);
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@ -64,6 +64,9 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
 /// Variables with this suffix are the new Gradient.
 constexpr char kNewGradSuffix[] = "@NEWGRAD@";

+/// Variables with this suffix are the loaded from pre-train model.
+constexpr char kLoadedVarSuffix[] = "@LOADED";
+
 /// RuntimeContext is used to relate input/output names of Operator with
 /// the corresponding variables in name scope.
 /// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same
--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@ -200,11 +200,12 @@ void BasicEngine::Execute() {
              iter != accumulators_.end(), true,
              platform::errors::NotFound("Cannot find gradient of variable %s",
                                         var->Name()));
+
          if (!var->OverridedStopGradient() && iter->second->RefCnt() == 1) {
            continue;
          }

-          var = std::make_shared<VariableWrapper>("Gtmp@");
+          var = std::make_shared<VariableWrapper>(var->Name());
          need_accu_var_list_.emplace_back(iter->second.get(), var);
        }
      }
--- a/paddle/fluid/operators/run_program_op.cc
+++ b/paddle/fluid/operators/run_program_op.cc
@ -0,0 +1,185 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/run_program_op.h"
+
+#include <string>
+
+namespace paddle {
+namespace operators {
+
+class RunProgramOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true,
+                      platform::errors::NotFound(
+                          "Input(X) of RunProgramOp should not be null."));
+    PADDLE_ENFORCE_EQ(ctx->HasInputs("Params"), true,
+                      platform::errors::NotFound(
+                          "Input(Params) of RunProgramOp should not be null."));
+    PADDLE_ENFORCE_EQ(ctx->HasOutputs("Out"), true,
+                      platform::errors::NotFound(
+                          "Output(Out) of RunProgramOp should not be null."));
+  }
+
+ protected:
+  /* [Why use single type kernel]:
+   *
+   * This op is similar to a control flow op, it doses not need
+   * a op kernel, but in order to make it execute under dynamic
+   * graph mode, implement it with op kernel.
+   *
+   * So whether the kernel data type is int, float or other type,
+   * which has no effect on its execution logic, so directly
+   * specified a data type here.
+   *
+   * Of course, the data type here is also not important.
+   */
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(framework::proto::VarType::FP32,
+                                   ctx.GetPlace());
+  }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name, const framework::Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const override {
+    return expected_kernel_type;
+  }
+};
+
+class RunProgramOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X",
+             "(vector<LoDTensor>)"
+             "The input tensors of RunProgram operator, also the feed targets "
+             "of loaded program.")
+        .AsDuplicable();
+    AddInput("Params",
+             "(vector<LoDTensor or SelecetedRows>)"
+             "The input parameter of RunProgram operator, also the parameters "
+             "of the loaded program.")
+        .AsDuplicable();
+    AddOutput("Out",
+              "(vector<LoDTensor>)"
+              "The output tensors of RunProgram operator, also the fetch "
+              "targets of the loaded program.")
+        .AsDuplicable();
+    AddOutput("OutScope",
+              "(StepScopeVar)"
+              "A vector of execution scope in RunProgram operator, which "
+              "contains at most one scope."
+              "NOTE: Do not use Scope directly because Scope output is not "
+              "currently supported.");
+    AddAttr<BlockDesc*>("global_block",
+                        "(BlockDesc *)"
+                        "The global block of executed program desc.");
+    AddAttr<int64_t>("start_op_index",
+                     "(int64_t)"
+                     "The index of the op to start execution");
+    AddAttr<int64_t>("end_op_index",
+                     "(int64_t)"
+                     "The index of the op to stop execution");
+    AddAttr<bool>("is_test",
+                  "(bool, default false) Set to true for inference only, false "
+                  "for training.")
+        .SetDefault(false);
+    AddComment(R"DOC(
+RunProgram operator.
+
+The RunProgram operator receives a program's feed targets, fetch targets, 
+and parameters, and receives the forward and backward program desc 
+as attributes, and then executes the program by executor.
+
+NOTE: This operator is added so that the inference model stored by 
+`fluid.io.save_inference_model` under the static graph mode can be loaded 
+under the dynamic graph mode for fine-tuning or inferencing.
+      
+)DOC");
+  }
+};
+
+class RunProgramGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true,
+                      platform::errors::NotFound(
+                          "Input(X) of RunProgramGradOp should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInputs("Params"), true,
+        platform::errors::NotFound(
+            "Input(Params) of RunProgramGradOp should not be null."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInputs(framework::GradVarName("Out")), true,
+        platform::errors::NotFound(
+            "Input(Out@GRAD) of RunProgramGradOp should not be null."));
+    // NOTE: The X@GRAD and Params@GRAD may not exist,
+    // because they can be set stop_gradient = True
+  }
+
+ protected:
+  /* see [Why use single type kernel] */
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(framework::proto::VarType::FP32,
+                                   ctx.GetPlace());
+  }
+
+  framework::OpKernelType GetKernelTypeForVar(
+      const std::string& var_name, const framework::Tensor& tensor,
+      const framework::OpKernelType& expected_kernel_type) const override {
+    return expected_kernel_type;
+  }
+};
+
+template <typename T>
+class RunProgramGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+
+ protected:
+  void Apply(GradOpPtr<T> grad_op) const override {
+    grad_op->SetType("run_program_grad");
+    grad_op->SetInput("X", this->Input("X"));
+    grad_op->SetInput("Params", this->Input("Params"));
+    grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    grad_op->SetInput("OutScope", this->Output("OutScope"));
+    grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+    grad_op->SetOutput(framework::GradVarName("Params"),
+                       this->InputGrad("Params"));
+    grad_op->SetAttrMap(this->Attrs());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(run_program, ops::RunProgramOp, ops::RunProgramOpMaker,
+                  ops::RunProgramGradOpMaker<paddle::framework::OpDesc>,
+                  ops::RunProgramGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OPERATOR(run_program_grad, ops::RunProgramGradOp);
+
+/* see [Why use single type kernel] */
+REGISTER_OP_CPU_KERNEL(
+    run_program,
+    ops::RunProgramOpKernel<paddle::platform::CPUDeviceContext, float>)
+REGISTER_OP_CPU_KERNEL(
+    run_program_grad,
+    ops::RunProgramGradOpKernel<paddle::platform::CPUDeviceContext, float>)
--- a/paddle/fluid/operators/run_program_op.cu.cc
+++ b/paddle/fluid/operators/run_program_op.cu.cc
@ -0,0 +1,28 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/run_program_op.h"
+
+#include "paddle/fluid/platform/float16.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+/* see [Why use single type kernel] */
+REGISTER_OP_CUDA_KERNEL(
+    run_program,
+    ops::RunProgramOpKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    run_program_grad,
+    ops::RunProgramGradOpKernel<paddle::platform::CUDADeviceContext, float>);
--- a/paddle/fluid/operators/run_program_op.h
+++ b/paddle/fluid/operators/run_program_op.h
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@ -621,6 +621,10 @@ void BindImperative(py::module *m_ptr) {
             return self.MutableGradVar()->Get<framework::LoDTensor>();
           },
           py::return_value_policy::reference)
+      .def("_set_grad_type",
+           [](imperative::VarBase &self, framework::proto::VarType::Type type) {
+             self.MutableGradVarBase()->SetType(type);
+           })
      .def("_grad_ivar",
           [](const imperative::VarBase &self) {
             auto &grad_var = self.GradVarBase();
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@ -989,7 +989,11 @@ All parameter, weight, gradient are variables in Paddle.
             PADDLE_ENFORCE_EQ(self.IsType<framework::ReaderHolder>(), true);
             return self.GetMutable<framework::ReaderHolder>();
           },
-           py::return_value_policy::reference);
+           py::return_value_policy::reference)
+      .def("set_scope", [](Variable &self, Scope &scope) {
+        auto scope_vec = self.GetMutable<std::vector<framework::Scope *>>();
+        scope_vec->emplace_back(&scope);
+      });

  BindReader(&m);

@ -1180,6 +1184,8 @@ All parameter, weight, gradient are variables in Paddle.
        []() { return std::string(framework::kEmptyVarName); });
  m.def("grad_var_suffix",
        []() { return std::string(framework::kGradVarSuffix); });
+  m.def("loaded_var_suffix",
+        []() { return std::string(framework::kLoadedVarSuffix); });
  m.def_submodule(
       "var_names",
       "The module will return special predefined variable name in Paddle")
--- a/python/paddle/fluid/dygraph/init.py
+++ b/python/paddle/fluid/dygraph/init.py
@ -44,6 +44,9 @@ from .backward_strategy import *
 from . import jit
 from .jit import *

+from . import static_runner
+from .static_runner import StaticModelRunner
+
 __all__ = []
 __all__ += layers.__all__
 __all__ += base.__all__
--- a/python/paddle/fluid/dygraph/static_runner.py
+++ b/python/paddle/fluid/dygraph/static_runner.py
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@ -329,12 +329,12 @@ def _fetch_var(name, scope=None, return_numpy=True):
    Returns:
       LodTensor|numpy.ndarray
    """
-    assert isinstance(name, str)
+    assert isinstance(name, six.string_types)
    if scope is None:
        scope = global_scope()
    assert isinstance(scope, core._Scope)

-    var = scope.find_var(name)
+    var = scope.find_var(_to_name_str(name))
    assert var is not None, (
        "Cannot find " + name + " in scope. Perhaps you need to make the"
        " variable persistable by using var.persistable = True in your"
--- a/python/paddle/fluid/op.py
+++ b/python/paddle/fluid/op.py
@ -124,11 +124,6 @@ class OpDescCreationMethod(object):
                    new_attr.bools.extend(user_defined_attr)
                elif attr.type == framework_pb2.LONGS:
                    new_attr.longs.extend(user_defined_attr)
-                elif attr.type == framework_pb2.INT_PAIRS:
-                    for p in user_defined_attr:
-                        pair = new_attr.int_pairs.add()
-                        pair.first = p[0]
-                        pair.second = p[1]
                else:
                    raise NotImplementedError(
                        "A not supported attribute type: %s." % (
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@ -193,6 +193,8 @@ list(REMOVE_ITEM TEST_OPS test_basic_lstm_api)
 list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op)
 list(REMOVE_ITEM TEST_OPS test_imperative_debug_string)
 list(REMOVE_ITEM TEST_OPS test_fuse_bn_act_pass)
+list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_mnist)
+list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_while)

 if (APPLE OR WIN32)
  list(REMOVE_ITEM TEST_OPS test_dataset)
@ -269,6 +271,10 @@ py_test_modules(test_install_check MODULES test_install_check ENVS
        FLAGS_cudnn_deterministic=1 SERIAL)
 set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST")
 py_test_modules(test_imperative_debug_string MODULES test_imperative_debug_string ENVS FLAGS_dygraph_debug=1)
+py_test_modules(test_imperative_static_runner_mnist MODULES test_imperative_static_runner_mnist ENVS
+    FLAGS_cudnn_deterministic=1)
+py_test_modules(test_imperative_static_runner_while MODULES test_imperative_static_runner_while ENVS
+    FLAGS_cudnn_deterministic=1)
 if(WITH_DISTRIBUTE)
    # FIXME(typhoonzero): add these tests back
    list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transformer")
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
@ -0,0 +1,235 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+
+import contextlib
+import numpy as np
+import six
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
+from test_imperative_base import new_program_scope
+
+import paddle.fluid.transpiler.details.program_utils as pu
+
+
+def while_softmax_regression(img):
+    def cond(i, times, pred):
+        return i < times
+
+    def body(i, times, pred):
+        pred = fluid.layers.fc(input=pred, size=10, act='softmax')
+        i = i + 1
+        return [i, times, pred]
+
+    i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
+    times = fluid.layers.fill_constant(shape=[1], dtype='int64', value=5)
+    pred = fluid.layers.fc(input=img, size=10, act='softmax')
+    i, times, pred = fluid.layers.while_loop(
+        cond=cond, body=body, loop_vars=[i, times, pred])
+    return pred
+
+
+class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
+    def setUp(self):
+        self.seed = 90
+        self.batch_size = 32
+        self.batch_num = 50
+        self.save_dirname = "while.inference.model"
+        self.model_filename = None
+        self.params_filename = None
+
+    def _random_batch_reader(self):
+        def _get_random_images_and_labels(image_shape, label_shape):
+            image = np.random.random(size=image_shape).astype('float32')
+            label = np.random.random(size=label_shape).astype('int64')
+            return image, label
+
+        def __reader__():
+            for _ in range(self.batch_num):
+                batch_image, batch_label = _get_random_images_and_labels(
+                    [self.batch_size, 784], [self.batch_size, 1])
+                yield batch_image, batch_label
+
+        return __reader__
+
+    def train_and_save_model(self):
+        startup_program = fluid.default_startup_program()
+        main_program = fluid.default_main_program()
+
+        img = fluid.data(name='img', shape=[None, 784], dtype='float32')
+        label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+
+        pred = while_softmax_regression(img)
+
+        loss = fluid.layers.cross_entropy(input=pred, label=label)
+        avg_loss = fluid.layers.mean(loss)
+
+        optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+        optimizer.minimize(avg_loss)
+
+        # pu.program_to_code(main_program, skip_op_callstack=True)
+
+        place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+
+        exe = fluid.Executor(place)
+        exe.run(startup_program)
+
+        loader = fluid.io.DataLoader.from_generator(
+            feed_list=[img, label], capacity=5, iterable=True)
+        loader.set_batch_generator(self._random_batch_reader(), places=place)
+
+        for data in loader():
+            exe.run(main_program, feed=data, fetch_list=[avg_loss])
+
+        fluid.io.save_inference_model(
+            self.save_dirname, ["img"], [pred],
+            exe,
+            model_filename=self.model_filename,
+            params_filename=self.params_filename)
+
+    def load_and_train_dygraph(self):
+        place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.dygraph.guard(place):
+            fluid.default_startup_program().random_seed = self.seed
+            fluid.default_main_program().random_seed = self.seed
+            np.random.seed(self.seed)
+
+            backward_strategy = fluid.dygraph.BackwardStrategy()
+            backward_strategy.sort_sum_gradient = True
+
+            while_net = fluid.dygraph.static_runner.StaticModelRunner(
+                self.save_dirname)
+
+            dy_param_init_value = {}
+            for param in while_net.parameters():
+                dy_param_init_value[param.name] = param.numpy()
+
+            sgd = fluid.optimizer.SGD(learning_rate=0.001,
+                                      parameter_list=while_net.parameters())
+
+            train_loader = fluid.io.DataLoader.from_generator(capacity=10)
+            train_loader.set_batch_generator(
+                self._random_batch_reader(), places=place)
+
+            while_net.train()
+
+            for data in train_loader():
+                img = data[0]
+                label = data[1]
+                label.stop_gradient = True
+
+                cost = while_net(inputs=img)
+
+                loss = fluid.layers.cross_entropy(cost, label)
+                avg_loss = fluid.layers.mean(loss)
+
+                avg_loss.backward(backward_strategy)
+                sgd.minimize(avg_loss)
+                while_net.clear_gradients()
+
+            dy_out = avg_loss.numpy()
+            dy_param_value = {}
+            for param in while_net.parameters():
+                dy_param_value[param.name] = param.numpy()
+
+        return dy_out, dy_param_init_value, dy_param_value
+
+    def load_and_train_static(self):
+        with new_program_scope():
+            fluid.default_startup_program().random_seed = self.seed
+            fluid.default_main_program().random_seed = self.seed
+            np.random.seed(self.seed)
+
+            img = fluid.data(name='img', shape=[None, 784], dtype='float32')
+            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+
+            pred = while_softmax_regression(img)
+
+            loss = fluid.layers.cross_entropy(input=pred, label=label)
+            avg_loss = fluid.layers.mean(loss)
+
+            optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+            optimizer.minimize(avg_loss)
+
+            place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
+            ) else fluid.CPUPlace()
+
+            exe = fluid.Executor(place)
+            exe.run(fluid.default_startup_program())
+
+            fluid.io.load_params(
+                exe,
+                self.save_dirname,
+                main_program=fluid.default_main_program(),
+                filename=self.params_filename)
+
+            static_param_init_value = {}
+            static_param_name_list = []
+            for param in fluid.default_main_program().all_parameters():
+                static_param_name_list.append(param.name)
+                static_param_init_value[param.name] = fluid.executor._fetch_var(
+                    param.name)
+
+            loader = fluid.io.DataLoader.from_generator(
+                feed_list=[img, label], capacity=5, iterable=True)
+            loader.set_batch_generator(
+                self._random_batch_reader(), places=place)
+
+            for data in loader():
+                fetch_list = [avg_loss.name]
+                fetch_list.extend(static_param_name_list)
+
+                out = exe.run(fluid.default_main_program(),
+                              feed=data,
+                              fetch_list=[avg_loss])
+
+            static_param_value = {}
+            static_out = out[0]
+            for i in range(1, len(out)):
+                static_param_value[static_param_name_list[i - 1]] = out[i]
+
+        return static_out, static_param_init_value, static_param_value
+
+    def test_while_no_params_filename(self):
+        # Phase 1. run and save static model
+        self.train_and_save_model()
+
+        # # Phase 2. load model & train dygraph
+        dy_out, dy_param_init_value, dy_param_value = \
+            self.load_and_train_dygraph()
+
+        static_out, static_param_init_value, static_param_value = \
+            self.load_and_train_static()
+
+        # Phase 3. compare
+        for key, value in six.iteritems(static_param_init_value):
+            key += core.loaded_var_suffix()
+            self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
+
+        self.assertTrue(np.allclose(static_out, dy_out))
+
+        for key, value in six.iteritems(static_param_value):
+            key += core.loaded_var_suffix()
+            self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py