Multi task (#26002)

* add multitask * add multitask, test=develop * fix code style, test=develop * add partail push dense, test=develop * fix has_kay in py3, test=develop * fix, test=develop * fix, test=develop * fix, test=develop
5 years ago · 5a83496c8d
parent 7a58431c0a
commit 5a83496c8d
10 changed files with 306 additions and 23 deletions
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -247,7 +247,8 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS
        graph build_strategy collective_helper
        fast_threaded_ssa_graph_executor variable_helper)
-cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS executor)
+cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS
    conditional_block_op executor)
 cc_library(prune SRCS prune.cc DEPS framework_proto boost)
 cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
 cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
--- a/paddle/fluid/framework/device_worker.h
+++ b/paddle/fluid/framework/device_worker.h
@ -19,6 +19,7 @@ limitations under the License. */
 #include <map>
 #include <memory>
 #include <mutex>  // NOLINT
 #include <set>
 #include <string>
 #include <thread>         // NOLINT
 #include <unordered_map>  // NOLINT
@ -313,6 +314,10 @@ class DownpourWorker : public HogwildWorker {
  std::map<uint64_t, std::vector<std::string>> dense_value_names_;
  std::map<uint64_t, uint64_t> table_dependency_;
  std::vector<std::pair<uint64_t, uint64_t>> copy_dense_tables_;
  // multitask
  std::map<int32_t, uint64_t> cond2table_map_;
  std::set<uint64_t> condvalue_set_;
  bool flag_partial_push_;
 private:
  // std::vector<std::string> dump_param_;
--- a/paddle/fluid/framework/downpour_worker.cc
+++ b/paddle/fluid/framework/downpour_worker.cc
@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <cstdlib>
 #include <ctime>
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/platform/cpu_helper.h"
@ -65,6 +67,13 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
    }
  }
  flag_partial_push_ = false;
  for (auto& m : param_.program_config(0).partial_pushdense_condtable_map()) {
    cond2table_map_[m.key()] = m.value();
    condvalue_set_.insert(m.value());
    flag_partial_push_ = true;
  }
  skip_ops_.resize(param_.skip_ops_size());
  for (int i = 0; i < param_.skip_ops_size(); ++i) {
    skip_ops_[i] = param_.skip_ops(i);
@ -876,14 +885,42 @@ void DownpourWorker::TrainFiles() {
 #endif
    if (need_to_push_dense_) {
-      for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
+      if (flag_partial_push_) {
-           ++i) {
+        Variable* var = (*thread_scope_).FindVar("cond_tag");
-        uint64_t tid = static_cast<uint64_t>(
+        LoDTensor* tensor = var->GetMutable<LoDTensor>();
-            param_.program_config(0).push_dense_table_id(i));
+        // check type in python code
-        fleet_ptr_->PushDenseVarsAsync(
+        int64_t* cond_value_batch = tensor->data<int64_t>();
-            *thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
+
-            scale_datanorm_, cur_batch);
+        for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
             ++i) {
          uint64_t tid = static_cast<uint64_t>(
              param_.program_config(0).push_dense_table_id(i));
          if (condvalue_set_.find(tid) != condvalue_set_.end()) {
            // common dense table must push dense
            if (cond2table_map_[cond_value_batch[0]] != tid) {
              // can't push dense
              continue;
            }
          }
          VLOG(3) << "push multitask dense gradient " << tid;
          fleet_ptr_->PushDenseVarsAsync(
              *thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
              scale_datanorm_, cur_batch);
        }
      } else {
        for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
             ++i) {
          uint64_t tid = static_cast<uint64_t>(
              param_.program_config(0).push_dense_table_id(i));
          fleet_ptr_->PushDenseVarsAsync(
              *thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
              scale_datanorm_, cur_batch);
        }
      }
      VLOG(3) << "push dense gradient done.";
      // the following code should be more precise and clean
--- a/paddle/fluid/framework/hogwild_worker.cc
+++ b/paddle/fluid/framework/hogwild_worker.cc
@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/framework/device_worker_factory.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
 #include "paddle/fluid/operators/distributed/distributed.h"
 #include "paddle/fluid/platform/cpu_helper.h"
 #include "paddle/fluid/platform/lodtensor_printer.h"
@ -47,6 +48,8 @@ void HogwildWorker::CreateThreadOperators(const ProgramDesc &program) {
    ops_.push_back(local_op_ptr);
    continue;
  }
  operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
      program, 0, ops_);
 }
 void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
--- a/paddle/fluid/framework/trainer_desc.proto
+++ b/paddle/fluid/framework/trainer_desc.proto
@ -148,12 +148,17 @@ message CopyTableConfig {
  repeated TableDependencyMap table_denpendency_map = 12;
 }
 message CondTableMap {
  required int32 key = 1;
  required int32 value = 2;
 }
 message ProgramConfig {
  required string program_id = 1;
  repeated int32 push_sparse_table_id = 2;
  repeated int32 push_dense_table_id = 3;
  repeated int32 pull_sparse_table_id = 4;
  repeated int32 pull_dense_table_id = 5;
  repeated CondTableMap partial_pushdense_condtable_map = 10;
 }
 message PullDenseWorkerParameter {
--- a/paddle/fluid/operators/controlflow/CMakeLists.txt
+++ b/paddle/fluid/operators/controlflow/CMakeLists.txt
@ -1,5 +1,7 @@
 include(operators)
-register_operators(DEPS naive_executor)
+register_operators(EXCLUDES conditional_block_op DEPS naive_executor)
 cc_library(conditional_block_op SRCS conditional_block_op.cc DEPS executor)
 cc_library(op_variant SRCS op_variant.cc DEPS operator proto_desc)
 cc_library(conditional_block_op_helper SRCS conditional_block_op_helper.cc DEPS operator op_variant conditional_block_op)
 cc_library(recurrent_op_helper SRCS recurrent_op_helper.cc DEPS operator op_variant recurrent_op)
--- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc
@ -162,6 +162,32 @@ void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
  PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl(
      program, &fwd_ops, &bwd_ops);
 }
 void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
    const framework::ProgramDesc &program, int block_id,
    const std::vector<framework::OperatorBase *> &all_ops) {
  // If block_id is not 0, returns
  // This is because all conditional_block_ops and conditional_block_grad_ops
  // in the whole program would be processed when block_id is 0 (i.e.
  // when Executor::Run() or ParallelExecutor constructs).
  // What's more, all conditional_block_ops and conditional_block_grad_ops
  // must be processed when block_id is zero. If not, conditional_block_op
  // may run first and erase variables used in conditional_block_grad_op,
  // and in this moment, conditional_block_grad_ops may be not constructed yet.
  if (block_id != 0) return;
  std::vector<OpVariant> fwd_ops, bwd_ops;
  for (auto *op : all_ops) {
    if (op->Type() == "conditional_block") {
      fwd_ops.emplace_back(op);
    } else if (op->Type() == "conditional_block_grad") {
      bwd_ops.emplace_back(op);
    }
  }
  PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOpImpl(
      program, &fwd_ops, &bwd_ops);
 }
 void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
    const framework::ProgramDesc &program,
--- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.h
+++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.h
@ -33,6 +33,10 @@ void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
    const framework::ProgramDesc &program, int block_id,
    const std::vector<std::unique_ptr<framework::OperatorBase>> &all_ops);
 void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
    const framework::ProgramDesc &program, int block_id,
    const std::vector<framework::OperatorBase *> &all_ops);
 void PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
    const framework::ProgramDesc &program,
    const std::vector<framework::OperatorBase *> &ifelse_ops,
--- a/python/paddle/fluid/device_worker.py
+++ b/python/paddle/fluid/device_worker.py
@ -221,6 +221,13 @@ class DownpourSGD(DeviceWorker):
                for i in program_configs[program_id]["pull_dense"]:
                    pc.pull_dense_table_id.extend([i])
                    dense_table_set.add(i)
                # code for partial push dense table such as multitask
                if "cond2denseid" in program_configs[program_id]:
                    cond2denseid = program_configs[program_id]["cond2denseid"]
                    for key, value in cond2denseid.items():
                        mc_map = pc.partial_pushdense_condtable_map.add()
                        mc_map.key = key
                        mc_map.value = value
                break
        trainer_desc.device_worker_name = opt_info.get("worker_class",
--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py