From b8991ee1491ac2e78cbeada4817d43d2e27f955a Mon Sep 17 00:00:00 2001 From: lizhenyu Date: Mon, 26 Oct 2020 17:32:03 +0800 Subject: [PATCH] add ps mode consistence check --- .../ccsrc/backend/session/ascend_session.cc | 14 ++++++++++---- mindspore/ccsrc/backend/session/cpu_session.cc | 8 +++++--- mindspore/ccsrc/backend/session/gpu_session.cc | 14 ++++++++++---- .../ccsrc/backend/session/session_basic.cc | 17 +++++++++++++++++ mindspore/ccsrc/backend/session/session_basic.h | 1 + mindspore/common/parameter.py | 4 ++-- 6 files changed, 45 insertions(+), 13 deletions(-) diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 09bd1134c1..304289f416 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -45,6 +45,9 @@ #include "debug/anf_ir_dump.h" #include "debug/dump_proto.h" #include "toolchain/adx_datadump_server.h" +#if ENABLE_CPU && ENABLE_D +#include "ps/util.h" +#endif namespace mindspore { namespace session { @@ -166,9 +169,12 @@ GraphId AscendSession::CompileGraphImpl(NotNull func_graph) { RootGraphExecutorValidate(NOT_NULL(root_graph)); // adjust kernel AdjustKernel(root_graph); -#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) - // Assign parameter keys. - AssignParamKey(root_graph); +#if ENABLE_CPU && ENABLE_D + if (ps::Util::IsParamServerMode()) { + CheckPSModeConsistence(root_graph); + // Assign parameter keys. + AssignParamKey(root_graph); + } #endif // assign stream AssignStream(NOT_NULL(root_graph)); @@ -314,7 +320,7 @@ void AscendSession::RunGraphImpl(const GraphId &graph_id, const std::vectorPreExecute(kernel_graph); } -#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) +#if ENABLE_CPU && ENABLE_D // Initialize parameter server InitPSParamAndOptim(kernel_graph, inputs); #endif diff --git a/mindspore/ccsrc/backend/session/cpu_session.cc b/mindspore/ccsrc/backend/session/cpu_session.cc index a150080acc..bac5d1625b 100644 --- a/mindspore/ccsrc/backend/session/cpu_session.cc +++ b/mindspore/ccsrc/backend/session/cpu_session.cc @@ -68,9 +68,11 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr MS_LOG(INFO) << "Set kernel info"; SetKernelInfo(graph.get()); #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) - AssignParamKey(graph); - if (ps::Util::IsRoleOfWorker()) { - Optimize(graph); + if (ps::Util::IsParamServerMode()) { + AssignParamKey(graph); + if (ps::Util::IsRoleOfWorker()) { + Optimize(graph); + } } #endif MS_LOG(INFO) << "Build kernel"; diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 38091cf110..9089ae81fd 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -56,6 +56,9 @@ #include "utils/ms_utils.h" #include "utils/config_manager.h" #include "utils/ms_context.h" +#if ENABLE_CPU && ENABLE_GPU +#include "ps/util.h" +#endif namespace mindspore { namespace session { @@ -255,9 +258,12 @@ GraphId GPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr // Graph kernel fusion optimization GraphKernelOptimize(graph); -#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) - // Assign parameter keys. - AssignParamKey(graph); +#if ENABLE_CPU && ENABLE_GPU + if (ps::Util::IsParamServerMode()) { + CheckPSModeConsistence(graph); + // Assign parameter keys. + AssignParamKey(graph); + } #endif // Start gpu kernel runtime StartKernelRT(); @@ -299,7 +305,7 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector &ro } #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU)) +void SessionBasic::CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) { + auto input_nodes = kernel_graph->inputs(); + for (const auto &input_node : input_nodes) { + if (!input_node->isa()) { + continue; + } + auto pk_node = input_node->cast(); + MS_EXCEPTION_IF_NULL(pk_node); + auto param_info_ptr = pk_node->param_info(); + if (param_info_ptr != nullptr && param_info_ptr->init_in_server()) { + const std::string ¶m_name = pk_node->fullname_with_scope(); + MS_LOG(EXCEPTION) << "Can not initialize the parameter[" << param_name + << "] in server, this parameter is used by kernel which executes in device"; + } + } +} + void SessionBasic::AssignParamKey(const KernelGraphPtr &kernel_graph) { if (!ps::Util::IsRoleOfWorker()) { MS_LOG(INFO) << "Not parameter server mode."; diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index 6071912126..dfc7bb948e 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -91,6 +91,7 @@ class SessionBasic : public std::enable_shared_from_this { // get graph id in child graphs by ME front anf node pointer virtual GraphId GetGraphIdByNode(const AnfNodePtr &) const { return kInvalidGraphId; } virtual GraphId GetFinalRunGraph() const { return kInvalidGraphId; } + void CheckPSModeConsistence(const KernelGraphPtr &Kernel_graph); void AssignParamKey(const KernelGraphPtr &kernel_graph); void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph, const std::vector &inputs_const); virtual bool CheckModelInputs(uint32_t graph_id, const std::vector &inputs, diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index c12d2e097a..f81fc7eaa4 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -169,8 +169,8 @@ class Parameter(MetaTensor_): def set_param_ps(self, init_in_server=False): if _is_role_worker() or _is_role_pserver() or _is_role_sched(): if init_in_server and (not self.name.endswith("embedding_table")): - raise RuntimeError("Can not initialize parameter '{}' in server, only parameters of \ - sparse operator support initialization in server.".format(self.name)) + raise RuntimeError("Can not initialize parameter '{}' in server, only parameters of " + "sparse operator support initialization in server.".format(self.name)) self.is_param_ps = True self.init_in_server = init_in_server self._param_info.init_in_server = init_in_server