replace ps-lite

pull/12679/head
chendongsheng 4 years ago
parent e99c29c7d9
commit db0a6f1e19

@ -1,22 +0,0 @@
if(ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/ps-lite/repository/archive/34fd45cae457d59850fdcb2066467778d0673f21.zip")
set(MD5 "0d1543b8dcb0bc3610637e1643c94eb4")
else()
set(REQ_URL "https://github.com/dmlc/ps-lite/archive/34fd45cae457d59850fdcb2066467778d0673f21.zip")
set(MD5 "393c0e27b68bfaf96718caa3aa96f5a3")
endif()
set(pslite_USE_STATIC_LIBS ON)
if(${ENABLE_IBVERBS} STREQUAL "ON")
set(pslite_CXXFLAGS "USE_IBVERBS=1")
endif()
mindspore_add_pkg(pslite
LIBS ps
URL ${REQ_URL}
MD5 ${MD5}
PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/pslite/ps_lite.patch001
ONLY_MAKE True
ONLY_MAKE_INCS include/*
ONLY_MAKE_LIBS build/*)
include_directories(${pslite_INC})
add_library(mindspore::pslite ALIAS pslite::ps)

@ -1,5 +0,0 @@
mindspore_add_pkg(zeromq
VER 4.1.4
HEAD_ONLY ./
URL https://raw.githubusercontent.com/mli/deps/master/build/zeromq-4.1.4.tar.gz
MD5 a611ecc93fffeb6d058c0e6edf4ad4fb)

@ -32,10 +32,6 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
if(USE_GLOG)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
endif()
if(ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
endif()
find_package(Python3)
include_directories(${Python3_INCLUDE_DIRS})

@ -339,8 +339,8 @@ elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
target_link_libraries(_c_expression PRIVATE -Wl,-force_load mindspore -Wl,-noall_load)
else()
if(ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
target_link_libraries(mindspore mindspore::pslite proto_input mindspore::protobuf
mindspore::event mindspore::event_pthreads ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
target_link_libraries(mindspore proto_input mindspore::protobuf
mindspore::event mindspore::event_pthreads)
target_link_libraries(mindspore -Wl,--no-as-needed mindspore::event_core ps_cache)
if(${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(mindspore ibverbs rdmacm)

@ -17,6 +17,7 @@
#include <vector>
#include <algorithm>
#include "ps/worker.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
@ -35,7 +36,7 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
<< input_shape << " is too large.";
}
if (mindspore::ps::Util::IsRoleOfWorker()) {
if (mindspore::ps::PSContext::instance()->is_worker()) {
key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
}
std::vector<size_t> keys{key_, key_, key_};
@ -50,9 +51,10 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
std::vector<int64_t> lens{SizeToLong(input_shape.size()), SizeToLong(indices_shape.size()),
SizeToLong(output_shape.size())};
if (mindspore::ps::Util::IsRoleOfWorker()) {
if (mindspore::ps::PSContext::instance()->is_worker()) {
mindspore::ps::worker.AddEmbeddingTable(key_, input_shape[axis]);
mindspore::ps::worker.InitPSEmbeddingTable(keys, values, lens);
mindspore::ps::ParamInitInfoMessage info;
mindspore::ps::worker.InitPSEmbeddingTable(key_, input_shape, indices_shape, output_shape, info);
}
}
@ -70,17 +72,16 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i
size_t input_size = inputs[1]->size;
size_t output_size = outputs[0]->size;
size_t size = input_size / sizeof(float);
::ps::SArray<int> lookup_ids(size, 0);
::ps::SArray<int> lengths{size};
::ps::SArray<float> lookup_result(output_size / sizeof(float), 0);
size_t size = input_size / sizeof(int);
std::vector<int> lookup_ids(size, 0);
std::vector<int> lengths{SizeToInt(size)};
std::vector<float> lookup_result(output_size / sizeof(float), 0);
auto ret = memcpy_s(lookup_ids.data(), lookup_ids.size() * sizeof(int), indices_addr, input_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
return false;
}
mindspore::ps::worker.DoPSEmbeddingLookup({key_}, lookup_ids, lengths, &lookup_result,
mindspore::ps::kEmbeddingLookupCmd);
mindspore::ps::worker.DoPSEmbeddingLookup(key_, lookup_ids, &lookup_result, mindspore::ps::kEmbeddingLookupCmd);
auto ret2 = memcpy_s(output_addr, outputs[0]->size, lookup_result.data(), output_size);
if (ret2 != EOK) {

@ -62,7 +62,7 @@ class PullKernel : public CPUKernel {
MS_EXCEPTION_IF_NULL(param_node);
param_name_ = param_node->fullname_with_scope();
if (mindspore::ps::Util::IsRoleOfWorker()) {
if (mindspore::ps::PSContext::instance()->is_worker()) {
key_ = AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
}
InitSizeLists();

@ -30,6 +30,7 @@
#include "backend/optimizer/pass/replace_node_by_proxy.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/util.h"
#include "ps/ps_context.h"
#endif
namespace mindspore {
@ -75,9 +76,9 @@ GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtr
MS_LOG(INFO) << "Set kernel info";
SetKernelInfo(graph.get());
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsParamServerMode()) {
if (ps::PSContext::instance()->is_ps_mode()) {
AssignParamKey(graph);
if (ps::Util::IsRoleOfWorker()) {
if (ps::PSContext::instance()->is_worker()) {
Optimize(graph);
}
}

@ -41,8 +41,9 @@
#include "utils/trace_base.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/common.h"
#include "ps/constants.h"
#include "ps/util.h"
#include "ps/ps_context.h"
#include "abstract/abstract_value.h"
#endif
@ -2287,7 +2288,7 @@ void SessionBasic::RunOpHideNopNode(const KernelGraphPtr &kernel_graph) const {
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) {
if (!ps::Util::IsRoleOfWorker()) {
if (!ps::PSContext::instance()->is_worker()) {
return;
}
CheckPSModeConsistence(kernel_graph);
@ -2384,7 +2385,7 @@ void SessionBasic::AssignParamKey(const KernelGraphPtr &kernel_graph) {
void SessionBasic::InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) {
if (!ps::Util::IsRoleOfWorker()) {
if (!ps::PSContext::instance()->is_worker()) {
return;
}
std::vector<tensor::TensorPtr> inputs(inputs_const);

@ -48,6 +48,7 @@
#include "mindspore/core/utils/parallel_node_check.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/util.h"
#include "ps/ps_context.h"
#endif
using mindspore::tensor::Tensor;
@ -3283,7 +3284,7 @@ static void HandleNoUsedParameter(const FuncGraphPtr &root) {
bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) {
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsRoleOfPServer() || ps::Util::IsRoleOfScheduler()) {
if (ps::PSContext::instance()->is_server() || ps::PSContext::instance()->is_scheduler()) {
return false;
}
#endif

@ -288,7 +288,6 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
else()
target_link_libraries(_c_dataengine PRIVATE _c_mindrecord)
if(ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
target_link_libraries(_c_dataengine PRIVATE mindspore::pslite ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
if(${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(_c_dataengine PRIVATE ibverbs rdmacm)
endif()

@ -460,7 +460,7 @@ bool StartPSWorkerAction(const ResourcePtr &res) {
bool StartPSServerAction(const ResourcePtr &res) {
FuncGraphPtr func_graph = res->func_graph();
auto &ps = ps::ParameterServer<float>::GetInstance();
auto &ps = ps::ParameterServer::GetInstance();
ps.Run(func_graph);
return true;
}
@ -626,7 +626,7 @@ std::vector<ActionItem> VmPipeline() {
actions.emplace_back(std::make_pair("validate", ValidateAction));
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsRoleOfWorker()) {
if (ps::PSContext::instance()->is_worker()) {
actions.emplace_back(std::make_pair("worker", StartPSWorkerAction));
}
#endif

@ -43,6 +43,7 @@
#include "pipeline/jit/static_analysis/auto_monad.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/util.h"
#include "ps/ps_context.h"
#endif
namespace mindspore {
@ -406,7 +407,7 @@ bool AddRecomputationPass(const ResourcePtr &res) {
bool AddCacheEmbeddingPass(const ResourcePtr &res) {
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsParamServerMode()) {
if (ps::PSContext::instance()->is_ps_mode()) {
return true;
}
#endif

@ -49,7 +49,7 @@
#include "utils/shape_utils.h"
#include "utils/info.h"
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "ps/common.h"
#include "ps/constants.h"
#include "ps/util.h"
#include "ps/worker.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
@ -492,14 +492,11 @@ std::vector<ActionItem> GetPipline(const ResourcePtr &resource, const std::strin
std::string backend = MsContext::GetInstance()->backend_policy();
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (mindspore::ps::Util::IsParamServerMode()) {
mindspore::ps::Util::SetInternalEnvVar();
}
if (ps::Util::IsRoleOfPServer()) {
if (ps::PSContext::instance()->is_server()) {
resource->results()[kBackend] = compile::CreateBackend();
return PServerPipeline();
}
if (ps::Util::IsRoleOfScheduler()) {
if (ps::PSContext::instance()->is_scheduler()) {
return PSchedulerPipeline();
}
#endif
@ -978,7 +975,7 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc
const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes,
const std::vector<int64_t> &input_indexes, bool need_run) {
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if ((ps::Util::IsParamServerMode()) && (!ps::Util::IsRoleOfWorker())) {
if ((ps::PSContext::instance()->is_ps_mode()) && (!ps::PSContext::instance()->is_worker())) {
return true;
}
#endif
@ -1030,7 +1027,7 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc
ConfigManager::GetInstance().set_iter_num(size);
// PS cache does not support loop sink.
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsRoleOfWorker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::PsDataPrefetch::GetInstance().CreateDataChannel(queue_name, LongToSize(size));
ConfigManager::GetInstance().set_iter_num(1);
}
@ -1151,10 +1148,11 @@ void ClearResAtexit() {
pynative::ClearPyNativeSession();
session::ClearPythonParasMap();
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (ps::Util::IsParamServerMode() && ps::Util::IsRoleOfWorker()) {
if (ps::PSContext::instance()->is_ps_mode() && ps::PSContext::instance()->is_worker()) {
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.Finalize();
}
MS_LOG(INFO) << "ps::worker.Finalize";
ps::worker.Finalize();
}
#endif

@ -21,8 +21,8 @@ if(NOT (ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU)))
list(REMOVE_ITEM _PS_SRC_FILES "core/abstract_node.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/scheduler_node.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/http_client.cc")
list(REMOVE_ITEM _PS_SRC_FILES "internal/worker.cc")
list(REMOVE_ITEM _PS_SRC_FILES "internal/parameter_server.cc")
list(REMOVE_ITEM _PS_SRC_FILES "worker.cc")
list(REMOVE_ITEM _PS_SRC_FILES "parameter_server.cc")
endif()
if(NOT ENABLE_D)

@ -1,140 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_COMMON_H_
#define MINDSPORE_CCSRC_PS_COMMON_H_
#include <limits.h>
#include <iostream>
#include <vector>
#include <memory>
#include <map>
#include <string>
#include "ps/ps.h"
namespace mindspore {
namespace ps {
constexpr char kEnvCommType[] = "MS_COMM_TYPE";
constexpr char kEnvInterface[] = "MS_INTERFACE";
constexpr char kEnvPServerNum[] = "MS_SERVER_NUM";
constexpr char kEnvWorkerNum[] = "MS_WORKER_NUM";
constexpr char kEnvSchedulerHost[] = "MS_SCHED_HOST";
constexpr char kEnvSchedulerPort[] = "MS_SCHED_PORT";
constexpr char kDmlcCommType[] = "DMLC_PS_VAN_TYPE";
constexpr char kDmlcInterface[] = "DMLC_INTERFACE";
constexpr char kDmlcPServerNum[] = "DMLC_NUM_SERVER";
constexpr char kDmlcWorkerNum[] = "DMLC_NUM_WORKER";
constexpr char kDmlcRole[] = "DMLC_ROLE";
constexpr char kDmlcSchedulerHost[] = "DMLC_PS_ROOT_URI";
constexpr char kDmlcSchedulerPort[] = "DMLC_PS_ROOT_PORT";
constexpr char kCommTypeOfIBVerbs[] = "ibverbs";
constexpr char kCommTypeOfTCP[] = "zmq";
constexpr char kRoleOfPServer[] = "server";
constexpr char kRoleOfWorker[] = "worker";
constexpr char kRoleOfScheduler[] = "scheduler";
constexpr char kLearningRate[] = "learning_rate";
constexpr char kMomentum[] = "momentum";
constexpr char kApplyMomentum[] = "ApplyMomentum";
constexpr char kSparseAdam[] = "Adam";
constexpr char kSparseLazyAdam[] = "LazyAdam";
constexpr char kSparseFtrl[] = "Ftrl";
constexpr char kApplyMomentumOp[] = "Momentum";
constexpr char kSparseAdamOp[] = "Adam";
constexpr char kSparseLazyAdamOp[] = "LazyAdam";
constexpr char kSparseFtrlOp[] = "FTRL";
constexpr int64_t kInitWeightsCmd = 10;
constexpr int64_t kInitWeightToOptimIdCmd = 11;
constexpr int64_t kInitOptimInputsShapeCmd = 12;
constexpr int64_t kInitKeyToPushNodeIdCmd = 13;
constexpr int64_t kInitEmbeddingsCmd = 20;
constexpr int64_t kUpdateEmbeddingsCmd = 21;
constexpr int64_t kCheckReadyForPushCmd = 25;
constexpr int64_t kCheckReadyForPullCmd = 26;
constexpr int64_t kEmbeddingLookupCmd = 30;
constexpr int64_t kFinalizeCmd = 40;
constexpr size_t kInvalidKey = UINT64_MAX;
constexpr int64_t kInvalidID = -1;
using DataPtr = std::shared_ptr<unsigned char>;
using VectorPtr = std::shared_ptr<std::vector<unsigned char>>;
using Key = ::ps::Key;
using Keys = ::ps::SArray<Key>;
using Values = ::ps::SArray<float>;
using ValuesPtr = std::shared_ptr<Values>;
using Weight = ::ps::SArray<float>;
using Grad = ::ps::SArray<float>;
using LookupIds = ::ps::SArray<Key>;
using Lengths = ::ps::SArray<int>;
using WeightPtr = std::shared_ptr<Weight>;
using GradPtr = std::shared_ptr<Grad>;
using InputsShape = std::vector<std::shared_ptr<std::vector<size_t>>>;
using InputsShapePtr = std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>>;
constexpr size_t INDEX_NOT_SEND = UINT_MAX;
using OptimOriginIdx = std::map<std::string, size_t>;
using OptimPSSendIdx = std::map<std::string, size_t>;
const OptimOriginIdx kMomentumOriginIdx = {{"weight", 0}, {"accum", 1}, {"lr", 2}, {"grad", 3}, {"momentum", 4}};
const OptimPSSendIdx kMomentumPSSendIdx = {
{"weight", INDEX_NOT_SEND}, {"accum", INDEX_NOT_SEND}, {"lr", 0}, {"grad", 1}, {"momentum", 2}};
const OptimOriginIdx kSparseAdamOriginIdx = {{"weight", 0}, {"m", 1}, {"v", 2}, {"beta1_power", 3},
{"beta2_power", 4}, {"lr", 5}, {"beta1", 6}, {"beta2", 7},
{"eps", 8}, {"grad", 9}, {"indices", 10}};
const OptimPSSendIdx kSparseAdamPSSendIdx = {{"weight", INDEX_NOT_SEND},
{"m", INDEX_NOT_SEND},
{"v", INDEX_NOT_SEND},
{"beta1_power", 0},
{"beta2_power", 1},
{"lr", 2},
{"beta1", 3},
{"beta2", 4},
{"eps", 5},
{"grad", 6},
{"indices", 7}};
const OptimOriginIdx kSparseFtrlOriginIdx = {{"weight", 0}, {"accum", 1}, {"linear", 2}, {"grad", 3}, {"indices", 4}};
const OptimPSSendIdx kSparseFtrlPSSendIdx = {
{"weight", INDEX_NOT_SEND}, {"accum", INDEX_NOT_SEND}, {"linear", INDEX_NOT_SEND}, {"grad", 0}, {"indices", 1}};
const std::map<std::string, OptimOriginIdx> kOptimToOriginIdx = {{kApplyMomentum, kMomentumOriginIdx},
{kSparseAdam, kSparseAdamOriginIdx},
{kSparseLazyAdam, kSparseAdamOriginIdx},
{kSparseFtrl, kSparseFtrlOriginIdx}};
const std::map<std::string, OptimOriginIdx> kOptimToPSSendIdx = {{kApplyMomentum, kMomentumPSSendIdx},
{kSparseAdam, kSparseAdamPSSendIdx},
{kSparseLazyAdam, kSparseAdamPSSendIdx},
{kSparseFtrl, kSparseFtrlPSSendIdx}};
#define EXC_IF_VEC_IDX_OOB(vec, idx) \
{ \
size_t vec_size = vec.size(); \
if (idx >= vec_size) { \
MS_LOG(EXCEPTION) << "Vector " << #vec << " size is " << vec_size << ". So index " << idx \
<< " is out of bound."; \
} \
}
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_COMMON_H_

@ -14,10 +14,11 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_INTERNAL_CONSTANTS_H_
#define MINDSPORE_CCSRC_PS_INTERNAL_CONSTANTS_H_
#ifndef MINDSPORE_CCSRC_PS_CONSTANTS_H_
#define MINDSPORE_CCSRC_PS_CONSTANTS_H_
#include <limits.h>
#include <climits>
#include <iostream>
#include <vector>
#include <memory>
@ -26,8 +27,6 @@
namespace mindspore {
namespace ps {
namespace internal {
constexpr char kEnvCommType[] = "MS_COMM_TYPE";
constexpr char kEnvInterface[] = "MS_INTERFACE";
constexpr char kEnvPServerNum[] = "MS_SERVER_NUM";
@ -127,7 +126,6 @@ const std::map<std::string, OptimOriginIdx> kOptimToPSSendIdx = {{kApplyMomentum
<< " is out of bound."; \
} \
}
} // namespace internal
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_INTERNAL_CONSTANTS_H_
#endif // MINDSPORE_CCSRC_PS_CONSTANTS_H_

@ -39,9 +39,9 @@ void ClusterMetadata::Init(const uint32_t &worker_num, const uint32_t &server_nu
scheduler_port_ = scheduler_port;
}
uint32_t ClusterMetadata::worker_num() { return worker_num_; }
uint32_t ClusterMetadata::total_worker_num() { return worker_num_; }
uint32_t ClusterMetadata::server_num() { return server_num_; }
uint32_t ClusterMetadata::total_server_num() { return server_num_; }
uint32_t ClusterMetadata::heartbeat_interval() { return heartbeat_interval_; }

@ -37,8 +37,8 @@ class ClusterMetadata {
void Init(const uint32_t &worker_num, const uint32_t &server_num, std::string scheduler_host,
const uint16_t &scheduler_port);
uint32_t worker_num();
uint32_t server_num();
uint32_t total_worker_num();
uint32_t total_server_num();
uint32_t heartbeat_interval();
void set_heartbeat_interval(const uint32_t &heartbeat_interval);
std::string scheduler_host();

@ -122,9 +122,9 @@ std::string CommUtil::NodeRoleToString(const NodeRole &role) {
}
}
bool CommUtil::ValidateRankId(const enum NodeRole &node_role, const uint32_t &rank_id) {
if (node_role == NodeRole::SERVER && (rank_id > ClusterMetadata::instance()->server_num() - 1)) {
if (node_role == NodeRole::SERVER && (rank_id > ClusterMetadata::instance()->total_server_num() - 1)) {
return false;
} else if (node_role == NodeRole::WORKER && (rank_id > ClusterMetadata::instance()->worker_num() - 1)) {
} else if (node_role == NodeRole::WORKER && (rank_id > ClusterMetadata::instance()->total_worker_num() - 1)) {
return false;
}
return true;

@ -20,7 +20,7 @@ namespace mindspore {
namespace ps {
namespace core {
void NodeManager::InitNodeNum() {
total_node_num_ = ClusterMetadata::instance()->server_num() + ClusterMetadata::instance()->worker_num();
total_node_num_ = ClusterMetadata::instance()->total_server_num() + ClusterMetadata::instance()->total_worker_num();
}
int NodeManager::NextRankId(const RegisterMessage &register_message) {

@ -1,179 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_INTERNAL_PARAMETER_SERVER_H_
#define MINDSPORE_CCSRC_PS_INTERNAL_PARAMETER_SERVER_H_
#include <unistd.h>
#include <unordered_map>
#include <string>
#include <iostream>
#include <memory>
#include <vector>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <cmath>
#include <random>
#include <utility>
#include <list>
#include <map>
#include <functional>
#include "ir/func_graph.h"
#include "backend/session/session_basic.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/session/session_factory.h"
#include "ps/optimizer_info.h"
#include "ps/optimizer_info_builder.h"
#include "ps/ps_context.h"
#include "runtime/device/cpu/kernel_select_cpu.h"
#include "utils/ms_context.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/ps/pserver_kernel.h"
#include "backend/kernel_compiler/cpu/ps/sparse_apply_adam_ps_kernel.h"
#include "backend/kernel_compiler/cpu/ps/sparse_apply_lazy_adam_ps_kernel.h"
#include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
#include "backend/kernel_compiler/cpu/ps/apply_momentum_ps_kernel.h"
#include "backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/random_normal/random_normal.h"
#include "ps/internal/constants.h"
#include "ps/util.h"
#include "ps/embedding_table_shard_metadata.h"
#include "utils/log_adapter.h"
#include "proto/comm.pb.h"
#include "proto/ps.pb.h"
#include "ps/core/server_node.h"
namespace mindspore {
namespace ps {
namespace internal {
class ParameterServer {
public:
static ParameterServer &GetInstance() {
static ParameterServer instance;
return instance;
}
void Run(const FuncGraphPtr &func_graph);
private:
ParameterServer()
: pserver_num_(0),
worker_num_(0),
rank_id_(0),
grad_accum_count_(0),
handler_(nullptr),
func_graph_(nullptr),
sess_(nullptr),
running_(true),
thread_(nullptr) {}
~ParameterServer() = default;
ParameterServer(const ParameterServer &) = delete;
ParameterServer &operator=(const ParameterServer &) = delete;
class ServerHandler {
public:
explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
~ServerHandler() = default;
void Init();
void operator()(std::shared_ptr<core::TcpConnection> conn, std::shared_ptr<core::MessageMeta> meta, DataPtr data,
size_t size);
void HandlePushReq(DataPtr data, size_t size, VectorPtr res);
void HandlePullReq(DataPtr data, size_t size, VectorPtr res);
void HandleInitWeights(DataPtr data, size_t size, VectorPtr res);
void HandleInitWeightToOptimId(DataPtr data, size_t size, VectorPtr res);
void HandleInitInputsShape(DataPtr data, size_t size, VectorPtr res);
void HandleInitEmbeddings(DataPtr data, size_t size, VectorPtr res);
void HandleCheckReadyForPush(DataPtr data, size_t size, VectorPtr res);
void HandleCheckReadyForPull(DataPtr data, size_t size, VectorPtr res);
void HandleEmbeddingLookup(DataPtr data, size_t size, VectorPtr res);
void HandleUpdateEmbeddings(DataPtr data, size_t size, VectorPtr res);
void HandleFinalize(DataPtr data, size_t size, VectorPtr res);
private:
ParameterServer *ps_;
typedef void (ServerHandler::*RequestHandler)(DataPtr data, size_t size, VectorPtr res);
std::unordered_map<int, RequestHandler> handlers_;
std::unordered_map<Key, bool> init_weights_;
std::unordered_map<Key, bool> init_weight_to_optim_;
std::unordered_map<Key, bool> init_optim_info_;
};
bool Init(const FuncGraphPtr &func_graph);
void InitOptimInfoBuilders();
void InitWeightKeyToOptims(const Key &key, const int64_t &optim_id);
void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
void InitWeight(const Key &key, const WeightPtr &weight);
void InitGrad(const Key &key, const GradPtr &grad);
void InitEmbeddingTable(const Key &key,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes,
const ParamInitInfo &param_init_info);
bool HasWeight(const Key &key);
void Finalize();
void UpdateWeights();
void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
WeightPtr weight(const Key &key);
void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, KVMessage *res);
void UpdateEmbeddings(const Key &key, const LookupIds &lookup_ids, const Values &vals);
bool ReadyForUpdateWeights();
bool ReadyForPush(const Key &key);
bool ReadyForPull(const Key &key);
void ResetGradAccumCount();
const CNodePtr GetCNode(const std::string &name) const;
std::mutex &mutex();
void GetEmbeddingTableParamPtr();
void SyncEmbeddingTables();
size_t pserver_num_;
size_t worker_num_;
size_t rank_id_;
size_t grad_accum_count_;
std::unique_ptr<ServerHandler> handler_;
FuncGraphPtr func_graph_;
std::shared_ptr<session::SessionBasic> sess_;
bool running_;
std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
std::unordered_map<Key, InputsShapePtr> original_optim_inputs_shape_;
std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
std::unordered_map<Key, std::string> weight_key_to_optims_;
std::unordered_map<Key, std::string> weight_key_to_optim_op_;
std::unordered_map<Key, WeightPtr> weights_;
std::unordered_map<Key, bool> is_embedding_;
std::unordered_map<Key, WeightPtr> grads_;
std::unordered_map<Key, size_t> grads_accum_counter_;
std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
std::unordered_map<Key, uint64_t> tokens_;
std::mutex mutex_;
std::condition_variable apply_grads_cv_;
std::unique_ptr<std::thread> thread_;
core::ServerNode server_node_;
std::map<Key, ParameterPtr> embedding_tables_;
friend class ServerHandler;
};
} // namespace internal
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_INTERNAL_PARAMETER_SERVER_H_

@ -1,157 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_INTERNAL_WORKER_H_
#define MINDSPORE_CCSRC_PS_INTERNAL_WORKER_H_
#include <utility>
#include <memory>
#include <vector>
#include <string>
#include <numeric>
#include <functional>
#include <algorithm>
#include <map>
#include <mutex>
#include <unordered_set>
#include <unordered_map>
#include "utils/log_adapter.h"
#include "ir/tensor.h"
#include "ps/util.h"
#include "ps/internal/constants.h"
#include "utils/shape_utils.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/core/worker_node.h"
#include "ps/embedding_table_shard_metadata.h"
#include "proto/comm.pb.h"
#include "proto/ps.pb.h"
#include "ps/ps_context.h"
namespace mindspore {
namespace ps {
namespace internal {
class Worker {
public:
static Worker &GetInstance() {
static Worker instance;
return instance;
}
using Callback = std::function<void()>;
using PartitionEmbeddingMessages = std::vector<std::pair<bool, EmbeddingTableLookup>>;
using PartitionKVMessages = std::vector<std::pair<bool, KVMessage>>;
using EmbeddingPartitioner = std::function<void(
const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
using KVPartitioner =
std::function<void(const KVMessage &send, PartitionKVMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
void Run();
void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const ShapeVector &sizes);
void Pull(const size_t key, void *dev_addr, const size_t size);
size_t SetParamKey(const std::string &param_name);
size_t GetParamKey(const std::string &param_name);
void SetParamInitInServer(const std::string &param_name, bool init_in_server);
bool GetParamInitInServer(const std::string &param_name);
void SetKeyOptimId(size_t key, const std::string &optimizer_name);
void SetOptimInputShapes(size_t key, const ShapeVector &shape);
void AddEmbeddingTable(const Key &key, const size_t &row_count);
void InitPSEmbeddingTable(const size_t &key, const std::vector<size_t> &input_shape,
const std::vector<size_t> &indices_shape, const std::vector<size_t> &output_shape);
void InitPSParamAndOptim(const AnfNodePtr &input_node, const tensor::TensorPtr &tensor);
void DoPSEmbeddingLookup(const Key &key, const std::vector<int> &lookup_ids, std::vector<float> *lookup_result,
int64_t cmd);
void UpdateEmbeddingTable(const std::vector<Key> &keys, const std::vector<int> &lookup_ids,
const std::vector<float> &vals);
bool running() { return running_; }
void Finalize();
private:
Worker() : running_(false), key_cnt_(0) {}
~Worker() = default;
Worker(const Worker &) = delete;
Worker &operator=(const Worker &) = delete;
void Initialize();
bool IsKeyInit(const size_t key);
void AddKeyToServerId(const Key &key);
void AddKeyByHashMod(const Key &key);
void InitPSOptimId(const size_t param_key);
void InitPSOptimInputShapes(const size_t key);
void InitPSParamData(const std::vector<size_t> &keys, void *origin_addr, size_t size);
bool IsReadyForPush(const Key &key);
bool IsReadyForPull(const Key &key);
void PrepareSparseGradient(const size_t begin, const size_t end, const std::unordered_set<int> &distinct_ids,
const std::vector<std::pair<int, float *>> &indice_to_grads, const int *all_indice,
const size_t segment_size, float *gradient, int *indices);
void BuildSparseValue(const std::vector<int> &lengths, const size_t grad_index, const size_t indice_index,
const float *original_data, const float *grads, int *indices, std::vector<float> *reduced_data);
void PushData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens = {},
int command = 0, int64_t priority = 0);
void PushSparseData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size);
void PullData(const std::vector<Key> &keys, std::vector<float> *vals, std::vector<int> *lens = nullptr, int cmd = 0,
int64_t priority = 0);
void LookupIdPartitioner(const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void SparsePartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void RoundRobinPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void WorkerInitEmbeddingPartitioner(const KVMessage &send, std::vector<std::pair<bool, KVMessage>> *partition,
const std::map<int64_t, int64_t> &attrs);
void UpdateEmbeddingPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void BroadcastPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void SendForPush(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &attrs);
void SendForPull(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &attrs, std::vector<float> *vals, std::vector<int> *lens);
int64_t server_num_;
bool running_;
std::mutex running_mutex_;
size_t key_cnt_;
std::map<std::string, size_t> param_to_key_;
std::map<size_t, bool> init_keys_;
std::map<size_t, int64_t> key_to_optimId_;
std::map<size_t, std::vector<ShapeVector>> key_to_optim_shapes_;
std::map<std::string, bool> param_to_init_in_server_;
core::WorkerNode worker_node_;
EmbeddingPartitioner lookup_partitioner_;
KVPartitioner sparse_partitioner_;
KVPartitioner round_robin_partitioner_;
KVPartitioner worker_init_embedding_partitioner_;
KVPartitioner update_embedding_partitioner_;
KVPartitioner broadcast_partitioner_;
std::unordered_map<Key, int64_t> key_to_server_id_;
std::unordered_map<Key, size_t> embedding_row_cnt_;
std::unordered_map<Key, std::shared_ptr<std::vector<EmbeddingTableShardMetadata>>> embedding_table_ranges_;
};
static Worker &worker = Worker::GetInstance();
} // namespace internal
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_INTERNAL_WORKER_H_

@ -84,7 +84,7 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < grad_index; i++) {
grad_offset += lengths[i];
}
float *grad_data = values.data() + grad_offset;
float *grad_data = const_cast<float *>(values.data()) + grad_offset;
CHECK_EQ(size, static_cast<size_t>(lengths[grad_index]));
for (size_t i = 0; i < size; i++) {
@ -121,7 +121,7 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < grad_index; i++) {
grad_offset += lengths[i];
}
float *incr_grad_data = values.data() + grad_offset;
float *incr_grad_data = const_cast<float *>(values.data()) + grad_offset;
MS_EXCEPTION_IF_NULL(incr_grad_data);
size_t incr_grad_size = lengths[grad_index] * sizeof(float);
@ -148,7 +148,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < indices_index; i++) {
indice_offset += lengths[i];
}
int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset;
void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
MS_EXCEPTION_IF_NULL(incr_indice_data);
size_t incr_indice_size = lengths[indices_index];
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
@ -259,7 +263,7 @@ MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr
}
void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
UpdateOptimInputValue<float>(kApplyMomentum, "lr", values.data(), lens);
UpdateOptimInputValue<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
}
const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
@ -303,12 +307,12 @@ SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const Address
}
void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "lr", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta1", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "eps", values.data(), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
}
const AddressPtr &SparseAdamOptimInfo::gradient() {

@ -20,7 +20,7 @@
#include <vector>
#include <string>
#include "backend/kernel_compiler/kernel.h"
#include "ps/common.h"
#include "ps/constants.h"
namespace mindspore {
namespace ps {

@ -129,9 +129,9 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co
return nullptr;
}
AddressPtr learning_rate = GenInputAddrPtr<float>(kApplyMomentum, "lr", values.data(), lens);
AddressPtr gradient = GenInputAddrPtr<float>(kApplyMomentum, "grad", values.data(), lens);
AddressPtr momentum = GenInputAddrPtr<float>(kApplyMomentum, "momentum", values.data(), lens);
AddressPtr learning_rate = GenInputAddrPtr<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
AddressPtr gradient = GenInputAddrPtr<float>(kApplyMomentum, "grad", const_cast<float *>(values.data()), lens);
AddressPtr momentum = GenInputAddrPtr<float>(kApplyMomentum, "momentum", const_cast<float *>(values.data()), lens);
return new MomentumOptimInfo(weight_addr, accumulate, learning_rate, gradient, momentum);
}
@ -172,14 +172,15 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
return nullptr;
}
AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", values.data(), lens);
AddressPtr beta2_power = GenInputAddrPtr<float>(kSparseAdam, "beta2_power", values.data(), lens);
AddressPtr learning_rate = GenInputAddrPtr<float>(kSparseAdam, "lr", values.data(), lens);
AddressPtr beta1 = GenInputAddrPtr<float>(kSparseAdam, "beta1", values.data(), lens);
AddressPtr beta2 = GenInputAddrPtr<float>(kSparseAdam, "beta2", values.data(), lens);
AddressPtr epsilon = GenInputAddrPtr<float>(kSparseAdam, "eps", values.data(), lens);
AddressPtr grad = GenInputAddrPtr<float>(kSparseAdam, "grad", values.data(), lens, inputs_shape);
AddressPtr indices = GenInputAddrPtr<float>(kSparseAdam, "indices", values.data(), lens, inputs_shape);
AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
AddressPtr beta2_power = GenInputAddrPtr<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
AddressPtr learning_rate = GenInputAddrPtr<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
AddressPtr beta1 = GenInputAddrPtr<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
AddressPtr beta2 = GenInputAddrPtr<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
AddressPtr epsilon = GenInputAddrPtr<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
AddressPtr grad = GenInputAddrPtr<float>(kSparseAdam, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
AddressPtr indices =
GenInputAddrPtr<float>(kSparseAdam, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
grad, indices, sharded);
}
@ -218,8 +219,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
}
linear->size = weight->size() * sizeof(float);
AddressPtr grad = GenInputAddrPtr<float>(kSparseFtrl, "grad", values.data(), lens, inputs_shape);
AddressPtr indices = GenInputAddrPtr<float>(kSparseFtrl, "indices", values.data(), lens, inputs_shape);
AddressPtr grad = GenInputAddrPtr<float>(kSparseFtrl, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
AddressPtr indices =
GenInputAddrPtr<float>(kSparseFtrl, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices, sharded);
}
} // namespace ps

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save