Merge pull request #14873 from colourful-tree/develop
add pslib(pserver) to paddle, an industrial scale high performance parameter server libraryfor_weibo
commit
44ad2f4479
@ -0,0 +1,78 @@
|
||||
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
IF(NOT ${WITH_LIBMCT})
|
||||
return()
|
||||
ENDIF(NOT ${WITH_LIBMCT})
|
||||
|
||||
IF(WIN32 OR APPLE)
|
||||
MESSAGE(WARNING
|
||||
"Windows or Mac is not supported with LIBMCT in Paddle yet."
|
||||
"Force WITH_LIBMCT=OFF")
|
||||
SET(WITH_LIBMCT OFF CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE)
|
||||
return()
|
||||
ENDIF()
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(LIBMCT_PROJECT "extern_libmct")
|
||||
IF((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL))
|
||||
MESSAGE(STATUS "use pre defined download url")
|
||||
SET(LIBMCT_VER "0.1.0" CACHE STRING "" FORCE)
|
||||
SET(LIBMCT_NAME "libmct" CACHE STRING "" FORCE)
|
||||
SET(LIBMCT_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${LIBMCT_VER}/${LIBMCT_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "LIBMCT_NAME: ${LIBMCT_NAME}, LIBMCT_URL: ${LIBMCT_URL}")
|
||||
SET(LIBMCT_SOURCE_DIR "${THIRD_PARTY_PATH}/libmct")
|
||||
SET(LIBMCT_DOWNLOAD_DIR "${LIBMCT_SOURCE_DIR}/src/${LIBMCT_PROJECT}")
|
||||
SET(LIBMCT_DST_DIR "libmct")
|
||||
SET(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||
SET(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR})
|
||||
SET(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR})
|
||||
SET(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include)
|
||||
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib")
|
||||
|
||||
INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR})
|
||||
|
||||
FILE(WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt
|
||||
"PROJECT(LIBMCT)\n"
|
||||
"cmake_minimum_required(VERSION 3.0)\n"
|
||||
"install(DIRECTORY ${LIBMCT_NAME}/include ${LIBMCT_NAME}/lib \n"
|
||||
" DESTINATION ${LIBMCT_DST_DIR})\n")
|
||||
|
||||
ExternalProject_Add(
|
||||
${LIBMCT_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
PREFIX ${LIBMCT_SOURCE_DIR}
|
||||
DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR}
|
||||
DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_NAME}.tar.gz
|
||||
&& tar zxvf ${LIBMCT_NAME}.tar.gz
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
UPDATE_COMMAND ""
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT}
|
||||
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT}
|
||||
)
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
|
||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
|
||||
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
|
||||
add_library(libmct STATIC ${dummyfile})
|
||||
else()
|
||||
add_library(libmct INTERFACE)
|
||||
endif()
|
||||
|
||||
#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL)
|
||||
ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT})
|
||||
LIST(APPEND external_project_dependencies libmct)
|
||||
|
@ -0,0 +1,77 @@
|
||||
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
IF(NOT ${WITH_PSLIB})
|
||||
return()
|
||||
ENDIF(NOT ${WITH_PSLIB})
|
||||
|
||||
IF(WIN32 OR APPLE)
|
||||
MESSAGE(WARNING
|
||||
"Windows or Mac is not supported with PSLIB in Paddle yet."
|
||||
"Force WITH_PSLIB=OFF")
|
||||
SET(WITH_PSLIB OFF CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE)
|
||||
return()
|
||||
ENDIF()
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(PSLIB_PROJECT "extern_pslib")
|
||||
IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL))
|
||||
MESSAGE(STATUS "use pre defined download url")
|
||||
SET(PSLIB_VER "0.1.0" CACHE STRING "" FORCE)
|
||||
SET(PSLIB_NAME "pslib" CACHE STRING "" FORCE)
|
||||
SET(PSLIB_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_VER}/${PSLIB_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "PSLIB_NAME: ${PSLIB_NAME}, PSLIB_URL: ${PSLIB_URL}")
|
||||
SET(PSLIB_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib")
|
||||
SET(PSLIB_DOWNLOAD_DIR "${PSLIB_SOURCE_DIR}/src/${PSLIB_PROJECT}")
|
||||
SET(PSLIB_DST_DIR "pslib")
|
||||
SET(PSLIB_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||
SET(PSLIB_INSTALL_DIR ${PSLIB_INSTALL_ROOT}/${PSLIB_DST_DIR})
|
||||
SET(PSLIB_ROOT ${PSLIB_INSTALL_DIR})
|
||||
SET(PSLIB_INC_DIR ${PSLIB_ROOT}/include)
|
||||
SET(PSLIB_LIB_DIR ${PSLIB_ROOT}/lib)
|
||||
SET(PSLIB_LIB ${PSLIB_LIB_DIR}/libps.so)
|
||||
SET(PSLIB_IOMP_LIB ${PSLIB_LIB_DIR}/libiomp5.so) #todo what is this
|
||||
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_ROOT}/lib")
|
||||
|
||||
INCLUDE_DIRECTORIES(${PSLIB_INC_DIR})
|
||||
|
||||
FILE(WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt
|
||||
"PROJECT(PSLIB)\n"
|
||||
"cmake_minimum_required(VERSION 3.0)\n"
|
||||
"install(DIRECTORY ${PSLIB_NAME}/include ${PSLIB_NAME}/lib \n"
|
||||
" DESTINATION ${PSLIB_DST_DIR})\n")
|
||||
|
||||
ExternalProject_Add(
|
||||
${PSLIB_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
PREFIX ${PSLIB_SOURCE_DIR}
|
||||
DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR}
|
||||
DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_NAME}.tar.gz
|
||||
&& tar zxvf ${PSLIB_NAME}.tar.gz
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
UPDATE_COMMAND ""
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT}
|
||||
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
|
||||
)
|
||||
|
||||
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
|
||||
ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT})
|
||||
LIST(APPEND external_project_dependencies pslib)
|
||||
|
||||
IF(WITH_C_API)
|
||||
INSTALL(FILES ${PSLIB_LIB} ${PSLIB_IOMP_LIB} DESTINATION lib)
|
||||
ENDIF()
|
@ -0,0 +1,77 @@
|
||||
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
IF(NOT ${WITH_PSLIB_BRPC})
|
||||
return()
|
||||
ENDIF(NOT ${WITH_PSLIB_BRPC})
|
||||
|
||||
IF(WIN32 OR APPLE)
|
||||
MESSAGE(WARNING
|
||||
"Windows or Mac is not supported with PSLIB_BRPC in Paddle yet."
|
||||
"Force WITH_PSLIB_BRPC=OFF")
|
||||
SET(WITH_PSLIB_BRPC OFF CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE)
|
||||
return()
|
||||
ENDIF()
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(PSLIB_BRPC_PROJECT "extern_pslib_brpc")
|
||||
IF((NOT DEFINED PSLIB_BRPC_NAME) OR (NOT DEFINED PSLIB_BRPC_URL))
|
||||
MESSAGE(STATUS "use pre defined download url")
|
||||
SET(PSLIB_BRPC_VER "0.1.0" CACHE STRING "" FORCE)
|
||||
SET(PSLIB_BRPC_NAME "pslib_brpc" CACHE STRING "" FORCE)
|
||||
SET(PSLIB_BRPC_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_BRPC_VER}/${PSLIB_BRPC_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "PSLIB_BRPC_NAME: ${PSLIB_BRPC_NAME}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}")
|
||||
SET(PSLIB_BRPC_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib_brpc")
|
||||
SET(PSLIB_BRPC_DOWNLOAD_DIR "${PSLIB_BRPC_SOURCE_DIR}/src/${PSLIB_BRPC_PROJECT}")
|
||||
SET(PSLIB_BRPC_DST_DIR "pslib_brpc")
|
||||
SET(PSLIB_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||
SET(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR})
|
||||
SET(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR})
|
||||
SET(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include)
|
||||
SET(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib)
|
||||
SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libbrpc.a)
|
||||
SET(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this
|
||||
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib")
|
||||
|
||||
INCLUDE_DIRECTORIES(${PSLIB_BRPC_INC_DIR})
|
||||
|
||||
FILE(WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt
|
||||
"PROJECT(PSLIB_BRPC)\n"
|
||||
"cmake_minimum_required(VERSION 3.0)\n"
|
||||
"install(DIRECTORY ${PSLIB_BRPC_NAME}/include ${PSLIB_BRPC_NAME}/lib \n"
|
||||
" DESTINATION ${PSLIB_BRPC_DST_DIR})\n")
|
||||
|
||||
ExternalProject_Add(
|
||||
${PSLIB_BRPC_PROJECT}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
PREFIX ${PSLIB_BRPC_SOURCE_DIR}
|
||||
DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR}
|
||||
DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_NAME}.tar.gz
|
||||
&& tar zxvf ${PSLIB_BRPC_NAME}.tar.gz
|
||||
DOWNLOAD_NO_PROGRESS 1
|
||||
UPDATE_COMMAND ""
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT}
|
||||
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT}
|
||||
)
|
||||
|
||||
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB})
|
||||
ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT})
|
||||
LIST(APPEND external_project_dependencies pslib_brpc)
|
||||
|
||||
IF(WITH_C_API)
|
||||
INSTALL(FILES ${PSLIB_BRPC_LIB} ${PSLIB_BRPC_IOMP_LIB} DESTINATION lib)
|
||||
ENDIF()
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,12 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
@ -0,0 +1,105 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
from .node import DownpourServer
|
||||
from .node import DownpourWorker
|
||||
from ..backward import append_backward
|
||||
import ps_pb2 as pslib
|
||||
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table
|
||||
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_inputs
|
||||
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_outputs
|
||||
from google.protobuf import text_format
|
||||
|
||||
|
||||
class DownpourSGD(object):
|
||||
"""
|
||||
Distributed optimizer of downpour stochastic gradient descent
|
||||
Standard implementation of Google's Downpour SGD
|
||||
in Large Scale Distributed Deep Networks
|
||||
|
||||
Args:
|
||||
learning_rate (float): the learning rate used to update parameters. \
|
||||
Can be a float value
|
||||
Examples:
|
||||
.. code-block:: python
|
||||
|
||||
downpour_sgd = fluid.distributed.DownpourSGD(learning_rate=0.2)
|
||||
downpour_sgd.minimize(cost)
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate=0.001, window=1):
|
||||
# todo(guru4elephant): add more optimizers here as argument
|
||||
# todo(guru4elephant): make learning_rate as a variable
|
||||
self.learning_rate_ = learning_rate
|
||||
self.window_ = window
|
||||
self.type = "downpour"
|
||||
|
||||
def minimize(self,
|
||||
loss,
|
||||
startup_program=None,
|
||||
parameter_list=None,
|
||||
no_grad_set=None):
|
||||
"""
|
||||
DownpounSGD is a distributed optimizer so
|
||||
that user can call minimize to generate backward
|
||||
operators and optimization operators within minmize function
|
||||
Args:
|
||||
loss(Variable): loss variable defined by user
|
||||
startup_program(Program): startup program that defined by user
|
||||
parameter_list(str list): parameter names defined by users
|
||||
no_grad_set(set): a set of variables that is defined by users
|
||||
so that these variables do not need gradient computation
|
||||
Returns:
|
||||
[ps_param, worker_skipped_ops]
|
||||
ps_param: parameter server protobuf desc
|
||||
worker_skipped_ops: operator names that need
|
||||
to be skipped during execution
|
||||
"""
|
||||
params_grads = sorted(
|
||||
append_backward(loss, parameter_list, no_grad_set),
|
||||
key=lambda x: x[0].name)
|
||||
table_name = find_distributed_lookup_table(loss.block.program)
|
||||
prefetch_slots = find_distributed_lookup_table_inputs(
|
||||
loss.block.program, table_name)
|
||||
prefetch_slots_emb = find_distributed_lookup_table_outputs(
|
||||
loss.block.program, table_name)
|
||||
server = DownpourServer()
|
||||
# window is communication strategy
|
||||
worker = DownpourWorker(self.window_)
|
||||
# Todo(guru4elephant): support multiple tables definitions
|
||||
# currently support one big sparse table
|
||||
sparse_table_index = 0
|
||||
# currently merge all dense parameters into one dense table
|
||||
dense_table_index = 1
|
||||
params = []
|
||||
grads = []
|
||||
for i in params_grads:
|
||||
params.append(i[0])
|
||||
for i in params_grads:
|
||||
grads.append(i[1])
|
||||
server.add_sparse_table(sparse_table_index, self.learning_rate_,
|
||||
prefetch_slots, prefetch_slots_emb)
|
||||
server.add_dense_table(dense_table_index, self.learning_rate_, params,
|
||||
grads)
|
||||
worker.add_sparse_table(sparse_table_index, self.learning_rate_,
|
||||
prefetch_slots, prefetch_slots_emb)
|
||||
worker.add_dense_table(dense_table_index, self.learning_rate_, params,
|
||||
grads)
|
||||
ps_param = pslib.PSParameter()
|
||||
ps_param.server_param.CopyFrom(server.get_desc())
|
||||
ps_param.trainer_param.CopyFrom(worker.get_desc())
|
||||
# Todo(guru4elephant): figure out how to support more sparse parameters
|
||||
# currently only support lookup_table
|
||||
worker_skipped_ops = ["lookup_table", "lookup_table_grad"]
|
||||
ps_param.trainer_param.skip_op.extend(worker_skipped_ops)
|
||||
return [ps_param, worker_skipped_ops]
|
@ -0,0 +1,85 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class FileSystem(object):
|
||||
"""
|
||||
A file system that support async_executor hadoop client desc.
|
||||
|
||||
Args:
|
||||
fs_type (string): fs_type, for example is "afs"
|
||||
user (string): hadoop param
|
||||
passwd (string): hadoop param
|
||||
hadoop bin (string): hadoop param
|
||||
Examples:
|
||||
fs = FileSystm()
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
fs_type="afs",
|
||||
uri="afs://xx",
|
||||
user=None,
|
||||
passwd=None,
|
||||
hadoop_bin=""):
|
||||
assert user != None
|
||||
assert passwd != None
|
||||
assert hadoop_bin != None
|
||||
import ps_pb2 as pslib
|
||||
self.fs_client = pslib.FsClientParameter()
|
||||
self.fs_client.uri = uri
|
||||
self.fs_client.user = user
|
||||
self.fs_client.passwd = passwd
|
||||
#self.fs_client.buffer_size = 0
|
||||
self.fs_client.hadoop_bin = hadoop_bin
|
||||
#self.fs_client.afs_conf = afs_conf if not afs_conf else ""
|
||||
|
||||
def get_desc(self):
|
||||
"""
|
||||
get hadoop desc.
|
||||
"""
|
||||
return self.fs_client
|
||||
|
||||
|
||||
class MPIHelper(object):
|
||||
"""
|
||||
MPIHelper is a wrapper of mpi4py, support get_rank get_size etc.
|
||||
Args:
|
||||
No params
|
||||
Examples:
|
||||
mh = MPIHelper()
|
||||
mh.get_ip()
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
from mpi4py import MPI
|
||||
self.comm = MPI.COMM_WORLD
|
||||
self.MPI = MPI
|
||||
|
||||
def get_rank(self):
|
||||
return self.comm.Get_rank()
|
||||
|
||||
def get_size(self):
|
||||
return self.comm.Get_size()
|
||||
|
||||
def get_ip(self):
|
||||
import socket
|
||||
local_ip = socket.gethostbyname(socket.gethostname())
|
||||
return local_ip
|
||||
|
||||
def get_hostname(self):
|
||||
import socket
|
||||
return socket.gethostname()
|
||||
|
||||
def finalize(self):
|
||||
self.MPI.Finalize()
|
@ -0,0 +1,179 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
import ps_pb2 as pslib
|
||||
|
||||
|
||||
class Server(object):
|
||||
"""
|
||||
A Server basic class.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class Worker(object):
|
||||
"""
|
||||
A Worker basic class.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
class DownpourServer(Server):
|
||||
"""
|
||||
DownpourServer class is used to generate server program_desc
|
||||
Args:
|
||||
server: it is pslib.ServerParameter()
|
||||
Examples:
|
||||
server = DownpourServer()
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.server_ = pslib.ServerParameter()
|
||||
self.server_.downpour_server_param.service_param.start_server_port = 0
|
||||
self.server_.downpour_server_param.service_param.server_class = "DownpourBrpcPsServer"
|
||||
self.server_.downpour_server_param.service_param.client_class = "DownpourBrpcPsClient"
|
||||
self.server_.downpour_server_param.service_param.service_class = "DownpourPsService"
|
||||
self.server_.downpour_server_param.service_param.start_server_port = 0
|
||||
self.server_.downpour_server_param.service_param.server_thread_num = 12
|
||||
|
||||
def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
|
||||
slot_value_var):
|
||||
"""
|
||||
Args:
|
||||
table_id(int): id of sparse params table
|
||||
learning_rate(float): the learning rate used to update parameters. \
|
||||
Can be a float value
|
||||
slot_key_vars(string): slot key id
|
||||
slot_value_var(string): slot key value after embedding
|
||||
Returns:
|
||||
return None
|
||||
"""
|
||||
table = self.server_.downpour_server_param.downpour_table_param.add()
|
||||
table.table_id = table_id
|
||||
table.table_class = "DownpourSparseTable"
|
||||
table.type = pslib.PS_SPARSE_TABLE
|
||||
table.accessor.accessor_class = "DownpourFeatureValueAccessor"
|
||||
table.accessor.sparse_sgd_param.learning_rate = learning_rate
|
||||
table.accessor.sparse_sgd_param.initial_g2sum = 3
|
||||
table.accessor.sparse_sgd_param.initial_range = 1e-4
|
||||
table.accessor.sparse_sgd_param.weight_bounds.extend([-10, 10])
|
||||
|
||||
table.accessor.embedx_dim = 8
|
||||
table.accessor.embedx_threshold = 5
|
||||
table.accessor.fea_dim = 11
|
||||
table.accessor.downpour_accessor_param.nonclk_coeff = 0.1
|
||||
table.accessor.downpour_accessor_param.click_coeff = 2
|
||||
table.accessor.downpour_accessor_param.base_threshold = 0.2
|
||||
table.accessor.downpour_accessor_param.delta_threshold = 0.15
|
||||
table.accessor.downpour_accessor_param.delta_keep_days = 31
|
||||
table.accessor.downpour_accessor_param.show_click_decay_rate = 0.999
|
||||
table.accessor.downpour_accessor_param.delete_threshold = 0.8
|
||||
|
||||
def add_dense_table(self, table_id, learning_rate, param_var, grad_var):
|
||||
"""
|
||||
Args:
|
||||
table_id(int): id of sparse params table
|
||||
learning_rate(float): the learning rate used to update parameters. \
|
||||
Can be a float value
|
||||
param_var(list): all dense param. it is a list.
|
||||
grad_var(list): all dense grad parm it is a list.
|
||||
Returns:
|
||||
return None
|
||||
"""
|
||||
table = self.server_.downpour_server_param.downpour_table_param.add()
|
||||
table.table_id = table_id
|
||||
table.table_class = "DownpourDenseTable"
|
||||
table.type = pslib.PS_DENSE_TABLE
|
||||
table.accessor.accessor_class = "DownpourDenseValueAccessor"
|
||||
table.accessor.dense_sgd_param.name = "adam"
|
||||
table.accessor.dense_sgd_param.adam.learning_rate = learning_rate
|
||||
table.accessor.dense_sgd_param.adam.avg_decay_rate = 0.999993
|
||||
table.accessor.dense_sgd_param.adam.ada_decay_rate = 0.9999
|
||||
table.accessor.dense_sgd_param.adam.ada_epsilon = 1e-8
|
||||
table.accessor.dense_sgd_param.adam.mom_decay_rate = 0.99
|
||||
table.accessor.dense_sgd_param.naive.learning_rate = 0.0002
|
||||
fea_dim = 0
|
||||
for param in filter(lambda x: x.name.find("embedding") == -1,
|
||||
param_var):
|
||||
fea_dim += reduce(lambda x, y: x * y, param.shape, 1)
|
||||
table.accessor.fea_dim = fea_dim
|
||||
|
||||
def get_desc(self):
|
||||
"""
|
||||
Return downpour server program_desc
|
||||
"""
|
||||
return self.server_
|
||||
|
||||
|
||||
class DownpourWorker(Worker):
|
||||
"""
|
||||
DownpourWorker class is used to generate worker program_desc
|
||||
Args:
|
||||
window (int): push params frequency
|
||||
worker: it is pslib.DownpourTrainerParameter
|
||||
Examples:
|
||||
worker = DownpourWorker(1)
|
||||
"""
|
||||
|
||||
def __init__(self, window):
|
||||
self.window = window
|
||||
self.worker_ = pslib.DownpourTrainerParameter()
|
||||
|
||||
def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
|
||||
slot_value_vars):
|
||||
"""
|
||||
Args:
|
||||
table_id(int): id of sparse params table
|
||||
learning_rate(float): the learning rate used to update parameters. \
|
||||
Can be a float value
|
||||
slot_key_vars(string): slot key id
|
||||
slot_value_var(string): slot key value after embedding
|
||||
Returns:
|
||||
return None
|
||||
"""
|
||||
table = self.worker_.sparse_table.add()
|
||||
table.table_id = table_id
|
||||
table.slot_key.extend([var.name for var in slot_key_vars])
|
||||
table.slot_value.extend([var.name for var in slot_value_vars])
|
||||
table.slot_gradient.extend(
|
||||
[var.name + "@GRAD" for var in slot_value_vars])
|
||||
|
||||
def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars):
|
||||
"""
|
||||
Args:
|
||||
table_id(int): id of sparse params table
|
||||
learning_rate(float): the learning rate used to update parameters. \
|
||||
Can be a float value
|
||||
param_var(list): all dense param. it is a list.
|
||||
grad_var(list): all dense grad parm it is a list.
|
||||
Returns:
|
||||
return None
|
||||
"""
|
||||
table = self.worker_.dense_table.add()
|
||||
table.table_id = table_id
|
||||
table.dense_variable_name.extend(
|
||||
filter(lambda x: x.find("embedding") == -1,
|
||||
[p.name for p in param_vars]))
|
||||
table.dense_gradient_variable_name.extend(
|
||||
filter(lambda x: x.find("embedding") == -1,
|
||||
[g.name for g in grad_vars]))
|
||||
|
||||
def get_desc(self):
|
||||
"""
|
||||
Return downpour worker program_desc
|
||||
"""
|
||||
return self.worker_
|
@ -0,0 +1,148 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
|
||||
from .helper import MPIHelper
|
||||
|
||||
|
||||
class PaddlePSInstance(object):
|
||||
"""
|
||||
PaddlePSInstance class is used to generate A instance of server or worker
|
||||
Args:
|
||||
server_worker_mode: is a value 0 or 1, default is 1
|
||||
proc_per_node: process per node, default is 2
|
||||
Examples:
|
||||
instance = PaddlePSInstance(1, 2)
|
||||
"""
|
||||
|
||||
def __init__(self, server_worker_mode, proc_per_node):
|
||||
self.dh = MPIHelper()
|
||||
self._rankid = self.dh.get_rank()
|
||||
self._server_worker_mode = server_worker_mode
|
||||
self._proc_per_node = proc_per_node
|
||||
self._nodes = self.dh.get_size()
|
||||
|
||||
self._ip = 0
|
||||
self._worker_num = self._nodes * self._proc_per_node / 2
|
||||
self._server_num = self._nodes * self._proc_per_node / 2
|
||||
self._total_server_worker = self._worker_num + self._server_num
|
||||
self._node_type = None #IDLE=-1, WORKER=1, SERVER=0
|
||||
self._set_nodetype()
|
||||
self._comm = None
|
||||
self._split_comm()
|
||||
|
||||
def _set_nodetype(self):
|
||||
if self._server_worker_mode == 0:
|
||||
if self._rankid < self._server_num:
|
||||
self._node_type = 1
|
||||
elif self._rankid < self._total_server_worker:
|
||||
self._node_type = 0
|
||||
else:
|
||||
self._node_type = -1
|
||||
elif self._server_worker_mode == 1:
|
||||
if self._rankid < self._total_server_worker:
|
||||
if 0 == self._rankid % self._proc_per_node % 2:
|
||||
self._node_type = 0
|
||||
else:
|
||||
self._node_type = 1
|
||||
else:
|
||||
self._node_type = -1
|
||||
else:
|
||||
self._node_type = -1
|
||||
|
||||
def _split_comm(self):
|
||||
if self.is_server():
|
||||
self._comm = self.dh.comm.Split(self._node_type)
|
||||
elif self.is_worker():
|
||||
self._comm = self.dh.comm.Split(self._node_type)
|
||||
pass
|
||||
|
||||
def get_worker_index(self):
|
||||
"""
|
||||
Return worker index
|
||||
"""
|
||||
if self._server_worker_mode == 0:
|
||||
return self._rankid == self.server_num
|
||||
else:
|
||||
return self._rankid / self._proc_per_node
|
||||
|
||||
def get_server_index(self):
|
||||
"""
|
||||
Return server index
|
||||
"""
|
||||
if self._server_worker_mode == 0:
|
||||
return self.rank_id
|
||||
else:
|
||||
return self.rank_id / self._proc_per_node
|
||||
|
||||
def is_worker(self):
|
||||
"""
|
||||
Return instance is worker or not
|
||||
"""
|
||||
return self._node_type == 1
|
||||
|
||||
def is_server(self):
|
||||
"""
|
||||
Return instance is server or not
|
||||
"""
|
||||
return self._node_type == 0
|
||||
|
||||
def is_first_worker(self):
|
||||
"""
|
||||
Return instance is first worker or not
|
||||
"""
|
||||
return self.is_worker() and 0 == self.get_worker_index()
|
||||
|
||||
def set_ip(self, ip):
|
||||
"""
|
||||
set server ip
|
||||
"""
|
||||
self._ip = ip
|
||||
|
||||
def gather_ips(self):
|
||||
"""
|
||||
Return all servers and workers ip throught mpi allgather
|
||||
"""
|
||||
self._ips = self.dh.comm.allgather(self._ip)
|
||||
return self._ips
|
||||
|
||||
def get_node_cnt(self):
|
||||
"""
|
||||
Return node cnt
|
||||
"""
|
||||
return self._nodes
|
||||
|
||||
def barrier_all(self):
|
||||
"""
|
||||
barrier workers and servers
|
||||
"""
|
||||
self.dh.comm.barrier()
|
||||
|
||||
def barrier_worker(self):
|
||||
"""
|
||||
barrier workers
|
||||
"""
|
||||
if self.is_worker():
|
||||
self._comm.barrier()
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
"""
|
||||
MPI finalize
|
||||
"""
|
||||
self.dh.finalize()
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
instance = PaddlePSInstance(1, 1, 2, 50)
|
||||
instance.barrier_all()
|
File diff suppressed because one or more lines are too long
Loading…
Reference in new issue