Merge pull request #14873 from colourful-tree/develop
add pslib(pserver) to paddle, an industrial scale high performance parameter server libraryfor_weibo
commit
44ad2f4479
@ -0,0 +1,78 @@
|
|||||||
|
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
IF(NOT ${WITH_LIBMCT})
|
||||||
|
return()
|
||||||
|
ENDIF(NOT ${WITH_LIBMCT})
|
||||||
|
|
||||||
|
IF(WIN32 OR APPLE)
|
||||||
|
MESSAGE(WARNING
|
||||||
|
"Windows or Mac is not supported with LIBMCT in Paddle yet."
|
||||||
|
"Force WITH_LIBMCT=OFF")
|
||||||
|
SET(WITH_LIBMCT OFF CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE)
|
||||||
|
return()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(LIBMCT_PROJECT "extern_libmct")
|
||||||
|
IF((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL))
|
||||||
|
MESSAGE(STATUS "use pre defined download url")
|
||||||
|
SET(LIBMCT_VER "0.1.0" CACHE STRING "" FORCE)
|
||||||
|
SET(LIBMCT_NAME "libmct" CACHE STRING "" FORCE)
|
||||||
|
SET(LIBMCT_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${LIBMCT_VER}/${LIBMCT_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||||
|
ENDIF()
|
||||||
|
MESSAGE(STATUS "LIBMCT_NAME: ${LIBMCT_NAME}, LIBMCT_URL: ${LIBMCT_URL}")
|
||||||
|
SET(LIBMCT_SOURCE_DIR "${THIRD_PARTY_PATH}/libmct")
|
||||||
|
SET(LIBMCT_DOWNLOAD_DIR "${LIBMCT_SOURCE_DIR}/src/${LIBMCT_PROJECT}")
|
||||||
|
SET(LIBMCT_DST_DIR "libmct")
|
||||||
|
SET(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||||
|
SET(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR})
|
||||||
|
SET(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR})
|
||||||
|
SET(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include)
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib")
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR})
|
||||||
|
|
||||||
|
FILE(WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt
|
||||||
|
"PROJECT(LIBMCT)\n"
|
||||||
|
"cmake_minimum_required(VERSION 3.0)\n"
|
||||||
|
"install(DIRECTORY ${LIBMCT_NAME}/include ${LIBMCT_NAME}/lib \n"
|
||||||
|
" DESTINATION ${LIBMCT_DST_DIR})\n")
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${LIBMCT_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
PREFIX ${LIBMCT_SOURCE_DIR}
|
||||||
|
DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR}
|
||||||
|
DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_NAME}.tar.gz
|
||||||
|
&& tar zxvf ${LIBMCT_NAME}.tar.gz
|
||||||
|
DOWNLOAD_NO_PROGRESS 1
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT}
|
||||||
|
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT}
|
||||||
|
)
|
||||||
|
|
||||||
|
if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
|
||||||
|
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
|
||||||
|
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
|
||||||
|
add_library(libmct STATIC ${dummyfile})
|
||||||
|
else()
|
||||||
|
add_library(libmct INTERFACE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL)
|
||||||
|
ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT})
|
||||||
|
LIST(APPEND external_project_dependencies libmct)
|
||||||
|
|
@ -0,0 +1,77 @@
|
|||||||
|
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
IF(NOT ${WITH_PSLIB})
|
||||||
|
return()
|
||||||
|
ENDIF(NOT ${WITH_PSLIB})
|
||||||
|
|
||||||
|
IF(WIN32 OR APPLE)
|
||||||
|
MESSAGE(WARNING
|
||||||
|
"Windows or Mac is not supported with PSLIB in Paddle yet."
|
||||||
|
"Force WITH_PSLIB=OFF")
|
||||||
|
SET(WITH_PSLIB OFF CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE)
|
||||||
|
return()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(PSLIB_PROJECT "extern_pslib")
|
||||||
|
IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL))
|
||||||
|
MESSAGE(STATUS "use pre defined download url")
|
||||||
|
SET(PSLIB_VER "0.1.0" CACHE STRING "" FORCE)
|
||||||
|
SET(PSLIB_NAME "pslib" CACHE STRING "" FORCE)
|
||||||
|
SET(PSLIB_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_VER}/${PSLIB_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||||
|
ENDIF()
|
||||||
|
MESSAGE(STATUS "PSLIB_NAME: ${PSLIB_NAME}, PSLIB_URL: ${PSLIB_URL}")
|
||||||
|
SET(PSLIB_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib")
|
||||||
|
SET(PSLIB_DOWNLOAD_DIR "${PSLIB_SOURCE_DIR}/src/${PSLIB_PROJECT}")
|
||||||
|
SET(PSLIB_DST_DIR "pslib")
|
||||||
|
SET(PSLIB_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||||
|
SET(PSLIB_INSTALL_DIR ${PSLIB_INSTALL_ROOT}/${PSLIB_DST_DIR})
|
||||||
|
SET(PSLIB_ROOT ${PSLIB_INSTALL_DIR})
|
||||||
|
SET(PSLIB_INC_DIR ${PSLIB_ROOT}/include)
|
||||||
|
SET(PSLIB_LIB_DIR ${PSLIB_ROOT}/lib)
|
||||||
|
SET(PSLIB_LIB ${PSLIB_LIB_DIR}/libps.so)
|
||||||
|
SET(PSLIB_IOMP_LIB ${PSLIB_LIB_DIR}/libiomp5.so) #todo what is this
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_ROOT}/lib")
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${PSLIB_INC_DIR})
|
||||||
|
|
||||||
|
FILE(WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt
|
||||||
|
"PROJECT(PSLIB)\n"
|
||||||
|
"cmake_minimum_required(VERSION 3.0)\n"
|
||||||
|
"install(DIRECTORY ${PSLIB_NAME}/include ${PSLIB_NAME}/lib \n"
|
||||||
|
" DESTINATION ${PSLIB_DST_DIR})\n")
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${PSLIB_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
PREFIX ${PSLIB_SOURCE_DIR}
|
||||||
|
DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR}
|
||||||
|
DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_NAME}.tar.gz
|
||||||
|
&& tar zxvf ${PSLIB_NAME}.tar.gz
|
||||||
|
DOWNLOAD_NO_PROGRESS 1
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT}
|
||||||
|
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
|
||||||
|
)
|
||||||
|
|
||||||
|
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
|
||||||
|
SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
|
||||||
|
ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT})
|
||||||
|
LIST(APPEND external_project_dependencies pslib)
|
||||||
|
|
||||||
|
IF(WITH_C_API)
|
||||||
|
INSTALL(FILES ${PSLIB_LIB} ${PSLIB_IOMP_LIB} DESTINATION lib)
|
||||||
|
ENDIF()
|
@ -0,0 +1,77 @@
|
|||||||
|
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
IF(NOT ${WITH_PSLIB_BRPC})
|
||||||
|
return()
|
||||||
|
ENDIF(NOT ${WITH_PSLIB_BRPC})
|
||||||
|
|
||||||
|
IF(WIN32 OR APPLE)
|
||||||
|
MESSAGE(WARNING
|
||||||
|
"Windows or Mac is not supported with PSLIB_BRPC in Paddle yet."
|
||||||
|
"Force WITH_PSLIB_BRPC=OFF")
|
||||||
|
SET(WITH_PSLIB_BRPC OFF CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE)
|
||||||
|
return()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(PSLIB_BRPC_PROJECT "extern_pslib_brpc")
|
||||||
|
IF((NOT DEFINED PSLIB_BRPC_NAME) OR (NOT DEFINED PSLIB_BRPC_URL))
|
||||||
|
MESSAGE(STATUS "use pre defined download url")
|
||||||
|
SET(PSLIB_BRPC_VER "0.1.0" CACHE STRING "" FORCE)
|
||||||
|
SET(PSLIB_BRPC_NAME "pslib_brpc" CACHE STRING "" FORCE)
|
||||||
|
SET(PSLIB_BRPC_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_BRPC_VER}/${PSLIB_BRPC_NAME}.tar.gz" CACHE STRING "" FORCE)
|
||||||
|
ENDIF()
|
||||||
|
MESSAGE(STATUS "PSLIB_BRPC_NAME: ${PSLIB_BRPC_NAME}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}")
|
||||||
|
SET(PSLIB_BRPC_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib_brpc")
|
||||||
|
SET(PSLIB_BRPC_DOWNLOAD_DIR "${PSLIB_BRPC_SOURCE_DIR}/src/${PSLIB_BRPC_PROJECT}")
|
||||||
|
SET(PSLIB_BRPC_DST_DIR "pslib_brpc")
|
||||||
|
SET(PSLIB_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
|
||||||
|
SET(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR})
|
||||||
|
SET(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR})
|
||||||
|
SET(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include)
|
||||||
|
SET(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib)
|
||||||
|
SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libbrpc.a)
|
||||||
|
SET(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib")
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${PSLIB_BRPC_INC_DIR})
|
||||||
|
|
||||||
|
FILE(WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt
|
||||||
|
"PROJECT(PSLIB_BRPC)\n"
|
||||||
|
"cmake_minimum_required(VERSION 3.0)\n"
|
||||||
|
"install(DIRECTORY ${PSLIB_BRPC_NAME}/include ${PSLIB_BRPC_NAME}/lib \n"
|
||||||
|
" DESTINATION ${PSLIB_BRPC_DST_DIR})\n")
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${PSLIB_BRPC_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
PREFIX ${PSLIB_BRPC_SOURCE_DIR}
|
||||||
|
DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR}
|
||||||
|
DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_NAME}.tar.gz
|
||||||
|
&& tar zxvf ${PSLIB_BRPC_NAME}.tar.gz
|
||||||
|
DOWNLOAD_NO_PROGRESS 1
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT}
|
||||||
|
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT}
|
||||||
|
)
|
||||||
|
|
||||||
|
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
|
||||||
|
SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB})
|
||||||
|
ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT})
|
||||||
|
LIST(APPEND external_project_dependencies pslib_brpc)
|
||||||
|
|
||||||
|
IF(WITH_C_API)
|
||||||
|
INSTALL(FILES ${PSLIB_BRPC_LIB} ${PSLIB_BRPC_IOMP_LIB} DESTINATION lib)
|
||||||
|
ENDIF()
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,12 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
@ -0,0 +1,105 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
|
||||||
|
from .node import DownpourServer
|
||||||
|
from .node import DownpourWorker
|
||||||
|
from ..backward import append_backward
|
||||||
|
import ps_pb2 as pslib
|
||||||
|
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table
|
||||||
|
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_inputs
|
||||||
|
from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_outputs
|
||||||
|
from google.protobuf import text_format
|
||||||
|
|
||||||
|
|
||||||
|
class DownpourSGD(object):
|
||||||
|
"""
|
||||||
|
Distributed optimizer of downpour stochastic gradient descent
|
||||||
|
Standard implementation of Google's Downpour SGD
|
||||||
|
in Large Scale Distributed Deep Networks
|
||||||
|
|
||||||
|
Args:
|
||||||
|
learning_rate (float): the learning rate used to update parameters. \
|
||||||
|
Can be a float value
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
downpour_sgd = fluid.distributed.DownpourSGD(learning_rate=0.2)
|
||||||
|
downpour_sgd.minimize(cost)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, learning_rate=0.001, window=1):
|
||||||
|
# todo(guru4elephant): add more optimizers here as argument
|
||||||
|
# todo(guru4elephant): make learning_rate as a variable
|
||||||
|
self.learning_rate_ = learning_rate
|
||||||
|
self.window_ = window
|
||||||
|
self.type = "downpour"
|
||||||
|
|
||||||
|
def minimize(self,
|
||||||
|
loss,
|
||||||
|
startup_program=None,
|
||||||
|
parameter_list=None,
|
||||||
|
no_grad_set=None):
|
||||||
|
"""
|
||||||
|
DownpounSGD is a distributed optimizer so
|
||||||
|
that user can call minimize to generate backward
|
||||||
|
operators and optimization operators within minmize function
|
||||||
|
Args:
|
||||||
|
loss(Variable): loss variable defined by user
|
||||||
|
startup_program(Program): startup program that defined by user
|
||||||
|
parameter_list(str list): parameter names defined by users
|
||||||
|
no_grad_set(set): a set of variables that is defined by users
|
||||||
|
so that these variables do not need gradient computation
|
||||||
|
Returns:
|
||||||
|
[ps_param, worker_skipped_ops]
|
||||||
|
ps_param: parameter server protobuf desc
|
||||||
|
worker_skipped_ops: operator names that need
|
||||||
|
to be skipped during execution
|
||||||
|
"""
|
||||||
|
params_grads = sorted(
|
||||||
|
append_backward(loss, parameter_list, no_grad_set),
|
||||||
|
key=lambda x: x[0].name)
|
||||||
|
table_name = find_distributed_lookup_table(loss.block.program)
|
||||||
|
prefetch_slots = find_distributed_lookup_table_inputs(
|
||||||
|
loss.block.program, table_name)
|
||||||
|
prefetch_slots_emb = find_distributed_lookup_table_outputs(
|
||||||
|
loss.block.program, table_name)
|
||||||
|
server = DownpourServer()
|
||||||
|
# window is communication strategy
|
||||||
|
worker = DownpourWorker(self.window_)
|
||||||
|
# Todo(guru4elephant): support multiple tables definitions
|
||||||
|
# currently support one big sparse table
|
||||||
|
sparse_table_index = 0
|
||||||
|
# currently merge all dense parameters into one dense table
|
||||||
|
dense_table_index = 1
|
||||||
|
params = []
|
||||||
|
grads = []
|
||||||
|
for i in params_grads:
|
||||||
|
params.append(i[0])
|
||||||
|
for i in params_grads:
|
||||||
|
grads.append(i[1])
|
||||||
|
server.add_sparse_table(sparse_table_index, self.learning_rate_,
|
||||||
|
prefetch_slots, prefetch_slots_emb)
|
||||||
|
server.add_dense_table(dense_table_index, self.learning_rate_, params,
|
||||||
|
grads)
|
||||||
|
worker.add_sparse_table(sparse_table_index, self.learning_rate_,
|
||||||
|
prefetch_slots, prefetch_slots_emb)
|
||||||
|
worker.add_dense_table(dense_table_index, self.learning_rate_, params,
|
||||||
|
grads)
|
||||||
|
ps_param = pslib.PSParameter()
|
||||||
|
ps_param.server_param.CopyFrom(server.get_desc())
|
||||||
|
ps_param.trainer_param.CopyFrom(worker.get_desc())
|
||||||
|
# Todo(guru4elephant): figure out how to support more sparse parameters
|
||||||
|
# currently only support lookup_table
|
||||||
|
worker_skipped_ops = ["lookup_table", "lookup_table_grad"]
|
||||||
|
ps_param.trainer_param.skip_op.extend(worker_skipped_ops)
|
||||||
|
return [ps_param, worker_skipped_ops]
|
@ -0,0 +1,85 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
class FileSystem(object):
|
||||||
|
"""
|
||||||
|
A file system that support async_executor hadoop client desc.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fs_type (string): fs_type, for example is "afs"
|
||||||
|
user (string): hadoop param
|
||||||
|
passwd (string): hadoop param
|
||||||
|
hadoop bin (string): hadoop param
|
||||||
|
Examples:
|
||||||
|
fs = FileSystm()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
fs_type="afs",
|
||||||
|
uri="afs://xx",
|
||||||
|
user=None,
|
||||||
|
passwd=None,
|
||||||
|
hadoop_bin=""):
|
||||||
|
assert user != None
|
||||||
|
assert passwd != None
|
||||||
|
assert hadoop_bin != None
|
||||||
|
import ps_pb2 as pslib
|
||||||
|
self.fs_client = pslib.FsClientParameter()
|
||||||
|
self.fs_client.uri = uri
|
||||||
|
self.fs_client.user = user
|
||||||
|
self.fs_client.passwd = passwd
|
||||||
|
#self.fs_client.buffer_size = 0
|
||||||
|
self.fs_client.hadoop_bin = hadoop_bin
|
||||||
|
#self.fs_client.afs_conf = afs_conf if not afs_conf else ""
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
|
"""
|
||||||
|
get hadoop desc.
|
||||||
|
"""
|
||||||
|
return self.fs_client
|
||||||
|
|
||||||
|
|
||||||
|
class MPIHelper(object):
|
||||||
|
"""
|
||||||
|
MPIHelper is a wrapper of mpi4py, support get_rank get_size etc.
|
||||||
|
Args:
|
||||||
|
No params
|
||||||
|
Examples:
|
||||||
|
mh = MPIHelper()
|
||||||
|
mh.get_ip()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
from mpi4py import MPI
|
||||||
|
self.comm = MPI.COMM_WORLD
|
||||||
|
self.MPI = MPI
|
||||||
|
|
||||||
|
def get_rank(self):
|
||||||
|
return self.comm.Get_rank()
|
||||||
|
|
||||||
|
def get_size(self):
|
||||||
|
return self.comm.Get_size()
|
||||||
|
|
||||||
|
def get_ip(self):
|
||||||
|
import socket
|
||||||
|
local_ip = socket.gethostbyname(socket.gethostname())
|
||||||
|
return local_ip
|
||||||
|
|
||||||
|
def get_hostname(self):
|
||||||
|
import socket
|
||||||
|
return socket.gethostname()
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
self.MPI.Finalize()
|
@ -0,0 +1,179 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
|
||||||
|
import ps_pb2 as pslib
|
||||||
|
|
||||||
|
|
||||||
|
class Server(object):
|
||||||
|
"""
|
||||||
|
A Server basic class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Worker(object):
|
||||||
|
"""
|
||||||
|
A Worker basic class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DownpourServer(Server):
|
||||||
|
"""
|
||||||
|
DownpourServer class is used to generate server program_desc
|
||||||
|
Args:
|
||||||
|
server: it is pslib.ServerParameter()
|
||||||
|
Examples:
|
||||||
|
server = DownpourServer()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.server_ = pslib.ServerParameter()
|
||||||
|
self.server_.downpour_server_param.service_param.start_server_port = 0
|
||||||
|
self.server_.downpour_server_param.service_param.server_class = "DownpourBrpcPsServer"
|
||||||
|
self.server_.downpour_server_param.service_param.client_class = "DownpourBrpcPsClient"
|
||||||
|
self.server_.downpour_server_param.service_param.service_class = "DownpourPsService"
|
||||||
|
self.server_.downpour_server_param.service_param.start_server_port = 0
|
||||||
|
self.server_.downpour_server_param.service_param.server_thread_num = 12
|
||||||
|
|
||||||
|
def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
|
||||||
|
slot_value_var):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
table_id(int): id of sparse params table
|
||||||
|
learning_rate(float): the learning rate used to update parameters. \
|
||||||
|
Can be a float value
|
||||||
|
slot_key_vars(string): slot key id
|
||||||
|
slot_value_var(string): slot key value after embedding
|
||||||
|
Returns:
|
||||||
|
return None
|
||||||
|
"""
|
||||||
|
table = self.server_.downpour_server_param.downpour_table_param.add()
|
||||||
|
table.table_id = table_id
|
||||||
|
table.table_class = "DownpourSparseTable"
|
||||||
|
table.type = pslib.PS_SPARSE_TABLE
|
||||||
|
table.accessor.accessor_class = "DownpourFeatureValueAccessor"
|
||||||
|
table.accessor.sparse_sgd_param.learning_rate = learning_rate
|
||||||
|
table.accessor.sparse_sgd_param.initial_g2sum = 3
|
||||||
|
table.accessor.sparse_sgd_param.initial_range = 1e-4
|
||||||
|
table.accessor.sparse_sgd_param.weight_bounds.extend([-10, 10])
|
||||||
|
|
||||||
|
table.accessor.embedx_dim = 8
|
||||||
|
table.accessor.embedx_threshold = 5
|
||||||
|
table.accessor.fea_dim = 11
|
||||||
|
table.accessor.downpour_accessor_param.nonclk_coeff = 0.1
|
||||||
|
table.accessor.downpour_accessor_param.click_coeff = 2
|
||||||
|
table.accessor.downpour_accessor_param.base_threshold = 0.2
|
||||||
|
table.accessor.downpour_accessor_param.delta_threshold = 0.15
|
||||||
|
table.accessor.downpour_accessor_param.delta_keep_days = 31
|
||||||
|
table.accessor.downpour_accessor_param.show_click_decay_rate = 0.999
|
||||||
|
table.accessor.downpour_accessor_param.delete_threshold = 0.8
|
||||||
|
|
||||||
|
def add_dense_table(self, table_id, learning_rate, param_var, grad_var):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
table_id(int): id of sparse params table
|
||||||
|
learning_rate(float): the learning rate used to update parameters. \
|
||||||
|
Can be a float value
|
||||||
|
param_var(list): all dense param. it is a list.
|
||||||
|
grad_var(list): all dense grad parm it is a list.
|
||||||
|
Returns:
|
||||||
|
return None
|
||||||
|
"""
|
||||||
|
table = self.server_.downpour_server_param.downpour_table_param.add()
|
||||||
|
table.table_id = table_id
|
||||||
|
table.table_class = "DownpourDenseTable"
|
||||||
|
table.type = pslib.PS_DENSE_TABLE
|
||||||
|
table.accessor.accessor_class = "DownpourDenseValueAccessor"
|
||||||
|
table.accessor.dense_sgd_param.name = "adam"
|
||||||
|
table.accessor.dense_sgd_param.adam.learning_rate = learning_rate
|
||||||
|
table.accessor.dense_sgd_param.adam.avg_decay_rate = 0.999993
|
||||||
|
table.accessor.dense_sgd_param.adam.ada_decay_rate = 0.9999
|
||||||
|
table.accessor.dense_sgd_param.adam.ada_epsilon = 1e-8
|
||||||
|
table.accessor.dense_sgd_param.adam.mom_decay_rate = 0.99
|
||||||
|
table.accessor.dense_sgd_param.naive.learning_rate = 0.0002
|
||||||
|
fea_dim = 0
|
||||||
|
for param in filter(lambda x: x.name.find("embedding") == -1,
|
||||||
|
param_var):
|
||||||
|
fea_dim += reduce(lambda x, y: x * y, param.shape, 1)
|
||||||
|
table.accessor.fea_dim = fea_dim
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
|
"""
|
||||||
|
Return downpour server program_desc
|
||||||
|
"""
|
||||||
|
return self.server_
|
||||||
|
|
||||||
|
|
||||||
|
class DownpourWorker(Worker):
|
||||||
|
"""
|
||||||
|
DownpourWorker class is used to generate worker program_desc
|
||||||
|
Args:
|
||||||
|
window (int): push params frequency
|
||||||
|
worker: it is pslib.DownpourTrainerParameter
|
||||||
|
Examples:
|
||||||
|
worker = DownpourWorker(1)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, window):
|
||||||
|
self.window = window
|
||||||
|
self.worker_ = pslib.DownpourTrainerParameter()
|
||||||
|
|
||||||
|
def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
|
||||||
|
slot_value_vars):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
table_id(int): id of sparse params table
|
||||||
|
learning_rate(float): the learning rate used to update parameters. \
|
||||||
|
Can be a float value
|
||||||
|
slot_key_vars(string): slot key id
|
||||||
|
slot_value_var(string): slot key value after embedding
|
||||||
|
Returns:
|
||||||
|
return None
|
||||||
|
"""
|
||||||
|
table = self.worker_.sparse_table.add()
|
||||||
|
table.table_id = table_id
|
||||||
|
table.slot_key.extend([var.name for var in slot_key_vars])
|
||||||
|
table.slot_value.extend([var.name for var in slot_value_vars])
|
||||||
|
table.slot_gradient.extend(
|
||||||
|
[var.name + "@GRAD" for var in slot_value_vars])
|
||||||
|
|
||||||
|
def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
table_id(int): id of sparse params table
|
||||||
|
learning_rate(float): the learning rate used to update parameters. \
|
||||||
|
Can be a float value
|
||||||
|
param_var(list): all dense param. it is a list.
|
||||||
|
grad_var(list): all dense grad parm it is a list.
|
||||||
|
Returns:
|
||||||
|
return None
|
||||||
|
"""
|
||||||
|
table = self.worker_.dense_table.add()
|
||||||
|
table.table_id = table_id
|
||||||
|
table.dense_variable_name.extend(
|
||||||
|
filter(lambda x: x.find("embedding") == -1,
|
||||||
|
[p.name for p in param_vars]))
|
||||||
|
table.dense_gradient_variable_name.extend(
|
||||||
|
filter(lambda x: x.find("embedding") == -1,
|
||||||
|
[g.name for g in grad_vars]))
|
||||||
|
|
||||||
|
def get_desc(self):
|
||||||
|
"""
|
||||||
|
Return downpour worker program_desc
|
||||||
|
"""
|
||||||
|
return self.worker_
|
@ -0,0 +1,148 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
|
||||||
|
from .helper import MPIHelper
|
||||||
|
|
||||||
|
|
||||||
|
class PaddlePSInstance(object):
|
||||||
|
"""
|
||||||
|
PaddlePSInstance class is used to generate A instance of server or worker
|
||||||
|
Args:
|
||||||
|
server_worker_mode: is a value 0 or 1, default is 1
|
||||||
|
proc_per_node: process per node, default is 2
|
||||||
|
Examples:
|
||||||
|
instance = PaddlePSInstance(1, 2)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, server_worker_mode, proc_per_node):
|
||||||
|
self.dh = MPIHelper()
|
||||||
|
self._rankid = self.dh.get_rank()
|
||||||
|
self._server_worker_mode = server_worker_mode
|
||||||
|
self._proc_per_node = proc_per_node
|
||||||
|
self._nodes = self.dh.get_size()
|
||||||
|
|
||||||
|
self._ip = 0
|
||||||
|
self._worker_num = self._nodes * self._proc_per_node / 2
|
||||||
|
self._server_num = self._nodes * self._proc_per_node / 2
|
||||||
|
self._total_server_worker = self._worker_num + self._server_num
|
||||||
|
self._node_type = None #IDLE=-1, WORKER=1, SERVER=0
|
||||||
|
self._set_nodetype()
|
||||||
|
self._comm = None
|
||||||
|
self._split_comm()
|
||||||
|
|
||||||
|
def _set_nodetype(self):
|
||||||
|
if self._server_worker_mode == 0:
|
||||||
|
if self._rankid < self._server_num:
|
||||||
|
self._node_type = 1
|
||||||
|
elif self._rankid < self._total_server_worker:
|
||||||
|
self._node_type = 0
|
||||||
|
else:
|
||||||
|
self._node_type = -1
|
||||||
|
elif self._server_worker_mode == 1:
|
||||||
|
if self._rankid < self._total_server_worker:
|
||||||
|
if 0 == self._rankid % self._proc_per_node % 2:
|
||||||
|
self._node_type = 0
|
||||||
|
else:
|
||||||
|
self._node_type = 1
|
||||||
|
else:
|
||||||
|
self._node_type = -1
|
||||||
|
else:
|
||||||
|
self._node_type = -1
|
||||||
|
|
||||||
|
def _split_comm(self):
|
||||||
|
if self.is_server():
|
||||||
|
self._comm = self.dh.comm.Split(self._node_type)
|
||||||
|
elif self.is_worker():
|
||||||
|
self._comm = self.dh.comm.Split(self._node_type)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_worker_index(self):
|
||||||
|
"""
|
||||||
|
Return worker index
|
||||||
|
"""
|
||||||
|
if self._server_worker_mode == 0:
|
||||||
|
return self._rankid == self.server_num
|
||||||
|
else:
|
||||||
|
return self._rankid / self._proc_per_node
|
||||||
|
|
||||||
|
def get_server_index(self):
|
||||||
|
"""
|
||||||
|
Return server index
|
||||||
|
"""
|
||||||
|
if self._server_worker_mode == 0:
|
||||||
|
return self.rank_id
|
||||||
|
else:
|
||||||
|
return self.rank_id / self._proc_per_node
|
||||||
|
|
||||||
|
def is_worker(self):
|
||||||
|
"""
|
||||||
|
Return instance is worker or not
|
||||||
|
"""
|
||||||
|
return self._node_type == 1
|
||||||
|
|
||||||
|
def is_server(self):
|
||||||
|
"""
|
||||||
|
Return instance is server or not
|
||||||
|
"""
|
||||||
|
return self._node_type == 0
|
||||||
|
|
||||||
|
def is_first_worker(self):
|
||||||
|
"""
|
||||||
|
Return instance is first worker or not
|
||||||
|
"""
|
||||||
|
return self.is_worker() and 0 == self.get_worker_index()
|
||||||
|
|
||||||
|
def set_ip(self, ip):
|
||||||
|
"""
|
||||||
|
set server ip
|
||||||
|
"""
|
||||||
|
self._ip = ip
|
||||||
|
|
||||||
|
def gather_ips(self):
|
||||||
|
"""
|
||||||
|
Return all servers and workers ip throught mpi allgather
|
||||||
|
"""
|
||||||
|
self._ips = self.dh.comm.allgather(self._ip)
|
||||||
|
return self._ips
|
||||||
|
|
||||||
|
def get_node_cnt(self):
|
||||||
|
"""
|
||||||
|
Return node cnt
|
||||||
|
"""
|
||||||
|
return self._nodes
|
||||||
|
|
||||||
|
def barrier_all(self):
|
||||||
|
"""
|
||||||
|
barrier workers and servers
|
||||||
|
"""
|
||||||
|
self.dh.comm.barrier()
|
||||||
|
|
||||||
|
def barrier_worker(self):
|
||||||
|
"""
|
||||||
|
barrier workers
|
||||||
|
"""
|
||||||
|
if self.is_worker():
|
||||||
|
self._comm.barrier()
|
||||||
|
pass
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
"""
|
||||||
|
MPI finalize
|
||||||
|
"""
|
||||||
|
self.dh.finalize()
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
instance = PaddlePSInstance(1, 1, 2, 50)
|
||||||
|
instance.barrier_all()
|
File diff suppressed because one or more lines are too long
Loading…
Reference in new issue