From 3c239cd640aca1fa8da71a9cdc319b8b4e4fb36c Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Sat, 1 Dec 2018 13:10:21 +0800 Subject: [PATCH 01/62] pslib --- CMakeLists.txt | 1 + cmake/external/pslib.cmake | 76 ++++++++++++++++++++++++ paddle/fluid/framework/async_executor.cc | 1 + 3 files changed, 78 insertions(+) create mode 100644 cmake/external/pslib.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index efa68c9ba2..5251fe286f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,6 +186,7 @@ endif() ######################################################################################## include(external/mklml) # download mklml package +include(external/pslib) # download mklml package include(external/xbyak) # download xbyak package include(external/libxsmm) # download, build, install libxsmm include(external/zlib) # download, build, install zlib diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake new file mode 100644 index 0000000000..812af5efa2 --- /dev/null +++ b/cmake/external/pslib.cmake @@ -0,0 +1,76 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_PSLIB}) + return() +ENDIF(NOT ${WITH_PSLIB}) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with PSLIB in Paddle yet." + "Force WITH_PSLIB=OFF") + SET(WITH_PSLIB OFF CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(PSLIB_PROJECT "extern_pslib") +IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(PSLIB_VER "pslib" CACHE STRING "" FORCE) #todo pslib version + SET(PSLIB_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${PSLIB_VER}.tar.gz" CACHE STRING "" FORCE) #todo pslib url +ENDIF() +MESSAGE(STATUS "PSLIB_VER: ${PSLIB_VER}, PSLIB_URL: ${PSLIB_URL}") +SET(PSLIB_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib") +SET(PSLIB_DOWNLOAD_DIR "${PSLIB_SOURCE_DIR}/src/${PSLIB_PROJECT}") +SET(PSLIB_DST_DIR "pslib") +SET(PSLIB_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(PSLIB_INSTALL_DIR ${PSLIB_INSTALL_ROOT}/${PSLIB_DST_DIR}) +SET(PSLIB_ROOT ${PSLIB_INSTALL_DIR}) +SET(PSLIB_INC_DIR ${PSLIB_ROOT}/include) +SET(PSLIB_LIB_DIR ${PSLIB_ROOT}/lib) +SET(PSLIB_LIB ${PSLIB_LIB_DIR}/libps.so) +SET(PSLIB_IOMP_LIB ${PSLIB_LIB_DIR}/libiomp5.so) #todo what is this +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_ROOT}/lib") + +INCLUDE_DIRECTORIES(${PSLIB_INC_DIR}) + +FILE(WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(PSLIB)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${PSLIB_VER}/include ${PSLIB_VER}/lib \n" + " DESTINATION ${PSLIB_DST_DIR})\n") + +ExternalProject_Add( + ${PSLIB_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PSLIB_SOURCE_DIR} + DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_VER}.tar.gz + && tar zxvf ${PSLIB_VER}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} +) + +ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) +ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) +LIST(APPEND external_project_dependencies pslib) + +IF(WITH_C_API) + INSTALL(FILES ${PSLIB_LIB} ${PSLIB_IOMP_LIB} DESTINATION lib) +ENDIF() diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index afb2dd2f06..aa76e03e83 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -29,6 +29,7 @@ limitations under the License. */ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/pybind/pybind.h" +#include "pslib.h" namespace paddle { namespace framework { From 0e4709daddaf76e71a2de3f7490184453b2c1e17 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sat, 1 Dec 2018 13:14:03 +0800 Subject: [PATCH 02/62] add mpi4py helper --- python/paddle/fluid/distributed/helper.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 python/paddle/fluid/distributed/helper.py diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py new file mode 100644 index 0000000000..8e079b1e8d --- /dev/null +++ b/python/paddle/fluid/distributed/helper.py @@ -0,0 +1,20 @@ +from mpi4py import MPI + +class MPIHelper(object): + def __init__(self): + self.comm = MPI.COMM_WORLD + + def get_rank(self): + return self.comm.Get_rank() + + def get_size(self): + return self.comm.Get_size() + + def get_ip(self): + import socket + local_ip = socket.gethostbyname(socket.gethostname()) + return local_ip + + def get_hostname(self): + import socket + return socket.gethostname() From 038346c0c2053bbc0b051e7bb48de42d61af6958 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Sat, 1 Dec 2018 13:52:02 +0800 Subject: [PATCH 03/62] libmct --- cmake/external/libmct.cmake | 76 +++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 cmake/external/libmct.cmake diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake new file mode 100644 index 0000000000..351806f6e1 --- /dev/null +++ b/cmake/external/libmct.cmake @@ -0,0 +1,76 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_LIBMCT}) + return() +ENDIF(NOT ${WITH_LIBMCT}) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with LIBMCT in Paddle yet." + "Force WITH_LIBMCT=OFF") + SET(WITH_LIBMCT OFF CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(LIBMCT_PROJECT "extern_libmct") +IF((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(LIBMCT_VER "libmct" CACHE STRING "" FORCE) #todo libmct version + SET(LIBMCT_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${LIBMCT_VER}.tar.gz" CACHE STRING "" FORCE) #todo libmct url +ENDIF() +MESSAGE(STATUS "LIBMCT_VER: ${LIBMCT_VER}, LIBMCT_URL: ${LIBMCT_URL}") +SET(LIBMCT_SOURCE_DIR "${THIRD_PARTY_PATH}/libmct") +SET(LIBMCT_DOWNLOAD_DIR "${LIBMCT_SOURCE_DIR}/src/${LIBMCT_PROJECT}") +SET(LIBMCT_DST_DIR "libmct") +SET(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR}) +SET(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR}) +SET(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include) +SET(LIBMCT_LIB_DIR ${LIBMCT_ROOT}/lib) +SET(LIBMCT_LIB ${LIBMCT_LIB_DIR}/libps.so) +SET(LIBMCT_IOMP_LIB ${LIBMCT_LIB_DIR}/libiomp5.so) #todo what is this +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib") + +INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR}) + +FILE(WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(LIBMCT)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${LIBMCT_VER}/include ${LIBMCT_VER}/lib \n" + " DESTINATION ${LIBMCT_DST_DIR})\n") + +ExternalProject_Add( + ${LIBMCT_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${LIBMCT_SOURCE_DIR} + DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_VER}.tar.gz + && tar zxvf ${LIBMCT_VER}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT} +) + +ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET libmct PROPERTY IMPORTED_LOCATION ${LIBMCT_LIB}) +ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT}) +LIST(APPEND external_project_dependencies libmct) + +IF(WITH_C_API) + INSTALL(FILES ${LIBMCT_LIB} ${LIBMCT_IOMP_LIB} DESTINATION lib) +ENDIF() From 4798a8c7b848891c18cd5b23e8023b88d9f32643 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Sat, 1 Dec 2018 14:51:40 +0800 Subject: [PATCH 04/62] pslib_brpc --- cmake/external/pslib_brpc.cmake | 76 +++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 cmake/external/pslib_brpc.cmake diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake new file mode 100644 index 0000000000..7b4beeae65 --- /dev/null +++ b/cmake/external/pslib_brpc.cmake @@ -0,0 +1,76 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_PSLIB_BRPC}) + return() +ENDIF(NOT ${WITH_PSLIB_BRPC}) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with PSLIB_BRPC in Paddle yet." + "Force WITH_PSLIB_BRPC=OFF") + SET(WITH_PSLIB_BRPC OFF CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(PSLIB_BRPC_PROJECT "extern_pslib_brpc") +IF((NOT DEFINED PSLIB_BRPC_VER) OR (NOT DEFINED PSLIB_BRPC_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(PSLIB_BRPC_VER "pslib_brpc" CACHE STRING "" FORCE) #todo pslib version + SET(PSLIB_BRPC_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${PSLIB_BRPC_VER}.tar.gz" CACHE STRING "" FORCE) #todo pslib_brpc url +ENDIF() +MESSAGE(STATUS "PSLIB_BRPC_VER: ${PSLIB_BRPC_VER}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}") +SET(PSLIB_BRPC_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib_brpc") +SET(PSLIB_BRPC_DOWNLOAD_DIR "${PSLIB_BRPC_SOURCE_DIR}/src/${PSLIB_BRPC_PROJECT}") +SET(PSLIB_BRPC_DST_DIR "pslib_brpc") +SET(PSLIB_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR}) +SET(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR}) +SET(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include) +SET(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib) +SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libps.so) +SET(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib") + +INCLUDE_DIRECTORIES(${PSLIB_BRPC_INC_DIR}) + +FILE(WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(PSLIB_BRPC)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${PSLIB_BRPC_VER}/include ${PSLIB_BRPC_VER}/lib \n" + " DESTINATION ${PSLIB_BRPC_DST_DIR})\n") + +ExternalProject_Add( + ${PSLIB_BRPC_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PSLIB_BRPC_SOURCE_DIR} + DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_VER}.tar.gz + && tar zxvf ${PSLIB_BRPC_VER}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT} +) + +ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) +ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) +LIST(APPEND external_project_dependencies pslib_brpc) + +IF(WITH_C_API) + INSTALL(FILES ${PSLIB_BRPC_LIB} ${PSLIB_BRPC_IOMP_LIB} DESTINATION lib) +ENDIF() From 52a0be7bb437e574d7fda8d322c816e91029e438 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sat, 1 Dec 2018 13:54:44 +0800 Subject: [PATCH 05/62] add mct into CMakeLists.txt --- CMakeLists.txt | 5 ++++- paddle/fluid/framework/async_executor.h | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5251fe286f..8c929396ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,7 +186,6 @@ endif() ######################################################################################## include(external/mklml) # download mklml package -include(external/pslib) # download mklml package include(external/xbyak) # download xbyak package include(external/libxsmm) # download, build, install libxsmm include(external/zlib) # download, build, install zlib @@ -217,6 +216,9 @@ include(external/warpctc) # download, build, install warpctc include(cupti) include(external/gzstream) endif (NOT WIN32) +include(external/libmct) +include(external/pslib_brpc) +include(external/pslib) if(WITH_DISTRIBUTE) if(WITH_GRPC) @@ -277,6 +279,7 @@ set(EXTERNAL_LIBS protobuf zlib ${PYTHON_LIBRARIES} + pslib ) if(WITH_AMD_GPU) diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index f4d2a79ac5..6aa59c89dc 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -40,6 +40,9 @@ class AsyncExecutor { const int thread_num, const std::vector& fetch_names, const bool debug = false); + void ConfigServer() {} + void ConfigWorker() {} + void StartServer() {} private: void CreateThreads(ExecutorThreadWorker* worker, From c583fd34acc9e02362fd2ddd4bf7adb53d8321e6 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 3 Dec 2018 09:53:24 +0800 Subject: [PATCH 06/62] add downpour sgd wrapper for pslib --- python/paddle/fluid/distributed/downpour.py | 34 ++++++++++++ python/paddle/fluid/distributed/node.py | 61 +++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 python/paddle/fluid/distributed/downpour.py create mode 100644 python/paddle/fluid/distributed/node.py diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py new file mode 100644 index 0000000000..523f686668 --- /dev/null +++ b/python/paddle/fluid/distributed/downpour.py @@ -0,0 +1,34 @@ +import paddle.fluid as fluid +import pslib_pb2 as pslib +from .node import DownpourServer +from .node import DownpourWorker +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table + +class DownpourSGD(object): + def __init__(self, optimizer=opt, learning_rate=0.001, window=1): + # todo(guru4elephant): if optimizer is not None, will warning here + self.learning_rate_ = opt.learning_rate + self.window_ = window + + def minimize(self, loss, startup_program=None, + parameter_list=None, no_grad_set=None, + prefetch_slots=None, prefetch_slots_emb=None): + params_grads = sorted(append_backward(loss), key=lambda x:x[0].name) + table_name = fluid_distributed_lookup_table(loss.block.program) + server = DownpourServer() + worker = DownpourWorker() + server.add_sparse_table(0, learning_rate, + prefetch_slots, prefetch_slots_emb) + server.add_dense_table(1, learning_rate, params, grads) + worker.add_sparse_table(0, learning_rate, + prefetch_slots, prefetch_slots_emb) + worker.add_dense_table(1, learning_rate, params, grads) + + ps_param = pslib.PSParameter() + ps_param.server_param.CopyFrom(server.get_desc()) + ps_param.worker_param.CopyFrom(worker.get_desc()) + worker_skipped_ops = ["lookup_table", "lookup_table_grad"] + + return [solver_desc, parallel_desc] + + diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py new file mode 100644 index 0000000000..fc62d7220c --- /dev/null +++ b/python/paddle/fluid/distributed/node.py @@ -0,0 +1,61 @@ +import paddle.fluid as fluid +import pslib_pb2 as pslib + +class Server(object): + def __init__(self): + pass + + +class Worker(object): + def __init__(self): + pass + + +class DownpourServer(Server): + def __init__(self): + self.server_ = pslib.ServerParameter().downpour_server_param + + def add_sparse_table(self, table_id, learning_rate, + slot_key, slot_value_var, slot_grad_var): + table = self.server_.downpour_table_param.add() + table.table_id = table_id + table.type = PS_SPARSE_TABLE + table.accessor.accessor_class = "DownpourFeatureValueAccessor" + table.accessor.dense_sgd_param.adam.learning_rate = learning_rate + table.accessor.fea_dim = slot_value_var[0].shape[1] + + def add_dense_table(self, table_id, learning_rate, + param_var, grad_var): + table = self.server_.downpour_table_param.add() + table.table_id = table_id + table.type = PS_DENSE_TABLE + table.accessor.accessor_class = "DownpourDenseValueAccessor" + table.accessor.sparse_sgd_param.learning_rate = learning_rate + table.accessor.fea_dim = reduce(lambda x, y: x.shape, 1 for x in param_var) + + def get_desc(self): + return self.server_ + + +class DownpourWorker(Worker): + def __init__(self, window): + self.window = window + self.worker_ = pslib.WorkerParameter().downpour_worker_param + self.worker_.pull_dense_per_batch = window + self.worker_.push_dense_per_batch = window + + def add_sparse_table(self, table_id, + slot_keys, slot_value_vars, slot_grad_vars): + table = self.worker_.sparse_table.add() + table.table_id = table_id + table.slot.extend(slot_keys) + self.worker_.extend([grad.name for grad in slot_grad_vars]) + + def add_dense_table(self, table_id, param_vars, grad_vars): + table = self.worker_.dense_table.add() + table.table_id = table_id + table.dense_variable_name.extend([p.name for p in param_vars]) + table.dense_gradient_variable_name.extend([g.name for g in grad_vars]) + + def get_desc(self): + return self.worker_ From a77fa67bbd11131f0c8c3683b903b2ceeeca41a0 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 3 Dec 2018 17:44:18 +0800 Subject: [PATCH 07/62] async_thread_trainer & libmct & pslib.cmake --- cmake/external/libmct.cmake | 17 +- cmake/external/pslib.cmake | 2 +- paddle/fluid/framework/async_executor.cc | 83 +++- paddle/fluid/framework/async_executor.h | 41 +- .../fluid/framework/executor_thread_worker.cc | 456 ++++++++++++++++++ .../fluid/framework/executor_thread_worker.h | 150 +++++- paddle/fluid/pybind/async_executor_py.cc | 6 +- python/paddle/fluid/async_executor.py | 13 + 8 files changed, 745 insertions(+), 23 deletions(-) diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake index 351806f6e1..239183cb6d 100644 --- a/cmake/external/libmct.cmake +++ b/cmake/external/libmct.cmake @@ -40,9 +40,6 @@ SET(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") SET(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR}) SET(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR}) SET(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include) -SET(LIBMCT_LIB_DIR ${LIBMCT_ROOT}/lib) -SET(LIBMCT_LIB ${LIBMCT_LIB_DIR}/libps.so) -SET(LIBMCT_IOMP_LIB ${LIBMCT_LIB_DIR}/libiomp5.so) #todo what is this SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib") INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR}) @@ -66,11 +63,15 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT} ) -ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET libmct PROPERTY IMPORTED_LOCATION ${LIBMCT_LIB}) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32) + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c) + file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";") + add_library(libmct STATIC ${dummyfile}) +else() + add_library(libmct INTERFACE) +endif() + +#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL) ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT}) LIST(APPEND external_project_dependencies libmct) -IF(WITH_C_API) - INSTALL(FILES ${LIBMCT_LIB} ${LIBMCT_IOMP_LIB} DESTINATION lib) -ENDIF() diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 812af5efa2..586f66d6fd 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -66,7 +66,7 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} ) -ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) +ADD_LIBRARY(pslib STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) LIST(APPEND external_project_dependencies pslib) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index aa76e03e83..94ed8c2fca 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -48,6 +48,10 @@ void AsyncExecutor::CreateThreads( worker->SetDataFeed(reader); worker->SetFetchVarNames(fetch_var_names); worker->BindingDataFeedMemory(); + worker->SetPSlibPtr(_pslib_ptr); + worker->SetPullDenseThread(_pull_dense_thread); + worker->BindingSlotVariableMemory(); + worker->SetParamConfig(&_param_config); } void PrepareReaders(std::vector>& readers, // NOLINT @@ -61,6 +65,77 @@ void PrepareReaders(std::vector>& readers, // NOLINT readers[0]->SetFileList(filelist); } +void AsyncExecutor::ConfigPslib(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index) { + _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); + _pslib_ptr->init_and_config(dist_desc, host_sign_list, node_num, index);//TODO +} + +void AsyncExecutor::StartServer() { + _pslib_ptr->run_server(); +} + +void AsyncExecutor::InitModel() { + //TODO only rank = 0 do this + std::vector all_dense_table_id; //TODO + all_dense_table_id.push_back(0); + for (auto table_id: all_dense_table_id) { + std::vector regions; + std::vector variables; //TODO + for (auto& t : variables) { + Variable* var = root_scope_->FindVar(t); + CHECK(var != nullptr) << "var[" << t << "] not found"; + LoDTensor* tensor = var->GetMutable(); + + float* g = tensor->data(); + CHECK(g != nullptr) << "var[" << t << "] value not initialized"; + + float init_range = 0.2; + int rown = tensor->dims()[0]; + init_range /= sqrt(rown); + + std::normal_distribution ndistr(0.0, 1.0); + for (auto i = 0u; i < tensor->numel(); ++i) { + g[i] = ndistr(local_random_engine()) * init_range; + } + + paddle::ps::Region reg(g, tensor->numel()); + regions.emplace_back(std::move(reg)); + } + + auto push_status = _pslib_ptr->_worker_ptr->push_dense_param(regions.data(), regions.size(), table_id); + push_status.wait(); + auto status = push_status.get(); + if (status != 0) { + LOG(FATAL) << "push dense param failed, status[" << status << "]"; + exit(-1); + } + } +} + +void AsyncExecutor::SaveModel(const std::string& path) { + auto ret = _pslib_ptr->_worker_ptr->flush(); + ret.wait(); + ret = _pslib_ptr->_worker_ptr->save(path, 0); + ret.wait(); + int32_t feasign_cnt = ret.get(); + if (feasign_cnt == -1) { // TODO should be feasign_cnt < 0, because server bug + LOG(FATAL) << "save model failed"; + exit(-1); + } +} + +void AsyncExecutor::PrepareDenseThread() { + DensePullThreadParam param; + param.ps_client = _pslib_ptr->_worker_ptr;; + param.threshold = 1;//GlobalConfig::instance().pull_dense_per_batch; //TODO + param.training_thread_num = actual_thread_num; + param.root_scope = root_scope_; + //param.dense_params = &GlobalConfig::instance().dense_variable_name; //TODO + + _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); + +} + void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, const std::string& data_feed_desc_str, const std::vector& filelist, @@ -83,7 +158,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, google::protobuf::TextFormat::ParseFromString(data_feed_desc_str, &data_feed_desc); - int actual_thread_num = thread_num; + actual_thread_num = thread_num; int file_cnt = filelist.size(); PADDLE_ENFORCE(file_cnt > 0, "File list cannot be empty"); @@ -107,11 +182,11 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // todo: should be factory method for creating datafeed std::vector> readers; PrepareReaders(readers, actual_thread_num, data_feed_desc, filelist); - + PrepareDenseThread(); std::vector> workers; workers.resize(actual_thread_num); for (auto& worker : workers) { - worker.reset(new ExecutorThreadWorker); + worker.reset(new AsyncExecutorThreadWorker); } // prepare thread resource here @@ -129,7 +204,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, for (auto& th : threads) { th.join(); } - + _pull_dense_thread->stop(); root_scope_->DropKids(); return; diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 6aa59c89dc..67f4e5deee 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -22,6 +22,8 @@ limitations under the License. */ #include // NOLINT #include #include +#include //local_random_engine +#include //local_random_engine #include "paddle/fluid/framework/data_feed.pb.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor_thread_worker.h" @@ -30,6 +32,26 @@ limitations under the License. */ namespace paddle { namespace framework { + +inline double current_realtime() { + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + return tp.tv_sec + tp.tv_nsec * 1e-9; +} + +inline std::default_random_engine& local_random_engine() { + struct engine_wrapper_t { + std::default_random_engine engine; + engine_wrapper_t() { + static std::atomic x(0); + std::seed_seq sseq = {x++, x++, x++, (unsigned long)(current_realtime() * 1000)}; + engine.seed(sseq); + } + }; + thread_local engine_wrapper_t r; + return r.engine; +} + class AsyncExecutor { public: AsyncExecutor(Scope* scope, const platform::Place& place); @@ -40,9 +62,12 @@ class AsyncExecutor { const int thread_num, const std::vector& fetch_names, const bool debug = false); - void ConfigServer() {} - void ConfigWorker() {} - void StartServer() {} + //void ConfigPslib(const char* dist_desc, uint64_t* host_sign_list, int node_num, int index); + void ConfigPslib(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index); + //void ConfigWorker() {} + void StartServer(); + void InitModel(); + void SaveModel(const std::string& path); private: void CreateThreads(ExecutorThreadWorker* worker, @@ -51,11 +76,19 @@ class AsyncExecutor { const std::vector& fetch_var_names, Scope* root_scope, const int thread_index, const bool debug); - + void PrepareDenseThread(); public: + std::shared_ptr _pslib_ptr; + std::shared_ptr _pull_dense_thread; Scope* root_scope_; platform::Place place_; + + AsyncWorkerParamConfig _param_config; + private: + int actual_thread_num; }; + + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 4e4001e979..19d8818be7 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -31,6 +31,85 @@ limitations under the License. */ namespace paddle { namespace framework { +int DensePullThread::start() { + _running = true; + _t = std::thread(&DensePullThread::run, this); + return 0; +} + +void DensePullThread::run() { + while (_running) { + _pull_dense_status.resize(0); + for (auto& t : _dense_variable_name) { + if (check_update_param(t.first)) { + auto status = pull_dense(t.first); + _pull_dense_status.emplace_back(std::move(status)); + reset_thread_version(t.first); + } + } + if (_pull_dense_status.size() != 0) { + wait_all(); + } + + usleep(_sleep_time_ms * 1000); + } +} +bool DensePullThread::check_update_param(uint64_t table_id) { + { + std::lock_guard lock(_mutex_for_version); + auto& version = _training_versions[table_id]; + _current_version[table_id] = *(std::min_element(version.begin(), version.end())); + } + if (_current_version[table_id] - _last_versions[table_id] < _threshold) { + return false; + } + return true; +} + +void DensePullThread::reset_thread_version(uint64_t table_id) { + std::lock_guard lock(_mutex_for_version); + _last_versions[table_id] = _current_version[table_id]; +} +std::future DensePullThread::pull_dense(uint64_t table_id) { + auto& regions = _regions[table_id]; + regions.clear(); + auto& variables = _dense_variable_name[table_id]; + regions.resize(variables.size()); + + for (auto i = 0u; i < variables.size(); ++i) { + auto& t = variables[i]; + Variable* var = _root_scope->FindVar(t); + LoDTensor* tensor = var->GetMutable(); + + float* w = tensor->data(); + paddle::ps::Region reg(w, tensor->numel()); + regions[i] = std::move(reg); + } + return _ps_client->pull_dense(regions.data(), regions.size(), table_id); +} + +void DensePullThread::wait_all() { + for (auto& t : _pull_dense_status) { + t.wait(); + auto status = t.get(); + if (status != 0) { + LOG(WARNING) << "pull dense failed times:" << ++_pull_dense_fail_times; + } + } + + if (_pull_dense_fail_times > 20) { + LOG(FATAL) << "pull dense failed times more than 20 times"; + exit(-1); + } + + _pull_dense_status.resize(0); +} + +void DensePullThread::increase_thread_version(int thread_id, uint64_t table_id) { + std::lock_guard lock(_mutex_for_version); + _training_versions[table_id][thread_id]++; +} + void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) { auto& block = program.Block(0); op_names_.clear(); @@ -90,6 +169,11 @@ void ExecutorThreadWorker::SetFetchVarNames( fetch_var_names.end()); } +void ExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { + +} + + void ExecutorThreadWorker::SetDevice() { #if defined _WIN32 || defined __APPLE__ return; @@ -219,5 +303,377 @@ void ExecutorThreadWorker::SetRootScope(Scope* g_scope) { root_scope_ = g_scope; } +//AsyncExecutor +void AsyncExecutorThreadWorker::TrainFiles() { + SetDevice(); + + int fetch_var_num = fetch_var_names_.size(); + fetch_values_.clear(); + fetch_values_.resize(fetch_var_num); + + thread_reader_->Start(); + + int cur_batch; + int batch_cnt = 0; + while ((cur_batch = thread_reader_->Next()) > 0) { + // executor run here + TrainOneNetwork(); + + ++batch_cnt; + thread_scope_->DropKids(); + + if (debug_ == false || thread_id_ != 0) { + continue; + } + + for (int i = 0; i < fetch_var_num; ++i) { + print_fetch_var(thread_scope_, fetch_var_names_[i]); + } // end for (int i = 0...) + } // end while () +} + +void AsyncExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { + _pslib_ptr = pslib_ptr; +} +void AsyncExecutorThreadWorker::SetPullDenseThread(std::shared_ptr dpt) { + _pull_dense_thread = dpt; +} +void AsyncExecutorThreadWorker::TrainOneNetwork() { + PrepareParams(); + + for (auto& op : ops_) { + if (op->Type().find("sgd") != std::string::npos) { + continue; + } + op->Run(*thread_scope_, place_); + } + + UpdateParams(); +} + +void AsyncExecutorThreadWorker::BindingSlotVariableMemory() { + /* + std::vector ins_slot_offset(batch_size + 1, 0); + for (auto i = 1u; i <= batch_size; ++i) { + ins_slot_offset[i] += ins_slot_offset[i - 1] + slot_dim; + } + + std::vector tensor_lod(batch_size + 1, 0); + for (auto i = 1u; i <= batch_size; ++i) { + tensor_lod[i] += tensor_lod[i - 1] + 1; + } + + auto& used_slots = reader->get_use_slot_alias(); + slot_input_vec.resize(used_slots.size() - 1); + for (auto slot_idx = 1u; slot_idx < used_slots.size(); ++slot_idx) { + auto var = slot_input_variable_name[slot_idx]; + + auto v = thread_scope->FindVar(var); + CHECK(v != nullptr) << "var[" << var << "] not found"; + + LoDTensor* tensor = v->GetMutable(); + float* tensor_ptr = tensor->mutable_data({batch_size, slot_dim}, platform::CPUPlace()); + memset(tensor_ptr, 0, sizeof(float) * ins_slot_offset.back()); + + LoD data_lod{tensor_lod}; + tensor->set_lod(data_lod); + + slot_input_vec[slot_idx - 1].reset(tensor); + } + */ +} +void AsyncExecutorThreadWorker::SetParamConfig(AsyncWorkerParamConfig* pc) { + _param_config = pc; +} + +void AsyncExecutorThreadWorker::PrepareParams() { + int table_id = 0; //TODO + PullSparse(table_id); + for (auto& t : _pull_sparse_status) { + t.wait(); + auto status = t.get(); + if (status != 0) { + LOG(ERROR) << "pull sparse failed, status[" << status << "]"; + exit(-1); + } + } + _pull_sparse_status.resize(0); + + FillSparse(table_id); +} + +void AsyncExecutorThreadWorker::UpdateParams() { + //for (auto i = 0u; i < GlobalConfig::instance().dense_table_id.size(); ++i) {//TODO + for (int i = 0; i < 1; ++i) { + PushSparse(i); + } + //for (auto i = 0u; i < GlobalConfig::instance().dense_table_id.size(); ++i) {//TODO + for (int i = 1; i < 2; ++i) { + PushDense(i); + } + int32_t tmp_push_dense_wait_times = _param_config->tmp_push_dense_wait_times; //TODO + int32_t tmp_push_sparse_wait_times = _param_config->tmp_push_sparse_wait_times; //TODO + static uint32_t push_dense_wait_times = static_cast(tmp_push_dense_wait_times); + static uint32_t push_sparse_wait_times = static_cast(tmp_push_sparse_wait_times); + + if (_push_dense_status.size() >= push_dense_wait_times) { + for (auto& t : _push_dense_status) { + t.wait(); + } + _push_dense_status.resize(0); + } + if (tmp_push_dense_wait_times == -1) { + _push_dense_status.resize(0); + } + + if (_push_sparse_status.size() >= push_sparse_wait_times) { + for (auto& t : _push_sparse_status) { + t.wait(); + } + _push_sparse_status.resize(0); + } + if (tmp_push_sparse_wait_times == -1) { + _push_sparse_status.resize(0); + } + + //for (auto dense_table_id : GlobalConfig::instance().dense_table_id) {//TODO + int dense_table_id = 1; + _pull_dense_thread->increase_thread_version(thread_id_, dense_table_id); + //} +} + +void AsyncExecutorThreadWorker::PushDense(int table_id) { + //auto table_id = GlobalConfig::instance().dense_table_id[table_id_index]; TODO + + std::vector regions; + //auto& variables = GlobalConfig::instance().dense_gradient_variable_name[table_id]; + std::vector variables; + for (auto& t : variables) { + Variable* var = thread_scope_->FindVar(t); + CHECK(var != nullptr) << "var[" << t << "] not found"; + LoDTensor* tensor = var->GetMutable(); + int count = tensor->numel(); + float* g = tensor->data(); + paddle::ps::Region reg(g, count); + regions.emplace_back(std::move(reg)); + } + + auto status = _pslib_ptr->_worker_ptr->push_dense(regions.data(), regions.size(), table_id); + _push_dense_status.push_back(std::move(status)); + +} + +void AsyncExecutorThreadWorker::PullSparse(int table_id) { + + + auto& features = _features[table_id]; + auto& feature_value = _feature_value[table_id]; + auto fea_dim = _param_config->fea_dim; //TODO + // slot id starts from 1 + features.clear(); + features.resize(0); + features.reserve(MAX_FEASIGN_NUM); + + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + // slot_idx = 0 is label TODO + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int64_t* ids = tensor->data(); + int len = tensor->numel(); + for (auto i = 0u; i < len; ++i) { + //todo: current trick - filter feasign=use_slot_mod(bug: datafeed fill use_slot_mod for empty slot) + if (ids[i] == 0u) { + continue; + } + features.push_back(static_cast(ids[i])); + } + } + + check_pull_push_memory(features, feature_value, fea_dim); + + std::vector pull_feature_value; + for (auto i = 0u; i < features.size(); ++i) { + pull_feature_value.push_back(feature_value[i].data()); + } + + auto status = _pslib_ptr->_worker_ptr->pull_sparse( + pull_feature_value.data(), table_id, features.data(), features.size()); + _pull_sparse_status.push_back(std::move(status)); + + //to save time + auto& push_g = _feature_push_value[table_id]; + check_pull_push_memory(features, push_g, fea_dim); + + //binding_slot_embed_with_concat(); TODO + collect_feasign_info(table_id); //TODO +} + +void AsyncExecutorThreadWorker::FillSparse(int table_id) { + auto slot_dim = _param_config->slot_dim; // TODO + auto fea_dim = _param_config->fea_dim; //TODO + auto& features = _features[table_id]; + auto& fea_value = _feature_value[table_id]; + + CHECK(features.size() > 0) << "feature size check failed"; + + auto fea_idx = 0u; + + std::vector init_value(fea_dim); + + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + // slot_idx = 0 is label TODO + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int64_t* ids = tensor->data(); + int len = tensor->numel(); + + Variable* var_emb = thread_scope_->FindVar(_param_config->slot_input_vec[slot_idx - 1]); + LoDTensor* tensor_emb = var_emb->GetMutable(); + float* ptr = tensor_emb->data(); + + for (auto index = 0u; index < len; ++index){ + //if (_current_train_job.use_cvm_feature()) { + // if (ids[index] == 0u) { + // memcpy(ptr + slot_dim * index, init_value.data(), sizeof(float) * slot_dim); + // continue; + // } + // memcpy(ptr + slot_dim * index, fea_value[fea_idx].data(), sizeof(float) * slot_dim); + // (ptr + slot_dim * index)[0] = log((ptr + slot_dim * index)[0] + 1); + // (ptr + slot_dim * index)[1] = log((ptr + slot_dim * index)[1] + 1) - (ptr + slot_dim * index)[0]; + // fea_idx++; + //} else { + if (ids[index] == 0u) { + memcpy(ptr + slot_dim * index, init_value.data() + 2, sizeof(float) * slot_dim); + continue; + } + memcpy(ptr + slot_dim * index, fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); + fea_idx++; + //} + } + } +} + +void AsyncExecutorThreadWorker::PushSparse(int table_id) { + + auto slot_dim = _param_config->slot_dim; //TODO + auto fea_dim = _param_config->fea_dim;//_current_train_job.fea_dim();TODO + auto& features = _features[table_id]; + //std::vector gradient_var; + //auto& gradient_var = GlobalConfig::instance().input_gradient_variable_name; //TODO + auto& push_g = _feature_push_value[table_id]; + check_pull_push_memory(features, push_g, fea_dim); + uint64_t fea_idx = 0u; + auto& fea_info = _fea_info[table_id]; //TODO + int offset = 0; + //if (!_current_train_job.use_cvm_feature()) { //TODO + offset = 2; + //} + + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + + // slot_idx = 0 is label TODO + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + if (_slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + continue; + } + Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[slot_idx - 1]); + LoDTensor* g_tensor = g_var->GetMutable(); + //int count = g_tensor->numel(); + float* g = g_tensor->data(); + /* + if (FLAGS_scale_sparse_gradient_with_batch_size) { + Eigen::Map g_mat(g, 1, tensor->numel()); + g_mat *= _batch_size; + } + */ + + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int len = tensor->lod()[0].back(); + //assert(slot_dim * len == count); + int64_t* ids = tensor->data(); + for (auto id_idx = 0u; id_idx < len; ++id_idx){ + if (ids[id_idx] == 0) { + g += slot_dim; + continue; + } + memcpy(push_g[fea_idx].data() + offset, g, sizeof(float) * slot_dim); + push_g[fea_idx][0] = 1.0f; + push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); + g += slot_dim; + fea_idx++; + } + } + assert(fea_idx == features.size()); + CHECK(features.size() > 0); + + std::vector push_g_vec; + for (auto i = 0u; i < features.size(); ++i) { + push_g_vec.push_back(push_g[i].data()); + } + auto status = _pslib_ptr->_worker_ptr->push_sparse( + table_id, features.data(), (const float**)push_g_vec.data(), features.size()); + _push_sparse_status.push_back(std::move(status)); +} + +void AsyncExecutorThreadWorker::collect_feasign_info( + int table_id) { + auto& fea_info = _fea_info[table_id]; + auto& feature = _features[table_id]; + fea_info.resize(feature.size()); + + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + Variable* var = thread_scope_->FindVar(feed_vec[0]); + LoDTensor* tensor = var->GetMutable(); + int64_t* label = tensor->data(); + + int global_index = 0; + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int64_t* ids = tensor->data(); + + int fea_idx = 0; + for (auto ins_idx = 1u; ins_idx < tensor->lod()[0].size(); ++ins_idx) { + for (; fea_idx < tensor->lod()[0][ins_idx]; ++fea_idx) { + if (ids[fea_idx] == 0u) { + continue; + } + FeasignInfo info{slot_idx, ins_idx, label[ins_idx - 1]}; + + fea_info[global_index++] = std::move(info); + } + } + } + CHECK(global_index == feature.size()) << "expect fea info size:" << feature.size() + << " real:" << global_index; +} + +void AsyncExecutorThreadWorker::check_pull_push_memory( + std::vector& features, + std::vector>& push_g, + int dim) { + push_g.resize(features.size() + 1); + for (auto& t : push_g) { + t.resize(dim); + } +} + +void AsyncExecutorThreadWorker::check_pull_push_memory( + std::vector& features, + std::vector& push_g, + int dim) { + if (features.size() > push_g.size()) { + push_g.reserve(features.size() + 1); + auto size = features.size() - push_g.size() + 1; + for (auto i = 0u; i < size; ++i) { + float* ptr = new float[dim]; + push_g.push_back(ptr); + } + } +} + } // einit_modelnd namespace framework } // end namespace paddle diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 13ec2442c4..63f383cd47 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -25,16 +25,107 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "pslib.h" namespace paddle { namespace framework { + +const static uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; + void CreateTensor(Variable* var, proto::VarType::Type var_type); +struct AsyncWorkerParamConfig { + int slot_dim; + int fea_dim; + int32_t tmp_push_dense_wait_times; + int32_t tmp_push_sparse_wait_times; + + std::vector slot_input_vec; //6048slot 6050slot //name + std::vector gradient_var; //6048slot_embed +}; + +struct DensePullThreadParam { + std::shared_ptr ps_client; + int threshold; + int training_thread_num; + Scope* root_scope; + std::map>* dense_params; + int sleep_time_ms = 2; +}; + +class DensePullThread { +public: + DensePullThread(DensePullThreadParam& param) : + _running(false) { + _ps_client = param.ps_client; + _threshold = param.threshold; + _thread_num = param.training_thread_num; + _root_scope = param.root_scope; + _sleep_time_ms = param.sleep_time_ms; + + for (auto& t : *param.dense_params) { + _dense_variable_name[t.first].insert( + _dense_variable_name[t.first].end(), + t.second.begin(), t.second.end()); + _training_versions[t.first].resize(_thread_num, 0); + _last_versions[t.first] = 0; + _current_version[t.first] = 0; + } + } + + int start(); + + void stop() { + if (_running) { + _running = false; + _t.join(); + } + } + + void increase_thread_version(int thread_id, uint64_t table_id); + void reset_thread_version(uint64_t table_id); + std::future pull_dense(uint64_t table_id); + void pull_dense2(uint64_t table_id); + void wait_all(); + +private: + void run(); + bool check_update_param(uint64_t table_id); + +private: + std::shared_ptr _ps_client; + int _thread_num; + int _threshold; + int _sleep_time_ms; + Scope* _root_scope; + bool _running; + + std::map _last_versions; + std::map _current_version; + std::mutex _mutex_for_version; + std::map> _training_versions; + std::map> _dense_variable_name; + + std::thread _t; + + std::vector<::std::future> _pull_dense_status; + + std::map> _regions; + uint32_t _pull_dense_fail_times = 0; + + std::vector _base_norm_param; + std::vector _mean; + std::vector _scale; + float _squared_sum_epsilon = 1e-4; + std::mutex _mutex_for_mean_scale; + + float _total_batch_num = 0; +}; class ExecutorThreadWorker { public: ExecutorThreadWorker() : thread_id_(-1), root_scope_(NULL), thread_scope_(NULL), debug_(false) {} - ~ExecutorThreadWorker() {} + virtual ~ExecutorThreadWorker() {} void CreateThreadResource(const framework::ProgramDesc& program, const paddle::platform::Place& place); @@ -51,10 +142,13 @@ class ExecutorThreadWorker { // set data feed declared in executor void SetDataFeed(const std::shared_ptr& datafeed); // A multi-thread training function - void TrainFiles(); + virtual void TrainFiles(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); - + virtual void SetPSlibPtr(std::shared_ptr pslib_ptr); + virtual void SetPullDenseThread(std::shared_ptr dpt) {}; + virtual void BindingSlotVariableMemory() {}; + virtual void SetParamConfig(AsyncWorkerParamConfig* pc) {}; private: void CreateThreadScope(const framework::ProgramDesc& program); void CreateThreadOperators(const framework::ProgramDesc& program); @@ -77,12 +171,58 @@ class ExecutorThreadWorker { Scope* root_scope_; // a thread scope, father scope is global score which is shared Scope* thread_scope_; - - private: + //private: std::vector fetch_var_names_; std::vector> fetch_values_; bool debug_; }; +class AsyncExecutorThreadWorker: public ExecutorThreadWorker { +public: + AsyncExecutorThreadWorker(){}; + virtual ~AsyncExecutorThreadWorker() {} + void SetPSlibPtr(std::shared_ptr pslib_ptr); + void SetPullDenseThread(std::shared_ptr dpt); + void BindingSlotVariableMemory(); + void SetParamConfig(AsyncWorkerParamConfig* pc); + void TrainFiles(); + void TrainOneNetwork(); + void PrepareParams(); + void UpdateParams(); + void PullSparse(int table_id); + void FillSparse(int table_id); + void PushSparse(int table_id); + void PushDense(int table_id); + + void check_pull_push_memory(std::vector& features, std::vector& push_g, int dim); + void check_pull_push_memory(std::vector& features, std::vector>& push_g, int dim); + void collect_feasign_info(int table_id); +private: + struct FeasignInfo { + uint32_t slot; + uint32_t ins; + int64_t label; + }; + + std::map> _features; + std::map> _fea_info; + std::map>> _feature_value; + std::map>> _feature_push_value; + + std::unordered_map _slot_alias_to_table; //TODO + + std::shared_ptr _pslib_ptr; + + std::shared_ptr _pull_dense_thread; + + std::vector<::std::future> _pull_sparse_status; + std::vector<::std::future> _pull_dense_status; + std::vector<::std::future> _push_sparse_status; + std::vector<::std::future> _push_dense_status; + + AsyncWorkerParamConfig* _param_config; + +}; + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/async_executor_py.cc b/paddle/fluid/pybind/async_executor_py.cc index 470e8b0508..63fd06224f 100644 --- a/paddle/fluid/pybind/async_executor_py.cc +++ b/paddle/fluid/pybind/async_executor_py.cc @@ -47,7 +47,11 @@ void BindAsyncExecutor(py::module* m) { return std::unique_ptr( new framework::AsyncExecutor(scope, place)); })) - .def("run_from_files", &framework::AsyncExecutor::RunFromFile); + .def("run_from_files", &framework::AsyncExecutor::RunFromFile) + .def("config_pslib", &framework::AsyncExecutor::ConfigPslib) + .def("start_server", &framework::AsyncExecutor::StartServer) + .def("init_model", &framework::AsyncExecutor::InitModel) + .def("save_model", &framework::AsyncExecutor::SaveModel); } // end BindAsyncExecutor } // end namespace pybind } // end namespace paddle diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 2664a7301d..2945e6e143 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -149,3 +149,16 @@ class AsyncExecutor(object): self.executor.run_from_files(program_desc, data_feed.desc(), filelist, thread_num, fetch_var_names, debug) + + def config_ps(self, dist_desc, host_sign_list, node_num, index): + self.executor.config_pslib(dist_desc, host_sign_list, node_num, index) + + def start_server(self): + self.executor.start_server() + + def init_model(self): + self.executor.init_model() + + def save_model(self, save_path): + self.executor.save_model(save_path) + From e650b42914eca57c8d5a9f743e10788d9cc39828 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 3 Dec 2018 17:47:13 +0800 Subject: [PATCH 08/62] async_thread_trainer & libmct & pslib.cmake --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c929396ff..6fd8dd1dfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,7 +217,7 @@ include(cupti) include(external/gzstream) endif (NOT WIN32) include(external/libmct) -include(external/pslib_brpc) +#include(external/pslib_brpc) include(external/pslib) if(WITH_DISTRIBUTE) @@ -280,6 +280,8 @@ set(EXTERNAL_LIBS zlib ${PYTHON_LIBRARIES} pslib + #pslib_brpc + libmct ) if(WITH_AMD_GPU) From ee4c51a372be97076f06cc7c61f624c3b65b501e Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 3 Dec 2018 18:00:21 +0800 Subject: [PATCH 09/62] refine downpour sgd API with pslib --- python/paddle/fluid/distributed/__init__.py | 0 python/paddle/fluid/distributed/downpour.py | 30 +- python/paddle/fluid/distributed/node.py | 30 +- python/paddle/fluid/distributed/ps_pb2.py | 1491 +++++++++++++++++++ 4 files changed, 1526 insertions(+), 25 deletions(-) create mode 100644 python/paddle/fluid/distributed/__init__.py create mode 100644 python/paddle/fluid/distributed/ps_pb2.py diff --git a/python/paddle/fluid/distributed/__init__.py b/python/paddle/fluid/distributed/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 523f686668..551a471495 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -1,34 +1,32 @@ -import paddle.fluid as fluid -import pslib_pb2 as pslib from .node import DownpourServer from .node import DownpourWorker +from ..backward import append_backward +import ps_pb2 as pslib from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table +from google.protobuf import text_format class DownpourSGD(object): - def __init__(self, optimizer=opt, learning_rate=0.001, window=1): + def __init__(self, learning_rate=0.001, window=1): # todo(guru4elephant): if optimizer is not None, will warning here - self.learning_rate_ = opt.learning_rate + self.learning_rate_ = learning_rate self.window_ = window - def minimize(self, loss, startup_program=None, - parameter_list=None, no_grad_set=None, + def minimize(self, loss, startup_program=None, + parameter_list=None, no_grad_set=None, prefetch_slots=None, prefetch_slots_emb=None): params_grads = sorted(append_backward(loss), key=lambda x:x[0].name) - table_name = fluid_distributed_lookup_table(loss.block.program) + table_name = find_distributed_lookup_table(loss.block.program) server = DownpourServer() - worker = DownpourWorker() - server.add_sparse_table(0, learning_rate, + worker = DownpourWorker(self.window_) + server.add_sparse_table(0, learning_rate, prefetch_slots, prefetch_slots_emb) server.add_dense_table(1, learning_rate, params, grads) - worker.add_sparse_table(0, learning_rate, + worker.add_sparse_table(0, learning_rate, prefetch_slots, prefetch_slots_emb) worker.add_dense_table(1, learning_rate, params, grads) - ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) - ps_param.worker_param.CopyFrom(worker.get_desc()) + #ps_param.worker_param.CopyFrom(worker.get_desc()) worker_skipped_ops = ["lookup_table", "lookup_table_grad"] - - return [solver_desc, parallel_desc] - - + ps_param_str = text_format.MessageToString(ps_param) + return [ps_param_str, worker_skipped_ops] diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index fc62d7220c..3344bba137 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -1,5 +1,4 @@ -import paddle.fluid as fluid -import pslib_pb2 as pslib +import ps_pb2 as pslib class Server(object): def __init__(self): @@ -13,11 +12,13 @@ class Worker(object): class DownpourServer(Server): def __init__(self): - self.server_ = pslib.ServerParameter().downpour_server_param + #self.server_ = pslib.ServerParameter().downpour_server_param + self.server_ = pslib.ServerParameter() def add_sparse_table(self, table_id, learning_rate, slot_key, slot_value_var, slot_grad_var): - table = self.server_.downpour_table_param.add() + #table = self.server_.downpour_table_param.add() + table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id table.type = PS_SPARSE_TABLE table.accessor.accessor_class = "DownpourFeatureValueAccessor" @@ -26,12 +27,14 @@ class DownpourServer(Server): def add_dense_table(self, table_id, learning_rate, param_var, grad_var): - table = self.server_.downpour_table_param.add() + #table = self.server_.downpour_table_param.add() + table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id table.type = PS_DENSE_TABLE table.accessor.accessor_class = "DownpourDenseValueAccessor" table.accessor.sparse_sgd_param.learning_rate = learning_rate - table.accessor.fea_dim = reduce(lambda x, y: x.shape, 1 for x in param_var) + table.accessor.fea_dim = 1 + #table.accessor.fea_dim = reduce(lambda x, y: x.shape, 1 for x in param_var) def get_desc(self): return self.server_ @@ -40,19 +43,28 @@ class DownpourServer(Server): class DownpourWorker(Worker): def __init__(self, window): self.window = window - self.worker_ = pslib.WorkerParameter().downpour_worker_param + #self.worker_ = pslib.WorkerParameter().downpour_worker_param + #self.worker_ = pslib.WorkerParameter() + self.worker_ = pslib.DownpourTrainerParameter() + #self.worker_.pull_dense_per_batch = window + #self.worker_.push_dense_per_batch = window + #self.worker_.downpour_worker_param.pull_dense_per_batch = window + #self.worker_.downpour_worker_param.push_dense_per_batch = window self.worker_.pull_dense_per_batch = window self.worker_.push_dense_per_batch = window + print(self.worker_) def add_sparse_table(self, table_id, slot_keys, slot_value_vars, slot_grad_vars): - table = self.worker_.sparse_table.add() + #table = self.worker_.sparse_table.add() + table = self.worker_.downpour_worker_param.sparse_table.add() table.table_id = table_id table.slot.extend(slot_keys) self.worker_.extend([grad.name for grad in slot_grad_vars]) def add_dense_table(self, table_id, param_vars, grad_vars): - table = self.worker_.dense_table.add() + #table = self.worker_.dense_table.add() + table = self.worker_.downpour_worker_param.dense_table.add() table.table_id = table_id table.dense_variable_name.extend([p.name for p in param_vars]) table.dense_gradient_variable_name.extend([g.name for g in grad_vars]) diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py new file mode 100644 index 0000000000..355841aba8 --- /dev/null +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -0,0 +1,1491 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: ps.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='ps.proto', + package='paddle', + syntax='proto2', + serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\xe4\x01\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x02 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x03 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x04 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x05 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\x91\x01\n\x16ServerServiceParameter\x12\x14\n\x0cserver_class\x18\x01 \x01(\t\x12\x14\n\x0c\x63lient_class\x18\x02 \x01(\t\x12\x15\n\rservice_class\x18\x03 \x01(\t\x12\x19\n\x11start_server_port\x18\x04 \x01(\r\x12\x19\n\x11server_thread_num\x18\x05 \x01(\r\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') +) +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +_TABLETYPE = _descriptor.EnumDescriptor( + name='TableType', + full_name='paddle.TableType', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='PS_SPARSE_TABLE', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_DENSE_TABLE', index=1, number=1, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=3140, + serialized_end=3192, +) +_sym_db.RegisterEnumDescriptor(_TABLETYPE) + +TableType = enum_type_wrapper.EnumTypeWrapper(_TABLETYPE) +_PSCMDID = _descriptor.EnumDescriptor( + name='PsCmdID', + full_name='paddle.PsCmdID', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='PS_PULL_DENSE_TABLE', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_DENSE_TABLE', index=1, number=1, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PULL_SPARSE_TABLE', index=2, number=2, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_SPARSE_TABLE', index=3, number=3, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_SHRINK_TABLE', index=4, number=4, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_SAVE_ONE_TABLE', index=5, number=5, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_SAVE_ALL_TABLE', index=6, number=6, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_LOAD_ONE_TABLE', index=7, number=7, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_LOAD_ALL_TABLE', index=8, number=8, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_CLEAR_ONE_TABLE', index=9, number=9, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_CLEAR_ALL_TABLE', index=10, number=10, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_DENSE_PARAM', index=11, number=11, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_STOP_SERVER', index=12, number=12, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=3195, + serialized_end=3512, +) +_sym_db.RegisterEnumDescriptor(_PSCMDID) + +PsCmdID = enum_type_wrapper.EnumTypeWrapper(_PSCMDID) +PS_SPARSE_TABLE = 0 +PS_DENSE_TABLE = 1 +PS_PULL_DENSE_TABLE = 0 +PS_PUSH_DENSE_TABLE = 1 +PS_PULL_SPARSE_TABLE = 2 +PS_PUSH_SPARSE_TABLE = 3 +PS_SHRINK_TABLE = 4 +PS_SAVE_ONE_TABLE = 5 +PS_SAVE_ALL_TABLE = 6 +PS_LOAD_ONE_TABLE = 7 +PS_LOAD_ALL_TABLE = 8 +PS_CLEAR_ONE_TABLE = 9 +PS_CLEAR_ALL_TABLE = 10 +PS_PUSH_DENSE_PARAM = 11 +PS_STOP_SERVER = 12 + + +_FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( + name='FsApiType', + full_name='paddle.FsClientParameter.FsApiType', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='HDFS', index=0, number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='AFS', index=1, number=1, + options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=3108, + serialized_end=3138, +) +_sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) + + +_PSPARAMETER = _descriptor.Descriptor( + name='PSParameter', + full_name='paddle.PSParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='worker_class', full_name='paddle.PSParameter.worker_class', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_class', full_name='paddle.PSParameter.server_class', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='instance_class', full_name='paddle.PSParameter.instance_class', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='worker_param', full_name='paddle.PSParameter.worker_param', index=3, + number=101, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_param', full_name='paddle.PSParameter.server_param', index=4, + number=102, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fs_client_param', full_name='paddle.PSParameter.fs_client_param', index=5, + number=501, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=21, + serialized_end=249, +) + + +_WORKERPARAMETER = _descriptor.Descriptor( + name='WorkerParameter', + full_name='paddle.WorkerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_worker_param', full_name='paddle.WorkerParameter.downpour_worker_param', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=251, + serialized_end=332, +) + + +_SERVERPARAMETER = _descriptor.Descriptor( + name='ServerParameter', + full_name='paddle.ServerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_server_param', full_name='paddle.ServerParameter.downpour_server_param', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=334, + serialized_end=415, +) + + +_DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( + name='DownpourWorkerParameter', + full_name='paddle.DownpourWorkerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_table_param', full_name='paddle.DownpourWorkerParameter.downpour_table_param', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=417, + serialized_end=496, +) + + +_DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( + name='DownpourTrainerParameter', + full_name='paddle.DownpourTrainerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='dense_table', full_name='paddle.DownpourTrainerParameter.dense_table', index=0, + number=2, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='sparse_table', full_name='paddle.DownpourTrainerParameter.sparse_table', index=1, + number=3, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='pull_dense_per_batch', full_name='paddle.DownpourTrainerParameter.pull_dense_per_batch', index=2, + number=4, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='push_dense_per_batch', full_name='paddle.DownpourTrainerParameter.push_dense_per_batch', index=3, + number=5, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=499, + serialized_end=687, +) + + +_DENSETABLEPARAMETER = _descriptor.Descriptor( + name='DenseTableParameter', + full_name='paddle.DenseTableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', full_name='paddle.DenseTableParameter.table_id', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_variable_name', full_name='paddle.DenseTableParameter.dense_variable_name', index=1, + number=2, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_gradient_variable_name', full_name='paddle.DenseTableParameter.dense_gradient_variable_name', index=2, + number=3, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fea_dim', full_name='paddle.DenseTableParameter.fea_dim', index=3, + number=4, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=689, + serialized_end=812, +) + + +_SPARSETABLEPARAMETER = _descriptor.Descriptor( + name='SparseTableParameter', + full_name='paddle.SparseTableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', full_name='paddle.SparseTableParameter.table_id', index=0, + number=1, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='feature_dim', full_name='paddle.SparseTableParameter.feature_dim', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_key', full_name='paddle.SparseTableParameter.slot_key', index=2, + number=3, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_value', full_name='paddle.SparseTableParameter.slot_value', index=3, + number=4, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_gradient', full_name='paddle.SparseTableParameter.slot_gradient', index=4, + number=5, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=814, + serialized_end=936, +) + + +_DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( + name='DownpourServerParameter', + full_name='paddle.DownpourServerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_table_param', full_name='paddle.DownpourServerParameter.downpour_table_param', index=0, + number=1, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='service_param', full_name='paddle.DownpourServerParameter.service_param', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=939, + serialized_end=1073, +) + + +_SERVERSERVICEPARAMETER = _descriptor.Descriptor( + name='ServerServiceParameter', + full_name='paddle.ServerServiceParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='server_class', full_name='paddle.ServerServiceParameter.server_class', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='client_class', full_name='paddle.ServerServiceParameter.client_class', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='service_class', full_name='paddle.ServerServiceParameter.service_class', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='start_server_port', full_name='paddle.ServerServiceParameter.start_server_port', index=3, + number=4, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_thread_num', full_name='paddle.ServerServiceParameter.server_thread_num', index=4, + number=5, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1076, + serialized_end=1221, +) + + +_TABLEPARAMETER = _descriptor.Descriptor( + name='TableParameter', + full_name='paddle.TableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', full_name='paddle.TableParameter.table_id', index=0, + number=1, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_class', full_name='paddle.TableParameter.table_class', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='shared_num', full_name='paddle.TableParameter.shared_num', index=2, + number=3, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='accessor', full_name='paddle.TableParameter.accessor', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='type', full_name='paddle.TableParameter.type', index=4, + number=5, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='compress_in_save', full_name='paddle.TableParameter.compress_in_save', index=5, + number=6, type=8, cpp_type=7, label=1, + has_default_value=True, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1224, + serialized_end=1415, +) + + +_TABLEACCESSORPARAMETER = _descriptor.Descriptor( + name='TableAccessorParameter', + full_name='paddle.TableAccessorParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='accessor_class', full_name='paddle.TableAccessorParameter.accessor_class', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='sparse_sgd_param', full_name='paddle.TableAccessorParameter.sparse_sgd_param', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_sgd_param', full_name='paddle.TableAccessorParameter.dense_sgd_param', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fea_dim', full_name='paddle.TableAccessorParameter.fea_dim', index=3, + number=4, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='embedx_dim', full_name='paddle.TableAccessorParameter.embedx_dim', index=4, + number=5, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='embedx_threshold', full_name='paddle.TableAccessorParameter.embedx_threshold', index=5, + number=6, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='downpour_accessor_param', full_name='paddle.TableAccessorParameter.downpour_accessor_param', index=6, + number=7, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_accessor_save_param', full_name='paddle.TableAccessorParameter.table_accessor_save_param', index=7, + number=8, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1418, + serialized_end=1787, +) + + +_DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( + name='DownpourTableAccessorParameter', + full_name='paddle.DownpourTableAccessorParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='nonclk_coeff', full_name='paddle.DownpourTableAccessorParameter.nonclk_coeff', index=0, + number=1, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='click_coeff', full_name='paddle.DownpourTableAccessorParameter.click_coeff', index=1, + number=2, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='base_threshold', full_name='paddle.DownpourTableAccessorParameter.base_threshold', index=2, + number=3, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delta_threshold', full_name='paddle.DownpourTableAccessorParameter.delta_threshold', index=3, + number=4, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delta_keep_days', full_name='paddle.DownpourTableAccessorParameter.delta_keep_days', index=4, + number=5, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='show_click_decay_rate', full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate', index=5, + number=6, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delete_threshold', full_name='paddle.DownpourTableAccessorParameter.delete_threshold', index=6, + number=7, type=2, cpp_type=6, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1790, + serialized_end=1996, +) + + +_TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( + name='TableAccessorSaveParameter', + full_name='paddle.TableAccessorSaveParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='param', full_name='paddle.TableAccessorSaveParameter.param', index=0, + number=1, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='converter', full_name='paddle.TableAccessorSaveParameter.converter', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='deconverter', full_name='paddle.TableAccessorSaveParameter.deconverter', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1998, + serialized_end=2081, +) + + +_PSREQUESTMESSAGE = _descriptor.Descriptor( + name='PsRequestMessage', + full_name='paddle.PsRequestMessage', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='cmd_id', full_name='paddle.PsRequestMessage.cmd_id', index=0, + number=1, type=13, cpp_type=3, label=2, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_id', full_name='paddle.PsRequestMessage.table_id', index=1, + number=2, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='params', full_name='paddle.PsRequestMessage.params', index=2, + number=3, type=12, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='client_id', full_name='paddle.PsRequestMessage.client_id', index=3, + number=4, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='data', full_name='paddle.PsRequestMessage.data', index=4, + number=5, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2083, + serialized_end=2184, +) + + +_SPARSESGDRULEPARAMETER = _descriptor.Descriptor( + name='SparseSGDRuleParameter', + full_name='paddle.SparseSGDRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', full_name='paddle.SparseSGDRuleParameter.learning_rate', index=0, + number=1, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='initial_g2sum', full_name='paddle.SparseSGDRuleParameter.initial_g2sum', index=1, + number=2, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='initial_range', full_name='paddle.SparseSGDRuleParameter.initial_range', index=2, + number=3, type=1, cpp_type=5, label=1, + has_default_value=True, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='weight_bounds', full_name='paddle.SparseSGDRuleParameter.weight_bounds', index=3, + number=4, type=2, cpp_type=6, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2186, + serialized_end=2305, +) + + +_DENSESGDRULEPARAMETER = _descriptor.Descriptor( + name='DenseSGDRuleParameter', + full_name='paddle.DenseSGDRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='paddle.DenseSGDRuleParameter.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='adam', full_name='paddle.DenseSGDRuleParameter.adam', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='naive', full_name='paddle.DenseSGDRuleParameter.naive', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='summary', full_name='paddle.DenseSGDRuleParameter.summary', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='moving_average', full_name='paddle.DenseSGDRuleParameter.moving_average', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2308, + serialized_end=2533, +) + + +_ADAMSGDPARAMETER = _descriptor.Descriptor( + name='AdamSGDParameter', + full_name='paddle.AdamSGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', full_name='paddle.AdamSGDParameter.learning_rate', index=0, + number=1, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='avg_decay_rate', full_name='paddle.AdamSGDParameter.avg_decay_rate', index=1, + number=2, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='ada_decay_rate', full_name='paddle.AdamSGDParameter.ada_decay_rate', index=2, + number=3, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='ada_epsilon', full_name='paddle.AdamSGDParameter.ada_epsilon', index=3, + number=4, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mom_decay_rate', full_name='paddle.AdamSGDParameter.mom_decay_rate', index=4, + number=5, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2536, + serialized_end=2670, +) + + +_NAIVESGDPARAMETER = _descriptor.Descriptor( + name='NaiveSGDParameter', + full_name='paddle.NaiveSGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', full_name='paddle.NaiveSGDParameter.learning_rate', index=0, + number=1, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='avg_decay_rate', full_name='paddle.NaiveSGDParameter.avg_decay_rate', index=1, + number=2, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2672, + serialized_end=2738, +) + + +_SUMMARYSGDPARAMETER = _descriptor.Descriptor( + name='SummarySGDParameter', + full_name='paddle.SummarySGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='summary_decay_rate', full_name='paddle.SummarySGDParameter.summary_decay_rate', index=0, + number=1, type=1, cpp_type=5, label=1, + has_default_value=True, default_value=float(0.999999), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2740, + serialized_end=2799, +) + + +_MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( + name='MovingAverageRuleParameter', + full_name='paddle.MovingAverageRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='momentum', full_name='paddle.MovingAverageRuleParameter.momentum', index=0, + number=1, type=1, cpp_type=5, label=1, + has_default_value=False, default_value=float(0), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2801, + serialized_end=2847, +) + + +_PSRESPONSEMESSAGE = _descriptor.Descriptor( + name='PsResponseMessage', + full_name='paddle.PsResponseMessage', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='err_code', full_name='paddle.PsResponseMessage.err_code', index=0, + number=1, type=5, cpp_type=1, label=2, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='err_msg', full_name='paddle.PsResponseMessage.err_msg', index=1, + number=2, type=9, cpp_type=9, label=2, + has_default_value=True, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='data', full_name='paddle.PsResponseMessage.data', index=2, + number=3, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2849, + serialized_end=2922, +) + + +_FSCLIENTPARAMETER = _descriptor.Descriptor( + name='FsClientParameter', + full_name='paddle.FsClientParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='fs_type', full_name='paddle.FsClientParameter.fs_type', index=0, + number=1, type=14, cpp_type=8, label=1, + has_default_value=True, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='uri', full_name='paddle.FsClientParameter.uri', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='user', full_name='paddle.FsClientParameter.user', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='passwd', full_name='paddle.FsClientParameter.passwd', index=3, + number=4, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='buffer_size', full_name='paddle.FsClientParameter.buffer_size', index=4, + number=5, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='hadoop_bin', full_name='paddle.FsClientParameter.hadoop_bin', index=5, + number=51, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='afs_conf', full_name='paddle.FsClientParameter.afs_conf', index=6, + number=101, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + _FSCLIENTPARAMETER_FSAPITYPE, + ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[ + ], + serialized_start=2925, + serialized_end=3138, +) + +_PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER +_PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER +_PSPARAMETER.fields_by_name['fs_client_param'].message_type = _FSCLIENTPARAMETER +_WORKERPARAMETER.fields_by_name['downpour_worker_param'].message_type = _DOWNPOURWORKERPARAMETER +_SERVERPARAMETER.fields_by_name['downpour_server_param'].message_type = _DOWNPOURSERVERPARAMETER +_DOWNPOURWORKERPARAMETER.fields_by_name['downpour_table_param'].message_type = _TABLEPARAMETER +_DOWNPOURTRAINERPARAMETER.fields_by_name['dense_table'].message_type = _DENSETABLEPARAMETER +_DOWNPOURTRAINERPARAMETER.fields_by_name['sparse_table'].message_type = _SPARSETABLEPARAMETER +_DOWNPOURSERVERPARAMETER.fields_by_name['downpour_table_param'].message_type = _TABLEPARAMETER +_DOWNPOURSERVERPARAMETER.fields_by_name['service_param'].message_type = _SERVERSERVICEPARAMETER +_TABLEPARAMETER.fields_by_name['accessor'].message_type = _TABLEACCESSORPARAMETER +_TABLEPARAMETER.fields_by_name['type'].enum_type = _TABLETYPE +_TABLEACCESSORPARAMETER.fields_by_name['sparse_sgd_param'].message_type = _SPARSESGDRULEPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name['dense_sgd_param'].message_type = _DENSESGDRULEPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name['downpour_accessor_param'].message_type = _DOWNPOURTABLEACCESSORPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name['table_accessor_save_param'].message_type = _TABLEACCESSORSAVEPARAMETER +_DENSESGDRULEPARAMETER.fields_by_name['adam'].message_type = _ADAMSGDPARAMETER +_DENSESGDRULEPARAMETER.fields_by_name['naive'].message_type = _NAIVESGDPARAMETER +_DENSESGDRULEPARAMETER.fields_by_name['summary'].message_type = _SUMMARYSGDPARAMETER +_DENSESGDRULEPARAMETER.fields_by_name['moving_average'].message_type = _MOVINGAVERAGERULEPARAMETER +_FSCLIENTPARAMETER.fields_by_name['fs_type'].enum_type = _FSCLIENTPARAMETER_FSAPITYPE +_FSCLIENTPARAMETER_FSAPITYPE.containing_type = _FSCLIENTPARAMETER +DESCRIPTOR.message_types_by_name['PSParameter'] = _PSPARAMETER +DESCRIPTOR.message_types_by_name['WorkerParameter'] = _WORKERPARAMETER +DESCRIPTOR.message_types_by_name['ServerParameter'] = _SERVERPARAMETER +DESCRIPTOR.message_types_by_name['DownpourWorkerParameter'] = _DOWNPOURWORKERPARAMETER +DESCRIPTOR.message_types_by_name['DownpourTrainerParameter'] = _DOWNPOURTRAINERPARAMETER +DESCRIPTOR.message_types_by_name['DenseTableParameter'] = _DENSETABLEPARAMETER +DESCRIPTOR.message_types_by_name['SparseTableParameter'] = _SPARSETABLEPARAMETER +DESCRIPTOR.message_types_by_name['DownpourServerParameter'] = _DOWNPOURSERVERPARAMETER +DESCRIPTOR.message_types_by_name['ServerServiceParameter'] = _SERVERSERVICEPARAMETER +DESCRIPTOR.message_types_by_name['TableParameter'] = _TABLEPARAMETER +DESCRIPTOR.message_types_by_name['TableAccessorParameter'] = _TABLEACCESSORPARAMETER +DESCRIPTOR.message_types_by_name['DownpourTableAccessorParameter'] = _DOWNPOURTABLEACCESSORPARAMETER +DESCRIPTOR.message_types_by_name['TableAccessorSaveParameter'] = _TABLEACCESSORSAVEPARAMETER +DESCRIPTOR.message_types_by_name['PsRequestMessage'] = _PSREQUESTMESSAGE +DESCRIPTOR.message_types_by_name['SparseSGDRuleParameter'] = _SPARSESGDRULEPARAMETER +DESCRIPTOR.message_types_by_name['DenseSGDRuleParameter'] = _DENSESGDRULEPARAMETER +DESCRIPTOR.message_types_by_name['AdamSGDParameter'] = _ADAMSGDPARAMETER +DESCRIPTOR.message_types_by_name['NaiveSGDParameter'] = _NAIVESGDPARAMETER +DESCRIPTOR.message_types_by_name['SummarySGDParameter'] = _SUMMARYSGDPARAMETER +DESCRIPTOR.message_types_by_name['MovingAverageRuleParameter'] = _MOVINGAVERAGERULEPARAMETER +DESCRIPTOR.message_types_by_name['PsResponseMessage'] = _PSRESPONSEMESSAGE +DESCRIPTOR.message_types_by_name['FsClientParameter'] = _FSCLIENTPARAMETER +DESCRIPTOR.enum_types_by_name['TableType'] = _TABLETYPE +DESCRIPTOR.enum_types_by_name['PsCmdID'] = _PSCMDID + +PSParameter = _reflection.GeneratedProtocolMessageType('PSParameter', (_message.Message,), dict( + DESCRIPTOR = _PSPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PSParameter) + )) +_sym_db.RegisterMessage(PSParameter) + +WorkerParameter = _reflection.GeneratedProtocolMessageType('WorkerParameter', (_message.Message,), dict( + DESCRIPTOR = _WORKERPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) + )) +_sym_db.RegisterMessage(WorkerParameter) + +ServerParameter = _reflection.GeneratedProtocolMessageType('ServerParameter', (_message.Message,), dict( + DESCRIPTOR = _SERVERPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerParameter) + )) +_sym_db.RegisterMessage(ServerParameter) + +DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType('DownpourWorkerParameter', (_message.Message,), dict( + DESCRIPTOR = _DOWNPOURWORKERPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) + )) +_sym_db.RegisterMessage(DownpourWorkerParameter) + +DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType('DownpourTrainerParameter', (_message.Message,), dict( + DESCRIPTOR = _DOWNPOURTRAINERPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) + )) +_sym_db.RegisterMessage(DownpourTrainerParameter) + +DenseTableParameter = _reflection.GeneratedProtocolMessageType('DenseTableParameter', (_message.Message,), dict( + DESCRIPTOR = _DENSETABLEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) + )) +_sym_db.RegisterMessage(DenseTableParameter) + +SparseTableParameter = _reflection.GeneratedProtocolMessageType('SparseTableParameter', (_message.Message,), dict( + DESCRIPTOR = _SPARSETABLEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) + )) +_sym_db.RegisterMessage(SparseTableParameter) + +DownpourServerParameter = _reflection.GeneratedProtocolMessageType('DownpourServerParameter', (_message.Message,), dict( + DESCRIPTOR = _DOWNPOURSERVERPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) + )) +_sym_db.RegisterMessage(DownpourServerParameter) + +ServerServiceParameter = _reflection.GeneratedProtocolMessageType('ServerServiceParameter', (_message.Message,), dict( + DESCRIPTOR = _SERVERSERVICEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) + )) +_sym_db.RegisterMessage(ServerServiceParameter) + +TableParameter = _reflection.GeneratedProtocolMessageType('TableParameter', (_message.Message,), dict( + DESCRIPTOR = _TABLEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableParameter) + )) +_sym_db.RegisterMessage(TableParameter) + +TableAccessorParameter = _reflection.GeneratedProtocolMessageType('TableAccessorParameter', (_message.Message,), dict( + DESCRIPTOR = _TABLEACCESSORPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) + )) +_sym_db.RegisterMessage(TableAccessorParameter) + +DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType('DownpourTableAccessorParameter', (_message.Message,), dict( + DESCRIPTOR = _DOWNPOURTABLEACCESSORPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTableAccessorParameter) + )) +_sym_db.RegisterMessage(DownpourTableAccessorParameter) + +TableAccessorSaveParameter = _reflection.GeneratedProtocolMessageType('TableAccessorSaveParameter', (_message.Message,), dict( + DESCRIPTOR = _TABLEACCESSORSAVEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorSaveParameter) + )) +_sym_db.RegisterMessage(TableAccessorSaveParameter) + +PsRequestMessage = _reflection.GeneratedProtocolMessageType('PsRequestMessage', (_message.Message,), dict( + DESCRIPTOR = _PSREQUESTMESSAGE, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) + )) +_sym_db.RegisterMessage(PsRequestMessage) + +SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType('SparseSGDRuleParameter', (_message.Message,), dict( + DESCRIPTOR = _SPARSESGDRULEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) + )) +_sym_db.RegisterMessage(SparseSGDRuleParameter) + +DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType('DenseSGDRuleParameter', (_message.Message,), dict( + DESCRIPTOR = _DENSESGDRULEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) + )) +_sym_db.RegisterMessage(DenseSGDRuleParameter) + +AdamSGDParameter = _reflection.GeneratedProtocolMessageType('AdamSGDParameter', (_message.Message,), dict( + DESCRIPTOR = _ADAMSGDPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) + )) +_sym_db.RegisterMessage(AdamSGDParameter) + +NaiveSGDParameter = _reflection.GeneratedProtocolMessageType('NaiveSGDParameter', (_message.Message,), dict( + DESCRIPTOR = _NAIVESGDPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) + )) +_sym_db.RegisterMessage(NaiveSGDParameter) + +SummarySGDParameter = _reflection.GeneratedProtocolMessageType('SummarySGDParameter', (_message.Message,), dict( + DESCRIPTOR = _SUMMARYSGDPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) + )) +_sym_db.RegisterMessage(SummarySGDParameter) + +MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType('MovingAverageRuleParameter', (_message.Message,), dict( + DESCRIPTOR = _MOVINGAVERAGERULEPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.MovingAverageRuleParameter) + )) +_sym_db.RegisterMessage(MovingAverageRuleParameter) + +PsResponseMessage = _reflection.GeneratedProtocolMessageType('PsResponseMessage', (_message.Message,), dict( + DESCRIPTOR = _PSRESPONSEMESSAGE, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) + )) +_sym_db.RegisterMessage(PsResponseMessage) + +FsClientParameter = _reflection.GeneratedProtocolMessageType('FsClientParameter', (_message.Message,), dict( + DESCRIPTOR = _FSCLIENTPARAMETER, + __module__ = 'ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) + )) +_sym_db.RegisterMessage(FsClientParameter) + + +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\200\001\001')) +# @@protoc_insertion_point(module_scope) From 2301abc481bbcfce1f87a102c295df8eeb4ba6c4 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 4 Dec 2018 11:32:10 +0800 Subject: [PATCH 10/62] cc libaray add pslib --- paddle/fluid/framework/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 9f5631b87c..8556dcbc36 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -180,7 +180,7 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS graph build_strategy fast_threaded_ssa_graph_executor variable_helper) -cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper) +cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib) cc_test(data_feed_test SRCS data_feed_test.cc DEPS async_executor) cc_library(prune SRCS prune.cc DEPS framework_proto) From 49130f9b8f41cda0ac50e5c57f4b033c260c7541 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Tue, 4 Dec 2018 20:15:52 +0800 Subject: [PATCH 11/62] refine downpour sgd API and adapt to pslib proto desc --- paddle/fluid/framework/CMakeLists.txt | 2 +- .../paddle/fluid/distribute_lookup_table.py | 18 ++++++++ python/paddle/fluid/distributed/downpour.py | 19 +++++--- python/paddle/fluid/distributed/node.py | 45 +++++++++---------- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 9f5631b87c..8556dcbc36 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -180,7 +180,7 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS graph build_strategy fast_threaded_ssa_graph_executor variable_helper) -cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper) +cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib) cc_test(data_feed_test SRCS data_feed_test.cc DEPS async_executor) cc_library(prune SRCS prune.cc DEPS framework_proto) diff --git a/python/paddle/fluid/distribute_lookup_table.py b/python/paddle/fluid/distribute_lookup_table.py index 52d9ce75f8..a903257fa9 100644 --- a/python/paddle/fluid/distribute_lookup_table.py +++ b/python/paddle/fluid/distribute_lookup_table.py @@ -15,6 +15,24 @@ LOOKUP_TABLE_TYPE = "lookup_table" +def find_distributed_lookup_table_inputs(program, table_name): + local_vars = program.current_block().vars + inputs = [] + for op in program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if table_name == op.input("W")[0]: + inputs.extend([local_vars[name] for name in op.input("Ids")]) + return inputs + +def find_distributed_lookup_table_outputs(program, table_name): + local_vars = program.current_block().vars + outputs = [] + for op in program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if table_name == op.input("W")[0]: + outputs.extend([local_vars[name] for name in op.output("Out")]) + return outputs + def find_distributed_lookup_table(program): """ Find distribute lookup table in program. diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 551a471495..3fe4afdbff 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -3,6 +3,8 @@ from .node import DownpourWorker from ..backward import append_backward import ps_pb2 as pslib from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_inputs +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_outputs from google.protobuf import text_format class DownpourSGD(object): @@ -12,21 +14,24 @@ class DownpourSGD(object): self.window_ = window def minimize(self, loss, startup_program=None, - parameter_list=None, no_grad_set=None, - prefetch_slots=None, prefetch_slots_emb=None): + parameter_list=None, no_grad_set=None): params_grads = sorted(append_backward(loss), key=lambda x:x[0].name) table_name = find_distributed_lookup_table(loss.block.program) + prefetch_slots = find_distributed_lookup_table_inputs( + loss.block.program, table_name) + prefetch_slots_emb = find_distributed_lookup_table_outputs( + loss.block.program, table_name) server = DownpourServer() worker = DownpourWorker(self.window_) - server.add_sparse_table(0, learning_rate, + server.add_sparse_table(0, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - server.add_dense_table(1, learning_rate, params, grads) - worker.add_sparse_table(0, learning_rate, + server.add_dense_table(1, self.learning_rate_, params_grads[0], params_grads[1]) + worker.add_sparse_table(0, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - worker.add_dense_table(1, learning_rate, params, grads) + worker.add_dense_table(1, self.learning_rate_, params_grads[0], params_grads[1]) ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) #ps_param.worker_param.CopyFrom(worker.get_desc()) worker_skipped_ops = ["lookup_table", "lookup_table_grad"] ps_param_str = text_format.MessageToString(ps_param) - return [ps_param_str, worker_skipped_ops] + return [ps_param_str, worker_skipped_ops, text_format.MessageToString(worker.get_desc())] diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 3344bba137..7c9a76efb6 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -16,25 +16,26 @@ class DownpourServer(Server): self.server_ = pslib.ServerParameter() def add_sparse_table(self, table_id, learning_rate, - slot_key, slot_value_var, slot_grad_var): - #table = self.server_.downpour_table_param.add() + slot_key_vars, slot_value_var): table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id - table.type = PS_SPARSE_TABLE + table.type = pslib.PS_SPARSE_TABLE table.accessor.accessor_class = "DownpourFeatureValueAccessor" table.accessor.dense_sgd_param.adam.learning_rate = learning_rate - table.accessor.fea_dim = slot_value_var[0].shape[1] + table.accessor.fea_dim = abs(reduce(lambda x, y: x * y, + slot_value_var[0].shape, 1)) def add_dense_table(self, table_id, learning_rate, param_var, grad_var): - #table = self.server_.downpour_table_param.add() table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id - table.type = PS_DENSE_TABLE + table.type = pslib.PS_DENSE_TABLE table.accessor.accessor_class = "DownpourDenseValueAccessor" table.accessor.sparse_sgd_param.learning_rate = learning_rate - table.accessor.fea_dim = 1 - #table.accessor.fea_dim = reduce(lambda x, y: x.shape, 1 for x in param_var) + fea_dim = 0 + for param in param_var: + fea_dim += reduce(lambda x, y: x * y, param.shape, 1) + table.accessor.fea_dim = fea_dim def get_desc(self): return self.server_ @@ -43,28 +44,24 @@ class DownpourServer(Server): class DownpourWorker(Worker): def __init__(self, window): self.window = window - #self.worker_ = pslib.WorkerParameter().downpour_worker_param - #self.worker_ = pslib.WorkerParameter() self.worker_ = pslib.DownpourTrainerParameter() - #self.worker_.pull_dense_per_batch = window - #self.worker_.push_dense_per_batch = window - #self.worker_.downpour_worker_param.pull_dense_per_batch = window - #self.worker_.downpour_worker_param.push_dense_per_batch = window self.worker_.pull_dense_per_batch = window self.worker_.push_dense_per_batch = window - print(self.worker_) - def add_sparse_table(self, table_id, - slot_keys, slot_value_vars, slot_grad_vars): - #table = self.worker_.sparse_table.add() - table = self.worker_.downpour_worker_param.sparse_table.add() + def add_sparse_table(self, table_id, learning_rate, + slot_key_vars, slot_value_vars): + table = self.worker_.sparse_table.add() table.table_id = table_id - table.slot.extend(slot_keys) - self.worker_.extend([grad.name for grad in slot_grad_vars]) + table.slot_key.extend( + [var.name for var in slot_key_vars]) + table.slot_value.extend( + [var.name for var in slot_value_vars]) + table.slot_gradient.extend( + [var.name + "@GRAD" for var in slot_value_vars]) - def add_dense_table(self, table_id, param_vars, grad_vars): - #table = self.worker_.dense_table.add() - table = self.worker_.downpour_worker_param.dense_table.add() + def add_dense_table(self, table_id, learning_rate, + param_vars, grad_vars): + table = self.worker_.dense_table.add() table.table_id = table_id table.dense_variable_name.extend([p.name for p in param_vars]) table.dense_gradient_variable_name.extend([g.name for g in grad_vars]) From 419506f510d258fa858c75a05cdcaa780105deca Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 4 Dec 2018 22:23:01 +0800 Subject: [PATCH 12/62] refine for compile pslib.so --- cmake/external/pslib.cmake | 2 +- cmake/external/pslib_brpc.cmake | 2 +- paddle/fluid/CMakeLists.txt | 1 + paddle/fluid/framework/CMakeLists.txt | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 586f66d6fd..812af5efa2 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -66,7 +66,7 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} ) -ADD_LIBRARY(pslib STATIC IMPORTED GLOBAL) +ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) LIST(APPEND external_project_dependencies pslib) diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake index 7b4beeae65..92019eef26 100644 --- a/cmake/external/pslib_brpc.cmake +++ b/cmake/external/pslib_brpc.cmake @@ -41,7 +41,7 @@ SET(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR}) SET(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR}) SET(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include) SET(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib) -SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libps.so) +SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libbrpc.a) SET(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib") diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index 6b526f0103..d980b36d9b 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(framework) +#add_subdirectory(distributed) add_subdirectory(operators) add_subdirectory(string) add_subdirectory(recordio) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 8556dcbc36..6fdc73e93a 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -180,7 +180,7 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS graph build_strategy fast_threaded_ssa_graph_executor variable_helper) -cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib) +cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib) cc_test(data_feed_test SRCS data_feed_test.cc DEPS async_executor) cc_library(prune SRCS prune.cc DEPS framework_proto) From 06213b798116f7aadb2ab95f83931c10b67a5942 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Tue, 4 Dec 2018 23:06:42 +0800 Subject: [PATCH 13/62] add hadoop helper function for distributed training --- python/paddle/fluid/async_executor.py | 9 +- .../paddle/fluid/distribute_lookup_table.py | 6 +- python/paddle/fluid/distributed/downpour.py | 45 +++++-- python/paddle/fluid/distributed/helper.py | 24 ++++ python/paddle/fluid/distributed/node.py | 1 - python/paddle/fluid/distributed/ps_pb2.py | 118 ++++++++++-------- 6 files changed, 134 insertions(+), 69 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 2945e6e143..c5863eb9e0 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -150,8 +150,13 @@ class AsyncExecutor(object): data_feed.desc(), filelist, thread_num, fetch_var_names, debug) - def config_ps(self, dist_desc, host_sign_list, node_num, index): - self.executor.config_pslib(dist_desc, host_sign_list, node_num, index) + def config_distributed_nodes(self, dist_opt): + # get total rank + # get rank index + # get iplists + # get hadoop info + return + def start_server(self): self.executor.start_server() diff --git a/python/paddle/fluid/distribute_lookup_table.py b/python/paddle/fluid/distribute_lookup_table.py index a903257fa9..243d806c41 100644 --- a/python/paddle/fluid/distribute_lookup_table.py +++ b/python/paddle/fluid/distribute_lookup_table.py @@ -21,7 +21,8 @@ def find_distributed_lookup_table_inputs(program, table_name): for op in program.global_block().ops: if op.type == LOOKUP_TABLE_TYPE: if table_name == op.input("W")[0]: - inputs.extend([local_vars[name] for name in op.input("Ids")]) + inputs.extend( + [local_vars[name] for name in op.input("Ids")]) return inputs def find_distributed_lookup_table_outputs(program, table_name): @@ -30,7 +31,8 @@ def find_distributed_lookup_table_outputs(program, table_name): for op in program.global_block().ops: if op.type == LOOKUP_TABLE_TYPE: if table_name == op.input("W")[0]: - outputs.extend([local_vars[name] for name in op.output("Out")]) + outputs.extend( + [local_vars[name] for name in op.output("Out")]) return outputs def find_distributed_lookup_table(program): diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 3fe4afdbff..093792d5d6 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -8,30 +8,57 @@ from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_o from google.protobuf import text_format class DownpourSGD(object): + """ + Distributed optimizer of downpour stochastic gradient descent + Standard implementation of Google's Downpour SGD + in Large Scale Distributed Deep Networks + + Args: + learning_rate (float): the learning rate used to update parameters. \ + Can be a float value + Examples: + .. code-block:: python + + downpour_sgd = fluid.distributed.DownpourSGD(learning_rate=0.2) + downpour_sgd.minimize(cost) + """ def __init__(self, learning_rate=0.001, window=1): - # todo(guru4elephant): if optimizer is not None, will warning here + # todo(guru4elephant): add more optimizers here as argument + # todo(guru4elephant): make learning_rate as a variable self.learning_rate_ = learning_rate self.window_ = window - + self.type = "downpour" + def minimize(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): - params_grads = sorted(append_backward(loss), key=lambda x:x[0].name) + params_grads = sorted(append_backward( + loss, parameter_list, no_grad_set), key=lambda x:x[0].name) table_name = find_distributed_lookup_table(loss.block.program) prefetch_slots = find_distributed_lookup_table_inputs( loss.block.program, table_name) prefetch_slots_emb = find_distributed_lookup_table_outputs( loss.block.program, table_name) server = DownpourServer() + # window is communication strategy worker = DownpourWorker(self.window_) - server.add_sparse_table(0, self.learning_rate_, + # Todo(guru4elephant): support multiple tables definitions + # currently support one big sparse table + sparse_table_index = 0 + # currently merge all dense parameters into one dense table + dense_table_index = 1 + server.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - server.add_dense_table(1, self.learning_rate_, params_grads[0], params_grads[1]) - worker.add_sparse_table(0, self.learning_rate_, + server.add_dense_table(dense_table_index, self.learning_rate_, + params_grads[0], params_grads[1]) + worker.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - worker.add_dense_table(1, self.learning_rate_, params_grads[0], params_grads[1]) + worker.add_dense_table(dense_table_index, self.learning_rate_, + params_grads[0], params_grads[1]) ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) - #ps_param.worker_param.CopyFrom(worker.get_desc()) + ps_param.worker_param.CopyFrom(worker.get_desc()) + # Todo(guru4elephant): figure out how to support more sparse parameters + # currently only support lookup_table worker_skipped_ops = ["lookup_table", "lookup_table_grad"] ps_param_str = text_format.MessageToString(ps_param) - return [ps_param_str, worker_skipped_ops, text_format.MessageToString(worker.get_desc())] + return [ps_param_str, worker_skipped_ops] diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 8e079b1e8d..12e2f7f197 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -1,5 +1,27 @@ from mpi4py import MPI +class FileSystem(object): + def __init__(self, fs_type="afs", + uri="afs://tianqi.afs.baidu.com:9902", + user=None, + passwd=None, + hadoop_bin="", + afs_conf=None): + assert user not None + assert passwd not None + assert hadoop_bin not None + fs_client = pslib.FsClientParameter() + if fs_type == "afs": + fs_client.fs_type = pslib.FsApiType.AFS + else: + fs_client.fs_type = pslib.FsApiType.HDFS + fs_client.uri = uri + fs_client.user = user + fs_client.passwd = passwd + fs_client.buffer_size = 0 + fs_client.afs_conf = afs_conf if not afs_conf else "" + + class MPIHelper(object): def __init__(self): self.comm = MPI.COMM_WORLD @@ -18,3 +40,5 @@ class MPIHelper(object): def get_hostname(self): import socket return socket.gethostname() + + diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 7c9a76efb6..b96a15a32f 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -12,7 +12,6 @@ class Worker(object): class DownpourServer(Server): def __init__(self): - #self.server_ = pslib.ServerParameter().downpour_server_param self.server_ = pslib.ServerParameter() def add_sparse_table(self, table_id, learning_rate, diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index 355841aba8..0ef34d6e18 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -20,7 +20,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( name='ps.proto', package='paddle', syntax='proto2', - serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\xe4\x01\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x02 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x03 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x04 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x05 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\x91\x01\n\x16ServerServiceParameter\x12\x14\n\x0cserver_class\x18\x01 \x01(\t\x12\x14\n\x0c\x63lient_class\x18\x02 \x01(\t\x12\x15\n\rservice_class\x18\x03 \x01(\t\x12\x19\n\x11start_server_port\x18\x04 \x01(\r\x12\x19\n\x11server_thread_num\x18\x05 \x01(\r\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') + serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\x91\x01\n\x16ServerServiceParameter\x12\x14\n\x0cserver_class\x18\x01 \x01(\t\x12\x14\n\x0c\x63lient_class\x18\x02 \x01(\t\x12\x15\n\rservice_class\x18\x03 \x01(\t\x12\x19\n\x11start_server_port\x18\x04 \x01(\r\x12\x19\n\x11server_thread_num\x18\x05 \x01(\r\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') ) _sym_db.RegisterFileDescriptor(DESCRIPTOR) @@ -41,8 +41,8 @@ _TABLETYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3140, - serialized_end=3192, + serialized_start=3198, + serialized_end=3250, ) _sym_db.RegisterEnumDescriptor(_TABLETYPE) @@ -108,8 +108,8 @@ _PSCMDID = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3195, - serialized_end=3512, + serialized_start=3253, + serialized_end=3570, ) _sym_db.RegisterEnumDescriptor(_PSCMDID) @@ -148,8 +148,8 @@ _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3108, - serialized_end=3138, + serialized_start=3166, + serialized_end=3196, ) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) @@ -197,7 +197,14 @@ _PSPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( - name='fs_client_param', full_name='paddle.PSParameter.fs_client_param', index=5, + name='trainer_param', full_name='paddle.PSParameter.trainer_param', index=5, + number=301, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fs_client_param', full_name='paddle.PSParameter.fs_client_param', index=6, number=501, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, @@ -216,7 +223,7 @@ _PSPARAMETER = _descriptor.Descriptor( oneofs=[ ], serialized_start=21, - serialized_end=249, + serialized_end=307, ) @@ -246,8 +253,8 @@ _WORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=251, - serialized_end=332, + serialized_start=309, + serialized_end=390, ) @@ -277,8 +284,8 @@ _SERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=334, - serialized_end=415, + serialized_start=392, + serialized_end=473, ) @@ -308,8 +315,8 @@ _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=417, - serialized_end=496, + serialized_start=475, + serialized_end=554, ) @@ -322,28 +329,28 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( fields=[ _descriptor.FieldDescriptor( name='dense_table', full_name='paddle.DownpourTrainerParameter.dense_table', index=0, - number=2, type=11, cpp_type=10, label=3, + number=1, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='sparse_table', full_name='paddle.DownpourTrainerParameter.sparse_table', index=1, - number=3, type=11, cpp_type=10, label=3, + number=2, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='pull_dense_per_batch', full_name='paddle.DownpourTrainerParameter.pull_dense_per_batch', index=2, - number=4, type=5, cpp_type=1, label=1, + number=3, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='push_dense_per_batch', full_name='paddle.DownpourTrainerParameter.push_dense_per_batch', index=3, - number=5, type=5, cpp_type=1, label=1, + number=4, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, @@ -360,8 +367,8 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=499, - serialized_end=687, + serialized_start=557, + serialized_end=745, ) @@ -412,8 +419,8 @@ _DENSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=689, - serialized_end=812, + serialized_start=747, + serialized_end=870, ) @@ -471,8 +478,8 @@ _SPARSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=814, - serialized_end=936, + serialized_start=872, + serialized_end=994, ) @@ -509,8 +516,8 @@ _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=939, - serialized_end=1073, + serialized_start=997, + serialized_end=1131, ) @@ -568,8 +575,8 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1076, - serialized_end=1221, + serialized_start=1134, + serialized_end=1279, ) @@ -634,8 +641,8 @@ _TABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1224, - serialized_end=1415, + serialized_start=1282, + serialized_end=1473, ) @@ -714,8 +721,8 @@ _TABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1418, - serialized_end=1787, + serialized_start=1476, + serialized_end=1845, ) @@ -787,8 +794,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1790, - serialized_end=1996, + serialized_start=1848, + serialized_end=2054, ) @@ -832,8 +839,8 @@ _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1998, - serialized_end=2081, + serialized_start=2056, + serialized_end=2139, ) @@ -891,8 +898,8 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2083, - serialized_end=2184, + serialized_start=2141, + serialized_end=2242, ) @@ -943,8 +950,8 @@ _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2186, - serialized_end=2305, + serialized_start=2244, + serialized_end=2363, ) @@ -1002,8 +1009,8 @@ _DENSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2308, - serialized_end=2533, + serialized_start=2366, + serialized_end=2591, ) @@ -1061,8 +1068,8 @@ _ADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2536, - serialized_end=2670, + serialized_start=2594, + serialized_end=2728, ) @@ -1099,8 +1106,8 @@ _NAIVESGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2672, - serialized_end=2738, + serialized_start=2730, + serialized_end=2796, ) @@ -1130,8 +1137,8 @@ _SUMMARYSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2740, - serialized_end=2799, + serialized_start=2798, + serialized_end=2857, ) @@ -1161,8 +1168,8 @@ _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2801, - serialized_end=2847, + serialized_start=2859, + serialized_end=2905, ) @@ -1206,8 +1213,8 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2849, - serialized_end=2922, + serialized_start=2907, + serialized_end=2980, ) @@ -1280,12 +1287,13 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2925, - serialized_end=3138, + serialized_start=2983, + serialized_end=3196, ) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER _PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER +_PSPARAMETER.fields_by_name['trainer_param'].message_type = _DOWNPOURTRAINERPARAMETER _PSPARAMETER.fields_by_name['fs_client_param'].message_type = _FSCLIENTPARAMETER _WORKERPARAMETER.fields_by_name['downpour_worker_param'].message_type = _DOWNPOURWORKERPARAMETER _SERVERPARAMETER.fields_by_name['downpour_server_param'].message_type = _DOWNPOURSERVERPARAMETER From d3ca359e445884ffca4b147607607517aad4791b Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 5 Dec 2018 19:30:37 +0800 Subject: [PATCH 14/62] config init & adapt to interface --- paddle/fluid/framework/async_executor.cc | 55 +++++++++++++++++-- paddle/fluid/framework/async_executor.h | 3 +- .../fluid/framework/executor_thread_worker.cc | 44 ++++++++------- .../fluid/framework/executor_thread_worker.h | 15 +++-- 4 files changed, 85 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 94ed8c2fca..292b05c588 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -67,21 +67,63 @@ void PrepareReaders(std::vector>& readers, // NOLINT void AsyncExecutor::ConfigPslib(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index) { _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); - _pslib_ptr->init_and_config(dist_desc, host_sign_list, node_num, index);//TODO + _pslib_ptr->init_and_config(dist_desc, host_sign_list, node_num, index);//TODO done } void AsyncExecutor::StartServer() { + InitParamConfig(); _pslib_ptr->run_server(); } +void AsyncExecutor::InitParamConfig() { + _param_config.fea_dim = _pslib_ptr->get_param()->trainer_param().sparse_table(0).feature_dim(); //TODO + _param_config.slot_dim = _param_config.fea_dim - 2; //TODO + _param_config.tmp_push_dense_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().pull_dense_per_batch()); + _param_config.tmp_push_sparse_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); + //sparse + for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); ++t) { + auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); + std::vector tmp_sparse_variable_name; + for (int i = 0u; i < table.slot_value_size(); ++i) { + tmp_sparse_variable_name.push_back(table.slot_value(i)); + _param_config.slot_alias_to_table[table.slot_value(i)] = table.table_id(); + } + std::vector tmp_sparse_gradient_variable_name; + for (auto i = 0u; i < table.slot_gradient_size(); ++i) { + tmp_sparse_gradient_variable_name.push_back( + table.slot_gradient(i)); + } + _param_config.slot_input_vec[table.table_id()] = std::move(tmp_sparse_variable_name); + _param_config.gradient_var[table.table_id()] = std::move(tmp_sparse_gradient_variable_name); + _param_config.sparse_table_id.push_back(table.table_id()); + } + //dense + for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); ++t) { + auto& table = _pslib_ptr->get_param()->trainer_param().dense_table(t); + std::vector tmp_dense_variable_name; + for (int i = 0u; i < table.dense_variable_name_size(); ++i) { + tmp_dense_variable_name.push_back(table.dense_variable_name(i)); + } + std::vector tmp_dense_gradient_variable_name; + for (auto i = 0u; i < table.dense_gradient_variable_name_size(); ++i) { + tmp_dense_gradient_variable_name.push_back( + table.dense_gradient_variable_name(i)); + } + _param_config.dense_variable_name[table.table_id()] = std::move(tmp_dense_variable_name); + _param_config.dense_gradient_variable_name[table.table_id()] = std::move(tmp_dense_gradient_variable_name); + _param_config.dense_table_id.push_back(table.table_id()); + _param_config.dense_table_size.push_back(table.fea_dim()); //TODO + } +} + void AsyncExecutor::InitModel() { //TODO only rank = 0 do this - std::vector all_dense_table_id; //TODO - all_dense_table_id.push_back(0); - for (auto table_id: all_dense_table_id) { + //std::vector all_dense_table_id; //TODO + //all_dense_table_id.push_back(0); //done + for (auto table_id: _param_config.dense_table_id) { std::vector regions; - std::vector variables; //TODO - for (auto& t : variables) { + //std::vector variables; //TODO + for (auto& t : _param_config.dense_variable_name[table_id]) { Variable* var = root_scope_->FindVar(t); CHECK(var != nullptr) << "var[" << t << "] not found"; LoDTensor* tensor = var->GetMutable(); @@ -131,6 +173,7 @@ void AsyncExecutor::PrepareDenseThread() { param.training_thread_num = actual_thread_num; param.root_scope = root_scope_; //param.dense_params = &GlobalConfig::instance().dense_variable_name; //TODO + param.dense_params = &_param_config.dense_variable_name; _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 67f4e5deee..21e4a66fce 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -68,7 +68,7 @@ class AsyncExecutor { void StartServer(); void InitModel(); void SaveModel(const std::string& path); - + void InitParamConfig(); private: void CreateThreads(ExecutorThreadWorker* worker, const ProgramDesc& main_program, @@ -86,6 +86,7 @@ class AsyncExecutor { AsyncWorkerParamConfig _param_config; private: int actual_thread_num; + }; diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 19d8818be7..f7c05e400d 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -382,33 +382,38 @@ void AsyncExecutorThreadWorker::BindingSlotVariableMemory() { } */ } -void AsyncExecutorThreadWorker::SetParamConfig(AsyncWorkerParamConfig* pc) { - _param_config = pc; + +void AsyncExecutorThreadWorker::SetParamConfig(AsyncWorkerParamConfig* param_config) { + _param_config = param_config; } void AsyncExecutorThreadWorker::PrepareParams() { - int table_id = 0; //TODO - PullSparse(table_id); - for (auto& t : _pull_sparse_status) { - t.wait(); - auto status = t.get(); - if (status != 0) { - LOG(ERROR) << "pull sparse failed, status[" << status << "]"; - exit(-1); + //int table_id = 0; //TODO + for (auto table_id: _param_config->sparse_table_id) { + PullSparse(table_id); + for (auto& t : _pull_sparse_status) { + t.wait(); + auto status = t.get(); + if (status != 0) { + LOG(ERROR) << "pull sparse failed, status[" << status << "]"; + exit(-1); + } } } _pull_sparse_status.resize(0); - FillSparse(table_id); + for (auto table_id: _param_config->sparse_table_id) { + FillSparse(table_id); + } } void AsyncExecutorThreadWorker::UpdateParams() { - //for (auto i = 0u; i < GlobalConfig::instance().dense_table_id.size(); ++i) {//TODO - for (int i = 0; i < 1; ++i) { + for (auto i: _param_config->sparse_table_id) {//TODO + //for (int i = 0; i < 1; ++i) { PushSparse(i); } //for (auto i = 0u; i < GlobalConfig::instance().dense_table_id.size(); ++i) {//TODO - for (int i = 1; i < 2; ++i) { + for (auto i: _param_config->dense_table_id) { PushDense(i); } int32_t tmp_push_dense_wait_times = _param_config->tmp_push_dense_wait_times; //TODO @@ -437,14 +442,13 @@ void AsyncExecutorThreadWorker::UpdateParams() { } //for (auto dense_table_id : GlobalConfig::instance().dense_table_id) {//TODO - int dense_table_id = 1; + for (auto dense_table_id: _param_config->dense_table_id) { _pull_dense_thread->increase_thread_version(thread_id_, dense_table_id); + } //} } void AsyncExecutorThreadWorker::PushDense(int table_id) { - //auto table_id = GlobalConfig::instance().dense_table_id[table_id_index]; TODO - std::vector regions; //auto& variables = GlobalConfig::instance().dense_gradient_variable_name[table_id]; std::vector variables; @@ -529,7 +533,7 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { int64_t* ids = tensor->data(); int len = tensor->numel(); - Variable* var_emb = thread_scope_->FindVar(_param_config->slot_input_vec[slot_idx - 1]); + Variable* var_emb = thread_scope_->FindVar(_param_config->slot_input_vec[table_id][slot_idx - 1]); LoDTensor* tensor_emb = var_emb->GetMutable(); float* ptr = tensor_emb->data(); @@ -575,10 +579,10 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - if (_slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { continue; } - Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[slot_idx - 1]); + Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); LoDTensor* g_tensor = g_var->GetMutable(); //int count = g_tensor->numel(); float* g = g_tensor->data(); diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 63f383cd47..4e3255a590 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -40,8 +40,14 @@ struct AsyncWorkerParamConfig { int32_t tmp_push_dense_wait_times; int32_t tmp_push_sparse_wait_times; - std::vector slot_input_vec; //6048slot 6050slot //name - std::vector gradient_var; //6048slot_embed + std::map> dense_variable_name; + std::map> dense_gradient_variable_name; + std::vector dense_table_id; + std::vector dense_table_size; // fea_dim for each dense table + std::vector sparse_table_id; + std::map> slot_input_vec; //6048slot 6050slot //name + std::map> gradient_var; //6048slot_embed + std::unordered_map slot_alias_to_table; //TODO done }; struct DensePullThreadParam { @@ -148,7 +154,7 @@ class ExecutorThreadWorker { virtual void SetPSlibPtr(std::shared_ptr pslib_ptr); virtual void SetPullDenseThread(std::shared_ptr dpt) {}; virtual void BindingSlotVariableMemory() {}; - virtual void SetParamConfig(AsyncWorkerParamConfig* pc) {}; + virtual void SetParamConfig(AsyncWorkerParamConfig* param_config) {}; private: void CreateThreadScope(const framework::ProgramDesc& program); void CreateThreadOperators(const framework::ProgramDesc& program); @@ -184,7 +190,7 @@ public: void SetPSlibPtr(std::shared_ptr pslib_ptr); void SetPullDenseThread(std::shared_ptr dpt); void BindingSlotVariableMemory(); - void SetParamConfig(AsyncWorkerParamConfig* pc); + void SetParamConfig(AsyncWorkerParamConfig* param_config); void TrainFiles(); void TrainOneNetwork(); void PrepareParams(); @@ -209,7 +215,6 @@ private: std::map>> _feature_value; std::map>> _feature_push_value; - std::unordered_map _slot_alias_to_table; //TODO std::shared_ptr _pslib_ptr; From 575ae7c6c3133df589cfe6c1a9d9e45e6bfc99c5 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 7 Dec 2018 14:30:10 +0800 Subject: [PATCH 15/62] refine pslib inferface & fix some bugs --- CMakeLists.txt | 4 +- paddle/fluid/framework/async_executor.cc | 29 +++++-- paddle/fluid/framework/async_executor.h | 6 +- .../fluid/framework/executor_thread_worker.cc | 31 ++++---- paddle/fluid/pybind/async_executor_py.cc | 4 +- python/paddle/fluid/async_executor.py | 11 ++- python/paddle/fluid/distributed/downpour.py | 2 +- python/paddle/fluid/distributed/ps_pb2.py | 78 +++++++++---------- 8 files changed, 100 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fd8dd1dfa..5b5bf6c5b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,7 +217,7 @@ include(cupti) include(external/gzstream) endif (NOT WIN32) include(external/libmct) -#include(external/pslib_brpc) +include(external/pslib_brpc) include(external/pslib) if(WITH_DISTRIBUTE) @@ -280,7 +280,7 @@ set(EXTERNAL_LIBS zlib ${PYTHON_LIBRARIES} pslib - #pslib_brpc + pslib_brpc libmct ) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 292b05c588..7685883dd5 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -65,18 +65,35 @@ void PrepareReaders(std::vector>& readers, // NOLINT readers[0]->SetFileList(filelist); } -void AsyncExecutor::ConfigPslib(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index) { +void AsyncExecutor::InitServer(const std::string& dist_desc, int index) { _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); - _pslib_ptr->init_and_config(dist_desc, host_sign_list, node_num, index);//TODO done + _pslib_ptr->init_server(dist_desc, index);//TODO done + + InitParamConfig(); } -void AsyncExecutor::StartServer() { +void AsyncExecutor::InitWorker(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index) { + _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); + _pslib_ptr->init_worker(dist_desc, host_sign_list.data(), node_num, index);//TODO done + InitParamConfig(); - _pslib_ptr->run_server(); +} + +uint64_t AsyncExecutor::StartServer() { + return _pslib_ptr->run_server(); +} + +void AsyncExecutor::GatherServers(std::vector& host_sign_list, int node_num) { + _pslib_ptr->gather_servers(host_sign_list.data(), node_num); } void AsyncExecutor::InitParamConfig() { - _param_config.fea_dim = _pslib_ptr->get_param()->trainer_param().sparse_table(0).feature_dim(); //TODO + for (int i = 0; i < _pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param_size(); ++i) { + if (_pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param(i).table_class().find("SparseTable") != -1) { + _param_config.fea_dim = _pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param(i).accessor().fea_dim(); //TODO + break; + } + } _param_config.slot_dim = _param_config.fea_dim - 2; //TODO _param_config.tmp_push_dense_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().pull_dense_per_batch()); _param_config.tmp_push_sparse_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); @@ -176,6 +193,7 @@ void AsyncExecutor::PrepareDenseThread() { param.dense_params = &_param_config.dense_variable_name; _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); + _pull_dense_thread->start(); } @@ -238,6 +256,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, fetch_var_names, root_scope_, thidx, debug); } + // start executing ops in multiple threads for (int thidx = 0; thidx < actual_thread_num; ++thidx) { threads.push_back( diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 21e4a66fce..90d6b46b2f 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -63,9 +63,11 @@ class AsyncExecutor { const std::vector& fetch_names, const bool debug = false); //void ConfigPslib(const char* dist_desc, uint64_t* host_sign_list, int node_num, int index); - void ConfigPslib(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index); + void InitServer(const std::string& dist_desc, int index); + void InitWorker(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index); //void ConfigWorker() {} - void StartServer(); + uint64_t StartServer(); + void GatherServers(std::vector& host_sign_list, int node_num); void InitModel(); void SaveModel(const std::string& path); void InitParamConfig(); diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index f7c05e400d..e0ee9c11c9 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -345,9 +345,12 @@ void AsyncExecutorThreadWorker::TrainOneNetwork() { if (op->Type().find("sgd") != std::string::npos) { continue; } + if (op->Type().find("lookup_table") != std::string::npos || + op->Type().find("lookup_table_grad") != std::string::npos) { + continue; + } op->Run(*thread_scope_, place_); } - UpdateParams(); } @@ -416,8 +419,8 @@ void AsyncExecutorThreadWorker::UpdateParams() { for (auto i: _param_config->dense_table_id) { PushDense(i); } - int32_t tmp_push_dense_wait_times = _param_config->tmp_push_dense_wait_times; //TODO - int32_t tmp_push_sparse_wait_times = _param_config->tmp_push_sparse_wait_times; //TODO + int32_t tmp_push_dense_wait_times = -1;//_param_config->tmp_push_dense_wait_times; //TODO + int32_t tmp_push_sparse_wait_times = -1;//_param_config->tmp_push_sparse_wait_times; //TODO static uint32_t push_dense_wait_times = static_cast(tmp_push_dense_wait_times); static uint32_t push_sparse_wait_times = static_cast(tmp_push_sparse_wait_times); @@ -430,7 +433,6 @@ void AsyncExecutorThreadWorker::UpdateParams() { if (tmp_push_dense_wait_times == -1) { _push_dense_status.resize(0); } - if (_push_sparse_status.size() >= push_sparse_wait_times) { for (auto& t : _push_sparse_status) { t.wait(); @@ -440,7 +442,6 @@ void AsyncExecutorThreadWorker::UpdateParams() { if (tmp_push_sparse_wait_times == -1) { _push_sparse_status.resize(0); } - //for (auto dense_table_id : GlobalConfig::instance().dense_table_id) {//TODO for (auto dense_table_id: _param_config->dense_table_id) { _pull_dense_thread->increase_thread_version(thread_id_, dense_table_id); @@ -451,8 +452,8 @@ void AsyncExecutorThreadWorker::UpdateParams() { void AsyncExecutorThreadWorker::PushDense(int table_id) { std::vector regions; //auto& variables = GlobalConfig::instance().dense_gradient_variable_name[table_id]; - std::vector variables; - for (auto& t : variables) { + //std::vector variables; + for (auto& t : _param_config->dense_gradient_variable_name[table_id]) { Variable* var = thread_scope_->FindVar(t); CHECK(var != nullptr) << "var[" << t << "] not found"; LoDTensor* tensor = var->GetMutable(); @@ -469,7 +470,6 @@ void AsyncExecutorThreadWorker::PushDense(int table_id) { void AsyncExecutorThreadWorker::PullSparse(int table_id) { - auto& features = _features[table_id]; auto& feature_value = _feature_value[table_id]; auto fea_dim = _param_config->fea_dim; //TODO @@ -477,7 +477,6 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { features.clear(); features.resize(0); features.reserve(MAX_FEASIGN_NUM); - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { @@ -493,14 +492,14 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { features.push_back(static_cast(ids[i])); } } - check_pull_push_memory(features, feature_value, fea_dim); std::vector pull_feature_value; for (auto i = 0u; i < features.size(); ++i) { pull_feature_value.push_back(feature_value[i].data()); } - + for (int i = 0; i < features.size(); ++i) { + } auto status = _pslib_ptr->_worker_ptr->pull_sparse( pull_feature_value.data(), table_id, features.data(), features.size()); _pull_sparse_status.push_back(std::move(status)); @@ -532,10 +531,15 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { LoDTensor* tensor = var->GetMutable(); int64_t* ids = tensor->data(); int len = tensor->numel(); - Variable* var_emb = thread_scope_->FindVar(_param_config->slot_input_vec[table_id][slot_idx - 1]); LoDTensor* tensor_emb = var_emb->GetMutable(); - float* ptr = tensor_emb->data(); + float* ptr = tensor_emb->mutable_data({len, slot_dim}, platform::CPUPlace()); + memset(ptr, 0, sizeof(float) * len * slot_dim); + auto& tensor_lod = tensor->lod()[0]; + + LoD data_lod{tensor_lod}; + tensor_emb->set_lod(data_lod); + //float* ptr = tensor_emb->data(); for (auto index = 0u; index < len; ++index){ //if (_current_train_job.use_cvm_feature()) { @@ -576,7 +580,6 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { //} const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { diff --git a/paddle/fluid/pybind/async_executor_py.cc b/paddle/fluid/pybind/async_executor_py.cc index 63fd06224f..eca46fbad5 100644 --- a/paddle/fluid/pybind/async_executor_py.cc +++ b/paddle/fluid/pybind/async_executor_py.cc @@ -48,8 +48,10 @@ void BindAsyncExecutor(py::module* m) { new framework::AsyncExecutor(scope, place)); })) .def("run_from_files", &framework::AsyncExecutor::RunFromFile) - .def("config_pslib", &framework::AsyncExecutor::ConfigPslib) + .def("init_server", &framework::AsyncExecutor::InitServer) + .def("init_worker", &framework::AsyncExecutor::InitWorker) .def("start_server", &framework::AsyncExecutor::StartServer) + .def("gather_servers", &framework::AsyncExecutor::GatherServers) .def("init_model", &framework::AsyncExecutor::InitModel) .def("save_model", &framework::AsyncExecutor::SaveModel); } // end BindAsyncExecutor diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index c5863eb9e0..f667ff2424 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -158,8 +158,17 @@ class AsyncExecutor(object): return + def init_server(self, filename, index): + self.executor.init_server(filename, index) + + def init_worker(self, filename, ips, nodes_cnt, index): + self.executor.init_worker(filename, ips, nodes_cnt, index) + def start_server(self): - self.executor.start_server() + return self.executor.start_server() + + def gather_servers(self, ips, nodes_cnt): + self.executor.gather_servers(ips, nodes_cnt) def init_model(self): self.executor.init_model() diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 093792d5d6..3d940b62b0 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -56,7 +56,7 @@ class DownpourSGD(object): params_grads[0], params_grads[1]) ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) - ps_param.worker_param.CopyFrom(worker.get_desc()) + ps_param.trainer_param.CopyFrom(worker.get_desc()) # Todo(guru4elephant): figure out how to support more sparse parameters # currently only support lookup_table worker_skipped_ops = ["lookup_table", "lookup_table_grad"] diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index 0ef34d6e18..f33ec50f7d 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -20,7 +20,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( name='ps.proto', package='paddle', syntax='proto2', - serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\x91\x01\n\x16ServerServiceParameter\x12\x14\n\x0cserver_class\x18\x01 \x01(\t\x12\x14\n\x0c\x63lient_class\x18\x02 \x01(\t\x12\x15\n\rservice_class\x18\x03 \x01(\t\x12\x19\n\x11start_server_port\x18\x04 \x01(\r\x12\x19\n\x11server_thread_num\x18\x05 \x01(\r\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') + serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd1\x01\n\x16ServerServiceParameter\x12(\n\x0cserver_class\x18\x01 \x01(\t:\x12\x41\x62\x61\x63usBrpcPsServer\x12(\n\x0c\x63lient_class\x18\x02 \x01(\t:\x12\x41\x62\x61\x63usBrpcPsClient\x12&\n\rservice_class\x18\x03 \x01(\t:\x0f\x41\x62\x61\x63usPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') ) _sym_db.RegisterFileDescriptor(DESCRIPTOR) @@ -41,8 +41,8 @@ _TABLETYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3198, - serialized_end=3250, + serialized_start=3262, + serialized_end=3314, ) _sym_db.RegisterEnumDescriptor(_TABLETYPE) @@ -108,8 +108,8 @@ _PSCMDID = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3253, - serialized_end=3570, + serialized_start=3317, + serialized_end=3634, ) _sym_db.RegisterEnumDescriptor(_PSCMDID) @@ -148,8 +148,8 @@ _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3166, - serialized_end=3196, + serialized_start=3230, + serialized_end=3260, ) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) @@ -531,35 +531,35 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( _descriptor.FieldDescriptor( name='server_class', full_name='paddle.ServerServiceParameter.server_class', index=0, number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), + has_default_value=True, default_value=_b("AbacusBrpcPsServer").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='client_class', full_name='paddle.ServerServiceParameter.client_class', index=1, number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), + has_default_value=True, default_value=_b("AbacusBrpcPsClient").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='service_class', full_name='paddle.ServerServiceParameter.service_class', index=2, number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), + has_default_value=True, default_value=_b("AbacusPsService").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='start_server_port', full_name='paddle.ServerServiceParameter.start_server_port', index=3, number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, + has_default_value=True, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='server_thread_num', full_name='paddle.ServerServiceParameter.server_thread_num', index=4, number=5, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, + has_default_value=True, default_value=12, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), @@ -576,7 +576,7 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( oneofs=[ ], serialized_start=1134, - serialized_end=1279, + serialized_end=1343, ) @@ -641,8 +641,8 @@ _TABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1282, - serialized_end=1473, + serialized_start=1346, + serialized_end=1537, ) @@ -721,8 +721,8 @@ _TABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1476, - serialized_end=1845, + serialized_start=1540, + serialized_end=1909, ) @@ -794,8 +794,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1848, - serialized_end=2054, + serialized_start=1912, + serialized_end=2118, ) @@ -839,8 +839,8 @@ _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2056, - serialized_end=2139, + serialized_start=2120, + serialized_end=2203, ) @@ -898,8 +898,8 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2141, - serialized_end=2242, + serialized_start=2205, + serialized_end=2306, ) @@ -950,8 +950,8 @@ _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2244, - serialized_end=2363, + serialized_start=2308, + serialized_end=2427, ) @@ -1009,8 +1009,8 @@ _DENSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2366, - serialized_end=2591, + serialized_start=2430, + serialized_end=2655, ) @@ -1068,8 +1068,8 @@ _ADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2594, - serialized_end=2728, + serialized_start=2658, + serialized_end=2792, ) @@ -1106,8 +1106,8 @@ _NAIVESGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2730, - serialized_end=2796, + serialized_start=2794, + serialized_end=2860, ) @@ -1137,8 +1137,8 @@ _SUMMARYSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2798, - serialized_end=2857, + serialized_start=2862, + serialized_end=2921, ) @@ -1168,8 +1168,8 @@ _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2859, - serialized_end=2905, + serialized_start=2923, + serialized_end=2969, ) @@ -1213,8 +1213,8 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2907, - serialized_end=2980, + serialized_start=2971, + serialized_end=3044, ) @@ -1287,8 +1287,8 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2983, - serialized_end=3196, + serialized_start=3047, + serialized_end=3260, ) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER From 6bc0efb489411bb1b3206db0cbb03951811fa988 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 10 Dec 2018 10:53:14 +0800 Subject: [PATCH 16/62] refine interface --- python/paddle/fluid/async_executor.py | 42 +++++++++++++------- python/paddle/fluid/distributed/downpour.py | 12 ++++-- python/paddle/fluid/distributed/helper.py | 30 ++++++++------ python/paddle/fluid/distributed/node.py | 44 +++++++++++++++++---- python/paddle/fluid/distributed/ps_pb2.py | 6 +-- 5 files changed, 93 insertions(+), 41 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index f667ff2424..3451d1edb5 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -24,6 +24,7 @@ from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format from . import io from .data_feed_desc import DataFeedDesc +from .distributed import ps_instance __all__ = ['AsyncExecutor'] @@ -85,6 +86,7 @@ class AsyncExecutor(object): scope = global_scope() self.executor = core.AsyncExecutor(scope, p) + self.instance = ps_instance.PaddlePSInstance("init_param", 1, 2) def run(self, program, data_feed, filelist, thread_num, fetch, debug=False): """ @@ -149,27 +151,39 @@ class AsyncExecutor(object): self.executor.run_from_files(program_desc, data_feed.desc(), filelist, thread_num, fetch_var_names, debug) + self.instance.barrier_all() def config_distributed_nodes(self, dist_opt): + # get total rank # get rank index # get iplists # get hadoop info - return - - - def init_server(self, filename, index): - self.executor.init_server(filename, index) - - def init_worker(self, filename, ips, nodes_cnt, index): - self.executor.init_worker(filename, ips, nodes_cnt, index) + pass + + def get_instance(self): + return self.instance + + def init_server(self, dist_desc): + self.executor.init_server(dist_desc, self.instance._rankid) + ip = self.executor.start_server() + self.instance.set_ip(ip) + self.instance.barrier_all() #wait all server start + ips = self.instance.gather_ips() + self.executor.gather_servers(ips, self.instance.get_node_cnt()) + self.instance.barrier_all() #wait all worker start + self.instance.barrier_all() #wait init model + self.instance.barrier_all() #wait worker do all things + + def init_worker(self, dist_desc): + self.instance.barrier_all() #wait all server start + ips = self.instance.gather_ips() + self.executor.init_worker(dist_desc, ips, self.instance.get_node_cnt(), self.instance._rankid) + self.instance.barrier_all() #wait all worker start + if self.instance.is_first_worker(): + self.executor.init_model() + self.instance.barrier_all() #wait init model - def start_server(self): - return self.executor.start_server() - - def gather_servers(self, ips, nodes_cnt): - self.executor.gather_servers(ips, nodes_cnt) - def init_model(self): self.executor.init_model() diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 3d940b62b0..654fa6fab6 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -46,14 +46,20 @@ class DownpourSGD(object): sparse_table_index = 0 # currently merge all dense parameters into one dense table dense_table_index = 1 + params = [] + grads = [] + for i in params_grads: + params.append(i[0]) + for i in params_grads: + grads.append(i[1]) server.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) server.add_dense_table(dense_table_index, self.learning_rate_, - params_grads[0], params_grads[1]) + params, grads) worker.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) worker.add_dense_table(dense_table_index, self.learning_rate_, - params_grads[0], params_grads[1]) + params, grads) ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) ps_param.trainer_param.CopyFrom(worker.get_desc()) @@ -61,4 +67,4 @@ class DownpourSGD(object): # currently only support lookup_table worker_skipped_ops = ["lookup_table", "lookup_table_grad"] ps_param_str = text_format.MessageToString(ps_param) - return [ps_param_str, worker_skipped_ops] + return [ps_param, worker_skipped_ops] diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 12e2f7f197..4cc5eb2a92 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -1,4 +1,5 @@ from mpi4py import MPI +import ps_pb2 as pslib class FileSystem(object): def __init__(self, fs_type="afs", @@ -7,20 +8,23 @@ class FileSystem(object): passwd=None, hadoop_bin="", afs_conf=None): - assert user not None - assert passwd not None - assert hadoop_bin not None - fs_client = pslib.FsClientParameter() - if fs_type == "afs": - fs_client.fs_type = pslib.FsApiType.AFS - else: - fs_client.fs_type = pslib.FsApiType.HDFS - fs_client.uri = uri - fs_client.user = user - fs_client.passwd = passwd - fs_client.buffer_size = 0 - fs_client.afs_conf = afs_conf if not afs_conf else "" + assert user != None + assert passwd != None + assert hadoop_bin != None + self.fs_client = pslib.FsClientParameter() + #if fs_type == "afs": + # fs_client.fs_type = pslib.FsApiType.AFS + #else: + # fs_client.fs_type = pslib.FsApiType.HDFS + self.fs_client.uri = uri + self.fs_client.user = user + self.fs_client.passwd = passwd + #self.fs_client.buffer_size = 0 + self.fs_client.hadoop_bin = hadoop_bin + #self.fs_client.afs_conf = afs_conf if not afs_conf else "" + def get_desc(self): + return self.fs_client class MPIHelper(object): def __init__(self): diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index b96a15a32f..c245dc4db8 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -13,24 +13,52 @@ class Worker(object): class DownpourServer(Server): def __init__(self): self.server_ = pslib.ServerParameter() + self.server_.downpour_server_param.service_param.start_server_port = 0 + self.server_.downpour_server_param.service_param.server_class = "DownpourBrpcPsServer" + self.server_.downpour_server_param.service_param.client_class = "DownpourBrpcPsClient" + self.server_.downpour_server_param.service_param.service_class = "DownpourPsService" + self.server_.downpour_server_param.service_param.start_server_port = 0 + self.server_.downpour_server_param.service_param.server_thread_num = 12 def add_sparse_table(self, table_id, learning_rate, slot_key_vars, slot_value_var): table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id + table.table_class = "DownpourSparseTable" table.type = pslib.PS_SPARSE_TABLE table.accessor.accessor_class = "DownpourFeatureValueAccessor" - table.accessor.dense_sgd_param.adam.learning_rate = learning_rate - table.accessor.fea_dim = abs(reduce(lambda x, y: x * y, - slot_value_var[0].shape, 1)) + table.accessor.sparse_sgd_param.learning_rate = learning_rate + table.accessor.sparse_sgd_param.initial_g2sum = 3 + table.accessor.sparse_sgd_param.initial_range = 1e-4 + table.accessor.sparse_sgd_param.weight_bounds.extend([-10, 10]) + + table.accessor.embedx_dim = 8 + table.accessor.embedx_threshold = 5 + table.accessor.fea_dim = 11 + #table.accessor.fea_dim = abs(reduce(lambda x, y: x * y, + # slot_value_var[0].shape, 1)) + table.accessor.downpour_accessor_param.nonclk_coeff = 0.1 + table.accessor.downpour_accessor_param.click_coeff = 2 + table.accessor.downpour_accessor_param.base_threshold = 0.2 + table.accessor.downpour_accessor_param.delta_threshold = 0.15 + table.accessor.downpour_accessor_param.delta_keep_days = 31 + table.accessor.downpour_accessor_param.show_click_decay_rate = 0.999 + table.accessor.downpour_accessor_param.delete_threshold = 0.8 def add_dense_table(self, table_id, learning_rate, param_var, grad_var): table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id + table.table_class = "DownpourDenseTable" table.type = pslib.PS_DENSE_TABLE table.accessor.accessor_class = "DownpourDenseValueAccessor" - table.accessor.sparse_sgd_param.learning_rate = learning_rate + table.accessor.dense_sgd_param.name = "adam" + table.accessor.dense_sgd_param.adam.learning_rate = learning_rate + table.accessor.dense_sgd_param.adam.avg_decay_rate = 0.999993 + table.accessor.dense_sgd_param.adam.ada_decay_rate = 0.9999 + table.accessor.dense_sgd_param.adam.ada_epsilon = 1e-8 + table.accessor.dense_sgd_param.adam.mom_decay_rate = 0.99 + table.accessor.dense_sgd_param.naive.learning_rate = 0.0002 fea_dim = 0 for param in param_var: fea_dim += reduce(lambda x, y: x * y, param.shape, 1) @@ -44,8 +72,8 @@ class DownpourWorker(Worker): def __init__(self, window): self.window = window self.worker_ = pslib.DownpourTrainerParameter() - self.worker_.pull_dense_per_batch = window - self.worker_.push_dense_per_batch = window + #self.worker_.pull_dense_per_batch = window + #self.worker_.push_dense_per_batch = window def add_sparse_table(self, table_id, learning_rate, slot_key_vars, slot_value_vars): @@ -62,8 +90,8 @@ class DownpourWorker(Worker): param_vars, grad_vars): table = self.worker_.dense_table.add() table.table_id = table_id - table.dense_variable_name.extend([p.name for p in param_vars]) - table.dense_gradient_variable_name.extend([g.name for g in grad_vars]) + table.dense_variable_name.extend(filter(lambda x: x.find("embedding") == -1, [p.name for p in param_vars])) + table.dense_gradient_variable_name.extend(filter(lambda x: x.find("embedding") == -1, [g.name for g in grad_vars])) def get_desc(self): return self.worker_ diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index f33ec50f7d..b82c649e14 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -531,21 +531,21 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( _descriptor.FieldDescriptor( name='server_class', full_name='paddle.ServerServiceParameter.server_class', index=0, number=1, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("AbacusBrpcPsServer").decode('utf-8'), + has_default_value=True, default_value=_b("DownpourBrpcPsServer").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='client_class', full_name='paddle.ServerServiceParameter.client_class', index=1, number=2, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("AbacusBrpcPsClient").decode('utf-8'), + has_default_value=True, default_value=_b("DownpourBrpcPsClient").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( name='service_class', full_name='paddle.ServerServiceParameter.service_class', index=2, number=3, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("AbacusPsService").decode('utf-8'), + has_default_value=True, default_value=_b("DownpourPsService").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), From 86e1044ab941d627362d0def4ad45a250178a736 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 10 Dec 2018 10:54:25 +0800 Subject: [PATCH 17/62] refine interface & add ps_instance --- .../paddle/fluid/distributed/ps_instance.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 python/paddle/fluid/distributed/ps_instance.py diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py new file mode 100644 index 0000000000..b4045327e1 --- /dev/null +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -0,0 +1,108 @@ +#import paddle.fluid.distributed.helper as dist_helper +import helper as dist_helper +import sys +#from mpi4py import MPI + + +class PaddlePSInstance(object): + def __init__(self, init_param, server_worker_mode, proc_per_node): + self.dh = dist_helper.MPIHelper() + self._config = init_param + self._rankid = self.dh.get_rank() + self._server_worker_mode = server_worker_mode + self._proc_per_node = proc_per_node + self._nodes = self.dh.get_size() + + self._ip = 0 + self._worker_num = self._nodes * self._proc_per_node / 2 + self._server_num = self._nodes * self._proc_per_node / 2 + self._total_server_worker = self._worker_num + self._server_num + self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 + self._set_nodetype() + self._comm = None + self._split_comm() + + + def _set_nodetype(self): + if self._server_worker_mode == 0: + if self._rankid < self._server_num: + self._node_type = 1 + elif self._rankid < self._total_server_worker: + self._node_type = 0 + else: + self._node_type = -1 + elif self._server_worker_mode == 1: + if self._rankid < self._total_server_worker: + if 0 == self._rankid % self._proc_per_node % 2: + self._node_type = 0 + else: + self._node_type = 1 + else: + self._node_type = -1; + else: + self._node_type = -1 + + #if self._rankid == 0: + #print "node type: ", self._node_type + + def _split_comm(self): + if self.is_server(): + self._comm = self.dh.comm.Split(self._node_type) + elif self.is_worker(): + self._comm = self.dh.comm.Split(self._node_type) + pass + + def get_worker_index(self): + if self._server_worker_mode == 0: + return self._rankid == self.server_num + else: + return self._rankid / self._proc_per_node + + def get_server_index(self): + if self._server_worker_mode == 0: + return self.rank_id + else: + return self.rank_id / self._proc_per_node + + def is_worker(self): + return self._node_type == 1 + + def is_server(self): + return self._node_type == 0 + + def is_first_worker(self): + return self.is_worker() and 0 == self.get_worker_index() + + def set_ip(self, ip): + self._ip = ip + + def gather_ips(self): + self._ips = self.dh.comm.allgather(self._ip) + return self._ips + + def get_node_cnt(self): + return self._nodes + + def barrier_all(self): + #print self._rankid, "begin" + #sys.stdout.flush() + self.dh.comm.barrier() + #print self._rankid, "end" + + def barrier_worker(self): + if self.is_worker(): + #print "worker: ", self._rankid, "begin" + #sys.stdout.flush() + self._comm.barrier() + #print "worker: ", self._rankid, "end" + pass + + def finalize(self): + pass + + +if __name__ == "__main__": + instance = PaddlePSInstance(1, 1, 2, 50) + instance.barrier_all() + #print "-----" + #instance.barrier_worker() From 8e3fe2d7355c09a3dde09bcbf63971ff3bfe169d Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 10 Dec 2018 18:57:57 +0800 Subject: [PATCH 18/62] add skip op --- paddle/fluid/framework/async_executor.cc | 8 ++++++-- paddle/fluid/framework/executor_thread_worker.cc | 15 ++++++++++----- paddle/fluid/framework/executor_thread_worker.h | 2 ++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 7685883dd5..f96ff436da 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -95,8 +95,12 @@ void AsyncExecutor::InitParamConfig() { } } _param_config.slot_dim = _param_config.fea_dim - 2; //TODO - _param_config.tmp_push_dense_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().pull_dense_per_batch()); - _param_config.tmp_push_sparse_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); + _param_config.tmp_push_dense_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); + _param_config.tmp_push_sparse_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); + + for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); ++t) { + _param_config.skip_op.push_back(_pslib_ptr->get_param()->trainer_param().skip_op(t)); + } //sparse for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); ++t) { auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index e0ee9c11c9..d8320b422b 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -340,16 +340,21 @@ void AsyncExecutorThreadWorker::SetPullDenseThread(std::shared_ptrType().find("sgd") != std::string::npos) { continue; } - if (op->Type().find("lookup_table") != std::string::npos || - op->Type().find("lookup_table_grad") != std::string::npos) { - continue; + bool need_skip = false; + for (auto t = 0u; t < _param_config->skip_op.size(); ++t) { + if (op->Type().find(_param_config->skip_op[t]) != std::string::npos) { + need_skip = true; + break; + } + } + if (!need_skip) { + op->Run(*thread_scope_, place_); } - op->Run(*thread_scope_, place_); } UpdateParams(); } diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 4e3255a590..b3ee9dfaec 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -39,6 +39,8 @@ struct AsyncWorkerParamConfig { int fea_dim; int32_t tmp_push_dense_wait_times; int32_t tmp_push_sparse_wait_times; + + std::vector skip_op; std::map> dense_variable_name; std::map> dense_gradient_variable_name; From 016a06877578a6c862d5fd7eef3c1c75a71adc81 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 10 Dec 2018 21:34:37 +0800 Subject: [PATCH 19/62] stop server --- paddle/fluid/framework/async_executor.cc | 4 ++++ paddle/fluid/framework/async_executor.h | 1 + .../fluid/framework/executor_thread_worker.cc | 18 +++++++++--------- paddle/fluid/pybind/async_executor_py.cc | 1 + python/paddle/fluid/async_executor.py | 9 ++++++++- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index f96ff436da..45a914b70e 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -83,6 +83,10 @@ uint64_t AsyncExecutor::StartServer() { return _pslib_ptr->run_server(); } +void AsyncExecutor::StopServer() { + _pslib_ptr->stop_server(); +} + void AsyncExecutor::GatherServers(std::vector& host_sign_list, int node_num) { _pslib_ptr->gather_servers(host_sign_list.data(), node_num); } diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 90d6b46b2f..4b46126217 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -67,6 +67,7 @@ class AsyncExecutor { void InitWorker(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index); //void ConfigWorker() {} uint64_t StartServer(); + void StopServer(); void GatherServers(std::vector& host_sign_list, int node_num); void InitModel(); void SaveModel(const std::string& path); diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index d8320b422b..a0455b26ef 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -569,7 +569,6 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { } void AsyncExecutorThreadWorker::PushSparse(int table_id) { - auto slot_dim = _param_config->slot_dim; //TODO auto fea_dim = _param_config->fea_dim;//_current_train_job.fea_dim();TODO auto& features = _features[table_id]; @@ -592,19 +591,20 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { } Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); LoDTensor* g_tensor = g_var->GetMutable(); - //int count = g_tensor->numel(); - float* g = g_tensor->data(); - /* - if (FLAGS_scale_sparse_gradient_with_batch_size) { - Eigen::Map g_mat(g, 1, tensor->numel()); - g_mat *= _batch_size; + if (g_tensor == NULL) { + LOG(ERROR) << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; + exit(-1); } - */ + float* g = g_tensor->data(); Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); LoDTensor* tensor = var->GetMutable(); + if (tensor == NULL) { + LOG(ERROR) << "var[" << feed_vec[slot_idx] << "] not found"; + exit(-1); + } int len = tensor->lod()[0].back(); - //assert(slot_dim * len == count); + assert(slot_dim * len == g_tensor->numel()); int64_t* ids = tensor->data(); for (auto id_idx = 0u; id_idx < len; ++id_idx){ if (ids[id_idx] == 0) { diff --git a/paddle/fluid/pybind/async_executor_py.cc b/paddle/fluid/pybind/async_executor_py.cc index eca46fbad5..8dfba0d269 100644 --- a/paddle/fluid/pybind/async_executor_py.cc +++ b/paddle/fluid/pybind/async_executor_py.cc @@ -51,6 +51,7 @@ void BindAsyncExecutor(py::module* m) { .def("init_server", &framework::AsyncExecutor::InitServer) .def("init_worker", &framework::AsyncExecutor::InitWorker) .def("start_server", &framework::AsyncExecutor::StartServer) + .def("stop_server", &framework::AsyncExecutor::StopServer) .def("gather_servers", &framework::AsyncExecutor::GatherServers) .def("init_model", &framework::AsyncExecutor::InitModel) .def("save_model", &framework::AsyncExecutor::SaveModel); diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 3451d1edb5..76fdb5b0e2 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -151,7 +151,10 @@ class AsyncExecutor(object): self.executor.run_from_files(program_desc, data_feed.desc(), filelist, thread_num, fetch_var_names, debug) - self.instance.barrier_all() + self.instance.barrier_all() #worker do all things + if self.instance.is_first_worker(): + self.executor.stop_server() + self.instance.barrier_all() #sync def config_distributed_nodes(self, dist_opt): @@ -164,6 +167,9 @@ class AsyncExecutor(object): def get_instance(self): return self.instance + #def stop_server(self): + # self.executor.stop_server() + def init_server(self, dist_desc): self.executor.init_server(dist_desc, self.instance._rankid) ip = self.executor.start_server() @@ -174,6 +180,7 @@ class AsyncExecutor(object): self.instance.barrier_all() #wait all worker start self.instance.barrier_all() #wait init model self.instance.barrier_all() #wait worker do all things + self.instance.barrier_all() #sync def init_worker(self, dist_desc): self.instance.barrier_all() #wait all server start From 60d71a9e2987941487f7f1e44d1e1850b41a1e3d Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 11 Dec 2018 11:12:49 +0800 Subject: [PATCH 20/62] skip op py file --- python/paddle/fluid/distributed/downpour.py | 1 + python/paddle/fluid/distributed/node.py | 2 +- python/paddle/fluid/distributed/ps_pb2.py | 93 +++++++++++---------- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 654fa6fab6..c1762dd768 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -66,5 +66,6 @@ class DownpourSGD(object): # Todo(guru4elephant): figure out how to support more sparse parameters # currently only support lookup_table worker_skipped_ops = ["lookup_table", "lookup_table_grad"] + ps_param.trainer_param.skip_op.extend(worker_skipped_ops) ps_param_str = text_format.MessageToString(ps_param) return [ps_param, worker_skipped_ops] diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index c245dc4db8..1f4aeeac73 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -60,7 +60,7 @@ class DownpourServer(Server): table.accessor.dense_sgd_param.adam.mom_decay_rate = 0.99 table.accessor.dense_sgd_param.naive.learning_rate = 0.0002 fea_dim = 0 - for param in param_var: + for param in filter(lambda x: x.name.find("embedding") == -1, param_var): fea_dim += reduce(lambda x, y: x * y, param.shape, 1) table.accessor.fea_dim = fea_dim diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index b82c649e14..978b18d0d5 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -20,7 +20,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( name='ps.proto', package='paddle', syntax='proto2', - serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xbc\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1c\n\x14pull_dense_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd1\x01\n\x16ServerServiceParameter\x12(\n\x0cserver_class\x18\x01 \x01(\t:\x12\x41\x62\x61\x63usBrpcPsServer\x12(\n\x0c\x63lient_class\x18\x02 \x01(\t:\x12\x41\x62\x61\x63usBrpcPsClient\x12&\n\rservice_class\x18\x03 \x01(\t:\x0f\x41\x62\x61\x63usPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') + serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xce\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1d\n\x15push_sparse_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\x12\x0f\n\x07skip_op\x18\x05 \x03(\t\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd7\x01\n\x16ServerServiceParameter\x12*\n\x0cserver_class\x18\x01 \x01(\t:\x14\x44ownpourBrpcPsServer\x12*\n\x0c\x63lient_class\x18\x02 \x01(\t:\x14\x44ownpourBrpcPsClient\x12(\n\rservice_class\x18\x03 \x01(\t:\x11\x44ownpourPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') ) _sym_db.RegisterFileDescriptor(DESCRIPTOR) @@ -41,8 +41,8 @@ _TABLETYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3262, - serialized_end=3314, + serialized_start=3286, + serialized_end=3338, ) _sym_db.RegisterEnumDescriptor(_TABLETYPE) @@ -108,8 +108,8 @@ _PSCMDID = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3317, - serialized_end=3634, + serialized_start=3341, + serialized_end=3658, ) _sym_db.RegisterEnumDescriptor(_PSCMDID) @@ -148,8 +148,8 @@ _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( ], containing_type=None, options=None, - serialized_start=3230, - serialized_end=3260, + serialized_start=3254, + serialized_end=3284, ) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) @@ -342,7 +342,7 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( is_extension=False, extension_scope=None, options=None), _descriptor.FieldDescriptor( - name='pull_dense_per_batch', full_name='paddle.DownpourTrainerParameter.pull_dense_per_batch', index=2, + name='push_sparse_per_batch', full_name='paddle.DownpourTrainerParameter.push_sparse_per_batch', index=2, number=3, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, @@ -355,6 +355,13 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), + _descriptor.FieldDescriptor( + name='skip_op', full_name='paddle.DownpourTrainerParameter.skip_op', index=4, + number=5, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None), ], extensions=[ ], @@ -368,7 +375,7 @@ _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( oneofs=[ ], serialized_start=557, - serialized_end=745, + serialized_end=763, ) @@ -419,8 +426,8 @@ _DENSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=747, - serialized_end=870, + serialized_start=765, + serialized_end=888, ) @@ -478,8 +485,8 @@ _SPARSETABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=872, - serialized_end=994, + serialized_start=890, + serialized_end=1012, ) @@ -516,8 +523,8 @@ _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=997, - serialized_end=1131, + serialized_start=1015, + serialized_end=1149, ) @@ -575,8 +582,8 @@ _SERVERSERVICEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1134, - serialized_end=1343, + serialized_start=1152, + serialized_end=1367, ) @@ -641,8 +648,8 @@ _TABLEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1346, - serialized_end=1537, + serialized_start=1370, + serialized_end=1561, ) @@ -721,8 +728,8 @@ _TABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1540, - serialized_end=1909, + serialized_start=1564, + serialized_end=1933, ) @@ -794,8 +801,8 @@ _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=1912, - serialized_end=2118, + serialized_start=1936, + serialized_end=2142, ) @@ -839,8 +846,8 @@ _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2120, - serialized_end=2203, + serialized_start=2144, + serialized_end=2227, ) @@ -898,8 +905,8 @@ _PSREQUESTMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2205, - serialized_end=2306, + serialized_start=2229, + serialized_end=2330, ) @@ -950,8 +957,8 @@ _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2308, - serialized_end=2427, + serialized_start=2332, + serialized_end=2451, ) @@ -1009,8 +1016,8 @@ _DENSESGDRULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2430, - serialized_end=2655, + serialized_start=2454, + serialized_end=2679, ) @@ -1068,8 +1075,8 @@ _ADAMSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2658, - serialized_end=2792, + serialized_start=2682, + serialized_end=2816, ) @@ -1106,8 +1113,8 @@ _NAIVESGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2794, - serialized_end=2860, + serialized_start=2818, + serialized_end=2884, ) @@ -1137,8 +1144,8 @@ _SUMMARYSGDPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2862, - serialized_end=2921, + serialized_start=2886, + serialized_end=2945, ) @@ -1168,8 +1175,8 @@ _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2923, - serialized_end=2969, + serialized_start=2947, + serialized_end=2993, ) @@ -1213,8 +1220,8 @@ _PSRESPONSEMESSAGE = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=2971, - serialized_end=3044, + serialized_start=2995, + serialized_end=3068, ) @@ -1287,8 +1294,8 @@ _FSCLIENTPARAMETER = _descriptor.Descriptor( extension_ranges=[], oneofs=[ ], - serialized_start=3047, - serialized_end=3260, + serialized_start=3071, + serialized_end=3284, ) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER From 729684007d70cad38e9d34317748e3fedd477886 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 11 Dec 2018 14:44:02 +0800 Subject: [PATCH 21/62] stop server out of run from file --- python/paddle/fluid/async_executor.py | 14 +++++++------- python/paddle/fluid/distributed/ps_instance.py | 3 +-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 76fdb5b0e2..787a6a6b9e 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -86,7 +86,7 @@ class AsyncExecutor(object): scope = global_scope() self.executor = core.AsyncExecutor(scope, p) - self.instance = ps_instance.PaddlePSInstance("init_param", 1, 2) + self.instance = ps_instance.PaddlePSInstance(1, 2) def run(self, program, data_feed, filelist, thread_num, fetch, debug=False): """ @@ -151,10 +151,7 @@ class AsyncExecutor(object): self.executor.run_from_files(program_desc, data_feed.desc(), filelist, thread_num, fetch_var_names, debug) - self.instance.barrier_all() #worker do all things - if self.instance.is_first_worker(): - self.executor.stop_server() - self.instance.barrier_all() #sync + def config_distributed_nodes(self, dist_opt): @@ -167,8 +164,11 @@ class AsyncExecutor(object): def get_instance(self): return self.instance - #def stop_server(self): - # self.executor.stop_server() + def stop_server(self): + self.instance.barrier_all() #worker do all things + if self.instance.is_first_worker(): + self.executor.stop_server() + self.instance.barrier_all() #sync def init_server(self, dist_desc): self.executor.init_server(dist_desc, self.instance._rankid) diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index b4045327e1..94e123c2ce 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -5,9 +5,8 @@ import sys class PaddlePSInstance(object): - def __init__(self, init_param, server_worker_mode, proc_per_node): + def __init__(self, server_worker_mode, proc_per_node): self.dh = dist_helper.MPIHelper() - self._config = init_param self._rankid = self.dh.get_rank() self._server_worker_mode = server_worker_mode self._proc_per_node = proc_per_node From 57ac412b98990ac1d946ad32de30b07a15d0a18f Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 11 Dec 2018 17:48:25 +0800 Subject: [PATCH 22/62] download data --- python/paddle/fluid/async_executor.py | 22 ++++++++++++++++++- python/paddle/fluid/contrib/utils/__init__.py | 4 ++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 787a6a6b9e..cce7ec5cca 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -25,6 +25,7 @@ from google.protobuf import text_format from . import io from .data_feed_desc import DataFeedDesc from .distributed import ps_instance +from .contrib.utils import hdfs_utils as hdfs __all__ = ['AsyncExecutor'] @@ -152,6 +153,22 @@ class AsyncExecutor(object): data_feed.desc(), filelist, thread_num, fetch_var_names, debug) + def download_data(self, afs_path, local_path, fs_default_name, ugi, process_num=12): + hadoop_home = "$HADOOP_HOME" + + configs = { + "fs.default.name": fs_default_name, + "hadoop.job.ugi": ugi + } + + client = hdfs.HDFSClient(hadoop_home, configs) + downloads = hdfs.multi_download( + client, + afs_path, + local_path, + self.instance.get_worker_index(), + self.instance.get_node_cnt() / 2, + multi_processes=process_num) def config_distributed_nodes(self, dist_opt): @@ -179,10 +196,11 @@ class AsyncExecutor(object): self.executor.gather_servers(ips, self.instance.get_node_cnt()) self.instance.barrier_all() #wait all worker start self.instance.barrier_all() #wait init model + self.instance.barrier_all() #wait for download_data self.instance.barrier_all() #wait worker do all things self.instance.barrier_all() #sync - def init_worker(self, dist_desc): + def init_worker(self, dist_desc, afs_path, local_path, fs_default_name, ugi): self.instance.barrier_all() #wait all server start ips = self.instance.gather_ips() self.executor.init_worker(dist_desc, ips, self.instance.get_node_cnt(), self.instance._rankid) @@ -190,6 +208,8 @@ class AsyncExecutor(object): if self.instance.is_first_worker(): self.executor.init_model() self.instance.barrier_all() #wait init model + self.download_data(afs_path, local_path, fs_default_name, ugi, process_num=12) + self.instance.barrier_all() #wait for download_data def init_model(self): self.executor.init_model() diff --git a/python/paddle/fluid/contrib/utils/__init__.py b/python/paddle/fluid/contrib/utils/__init__.py index 6e479bdc2b..2fe9f702f3 100644 --- a/python/paddle/fluid/contrib/utils/__init__.py +++ b/python/paddle/fluid/contrib/utils/__init__.py @@ -13,8 +13,8 @@ # limitations under the License. from __future__ import print_function -from . import lookup_table_utils -from .lookup_table_utils import * +#from . import lookup_table_utils +#from .lookup_table_utils import * from . import hdfs_utils from .hdfs_utils import * From 10ed9e0a6e3ab06e0b42172126bb8872828cbe60 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 11 Dec 2018 22:03:33 +0800 Subject: [PATCH 23/62] download & run & instance --- paddle/fluid/framework/async_executor.cc | 38 ++++++++++++++---------- paddle/fluid/framework/async_executor.h | 3 +- python/paddle/fluid/async_executor.py | 23 ++++++++------ 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 45a914b70e..f0ca375f95 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -191,18 +191,19 @@ void AsyncExecutor::SaveModel(const std::string& path) { } } -void AsyncExecutor::PrepareDenseThread() { - DensePullThreadParam param; - param.ps_client = _pslib_ptr->_worker_ptr;; - param.threshold = 1;//GlobalConfig::instance().pull_dense_per_batch; //TODO - param.training_thread_num = actual_thread_num; - param.root_scope = root_scope_; - //param.dense_params = &GlobalConfig::instance().dense_variable_name; //TODO - param.dense_params = &_param_config.dense_variable_name; - - _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); - _pull_dense_thread->start(); - +void AsyncExecutor::PrepareDenseThread(const std::string& mode) { + if (mode == "mpi") { + DensePullThreadParam param; + param.ps_client = _pslib_ptr->_worker_ptr;; + param.threshold = 1;//GlobalConfig::instance().pull_dense_per_batch; //TODO + param.training_thread_num = actual_thread_num; + param.root_scope = root_scope_; + //param.dense_params = &GlobalConfig::instance().dense_variable_name; //TODO + param.dense_params = &_param_config.dense_variable_name; + + _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); + _pull_dense_thread->start(); + } } void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, @@ -210,6 +211,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, const std::vector& filelist, const int thread_num, const std::vector& fetch_var_names, + const std::string& mode, const bool debug) { std::vector threads; @@ -251,11 +253,15 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // todo: should be factory method for creating datafeed std::vector> readers; PrepareReaders(readers, actual_thread_num, data_feed_desc, filelist); - PrepareDenseThread(); + PrepareDenseThread(mode); std::vector> workers; workers.resize(actual_thread_num); for (auto& worker : workers) { - worker.reset(new AsyncExecutorThreadWorker); + if (mode == "mpi") { + worker.reset(new AsyncExecutorThreadWorker); + } else { + worker.reset(new ExecutorThreadWorker); + } } // prepare thread resource here @@ -274,7 +280,9 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, for (auto& th : threads) { th.join(); } - _pull_dense_thread->stop(); + if (mode == "mpi") { + _pull_dense_thread->stop(); + } root_scope_->DropKids(); return; diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 4b46126217..93010f8a9b 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -61,6 +61,7 @@ class AsyncExecutor { const std::vector& filelist, const int thread_num, const std::vector& fetch_names, + const std::string& mode, const bool debug = false); //void ConfigPslib(const char* dist_desc, uint64_t* host_sign_list, int node_num, int index); void InitServer(const std::string& dist_desc, int index); @@ -79,7 +80,7 @@ class AsyncExecutor { const std::vector& fetch_var_names, Scope* root_scope, const int thread_index, const bool debug); - void PrepareDenseThread(); + void PrepareDenseThread(const std::string& mode); public: std::shared_ptr _pslib_ptr; std::shared_ptr _pull_dense_thread; diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index cce7ec5cca..e760d58fd2 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -87,9 +87,8 @@ class AsyncExecutor(object): scope = global_scope() self.executor = core.AsyncExecutor(scope, p) - self.instance = ps_instance.PaddlePSInstance(1, 2) - def run(self, program, data_feed, filelist, thread_num, fetch, debug=False): + def run(self, program, data_feed, filelist, thread_num, fetch, mode="", debug=False): """ Run program by this AsyncExecutor. Training dataset will be in filelist. Users can also inspect certain variables by naming them in parameter @@ -151,10 +150,11 @@ class AsyncExecutor(object): self.executor.run_from_files(program_desc, data_feed.desc(), filelist, thread_num, - fetch_var_names, debug) + fetch_var_names, mode, debug) def download_data(self, afs_path, local_path, fs_default_name, ugi, process_num=12): - hadoop_home = "$HADOOP_HOME" + #hadoop_home = "$HADOOP_HOME" + hadoop_home = "~/tools/hadoop-xingtian/hadoop/" configs = { "fs.default.name": fs_default_name, @@ -169,8 +169,11 @@ class AsyncExecutor(object): self.instance.get_worker_index(), self.instance.get_node_cnt() / 2, multi_processes=process_num) + self.instance.barrier_all() #wait for download_data #TODO only barriere worker - def config_distributed_nodes(self, dist_opt): + def config_distributed_nodes(self): + self.instance = ps_instance.PaddlePSInstance(1, 2) + return self.instance # get total rank # get rank index @@ -196,11 +199,15 @@ class AsyncExecutor(object): self.executor.gather_servers(ips, self.instance.get_node_cnt()) self.instance.barrier_all() #wait all worker start self.instance.barrier_all() #wait init model - self.instance.barrier_all() #wait for download_data + self.instance.barrier_all() #wait for download_data #TODO remove this after only barrier worker self.instance.barrier_all() #wait worker do all things self.instance.barrier_all() #sync - def init_worker(self, dist_desc, afs_path, local_path, fs_default_name, ugi): + def init_worker(self, dist_desc, startup_program): + place = core.CPUPlace() + executor = Executor(place) + executor.run(startup_program) + self.instance.barrier_all() #wait all server start ips = self.instance.gather_ips() self.executor.init_worker(dist_desc, ips, self.instance.get_node_cnt(), self.instance._rankid) @@ -208,8 +215,6 @@ class AsyncExecutor(object): if self.instance.is_first_worker(): self.executor.init_model() self.instance.barrier_all() #wait init model - self.download_data(afs_path, local_path, fs_default_name, ugi, process_num=12) - self.instance.barrier_all() #wait for download_data def init_model(self): self.executor.init_model() From 1500c8e621d4d356d27c1483f7068839f8fb66f6 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 09:58:04 +0800 Subject: [PATCH 24/62] is instance is None --- python/paddle/fluid/async_executor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index e760d58fd2..2a6a11805e 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -87,6 +87,7 @@ class AsyncExecutor(object): scope = global_scope() self.executor = core.AsyncExecutor(scope, p) + self.instance = None def run(self, program, data_feed, filelist, thread_num, fetch, mode="", debug=False): """ @@ -154,6 +155,9 @@ class AsyncExecutor(object): def download_data(self, afs_path, local_path, fs_default_name, ugi, process_num=12): #hadoop_home = "$HADOOP_HOME" + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') + hadoop_home = "~/tools/hadoop-xingtian/hadoop/" configs = { @@ -182,15 +186,21 @@ class AsyncExecutor(object): pass def get_instance(self): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') return self.instance def stop_server(self): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') self.instance.barrier_all() #worker do all things if self.instance.is_first_worker(): self.executor.stop_server() self.instance.barrier_all() #sync def init_server(self, dist_desc): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') self.executor.init_server(dist_desc, self.instance._rankid) ip = self.executor.start_server() self.instance.set_ip(ip) @@ -204,6 +214,8 @@ class AsyncExecutor(object): self.instance.barrier_all() #sync def init_worker(self, dist_desc, startup_program): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') place = core.CPUPlace() executor = Executor(place) executor.run(startup_program) @@ -217,8 +229,12 @@ class AsyncExecutor(object): self.instance.barrier_all() #wait init model def init_model(self): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') self.executor.init_model() def save_model(self, save_path): + if self.instance is None: + raise ValueError('instance is None, please run config_distributed_nodes init instance') self.executor.save_model(save_path) From faffc25c19cdc9504214a4c0c85aa131a44079de Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 15:28:27 +0800 Subject: [PATCH 25/62] fix hadoop home bug & refine setup.py --- python/paddle/fluid/async_executor.py | 5 +---- python/setup.py.in | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 2a6a11805e..b077e1be7e 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -153,13 +153,10 @@ class AsyncExecutor(object): data_feed.desc(), filelist, thread_num, fetch_var_names, mode, debug) - def download_data(self, afs_path, local_path, fs_default_name, ugi, process_num=12): - #hadoop_home = "$HADOOP_HOME" + def download_data(self, afs_path, local_path, fs_default_name, ugi, hadoop_home="$HADOOP_HOME", process_num=12): if self.instance is None: raise ValueError('instance is None, please run config_distributed_nodes init instance') - hadoop_home = "~/tools/hadoop-xingtian/hadoop/" - configs = { "fs.default.name": fs_default_name, "hadoop.job.ugi": ugi diff --git a/python/setup.py.in b/python/setup.py.in index 200b96ec54..9418804be2 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -103,8 +103,10 @@ packages=['paddle', 'paddle.fluid', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', + 'paddle.fluid.distributed', 'paddle.fluid.layers', 'paddle.fluid.contrib', + 'paddle.fluid.contrib.utils', 'paddle.fluid.contrib.decoder', 'paddle.fluid.contrib.quantize', 'paddle.fluid.transpiler', From 009c7cf6ccf3f8ece6922d532df38cadd3ca5c84 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 16:23:50 +0800 Subject: [PATCH 26/62] add finialize --- python/paddle/fluid/contrib/utils/__init__.py | 2 +- python/paddle/fluid/distributed/helper.py | 3 +++ python/paddle/fluid/distributed/ps_instance.py | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/utils/__init__.py b/python/paddle/fluid/contrib/utils/__init__.py index 2fe9f702f3..20b2cc381a 100644 --- a/python/paddle/fluid/contrib/utils/__init__.py +++ b/python/paddle/fluid/contrib/utils/__init__.py @@ -18,5 +18,5 @@ from __future__ import print_function from . import hdfs_utils from .hdfs_utils import * -__all__ = lookup_table_utils.__all__ +#__all__ = lookup_table_utils.__all__ __all__ = hdfs_utils.__all__ diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 4cc5eb2a92..1244b4c0ca 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -44,5 +44,8 @@ class MPIHelper(object): def get_hostname(self): import socket return socket.gethostname() + + def finalize(self): + MPI.Finalize() diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index 94e123c2ce..dce5dfc5bd 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -97,6 +97,7 @@ class PaddlePSInstance(object): pass def finalize(self): + self.dh.finalize() pass From 2c1e986f22c7535ffd420d9370f79cf93bd5bf25 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 19:21:31 +0800 Subject: [PATCH 27/62] barrier_all to barrier_worker --- python/paddle/fluid/async_executor.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index b077e1be7e..af42d2912f 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -170,7 +170,8 @@ class AsyncExecutor(object): self.instance.get_worker_index(), self.instance.get_node_cnt() / 2, multi_processes=process_num) - self.instance.barrier_all() #wait for download_data #TODO only barriere worker + #self.instance.barrier_all() #wait for download_data #TODO only barriere worker + self.instance.barrier_worker() #wait for download_data #TODO only barriere worker def config_distributed_nodes(self): self.instance = ps_instance.PaddlePSInstance(1, 2) @@ -187,13 +188,13 @@ class AsyncExecutor(object): raise ValueError('instance is None, please run config_distributed_nodes init instance') return self.instance - def stop_server(self): + def stop(self): if self.instance is None: raise ValueError('instance is None, please run config_distributed_nodes init instance') - self.instance.barrier_all() #worker do all things + self.instance.barrier_worker() #worker do all things if self.instance.is_first_worker(): self.executor.stop_server() - self.instance.barrier_all() #sync + self.instance.barrier_worker() #sync def init_server(self, dist_desc): if self.instance is None: @@ -205,10 +206,6 @@ class AsyncExecutor(object): ips = self.instance.gather_ips() self.executor.gather_servers(ips, self.instance.get_node_cnt()) self.instance.barrier_all() #wait all worker start - self.instance.barrier_all() #wait init model - self.instance.barrier_all() #wait for download_data #TODO remove this after only barrier worker - self.instance.barrier_all() #wait worker do all things - self.instance.barrier_all() #sync def init_worker(self, dist_desc, startup_program): if self.instance is None: @@ -223,7 +220,7 @@ class AsyncExecutor(object): self.instance.barrier_all() #wait all worker start if self.instance.is_first_worker(): self.executor.init_model() - self.instance.barrier_all() #wait init model + self.instance.barrier_worker() #wait init model def init_model(self): if self.instance is None: From 5d3ecbfdf503965cc66eda6f8c75849ae0546c1e Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 19:48:30 +0800 Subject: [PATCH 28/62] fix hdfs bug --- python/paddle/fluid/contrib/utils/hdfs_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py index 251665d85e..ff1a2d3e4a 100644 --- a/python/paddle/fluid/contrib/utils/hdfs_utils.py +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -52,9 +52,10 @@ class HDFSClient(object): ret_code = 0 ret_out = None ret_err = None + whole_commands = " ".join(whole_commands) for x in range(retry_times + 1): proc = subprocess.Popen( - whole_commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + whole_commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (output, errors) = proc.communicate() ret_code, ret_out, ret_err = proc.returncode, output, errors if ret_code: From 7bd16e3afad479a559fba9321581def2c5d90165 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Wed, 12 Dec 2018 21:47:51 +0800 Subject: [PATCH 29/62] fix some bug & add log --- paddle/fluid/framework/async_executor.cc | 2 +- .../fluid/framework/executor_thread_worker.cc | 28 +++++++++++++------ .../fluid/framework/executor_thread_worker.h | 2 +- python/paddle/fluid/async_executor.py | 3 +- .../paddle/fluid/contrib/utils/hdfs_utils.py | 5 +++- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index f0ca375f95..6efe5cafe7 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -111,7 +111,7 @@ void AsyncExecutor::InitParamConfig() { std::vector tmp_sparse_variable_name; for (int i = 0u; i < table.slot_value_size(); ++i) { tmp_sparse_variable_name.push_back(table.slot_value(i)); - _param_config.slot_alias_to_table[table.slot_value(i)] = table.table_id(); + _param_config.slot_alias_to_table[table.slot_key(i)] = table.table_id(); } std::vector tmp_sparse_gradient_variable_name; for (auto i = 0u; i < table.slot_gradient_size(); ++i) { diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index a0455b26ef..7004ecf23b 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -330,6 +330,7 @@ void AsyncExecutorThreadWorker::TrainFiles() { print_fetch_var(thread_scope_, fetch_var_names_[i]); } // end for (int i = 0...) } // end while () + LOG(ERROR) << "TRAIN DONE"; } void AsyncExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { @@ -571,25 +572,30 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { void AsyncExecutorThreadWorker::PushSparse(int table_id) { auto slot_dim = _param_config->slot_dim; //TODO auto fea_dim = _param_config->fea_dim;//_current_train_job.fea_dim();TODO - auto& features = _features[table_id]; + auto& features = _features[table_id]; + CHECK(features.size() < 1000000) << "features size:" << features.size(); //std::vector gradient_var; //auto& gradient_var = GlobalConfig::instance().input_gradient_variable_name; //TODO - auto& push_g = _feature_push_value[table_id]; + auto& push_g = _feature_push_value[table_id]; check_pull_push_memory(features, push_g, fea_dim); + CHECK(push_g.size() == features.size() + 1) << "push_g size:" << push_g.size() << " features size:" << features.size(); uint64_t fea_idx = 0u; - auto& fea_info = _fea_info[table_id]; //TODO + auto& fea_info = _fea_info[table_id]; int offset = 0; //if (!_current_train_job.use_cvm_feature()) { //TODO offset = 2; //} - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + if (_param_config->slot_alias_to_table.find(feed_vec[slot_idx]) == _param_config->slot_alias_to_table.end()) { + LOG(ERROR) << "ERROR slot_idx:" << slot_idx << " name:" << feed_vec[slot_idx]; + } else if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + LOG(ERROR) << "ERROR continue"; continue; } - Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); + Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); + CHECK(g_var != nullptr) << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; LoDTensor* g_tensor = g_var->GetMutable(); if (g_tensor == NULL) { LOG(ERROR) << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; @@ -598,13 +604,16 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { float* g = g_tensor->data(); Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + CHECK(var != nullptr) << "var[" << feed_vec[slot_idx] << "] not found"; LoDTensor* tensor = var->GetMutable(); if (tensor == NULL) { LOG(ERROR) << "var[" << feed_vec[slot_idx] << "] not found"; exit(-1); } - int len = tensor->lod()[0].back(); - assert(slot_dim * len == g_tensor->numel()); + //int len = tensor->lod()[0].back(); + int len = tensor->numel(); + CHECK(slot_dim * len == g_tensor->numel()) << "len:" << len << " g_numel:" << g_tensor->numel(); + CHECK(len == tensor->numel()) << "len:" << len << "t_numel:" << tensor->numel(); int64_t* ids = tensor->data(); for (auto id_idx = 0u; id_idx < len; ++id_idx){ if (ids[id_idx] == 0) { @@ -613,12 +622,13 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { } memcpy(push_g[fea_idx].data() + offset, g, sizeof(float) * slot_dim); push_g[fea_idx][0] = 1.0f; + CHECK(fea_idx < fea_info.size()) << "fea_idx:" << fea_idx << " size:" << fea_info.size(); push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); g += slot_dim; fea_idx++; } } - assert(fea_idx == features.size()); + CHECK(fea_idx == features.size()) << "fea_idx:" << fea_idx << " features size:" << features.size(); CHECK(features.size() > 0); std::vector push_g_vec; diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index b3ee9dfaec..0c9a47690b 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -49,7 +49,7 @@ struct AsyncWorkerParamConfig { std::vector sparse_table_id; std::map> slot_input_vec; //6048slot 6050slot //name std::map> gradient_var; //6048slot_embed - std::unordered_map slot_alias_to_table; //TODO done + std::map slot_alias_to_table; //TODO done }; struct DensePullThreadParam { diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index af42d2912f..13d876e57b 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -153,7 +153,7 @@ class AsyncExecutor(object): data_feed.desc(), filelist, thread_num, fetch_var_names, mode, debug) - def download_data(self, afs_path, local_path, fs_default_name, ugi, hadoop_home="$HADOOP_HOME", process_num=12): + def download_data(self, afs_path, local_path, fs_default_name, ugi, file_cnt, hadoop_home="$HADOOP_HOME", process_num=12): if self.instance is None: raise ValueError('instance is None, please run config_distributed_nodes init instance') @@ -169,6 +169,7 @@ class AsyncExecutor(object): local_path, self.instance.get_worker_index(), self.instance.get_node_cnt() / 2, + file_cnt, multi_processes=process_num) #self.instance.barrier_all() #wait for download_data #TODO only barriere worker self.instance.barrier_worker() #wait for download_data #TODO only barriere worker diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py index ff1a2d3e4a..42b4d7feab 100644 --- a/python/paddle/fluid/contrib/utils/hdfs_utils.py +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -427,6 +427,7 @@ def multi_download(client, local_path, trainer_id, trainers, + file_cnt, multi_processes=5): """ multi_download @@ -435,6 +436,7 @@ def multi_download(client, :param local_path: path on local :param trainer_id: current trainer id :param trainers: all trainers number + :param file_cnt: all file number :param multi_processes: the download data process at the same time, default=5 :return: None """ @@ -450,7 +452,7 @@ def multi_download(client, client.make_local_dirs(local_path) _logger.info("Make local dir {} successfully".format(local_path)) - all_need_download = client.lsr(hdfs_path, sort=True) + all_need_download = client.lsr(hdfs_path, sort=True)[:file_cnt] need_download = all_need_download[trainer_id::trainers] _logger.info("Get {} files From all {} files need to be download from {}". format(len(need_download), len(all_need_download), hdfs_path)) @@ -501,6 +503,7 @@ if __name__ == "__main__": "/home/xx/data1", 1, 5, + 100, multi_processes=5) multi_upload(client, "/user/com/train-25/model", "/home/xx/data1") From 06930531887547286f3c4ad096d1fd0794749867 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Tue, 11 Dec 2018 14:43:52 +0800 Subject: [PATCH 30/62] add liscence --- python/paddle/fluid/async_executor.py | 3 ++- python/paddle/fluid/distributed/downpour.py | 13 ++++++++++ python/paddle/fluid/distributed/helper.py | 14 +++++++++++ python/paddle/fluid/distributed/node.py | 13 ++++++++++ .../paddle/fluid/distributed/ps_instance.py | 24 ++++++++++--------- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 13d876e57b..099805ac1b 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -76,7 +76,7 @@ class AsyncExecutor(object): Note: Only running on CPUPlace supported. """ - def __init__(self, place=None): + def __init__(self, place=None, run_mode=""): if place is None: place = core.CPUPlace() if not isinstance(place, core.CPUPlace): @@ -89,6 +89,7 @@ class AsyncExecutor(object): self.executor = core.AsyncExecutor(scope, p) self.instance = None + def run(self, program, data_feed, filelist, thread_num, fetch, mode="", debug=False): """ Run program by this AsyncExecutor. Training dataset will be in filelist. diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index c1762dd768..9ef9e14ccc 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -1,3 +1,16 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + from .node import DownpourServer from .node import DownpourWorker from ..backward import append_backward diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 1244b4c0ca..986525e5d8 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -1,3 +1,17 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from mpi4py import MPI import ps_pb2 as pslib diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 1f4aeeac73..8755323006 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -1,3 +1,16 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + import ps_pb2 as pslib class Server(object): diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index dce5dfc5bd..b93da053a3 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -1,8 +1,18 @@ -#import paddle.fluid.distributed.helper as dist_helper +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + import helper as dist_helper import sys -#from mpi4py import MPI - class PaddlePSInstance(object): def __init__(self, server_worker_mode, proc_per_node): @@ -83,17 +93,11 @@ class PaddlePSInstance(object): return self._nodes def barrier_all(self): - #print self._rankid, "begin" - #sys.stdout.flush() self.dh.comm.barrier() - #print self._rankid, "end" def barrier_worker(self): if self.is_worker(): - #print "worker: ", self._rankid, "begin" - #sys.stdout.flush() self._comm.barrier() - #print "worker: ", self._rankid, "end" pass def finalize(self): @@ -104,5 +108,3 @@ class PaddlePSInstance(object): if __name__ == "__main__": instance = PaddlePSInstance(1, 1, 2, 50) instance.barrier_all() - #print "-----" - #instance.barrier_worker() From 33ee5cad61383db6bc06681f9f1afa76492a5759 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 10:33:06 +0800 Subject: [PATCH 31/62] format code style of executor_thread_worker.cc --- .../fluid/framework/executor_thread_worker.cc | 77 +++++++++---------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 7004ecf23b..86ac93be3e 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -417,48 +417,46 @@ void AsyncExecutorThreadWorker::PrepareParams() { } void AsyncExecutorThreadWorker::UpdateParams() { - for (auto i: _param_config->sparse_table_id) {//TODO - //for (int i = 0; i < 1; ++i) { - PushSparse(i); - } - //for (auto i = 0u; i < GlobalConfig::instance().dense_table_id.size(); ++i) {//TODO - for (auto i: _param_config->dense_table_id) { - PushDense(i); - } - int32_t tmp_push_dense_wait_times = -1;//_param_config->tmp_push_dense_wait_times; //TODO - int32_t tmp_push_sparse_wait_times = -1;//_param_config->tmp_push_sparse_wait_times; //TODO - static uint32_t push_dense_wait_times = static_cast(tmp_push_dense_wait_times); - static uint32_t push_sparse_wait_times = static_cast(tmp_push_sparse_wait_times); - - if (_push_dense_status.size() >= push_dense_wait_times) { - for (auto& t : _push_dense_status) { - t.wait(); - } - _push_dense_status.resize(0); - } - if (tmp_push_dense_wait_times == -1) { - _push_dense_status.resize(0); - } - if (_push_sparse_status.size() >= push_sparse_wait_times) { - for (auto& t : _push_sparse_status) { - t.wait(); - } - _push_sparse_status.resize(0); - } - if (tmp_push_sparse_wait_times == -1) { - _push_sparse_status.resize(0); - } - //for (auto dense_table_id : GlobalConfig::instance().dense_table_id) {//TODO - for (auto dense_table_id: _param_config->dense_table_id) { - _pull_dense_thread->increase_thread_version(thread_id_, dense_table_id); + for (auto i : _param_config->sparse_table_id) { + PushSparse(i); + } + for (auto i : _param_config->dense_table_id) { + PushDense(i); + } + // _param_config->tmp_push_dense_wait_times + int32_t tmp_push_dense_wait_times = -1; + // _param_config->tmp_push_sparse_wait_times + int32_t tmp_push_sparse_wait_times = -1; + static uint32_t push_dense_wait_times = + static_cast(tmp_push_dense_wait_times); + static uint32_t push_sparse_wait_times = + static_cast(tmp_push_sparse_wait_times); + + if (_push_dense_status.size() >= push_dense_wait_times) { + for (auto& t : _push_dense_status) { + t.wait(); + } + _push_dense_status.resize(0); + } + if (tmp_push_dense_wait_times == -1) { + _push_dense_status.resize(0); + } + if (_push_sparse_status.size() >= push_sparse_wait_times) { + for (auto& t : _push_sparse_status) { + t.wait(); } - //} + _push_sparse_status.resize(0); + } + if (tmp_push_sparse_wait_times == -1) { + _push_sparse_status.resize(0); + } + for (auto dense_table_id : _param_config->dense_table_id) { + _pull_dense_thread->increase_thread_version(thread_id_, dense_table_id); + } } void AsyncExecutorThreadWorker::PushDense(int table_id) { std::vector regions; - //auto& variables = GlobalConfig::instance().dense_gradient_variable_name[table_id]; - //std::vector variables; for (auto& t : _param_config->dense_gradient_variable_name[table_id]) { Variable* var = thread_scope_->FindVar(t); CHECK(var != nullptr) << "var[" << t << "] not found"; @@ -469,7 +467,8 @@ void AsyncExecutorThreadWorker::PushDense(int table_id) { regions.emplace_back(std::move(reg)); } - auto status = _pslib_ptr->_worker_ptr->push_dense(regions.data(), regions.size(), table_id); + auto status = _pslib_ptr->_worker_ptr->push_dense( + regions.data(), regions.size(), table_id); _push_dense_status.push_back(std::move(status)); } @@ -478,7 +477,7 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { auto& features = _features[table_id]; auto& feature_value = _feature_value[table_id]; - auto fea_dim = _param_config->fea_dim; //TODO + auto fea_dim = _param_config->fea_dim; // slot id starts from 1 features.clear(); features.resize(0); From c71279bc697a101b9afe74a1e19fc9fb99195bd9 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 11:23:16 +0800 Subject: [PATCH 32/62] refine code style for async_executor.h and async_executor.cc --- paddle/fluid/framework/async_executor.cc | 101 ++++++++++++++--------- paddle/fluid/framework/async_executor.h | 25 +++--- 2 files changed, 79 insertions(+), 47 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 6efe5cafe7..c62d62a5dc 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -66,15 +66,20 @@ void PrepareReaders(std::vector>& readers, // NOLINT } void AsyncExecutor::InitServer(const std::string& dist_desc, int index) { - _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); - _pslib_ptr->init_server(dist_desc, index);//TODO done - + _pslib_ptr = + std::shared_ptr( + new paddle::distributed::PSlib()); + _pslib_ptr->init_server(dist_desc, index); InitParamConfig(); } -void AsyncExecutor::InitWorker(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index) { - _pslib_ptr = std::shared_ptr(new paddle::distributed::PSlib()); - _pslib_ptr->init_worker(dist_desc, host_sign_list.data(), node_num, index);//TODO done +void AsyncExecutor::InitWorker(const std::string& dist_desc, + const std::vector& host_sign_list, + int node_num, int index) { + _pslib_ptr = std::shared_ptr( + new paddle::distributed::PSlib()); + _pslib_ptr->init_worker( + dist_desc, host_sign_list.data(), node_num, index); InitParamConfig(); } @@ -87,43 +92,65 @@ void AsyncExecutor::StopServer() { _pslib_ptr->stop_server(); } -void AsyncExecutor::GatherServers(std::vector& host_sign_list, int node_num) { +void AsyncExecutor::GatherServers( + std::vector& host_sign_list, int node_num) { _pslib_ptr->gather_servers(host_sign_list.data(), node_num); } void AsyncExecutor::InitParamConfig() { - for (int i = 0; i < _pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param_size(); ++i) { - if (_pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param(i).table_class().find("SparseTable") != -1) { - _param_config.fea_dim = _pslib_ptr->get_param()->server_param().downpour_server_param().downpour_table_param(i).accessor().fea_dim(); //TODO + for (int i = 0; i < + _pslib_ptr->get_param()->server_param().\ + downpour_server_param().\ + downpour_table_param_size(); + ++i) { + if (_pslib_ptr->get_param()->server_param().\ + downpour_server_param().downpour_table_param(i).\ + table_class().find("SparseTable") != -1) { + _param_config.fea_dim = _pslib_ptr->get_param()->server_param().\ + downpour_server_param().\ + downpour_table_param(i).\ + accessor().fea_dim(); break; } } - _param_config.slot_dim = _param_config.fea_dim - 2; //TODO - _param_config.tmp_push_dense_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); - _param_config.tmp_push_sparse_wait_times = (int32_t)(_pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); - - for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); ++t) { - _param_config.skip_op.push_back(_pslib_ptr->get_param()->trainer_param().skip_op(t)); + _param_config.slot_dim = _param_config.fea_dim - 2; + _param_config.tmp_push_dense_wait_times = static_cast( + _pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); + _param_config.tmp_push_sparse_wait_times = static_cast( + _pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); + ++t) { + _param_config.skip_op.push_back( + _pslib_ptr->get_param()->trainer_param().skip_op(t)); } - //sparse - for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); ++t) { + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); + ++t) { auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); std::vector tmp_sparse_variable_name; for (int i = 0u; i < table.slot_value_size(); ++i) { tmp_sparse_variable_name.push_back(table.slot_value(i)); - _param_config.slot_alias_to_table[table.slot_key(i)] = table.table_id(); + _param_config.slot_alias_to_table[table.slot_key(i)] = + table.table_id(); } std::vector tmp_sparse_gradient_variable_name; for (auto i = 0u; i < table.slot_gradient_size(); ++i) { tmp_sparse_gradient_variable_name.push_back( table.slot_gradient(i)); } - _param_config.slot_input_vec[table.table_id()] = std::move(tmp_sparse_variable_name); - _param_config.gradient_var[table.table_id()] = std::move(tmp_sparse_gradient_variable_name); + _param_config.slot_input_vec[table.table_id()] = + std::move(tmp_sparse_variable_name); + _param_config.gradient_var[table.table_id()] = + std::move(tmp_sparse_gradient_variable_name); _param_config.sparse_table_id.push_back(table.table_id()); } - //dense - for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); ++t) { + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); + ++t) { auto& table = _pslib_ptr->get_param()->trainer_param().dense_table(t); std::vector tmp_dense_variable_name; for (int i = 0u; i < table.dense_variable_name_size(); ++i) { @@ -134,20 +161,18 @@ void AsyncExecutor::InitParamConfig() { tmp_dense_gradient_variable_name.push_back( table.dense_gradient_variable_name(i)); } - _param_config.dense_variable_name[table.table_id()] = std::move(tmp_dense_variable_name); - _param_config.dense_gradient_variable_name[table.table_id()] = std::move(tmp_dense_gradient_variable_name); + _param_config.dense_variable_name[table.table_id()] = + std::move(tmp_dense_variable_name); + _param_config.dense_gradient_variable_name[table.table_id()] = + std::move(tmp_dense_gradient_variable_name); _param_config.dense_table_id.push_back(table.table_id()); - _param_config.dense_table_size.push_back(table.fea_dim()); //TODO + _param_config.dense_table_size.push_back(table.fea_dim()); } } void AsyncExecutor::InitModel() { - //TODO only rank = 0 do this - //std::vector all_dense_table_id; //TODO - //all_dense_table_id.push_back(0); //done - for (auto table_id: _param_config.dense_table_id) { + for (auto table_id : _param_config.dense_table_id) { std::vector regions; - //std::vector variables; //TODO for (auto& t : _param_config.dense_variable_name[table_id]) { Variable* var = root_scope_->FindVar(t); CHECK(var != nullptr) << "var[" << t << "] not found"; @@ -169,13 +194,15 @@ void AsyncExecutor::InitModel() { regions.emplace_back(std::move(reg)); } - auto push_status = _pslib_ptr->_worker_ptr->push_dense_param(regions.data(), regions.size(), table_id); + auto push_status = + _pslib_ptr->_worker_ptr->push_dense_param( + regions.data(), regions.size(), table_id); push_status.wait(); auto status = push_status.get(); if (status != 0) { LOG(FATAL) << "push dense param failed, status[" << status << "]"; exit(-1); - } + } } } @@ -185,7 +212,7 @@ void AsyncExecutor::SaveModel(const std::string& path) { ret = _pslib_ptr->_worker_ptr->save(path, 0); ret.wait(); int32_t feasign_cnt = ret.get(); - if (feasign_cnt == -1) { // TODO should be feasign_cnt < 0, because server bug + if (feasign_cnt == -1) { // (colourful-tree) TODO should be feasign_cnt < 0 LOG(FATAL) << "save model failed"; exit(-1); } @@ -195,13 +222,13 @@ void AsyncExecutor::PrepareDenseThread(const std::string& mode) { if (mode == "mpi") { DensePullThreadParam param; param.ps_client = _pslib_ptr->_worker_ptr;; - param.threshold = 1;//GlobalConfig::instance().pull_dense_per_batch; //TODO + param.threshold = 1; param.training_thread_num = actual_thread_num; param.root_scope = root_scope_; - //param.dense_params = &GlobalConfig::instance().dense_variable_name; //TODO param.dense_params = &_param_config.dense_variable_name; - _pull_dense_thread = std::shared_ptr(new DensePullThread(param)); + _pull_dense_thread = std::shared_ptr( + new DensePullThread(param)); _pull_dense_thread->start(); } } diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 93010f8a9b..184566dd39 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include // NOLINT @@ -22,8 +23,7 @@ limitations under the License. */ #include // NOLINT #include #include -#include //local_random_engine -#include //local_random_engine +#include // local_random_engine #include "paddle/fluid/framework/data_feed.pb.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor_thread_worker.h" @@ -43,9 +43,10 @@ inline std::default_random_engine& local_random_engine() { struct engine_wrapper_t { std::default_random_engine engine; engine_wrapper_t() { - static std::atomic x(0); - std::seed_seq sseq = {x++, x++, x++, (unsigned long)(current_realtime() * 1000)}; - engine.seed(sseq); + static std::atomic x(0); + std::seed_seq sseq = {x++, x++, x++, + static_cast(current_realtime() * 1000)}; + engine.seed(sseq); } }; thread_local engine_wrapper_t r; @@ -61,18 +62,20 @@ class AsyncExecutor { const std::vector& filelist, const int thread_num, const std::vector& fetch_names, - const std::string& mode, + const std::string& mode, const bool debug = false); - //void ConfigPslib(const char* dist_desc, uint64_t* host_sign_list, int node_num, int index); void InitServer(const std::string& dist_desc, int index); - void InitWorker(const std::string& dist_desc, std::vector& host_sign_list, int node_num, int index); - //void ConfigWorker() {} + void InitWorker( + const std::string& dist_desc, + const std::vector& host_sign_list, + int node_num, int index); uint64_t StartServer(); void StopServer(); - void GatherServers(std::vector& host_sign_list, int node_num); + void GatherServers(const std::vector& host_sign_list, int node_num); void InitModel(); void SaveModel(const std::string& path); void InitParamConfig(); + private: void CreateThreads(ExecutorThreadWorker* worker, const ProgramDesc& main_program, @@ -81,6 +84,7 @@ class AsyncExecutor { Scope* root_scope, const int thread_index, const bool debug); void PrepareDenseThread(const std::string& mode); + public: std::shared_ptr _pslib_ptr; std::shared_ptr _pull_dense_thread; @@ -88,6 +92,7 @@ class AsyncExecutor { platform::Place place_; AsyncWorkerParamConfig _param_config; + private: int actual_thread_num; From 3c01cdeff0e11108f816b5f1abe5d71b3e8d153f Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Thu, 13 Dec 2018 11:33:21 +0800 Subject: [PATCH 33/62] refine executor_thread_worker.cc & executor_thread_worker.h code style --- .../fluid/framework/executor_thread_worker.cc | 86 ++++--------------- .../fluid/framework/executor_thread_worker.h | 2 - 2 files changed, 15 insertions(+), 73 deletions(-) diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 86ac93be3e..592a416d6d 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -303,7 +303,7 @@ void ExecutorThreadWorker::SetRootScope(Scope* g_scope) { root_scope_ = g_scope; } -//AsyncExecutor +// AsyncExecutor void AsyncExecutorThreadWorker::TrainFiles() { SetDevice(); @@ -330,7 +330,6 @@ void AsyncExecutorThreadWorker::TrainFiles() { print_fetch_var(thread_scope_, fetch_var_names_[i]); } // end for (int i = 0...) } // end while () - LOG(ERROR) << "TRAIN DONE"; } void AsyncExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { @@ -360,44 +359,12 @@ void AsyncExecutorThreadWorker::TrainOneNetwork() { UpdateParams(); } -void AsyncExecutorThreadWorker::BindingSlotVariableMemory() { - /* - std::vector ins_slot_offset(batch_size + 1, 0); - for (auto i = 1u; i <= batch_size; ++i) { - ins_slot_offset[i] += ins_slot_offset[i - 1] + slot_dim; - } - - std::vector tensor_lod(batch_size + 1, 0); - for (auto i = 1u; i <= batch_size; ++i) { - tensor_lod[i] += tensor_lod[i - 1] + 1; - } - - auto& used_slots = reader->get_use_slot_alias(); - slot_input_vec.resize(used_slots.size() - 1); - for (auto slot_idx = 1u; slot_idx < used_slots.size(); ++slot_idx) { - auto var = slot_input_variable_name[slot_idx]; - - auto v = thread_scope->FindVar(var); - CHECK(v != nullptr) << "var[" << var << "] not found"; - - LoDTensor* tensor = v->GetMutable(); - float* tensor_ptr = tensor->mutable_data({batch_size, slot_dim}, platform::CPUPlace()); - memset(tensor_ptr, 0, sizeof(float) * ins_slot_offset.back()); - - LoD data_lod{tensor_lod}; - tensor->set_lod(data_lod); - - slot_input_vec[slot_idx - 1].reset(tensor); - } - */ -} void AsyncExecutorThreadWorker::SetParamConfig(AsyncWorkerParamConfig* param_config) { _param_config = param_config; } void AsyncExecutorThreadWorker::PrepareParams() { - //int table_id = 0; //TODO for (auto table_id: _param_config->sparse_table_id) { PullSparse(table_id); for (auto& t : _pull_sparse_status) { @@ -423,9 +390,7 @@ void AsyncExecutorThreadWorker::UpdateParams() { for (auto i : _param_config->dense_table_id) { PushDense(i); } - // _param_config->tmp_push_dense_wait_times int32_t tmp_push_dense_wait_times = -1; - // _param_config->tmp_push_sparse_wait_times int32_t tmp_push_sparse_wait_times = -1; static uint32_t push_dense_wait_times = static_cast(tmp_push_dense_wait_times); @@ -509,17 +474,15 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { pull_feature_value.data(), table_id, features.data(), features.size()); _pull_sparse_status.push_back(std::move(status)); - //to save time auto& push_g = _feature_push_value[table_id]; check_pull_push_memory(features, push_g, fea_dim); - //binding_slot_embed_with_concat(); TODO - collect_feasign_info(table_id); //TODO + collect_feasign_info(table_id); } void AsyncExecutorThreadWorker::FillSparse(int table_id) { - auto slot_dim = _param_config->slot_dim; // TODO - auto fea_dim = _param_config->fea_dim; //TODO + auto slot_dim = _param_config->slot_dim; + auto fea_dim = _param_config->fea_dim; auto& features = _features[table_id]; auto& fea_value = _feature_value[table_id]; @@ -544,53 +507,35 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { LoD data_lod{tensor_lod}; tensor_emb->set_lod(data_lod); - //float* ptr = tensor_emb->data(); for (auto index = 0u; index < len; ++index){ - //if (_current_train_job.use_cvm_feature()) { - // if (ids[index] == 0u) { - // memcpy(ptr + slot_dim * index, init_value.data(), sizeof(float) * slot_dim); - // continue; - // } - // memcpy(ptr + slot_dim * index, fea_value[fea_idx].data(), sizeof(float) * slot_dim); - // (ptr + slot_dim * index)[0] = log((ptr + slot_dim * index)[0] + 1); - // (ptr + slot_dim * index)[1] = log((ptr + slot_dim * index)[1] + 1) - (ptr + slot_dim * index)[0]; - // fea_idx++; - //} else { - if (ids[index] == 0u) { - memcpy(ptr + slot_dim * index, init_value.data() + 2, sizeof(float) * slot_dim); - continue; - } - memcpy(ptr + slot_dim * index, fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); - fea_idx++; - //} + if (ids[index] == 0u) { + memcpy(ptr + slot_dim * index, init_value.data() + 2, sizeof(float) * slot_dim); + continue; + } + memcpy(ptr + slot_dim * index, fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); + fea_idx++; } } } void AsyncExecutorThreadWorker::PushSparse(int table_id) { - auto slot_dim = _param_config->slot_dim; //TODO - auto fea_dim = _param_config->fea_dim;//_current_train_job.fea_dim();TODO + auto slot_dim = _param_config->slot_dim; + auto fea_dim = _param_config->fea_dim; auto& features = _features[table_id]; - CHECK(features.size() < 1000000) << "features size:" << features.size(); - //std::vector gradient_var; - //auto& gradient_var = GlobalConfig::instance().input_gradient_variable_name; //TODO + CHECK(features.size() < 1000000) << "features size is too big, may be wrong:" << features.size(); auto& push_g = _feature_push_value[table_id]; check_pull_push_memory(features, push_g, fea_dim); CHECK(push_g.size() == features.size() + 1) << "push_g size:" << push_g.size() << " features size:" << features.size(); uint64_t fea_idx = 0u; auto& fea_info = _fea_info[table_id]; - int offset = 0; - //if (!_current_train_job.use_cvm_feature()) { //TODO - offset = 2; - //} + int offset = 2; const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label TODO + // slot_idx = 0 is label for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { if (_param_config->slot_alias_to_table.find(feed_vec[slot_idx]) == _param_config->slot_alias_to_table.end()) { LOG(ERROR) << "ERROR slot_idx:" << slot_idx << " name:" << feed_vec[slot_idx]; } else if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { - LOG(ERROR) << "ERROR continue"; continue; } Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); @@ -609,7 +554,6 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { LOG(ERROR) << "var[" << feed_vec[slot_idx] << "] not found"; exit(-1); } - //int len = tensor->lod()[0].back(); int len = tensor->numel(); CHECK(slot_dim * len == g_tensor->numel()) << "len:" << len << " g_numel:" << g_tensor->numel(); CHECK(len == tensor->numel()) << "len:" << len << "t_numel:" << tensor->numel(); diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 0c9a47690b..4e9c2622b0 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -155,7 +155,6 @@ class ExecutorThreadWorker { void SetFetchVarNames(const std::vector& fetch_var_names); virtual void SetPSlibPtr(std::shared_ptr pslib_ptr); virtual void SetPullDenseThread(std::shared_ptr dpt) {}; - virtual void BindingSlotVariableMemory() {}; virtual void SetParamConfig(AsyncWorkerParamConfig* param_config) {}; private: void CreateThreadScope(const framework::ProgramDesc& program); @@ -191,7 +190,6 @@ public: virtual ~AsyncExecutorThreadWorker() {} void SetPSlibPtr(std::shared_ptr pslib_ptr); void SetPullDenseThread(std::shared_ptr dpt); - void BindingSlotVariableMemory(); void SetParamConfig(AsyncWorkerParamConfig* param_config); void TrainFiles(); void TrainOneNetwork(); From c4cb4142916c92d82b3e0924206aac25db4b8758 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Thu, 13 Dec 2018 11:54:17 +0800 Subject: [PATCH 34/62] refine pslib.cmake url to public --- cmake/external/pslib.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 812af5efa2..4d4dc195aa 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -29,8 +29,8 @@ INCLUDE(ExternalProject) SET(PSLIB_PROJECT "extern_pslib") IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL)) MESSAGE(STATUS "use pre defined download url") - SET(PSLIB_VER "pslib" CACHE STRING "" FORCE) #todo pslib version - SET(PSLIB_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${PSLIB_VER}.tar.gz" CACHE STRING "" FORCE) #todo pslib url + SET(PSLIB_VER "0.1.0" CACHE STRING "" FORCE) + SET(PSLIB_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_VER}/pslib.tar.gz" CACHE STRING "" FORCE) ENDIF() MESSAGE(STATUS "PSLIB_VER: ${PSLIB_VER}, PSLIB_URL: ${PSLIB_URL}") SET(PSLIB_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib") From c59cdf3a243e104992ec2cde1e36cb38d452feb4 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 12:12:21 +0800 Subject: [PATCH 35/62] refine executor_thread_worker.h and executor_thread_worker.cc code style --- .../fluid/framework/executor_thread_worker.cc | 364 +++++++++--------- .../fluid/framework/executor_thread_worker.h | 92 +++-- 2 files changed, 243 insertions(+), 213 deletions(-) diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 592a416d6d..412f4a2b6e 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -58,7 +58,8 @@ bool DensePullThread::check_update_param(uint64_t table_id) { { std::lock_guard lock(_mutex_for_version); auto& version = _training_versions[table_id]; - _current_version[table_id] = *(std::min_element(version.begin(), version.end())); + _current_version[table_id] = + *(std::min_element(version.begin(), version.end())); } if (_current_version[table_id] - _last_versions[table_id] < _threshold) { return false; @@ -93,7 +94,8 @@ void DensePullThread::wait_all() { t.wait(); auto status = t.get(); if (status != 0) { - LOG(WARNING) << "pull dense failed times:" << ++_pull_dense_fail_times; + LOG(WARNING) << "pull dense failed times:" << + ++_pull_dense_fail_times; } } @@ -105,7 +107,8 @@ void DensePullThread::wait_all() { _pull_dense_status.resize(0); } -void DensePullThread::increase_thread_version(int thread_id, uint64_t table_id) { +void DensePullThread::increase_thread_version( + int thread_id, uint64_t table_id) { std::lock_guard lock(_mutex_for_version); _training_versions[table_id][thread_id]++; } @@ -169,10 +172,6 @@ void ExecutorThreadWorker::SetFetchVarNames( fetch_var_names.end()); } -void ExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { - -} - void ExecutorThreadWorker::SetDevice() { #if defined _WIN32 || defined __APPLE__ @@ -332,10 +331,12 @@ void AsyncExecutorThreadWorker::TrainFiles() { } // end while () } -void AsyncExecutorThreadWorker::SetPSlibPtr(std::shared_ptr pslib_ptr) { +void AsyncExecutorThreadWorker::SetPSlibPtr( + std::shared_ptr pslib_ptr) { _pslib_ptr = pslib_ptr; } -void AsyncExecutorThreadWorker::SetPullDenseThread(std::shared_ptr dpt) { +void AsyncExecutorThreadWorker::SetPullDenseThread( + std::shared_ptr dpt) { _pull_dense_thread = dpt; } void AsyncExecutorThreadWorker::TrainOneNetwork() { @@ -347,7 +348,8 @@ void AsyncExecutorThreadWorker::TrainOneNetwork() { } bool need_skip = false; for (auto t = 0u; t < _param_config->skip_op.size(); ++t) { - if (op->Type().find(_param_config->skip_op[t]) != std::string::npos) { + if (op->Type().find(_param_config->skip_op[t]) != + std::string::npos) { need_skip = true; break; } @@ -359,13 +361,13 @@ void AsyncExecutorThreadWorker::TrainOneNetwork() { UpdateParams(); } - -void AsyncExecutorThreadWorker::SetParamConfig(AsyncWorkerParamConfig* param_config) { +void AsyncExecutorThreadWorker::SetParamConfig( + AsyncWorkerParamConfig* param_config) { _param_config = param_config; } void AsyncExecutorThreadWorker::PrepareParams() { - for (auto table_id: _param_config->sparse_table_id) { + for (auto table_id : _param_config->sparse_table_id) { PullSparse(table_id); for (auto& t : _pull_sparse_status) { t.wait(); @@ -378,7 +380,7 @@ void AsyncExecutorThreadWorker::PrepareParams() { } _pull_sparse_status.resize(0); - for (auto table_id: _param_config->sparse_table_id) { + for (auto table_id : _param_config->sparse_table_id) { FillSparse(table_id); } } @@ -440,180 +442,198 @@ void AsyncExecutorThreadWorker::PushDense(int table_id) { void AsyncExecutorThreadWorker::PullSparse(int table_id) { - auto& features = _features[table_id]; - auto& feature_value = _feature_value[table_id]; - auto fea_dim = _param_config->fea_dim; - // slot id starts from 1 - features.clear(); - features.resize(0); - features.reserve(MAX_FEASIGN_NUM); - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label TODO - for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); - LoDTensor* tensor = var->GetMutable(); - int64_t* ids = tensor->data(); - int len = tensor->numel(); - for (auto i = 0u; i < len; ++i) { - //todo: current trick - filter feasign=use_slot_mod(bug: datafeed fill use_slot_mod for empty slot) - if (ids[i] == 0u) { - continue; - } - features.push_back(static_cast(ids[i])); - } - } - check_pull_push_memory(features, feature_value, fea_dim); - - std::vector pull_feature_value; - for (auto i = 0u; i < features.size(); ++i) { - pull_feature_value.push_back(feature_value[i].data()); - } - for (int i = 0; i < features.size(); ++i) { + auto& features = _features[table_id]; + auto& feature_value = _feature_value[table_id]; + auto fea_dim = _param_config->fea_dim; + // slot id starts from 1 + features.clear(); + features.resize(0); + features.reserve(MAX_FEASIGN_NUM); + const std::vector& feed_vec = + thread_reader_->GetUseSlotAlias(); + // slot_idx = 0 is label TODO + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int64_t* ids = tensor->data(); + int len = tensor->numel(); + for (auto i = 0u; i < len; ++i) { + // todo(colourful-tree): current trick - filter feasign=use_slot_mod( + // bug: datafeed fill use_slot_mod for empty slot) + if (ids[i] == 0u) { + continue; + } + features.push_back(static_cast(ids[i])); } - auto status = _pslib_ptr->_worker_ptr->pull_sparse( - pull_feature_value.data(), table_id, features.data(), features.size()); - _pull_sparse_status.push_back(std::move(status)); - - auto& push_g = _feature_push_value[table_id]; - check_pull_push_memory(features, push_g, fea_dim); - - collect_feasign_info(table_id); + } + check_pull_push_memory(features, feature_value, fea_dim); + + std::vector pull_feature_value; + for (auto i = 0u; i < features.size(); ++i) { + pull_feature_value.push_back(feature_value[i].data()); + } + + auto status = _pslib_ptr->_worker_ptr->pull_sparse( + pull_feature_value.data(), table_id, features.data(), features.size()); + _pull_sparse_status.push_back(std::move(status)); + + auto& push_g = _feature_push_value[table_id]; + check_pull_push_memory(features, push_g, fea_dim); + + collect_feasign_info(table_id); } void AsyncExecutorThreadWorker::FillSparse(int table_id) { - auto slot_dim = _param_config->slot_dim; - auto fea_dim = _param_config->fea_dim; - auto& features = _features[table_id]; - auto& fea_value = _feature_value[table_id]; - - CHECK(features.size() > 0) << "feature size check failed"; - - auto fea_idx = 0u; - - std::vector init_value(fea_dim); - - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label TODO - for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); - LoDTensor* tensor = var->GetMutable(); - int64_t* ids = tensor->data(); - int len = tensor->numel(); - Variable* var_emb = thread_scope_->FindVar(_param_config->slot_input_vec[table_id][slot_idx - 1]); - LoDTensor* tensor_emb = var_emb->GetMutable(); - float* ptr = tensor_emb->mutable_data({len, slot_dim}, platform::CPUPlace()); - memset(ptr, 0, sizeof(float) * len * slot_dim); - auto& tensor_lod = tensor->lod()[0]; - - LoD data_lod{tensor_lod}; - tensor_emb->set_lod(data_lod); - - for (auto index = 0u; index < len; ++index){ - if (ids[index] == 0u) { - memcpy(ptr + slot_dim * index, init_value.data() + 2, sizeof(float) * slot_dim); - continue; - } - memcpy(ptr + slot_dim * index, fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); - fea_idx++; - } + auto slot_dim = _param_config->slot_dim; + auto fea_dim = _param_config->fea_dim; + auto& features = _features[table_id]; + auto& fea_value = _feature_value[table_id]; + + CHECK(features.size() > 0) << "feature size check failed"; + + auto fea_idx = 0u; + + std::vector init_value(fea_dim); + + const std::vector& feed_vec = + thread_reader_->GetUseSlotAlias(); + // slot_idx = 0 is label TODO + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + LoDTensor* tensor = var->GetMutable(); + int64_t* ids = tensor->data(); + int len = tensor->numel(); + Variable* var_emb = thread_scope_->FindVar( + _param_config->slot_input_vec[table_id][slot_idx - 1]); + LoDTensor* tensor_emb = var_emb->GetMutable(); + float* ptr = tensor_emb->mutable_data( + {len, slot_dim}, platform::CPUPlace()); + memset(ptr, 0, sizeof(float) * len * slot_dim); + auto& tensor_lod = tensor->lod()[0]; + + LoD data_lod{tensor_lod}; + tensor_emb->set_lod(data_lod); + + for (auto index = 0u; index < len; ++index) { + if (ids[index] == 0u) { + memcpy(ptr + slot_dim * index, + init_value.data() + 2, sizeof(float) * slot_dim); + continue; + } + memcpy(ptr + slot_dim * index, + fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); + fea_idx++; } + } } void AsyncExecutorThreadWorker::PushSparse(int table_id) { - auto slot_dim = _param_config->slot_dim; - auto fea_dim = _param_config->fea_dim; - auto& features = _features[table_id]; - CHECK(features.size() < 1000000) << "features size is too big, may be wrong:" << features.size(); - auto& push_g = _feature_push_value[table_id]; - check_pull_push_memory(features, push_g, fea_dim); - CHECK(push_g.size() == features.size() + 1) << "push_g size:" << push_g.size() << " features size:" << features.size(); - uint64_t fea_idx = 0u; - auto& fea_info = _fea_info[table_id]; - int offset = 2; - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label - for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - if (_param_config->slot_alias_to_table.find(feed_vec[slot_idx]) == _param_config->slot_alias_to_table.end()) { - LOG(ERROR) << "ERROR slot_idx:" << slot_idx << " name:" << feed_vec[slot_idx]; - } else if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { - continue; - } - Variable* g_var = thread_scope_->FindVar(_param_config->gradient_var[table_id][slot_idx - 1]); - CHECK(g_var != nullptr) << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; - LoDTensor* g_tensor = g_var->GetMutable(); - if (g_tensor == NULL) { - LOG(ERROR) << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; - exit(-1); - } - float* g = g_tensor->data(); - - Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); - CHECK(var != nullptr) << "var[" << feed_vec[slot_idx] << "] not found"; - LoDTensor* tensor = var->GetMutable(); - if (tensor == NULL) { - LOG(ERROR) << "var[" << feed_vec[slot_idx] << "] not found"; - exit(-1); - } - int len = tensor->numel(); - CHECK(slot_dim * len == g_tensor->numel()) << "len:" << len << " g_numel:" << g_tensor->numel(); - CHECK(len == tensor->numel()) << "len:" << len << "t_numel:" << tensor->numel(); - int64_t* ids = tensor->data(); - for (auto id_idx = 0u; id_idx < len; ++id_idx){ - if (ids[id_idx] == 0) { - g += slot_dim; - continue; - } - memcpy(push_g[fea_idx].data() + offset, g, sizeof(float) * slot_dim); - push_g[fea_idx][0] = 1.0f; - CHECK(fea_idx < fea_info.size()) << "fea_idx:" << fea_idx << " size:" << fea_info.size(); - push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); - g += slot_dim; - fea_idx++; - } + auto slot_dim = _param_config->slot_dim; + auto fea_dim = _param_config->fea_dim; + auto& features = _features[table_id]; + auto& push_g = _feature_push_value[table_id]; + check_pull_push_memory(features, push_g, fea_dim); + CHECK(push_g.size() == features.size() + 1) << + "push_g size:" << push_g.size() << " features size:" << features.size(); + uint64_t fea_idx = 0u; + auto& fea_info = _fea_info[table_id]; + int offset = 2; + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + // slot_idx = 0 is label + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + if (_param_config->slot_alias_to_table.find( + feed_vec[slot_idx]) == _param_config->slot_alias_to_table.end()) { + LOG(ERROR) << "ERROR slot_idx:" << slot_idx << + " name:" << feed_vec[slot_idx]; + } else if ( + _param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + continue; } - CHECK(fea_idx == features.size()) << "fea_idx:" << fea_idx << " features size:" << features.size(); - CHECK(features.size() > 0); - - std::vector push_g_vec; - for (auto i = 0u; i < features.size(); ++i) { - push_g_vec.push_back(push_g[i].data()); + Variable* g_var = thread_scope_->FindVar( + _param_config->gradient_var[table_id][slot_idx - 1]); + CHECK(g_var != nullptr) << "var[" << + _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; + LoDTensor* g_tensor = g_var->GetMutable(); + if (g_tensor == NULL) { + LOG(ERROR) << "var[" << + _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; + exit(-1); + } + float* g = g_tensor->data(); + + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); + CHECK(var != nullptr) << "var[" << feed_vec[slot_idx] << "] not found"; + LoDTensor* tensor = var->GetMutable(); + if (tensor == NULL) { + LOG(ERROR) << "var[" << feed_vec[slot_idx] << "] not found"; + exit(-1); + } + int len = tensor->numel(); + CHECK(slot_dim * len == g_tensor->numel()) << + "len:" << len << " g_numel:" << g_tensor->numel(); + CHECK(len == tensor->numel()) << "len:" << + len << "t_numel:" << tensor->numel(); + int64_t* ids = tensor->data(); + for (auto id_idx = 0u; id_idx < len; ++id_idx) { + if (ids[id_idx] == 0) { + g += slot_dim; + continue; + } + memcpy(push_g[fea_idx].data() + offset, + g, sizeof(float) * slot_dim); + push_g[fea_idx][0] = 1.0f; + CHECK(fea_idx < fea_info.size()) << "fea_idx:" << + fea_idx << " size:" << fea_info.size(); + push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); + g += slot_dim; + fea_idx++; } - auto status = _pslib_ptr->_worker_ptr->push_sparse( - table_id, features.data(), (const float**)push_g_vec.data(), features.size()); - _push_sparse_status.push_back(std::move(status)); + } + CHECK(fea_idx == features.size()) << "fea_idx:" << + fea_idx << " features size:" << features.size(); + CHECK_GT(features.size(), 0); + + std::vector push_g_vec; + for (auto i = 0u; i < features.size(); ++i) { + push_g_vec.push_back(push_g[i].data()); + } + auto status = _pslib_ptr->_worker_ptr->push_sparse( + table_id, features.data(), + (const float**)push_g_vec.data(), features.size()); + _push_sparse_status.push_back(std::move(status)); } void AsyncExecutorThreadWorker::collect_feasign_info( - int table_id) { - auto& fea_info = _fea_info[table_id]; - auto& feature = _features[table_id]; - fea_info.resize(feature.size()); - - const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - Variable* var = thread_scope_->FindVar(feed_vec[0]); + int table_id) { + auto& fea_info = _fea_info[table_id]; + auto& feature = _features[table_id]; + fea_info.resize(feature.size()); + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); + Variable* var = thread_scope_->FindVar(feed_vec[0]); + LoDTensor* tensor = var->GetMutable(); + int64_t* label = tensor->data(); + + int global_index = 0; + for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); LoDTensor* tensor = var->GetMutable(); - int64_t* label = tensor->data(); - - int global_index = 0; - for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); - LoDTensor* tensor = var->GetMutable(); - int64_t* ids = tensor->data(); - - int fea_idx = 0; - for (auto ins_idx = 1u; ins_idx < tensor->lod()[0].size(); ++ins_idx) { - for (; fea_idx < tensor->lod()[0][ins_idx]; ++fea_idx) { - if (ids[fea_idx] == 0u) { - continue; - } - FeasignInfo info{slot_idx, ins_idx, label[ins_idx - 1]}; - - fea_info[global_index++] = std::move(info); - } + int64_t* ids = tensor->data(); + + int fea_idx = 0; + for (auto ins_idx = 1u; ins_idx < tensor->lod()[0].size(); ++ins_idx) { + for (; fea_idx < tensor->lod()[0][ins_idx]; ++fea_idx) { + if (ids[fea_idx] == 0u) { + continue; } + FeasignInfo info{slot_idx, ins_idx, label[ins_idx - 1]}; + + fea_info[global_index++] = std::move(info); + } } - CHECK(global_index == feature.size()) << "expect fea info size:" << feature.size() - << " real:" << global_index; + } + CHECK(global_index == feature.size()) << + "expect fea info size:" << feature.size() + << " real:" << global_index; } void AsyncExecutorThreadWorker::check_pull_push_memory( diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 4e9c2622b0..b6c4f950ec 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -35,21 +35,22 @@ const static uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; void CreateTensor(Variable* var, proto::VarType::Type var_type); struct AsyncWorkerParamConfig { - int slot_dim; - int fea_dim; - int32_t tmp_push_dense_wait_times; - int32_t tmp_push_sparse_wait_times; - - std::vector skip_op; - - std::map> dense_variable_name; - std::map> dense_gradient_variable_name; - std::vector dense_table_id; - std::vector dense_table_size; // fea_dim for each dense table - std::vector sparse_table_id; - std::map> slot_input_vec; //6048slot 6050slot //name - std::map> gradient_var; //6048slot_embed - std::map slot_alias_to_table; //TODO done + int slot_dim; + int fea_dim; + int32_t tmp_push_dense_wait_times; + int32_t tmp_push_sparse_wait_times; + + std::vector skip_op; + + std::map> dense_variable_name; + std::map> dense_gradient_variable_name; + std::vector dense_table_id; + // fea_dim for each dense table + std::vector dense_table_size; + std::vector sparse_table_id; + std::map> slot_input_vec; + std::map> gradient_var; + std::map slot_alias_to_table; }; struct DensePullThreadParam { @@ -62,8 +63,8 @@ struct DensePullThreadParam { }; class DensePullThread { -public: - DensePullThread(DensePullThreadParam& param) : + public: + explicit DensePullThread(const DensePullThreadParam& param) : _running(false) { _ps_client = param.ps_client; _threshold = param.threshold; @@ -96,11 +97,11 @@ public: void pull_dense2(uint64_t table_id); void wait_all(); -private: + private: void run(); bool check_update_param(uint64_t table_id); -private: + private: std::shared_ptr _ps_client; int _thread_num; int _threshold; @@ -153,9 +154,13 @@ class ExecutorThreadWorker { virtual void TrainFiles(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); - virtual void SetPSlibPtr(std::shared_ptr pslib_ptr); - virtual void SetPullDenseThread(std::shared_ptr dpt) {}; - virtual void SetParamConfig(AsyncWorkerParamConfig* param_config) {}; + virtual void SetPSlibPtr( + std::shared_ptr pslib_ptr); + virtual void SetPullDenseThread( + std::shared_ptr dpt) {} + virtual void SetParamConfig( + AsyncWorkerParamConfig * param_config) {} + private: void CreateThreadScope(const framework::ProgramDesc& program); void CreateThreadOperators(const framework::ProgramDesc& program); @@ -178,32 +183,37 @@ class ExecutorThreadWorker { Scope* root_scope_; // a thread scope, father scope is global score which is shared Scope* thread_scope_; - //private: std::vector fetch_var_names_; std::vector> fetch_values_; bool debug_; }; class AsyncExecutorThreadWorker: public ExecutorThreadWorker { -public: - AsyncExecutorThreadWorker(){}; - virtual ~AsyncExecutorThreadWorker() {} - void SetPSlibPtr(std::shared_ptr pslib_ptr); - void SetPullDenseThread(std::shared_ptr dpt); - void SetParamConfig(AsyncWorkerParamConfig* param_config); - void TrainFiles(); - void TrainOneNetwork(); - void PrepareParams(); - void UpdateParams(); - void PullSparse(int table_id); - void FillSparse(int table_id); - void PushSparse(int table_id); - void PushDense(int table_id); - - void check_pull_push_memory(std::vector& features, std::vector& push_g, int dim); - void check_pull_push_memory(std::vector& features, std::vector>& push_g, int dim); + public: + AsyncExecutorThreadWorker() {} + virtual ~AsyncExecutorThreadWorker() {} + void SetPSlibPtr(std::shared_ptr pslib_ptr); + void SetPullDenseThread(std::shared_ptr dpt); + void SetParamConfig(AsyncWorkerParamConfig* param_config); + void TrainFiles(); + void TrainOneNetwork(); + void PrepareParams(); + void UpdateParams(); + void PullSparse(int table_id); + void FillSparse(int table_id); + void PushSparse(int table_id); + void PushDense(int table_id); + + void check_pull_push_memory( + const std::vector& features, + std::vector& push_g, + int dim); + void check_pull_push_memory(const std::vector& features, + std::vector>& push_g, + int dim); void collect_feasign_info(int table_id); -private: + + private: struct FeasignInfo { uint32_t slot; uint32_t ins; From 2912d5311bccc3b89dd32a0e80f48be41ba7d1bc Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Thu, 13 Dec 2018 13:21:39 +0800 Subject: [PATCH 36/62] fix code style bug & change pslib.cmake & change Cmakelist adapt pslib --- CMakeLists.txt | 19 +++++++++++++------ cmake/external/pslib.cmake | 11 ++++++----- paddle/fluid/framework/async_executor.cc | 7 +++---- paddle/fluid/framework/async_executor.h | 4 ++-- paddle/fluid/framework/data_feed.cc | 1 + .../fluid/framework/executor_thread_worker.cc | 4 ++-- .../fluid/framework/executor_thread_worker.h | 2 +- 7 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b5bf6c5b6..c3b4349c8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,7 @@ option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF) +option(WITH_PSLIB "Compile with pslib support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) @@ -216,9 +217,12 @@ include(external/warpctc) # download, build, install warpctc include(cupti) include(external/gzstream) endif (NOT WIN32) -include(external/libmct) -include(external/pslib_brpc) -include(external/pslib) + +if(WITH_PSLIB) + include(external/libmct) + include(external/pslib_brpc) + include(external/pslib) +endif() if(WITH_DISTRIBUTE) if(WITH_GRPC) @@ -279,11 +283,14 @@ set(EXTERNAL_LIBS protobuf zlib ${PYTHON_LIBRARIES} - pslib - pslib_brpc - libmct ) +if(WITH_PSLIB) + list(APPEND EXTERNAL_LIBS pslib) + list(APPEND EXTERNAL_LIBS pslib_brpc) + list(APPEND EXTERNAL_LIBS libmct) +endif(WITH_PSLIB) + if(WITH_AMD_GPU) find_package(HIP) include(hip) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 4d4dc195aa..3b495d78e2 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -30,9 +30,10 @@ SET(PSLIB_PROJECT "extern_pslib") IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL)) MESSAGE(STATUS "use pre defined download url") SET(PSLIB_VER "0.1.0" CACHE STRING "" FORCE) - SET(PSLIB_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_VER}/pslib.tar.gz" CACHE STRING "" FORCE) + SET(PSLIB_NAME "pslib" CACHE STRING "" FORCE) + SET(PSLIB_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_VER}/${PSLIB_NAME}.tar.gz" CACHE STRING "" FORCE) ENDIF() -MESSAGE(STATUS "PSLIB_VER: ${PSLIB_VER}, PSLIB_URL: ${PSLIB_URL}") +MESSAGE(STATUS "PSLIB_NAME: ${PSLIB_NAME}, PSLIB_URL: ${PSLIB_URL}") SET(PSLIB_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib") SET(PSLIB_DOWNLOAD_DIR "${PSLIB_SOURCE_DIR}/src/${PSLIB_PROJECT}") SET(PSLIB_DST_DIR "pslib") @@ -50,7 +51,7 @@ INCLUDE_DIRECTORIES(${PSLIB_INC_DIR}) FILE(WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt "PROJECT(PSLIB)\n" "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY ${PSLIB_VER}/include ${PSLIB_VER}/lib \n" + "install(DIRECTORY ${PSLIB_NAME}/include ${PSLIB_NAME}/lib \n" " DESTINATION ${PSLIB_DST_DIR})\n") ExternalProject_Add( @@ -58,8 +59,8 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${PSLIB_SOURCE_DIR} DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_VER}.tar.gz - && tar zxvf ${PSLIB_VER}.tar.gz + DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_NAME}.tar.gz + && tar zxvf ${PSLIB_NAME}.tar.gz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT} diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index c62d62a5dc..8231aff142 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -50,7 +50,6 @@ void AsyncExecutor::CreateThreads( worker->BindingDataFeedMemory(); worker->SetPSlibPtr(_pslib_ptr); worker->SetPullDenseThread(_pull_dense_thread); - worker->BindingSlotVariableMemory(); worker->SetParamConfig(&_param_config); } @@ -79,7 +78,7 @@ void AsyncExecutor::InitWorker(const std::string& dist_desc, _pslib_ptr = std::shared_ptr( new paddle::distributed::PSlib()); _pslib_ptr->init_worker( - dist_desc, host_sign_list.data(), node_num, index); + dist_desc, (uint64_t*)(host_sign_list.data()), node_num, index); InitParamConfig(); } @@ -93,8 +92,8 @@ void AsyncExecutor::StopServer() { } void AsyncExecutor::GatherServers( - std::vector& host_sign_list, int node_num) { - _pslib_ptr->gather_servers(host_sign_list.data(), node_num); + const std::vector& host_sign_list, int node_num) { + _pslib_ptr->gather_servers((uint64_t*)(host_sign_list.data()), node_num); } void AsyncExecutor::InitParamConfig() { diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 184566dd39..16540c2df2 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -43,9 +43,9 @@ inline std::default_random_engine& local_random_engine() { struct engine_wrapper_t { std::default_random_engine engine; engine_wrapper_t() { - static std::atomic x(0); + static std::atomic x(0); std::seed_seq sseq = {x++, x++, x++, - static_cast(current_realtime() * 1000)}; + static_cast(current_realtime() * 1000)}; engine.seed(sseq); } }; diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 851c7eda89..54a00f8ccf 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -68,6 +68,7 @@ bool DataFeed::PickOneFile(std::string* filename) { return false; } *filename = filelist_[file_idx_++]; + LOG(ERROR) << "pick file:" << *filename; return true; } diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 412f4a2b6e..df15a4d293 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -637,7 +637,7 @@ void AsyncExecutorThreadWorker::collect_feasign_info( } void AsyncExecutorThreadWorker::check_pull_push_memory( - std::vector& features, + const std::vector& features, std::vector>& push_g, int dim) { push_g.resize(features.size() + 1); @@ -647,7 +647,7 @@ void AsyncExecutorThreadWorker::check_pull_push_memory( } void AsyncExecutorThreadWorker::check_pull_push_memory( - std::vector& features, + const std::vector& features, std::vector& push_g, int dim) { if (features.size() > push_g.size()) { diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index b6c4f950ec..93373b1d2e 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -155,7 +155,7 @@ class ExecutorThreadWorker { // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); virtual void SetPSlibPtr( - std::shared_ptr pslib_ptr); + std::shared_ptr pslib_ptr) {}; virtual void SetPullDenseThread( std::shared_ptr dpt) {} virtual void SetParamConfig( From f81957a7531d7cdb4e4f0a96c0d0f5f8752c92b7 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Thu, 13 Dec 2018 15:12:37 +0800 Subject: [PATCH 37/62] refine cmake for pslib & pre_define --- CMakeLists.txt | 2 +- cmake/configure.cmake | 4 ++++ cmake/external/libmct.cmake | 13 +++++++------ cmake/external/pslib_brpc.cmake | 15 ++++++++------- paddle/fluid/framework/CMakeLists.txt | 7 ++++++- paddle/fluid/framework/async_executor.cc | 14 ++++++++++++++ paddle/fluid/framework/async_executor.h | 11 +++++++---- paddle/fluid/framework/executor_thread_worker.cc | 6 +++++- paddle/fluid/framework/executor_thread_worker.h | 12 ++++++++++-- paddle/fluid/pybind/async_executor_py.cc | 11 +++++++++++ 10 files changed, 73 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c3b4349c8c..68eb8718ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,7 +222,7 @@ if(WITH_PSLIB) include(external/libmct) include(external/pslib_brpc) include(external/pslib) -endif() +endif(WITH_PSLIB) if(WITH_DISTRIBUTE) if(WITH_GRPC) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 4e17ddee73..03076c44c3 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -84,6 +84,10 @@ if(NOT WITH_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG) endif(NOT WITH_GOLANG) +if(WITH_PSLIB) + add_definitions(-DPADDLE_WITH_PSLIB) +endif() + if(WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA) diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake index 239183cb6d..27cff8cfb6 100644 --- a/cmake/external/libmct.cmake +++ b/cmake/external/libmct.cmake @@ -29,10 +29,11 @@ INCLUDE(ExternalProject) SET(LIBMCT_PROJECT "extern_libmct") IF((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL)) MESSAGE(STATUS "use pre defined download url") - SET(LIBMCT_VER "libmct" CACHE STRING "" FORCE) #todo libmct version - SET(LIBMCT_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${LIBMCT_VER}.tar.gz" CACHE STRING "" FORCE) #todo libmct url + SET(LIBMCT_VER "0.1.0" CACHE STRING "" FORCE) + SET(LIBMCT_NAME "libmct" CACHE STRING "" FORCE) + SET(LIBMCT_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${LIBMCT_VER}/${LIBMCT_NAME}.tar.gz" CACHE STRING "" FORCE) ENDIF() -MESSAGE(STATUS "LIBMCT_VER: ${LIBMCT_VER}, LIBMCT_URL: ${LIBMCT_URL}") +MESSAGE(STATUS "LIBMCT_NAME: ${LIBMCT_NAME}, LIBMCT_URL: ${LIBMCT_URL}") SET(LIBMCT_SOURCE_DIR "${THIRD_PARTY_PATH}/libmct") SET(LIBMCT_DOWNLOAD_DIR "${LIBMCT_SOURCE_DIR}/src/${LIBMCT_PROJECT}") SET(LIBMCT_DST_DIR "libmct") @@ -47,7 +48,7 @@ INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR}) FILE(WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt "PROJECT(LIBMCT)\n" "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY ${LIBMCT_VER}/include ${LIBMCT_VER}/lib \n" + "install(DIRECTORY ${LIBMCT_NAME}/include ${LIBMCT_NAME}/lib \n" " DESTINATION ${LIBMCT_DST_DIR})\n") ExternalProject_Add( @@ -55,8 +56,8 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${LIBMCT_SOURCE_DIR} DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_VER}.tar.gz - && tar zxvf ${LIBMCT_VER}.tar.gz + DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_NAME}.tar.gz + && tar zxvf ${LIBMCT_NAME}.tar.gz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT} diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake index 92019eef26..7ff5a8aca1 100644 --- a/cmake/external/pslib_brpc.cmake +++ b/cmake/external/pslib_brpc.cmake @@ -27,12 +27,13 @@ ENDIF() INCLUDE(ExternalProject) SET(PSLIB_BRPC_PROJECT "extern_pslib_brpc") -IF((NOT DEFINED PSLIB_BRPC_VER) OR (NOT DEFINED PSLIB_BRPC_URL)) +IF((NOT DEFINED PSLIB_BRPC_NAME) OR (NOT DEFINED PSLIB_BRPC_URL)) MESSAGE(STATUS "use pre defined download url") - SET(PSLIB_BRPC_VER "pslib_brpc" CACHE STRING "" FORCE) #todo pslib version - SET(PSLIB_BRPC_URL "http://bjyz-heqiaozhi-dev-new.epc.baidu.com:8000/${PSLIB_BRPC_VER}.tar.gz" CACHE STRING "" FORCE) #todo pslib_brpc url + SET(PSLIB_BRPC_VER "0.1.0" CACHE STRING "" FORCE) + SET(PSLIB_BRPC_NAME "pslib_brpc" CACHE STRING "" FORCE) + SET(PSLIB_BRPC_URL "https://raw.githubusercontent.com/PaddlePaddle/Fleet/release/${PSLIB_BRPC_VER}/${PSLIB_BRPC_NAME}.tar.gz" CACHE STRING "" FORCE) ENDIF() -MESSAGE(STATUS "PSLIB_BRPC_VER: ${PSLIB_BRPC_VER}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}") +MESSAGE(STATUS "PSLIB_BRPC_NAME: ${PSLIB_BRPC_NAME}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}") SET(PSLIB_BRPC_SOURCE_DIR "${THIRD_PARTY_PATH}/pslib_brpc") SET(PSLIB_BRPC_DOWNLOAD_DIR "${PSLIB_BRPC_SOURCE_DIR}/src/${PSLIB_BRPC_PROJECT}") SET(PSLIB_BRPC_DST_DIR "pslib_brpc") @@ -50,7 +51,7 @@ INCLUDE_DIRECTORIES(${PSLIB_BRPC_INC_DIR}) FILE(WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt "PROJECT(PSLIB_BRPC)\n" "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY ${PSLIB_BRPC_VER}/include ${PSLIB_BRPC_VER}/lib \n" + "install(DIRECTORY ${PSLIB_BRPC_NAME}/include ${PSLIB_BRPC_NAME}/lib \n" " DESTINATION ${PSLIB_BRPC_DST_DIR})\n") ExternalProject_Add( @@ -58,8 +59,8 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${PSLIB_BRPC_SOURCE_DIR} DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_VER}.tar.gz - && tar zxvf ${PSLIB_BRPC_VER}.tar.gz + DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_NAME}.tar.gz + && tar zxvf ${PSLIB_BRPC_NAME}.tar.gz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT} diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 6fdc73e93a..f3d66cd883 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -180,7 +180,12 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS graph build_strategy fast_threaded_ssa_graph_executor variable_helper) -cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib) +if(WITH_PSLIB) + cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib) +else() + cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper) +endif(WITH_PSLIB) + cc_test(data_feed_test SRCS data_feed_test.cc DEPS async_executor) cc_library(prune SRCS prune.cc DEPS framework_proto) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 8231aff142..fe6488f4b6 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -29,7 +29,9 @@ limitations under the License. */ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/pybind/pybind.h" +#ifdef PADDLE_WITH_PSLIB #include "pslib.h" +#endif namespace paddle { namespace framework { @@ -48,9 +50,11 @@ void AsyncExecutor::CreateThreads( worker->SetDataFeed(reader); worker->SetFetchVarNames(fetch_var_names); worker->BindingDataFeedMemory(); +#ifdef PADDLE_WITH_PSLIB worker->SetPSlibPtr(_pslib_ptr); worker->SetPullDenseThread(_pull_dense_thread); worker->SetParamConfig(&_param_config); +#endif } void PrepareReaders(std::vector>& readers, // NOLINT @@ -64,6 +68,7 @@ void PrepareReaders(std::vector>& readers, // NOLINT readers[0]->SetFileList(filelist); } +#ifdef PADDLE_WITH_PSLIB void AsyncExecutor::InitServer(const std::string& dist_desc, int index) { _pslib_ptr = std::shared_ptr( @@ -231,6 +236,7 @@ void AsyncExecutor::PrepareDenseThread(const std::string& mode) { _pull_dense_thread->start(); } } +#endif void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, const std::string& data_feed_desc_str, @@ -279,15 +285,21 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, // todo: should be factory method for creating datafeed std::vector> readers; PrepareReaders(readers, actual_thread_num, data_feed_desc, filelist); +#ifdef PADDLE_WITH_PSLIB PrepareDenseThread(mode); +#endif std::vector> workers; workers.resize(actual_thread_num); for (auto& worker : workers) { +#ifdef PADDLE_WITH_PSLIB if (mode == "mpi") { worker.reset(new AsyncExecutorThreadWorker); } else { worker.reset(new ExecutorThreadWorker); } +#else + worker.reset(new ExecutorThreadWorker); +#endif } // prepare thread resource here @@ -306,9 +318,11 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, for (auto& th : threads) { th.join(); } +#ifdef PADDLE_WITH_PSLIB if (mode == "mpi") { _pull_dense_thread->stop(); } +#endif root_scope_->DropKids(); return; diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 16540c2df2..d6f16d9133 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -64,6 +64,7 @@ class AsyncExecutor { const std::vector& fetch_names, const std::string& mode, const bool debug = false); +#ifdef PADDLE_WITH_PSLIB void InitServer(const std::string& dist_desc, int index); void InitWorker( const std::string& dist_desc, @@ -75,7 +76,7 @@ class AsyncExecutor { void InitModel(); void SaveModel(const std::string& path); void InitParamConfig(); - +#endif private: void CreateThreads(ExecutorThreadWorker* worker, const ProgramDesc& main_program, @@ -83,16 +84,18 @@ class AsyncExecutor { const std::vector& fetch_var_names, Scope* root_scope, const int thread_index, const bool debug); +#ifdef PADDLE_WITH_PSLIB void PrepareDenseThread(const std::string& mode); - +#endif public: +#ifdef PADDLE_WITH_PSLIB std::shared_ptr _pslib_ptr; std::shared_ptr _pull_dense_thread; + AsyncWorkerParamConfig _param_config; +#endif Scope* root_scope_; platform::Place place_; - AsyncWorkerParamConfig _param_config; - private: int actual_thread_num; diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index df15a4d293..a58c269220 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -31,6 +31,7 @@ limitations under the License. */ namespace paddle { namespace framework { +#ifdef PADDLE_WITH_PSLIB int DensePullThread::start() { _running = true; _t = std::thread(&DensePullThread::run, this); @@ -112,7 +113,8 @@ void DensePullThread::increase_thread_version( std::lock_guard lock(_mutex_for_version); _training_versions[table_id][thread_id]++; } - +#endif + void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) { auto& block = program.Block(0); op_names_.clear(); @@ -302,6 +304,7 @@ void ExecutorThreadWorker::SetRootScope(Scope* g_scope) { root_scope_ = g_scope; } +#ifdef PADDLE_WITH_PSLIB // AsyncExecutor void AsyncExecutorThreadWorker::TrainFiles() { SetDevice(); @@ -659,6 +662,7 @@ void AsyncExecutorThreadWorker::check_pull_push_memory( } } } +#endif } // einit_modelnd namespace framework } // end namespace paddle diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 93373b1d2e..c23eb09470 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -25,14 +25,16 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#ifdef PADDLE_WITH_PSLIB #include "pslib.h" +#endif namespace paddle { namespace framework { -const static uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; - void CreateTensor(Variable* var, proto::VarType::Type var_type); +#ifdef PADDLE_WITH_PSLIB +const static uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; struct AsyncWorkerParamConfig { int slot_dim; @@ -130,6 +132,8 @@ class DensePullThread { float _total_batch_num = 0; }; +#endif + class ExecutorThreadWorker { public: ExecutorThreadWorker() @@ -154,12 +158,14 @@ class ExecutorThreadWorker { virtual void TrainFiles(); // set fetch variable names from python interface assigned by users void SetFetchVarNames(const std::vector& fetch_var_names); +#ifdef PADDLE_WITH_PSLIB virtual void SetPSlibPtr( std::shared_ptr pslib_ptr) {}; virtual void SetPullDenseThread( std::shared_ptr dpt) {} virtual void SetParamConfig( AsyncWorkerParamConfig * param_config) {} +#endif private: void CreateThreadScope(const framework::ProgramDesc& program); @@ -188,6 +194,7 @@ class ExecutorThreadWorker { bool debug_; }; +#ifdef PADDLE_WITH_PSLIB class AsyncExecutorThreadWorker: public ExecutorThreadWorker { public: AsyncExecutorThreadWorker() {} @@ -238,6 +245,7 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker { AsyncWorkerParamConfig* _param_config; }; +#endif } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/async_executor_py.cc b/paddle/fluid/pybind/async_executor_py.cc index 8dfba0d269..71a0e256e4 100644 --- a/paddle/fluid/pybind/async_executor_py.cc +++ b/paddle/fluid/pybind/async_executor_py.cc @@ -41,6 +41,7 @@ namespace pd = paddle::framework; namespace paddle { namespace pybind { using set_name_func = void (pd::DataFeedDesc::*)(const std::string&); +#ifdef PADDLE_WITH_PSLIB void BindAsyncExecutor(py::module* m) { py::class_(*m, "AsyncExecutor") .def(py::init([](framework::Scope* scope, const platform::Place& place) { @@ -56,5 +57,15 @@ void BindAsyncExecutor(py::module* m) { .def("init_model", &framework::AsyncExecutor::InitModel) .def("save_model", &framework::AsyncExecutor::SaveModel); } // end BindAsyncExecutor +#else +void BindAsyncExecutor(py::module* m) { + py::class_(*m, "AsyncExecutor") + .def(py::init([](framework::Scope* scope, const platform::Place& place) { + return std::unique_ptr( + new framework::AsyncExecutor(scope, place)); + })) + .def("run_from_files", &framework::AsyncExecutor::RunFromFile) +} // end BindAsyncExecutor +#endif } // end namespace pybind } // end namespace paddle From 95b887c4f26c794e2b01daa5c97b32582de7c56a Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Thu, 13 Dec 2018 15:30:31 +0800 Subject: [PATCH 38/62] remove commit --- paddle/fluid/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index d980b36d9b..6b526f0103 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -1,7 +1,6 @@ add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(framework) -#add_subdirectory(distributed) add_subdirectory(operators) add_subdirectory(string) add_subdirectory(recordio) From c9b799896e6b78a4248cd8c9288ab6adacf628ad Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 15:33:45 +0800 Subject: [PATCH 39/62] fix tag in async_executor --- paddle/fluid/framework/async_executor.cc | 238 ++++++++--------- paddle/fluid/framework/async_executor.h | 5 +- .../fluid/framework/executor_thread_worker.cc | 249 +++++++++--------- .../fluid/framework/executor_thread_worker.h | 178 ++++++------- 4 files changed, 336 insertions(+), 334 deletions(-) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index fe6488f4b6..0fe7f3bd5c 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -102,139 +102,139 @@ void AsyncExecutor::GatherServers( } void AsyncExecutor::InitParamConfig() { - for (int i = 0; i < - _pslib_ptr->get_param()->server_param().\ - downpour_server_param().\ - downpour_table_param_size(); - ++i) { - if (_pslib_ptr->get_param()->server_param().\ - downpour_server_param().downpour_table_param(i).\ - table_class().find("SparseTable") != -1) { - _param_config.fea_dim = _pslib_ptr->get_param()->server_param().\ - downpour_server_param().\ - downpour_table_param(i).\ - accessor().fea_dim(); - break; - } + for (int i = 0; i < + _pslib_ptr->get_param()->server_param(). \ + downpour_server_param(). \ + downpour_table_param_size(); + ++i) { + if (_pslib_ptr->get_param()->server_param(). \ + downpour_server_param().downpour_table_param(i). \ + table_class().find("SparseTable") != -1) { + _param_config.fea_dim = _pslib_ptr->get_param()->server_param(). \ + downpour_server_param(). \ + downpour_table_param(i). \ + accessor().fea_dim(); + break; } - _param_config.slot_dim = _param_config.fea_dim - 2; - _param_config.tmp_push_dense_wait_times = static_cast( - _pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); - _param_config.tmp_push_sparse_wait_times = static_cast( - _pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); - - for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); - ++t) { - _param_config.skip_op.push_back( - _pslib_ptr->get_param()->trainer_param().skip_op(t)); + } + _param_config.slot_dim = _param_config.fea_dim - 2; + _param_config.tmp_push_dense_wait_times = static_cast( + _pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); + _param_config.tmp_push_sparse_wait_times = static_cast( + _pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); + ++t) { + _param_config.skip_op.push_back( + _pslib_ptr->get_param()->trainer_param().skip_op(t)); + } + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); + ++t) { + auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); + std::vector tmp_sparse_variable_name; + for (int i = 0u; i < table.slot_value_size(); ++i) { + tmp_sparse_variable_name.push_back(table.slot_value(i)); + _param_config.slot_alias_to_table[table.slot_key(i)] = + table.table_id(); } - - for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); - ++t) { - auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); - std::vector tmp_sparse_variable_name; - for (int i = 0u; i < table.slot_value_size(); ++i) { - tmp_sparse_variable_name.push_back(table.slot_value(i)); - _param_config.slot_alias_to_table[table.slot_key(i)] = - table.table_id(); - } - std::vector tmp_sparse_gradient_variable_name; - for (auto i = 0u; i < table.slot_gradient_size(); ++i) { - tmp_sparse_gradient_variable_name.push_back( - table.slot_gradient(i)); - } - _param_config.slot_input_vec[table.table_id()] = - std::move(tmp_sparse_variable_name); - _param_config.gradient_var[table.table_id()] = - std::move(tmp_sparse_gradient_variable_name); - _param_config.sparse_table_id.push_back(table.table_id()); + std::vector tmp_sparse_gradient_variable_name; + for (auto i = 0u; i < table.slot_gradient_size(); ++i) { + tmp_sparse_gradient_variable_name.push_back( + table.slot_gradient(i)); } - - for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); - ++t) { - auto& table = _pslib_ptr->get_param()->trainer_param().dense_table(t); - std::vector tmp_dense_variable_name; - for (int i = 0u; i < table.dense_variable_name_size(); ++i) { - tmp_dense_variable_name.push_back(table.dense_variable_name(i)); - } - std::vector tmp_dense_gradient_variable_name; - for (auto i = 0u; i < table.dense_gradient_variable_name_size(); ++i) { - tmp_dense_gradient_variable_name.push_back( - table.dense_gradient_variable_name(i)); - } - _param_config.dense_variable_name[table.table_id()] = - std::move(tmp_dense_variable_name); - _param_config.dense_gradient_variable_name[table.table_id()] = - std::move(tmp_dense_gradient_variable_name); - _param_config.dense_table_id.push_back(table.table_id()); - _param_config.dense_table_size.push_back(table.fea_dim()); + _param_config.slot_input_vec[table.table_id()] = + std::move(tmp_sparse_variable_name); + _param_config.gradient_var[table.table_id()] = + std::move(tmp_sparse_gradient_variable_name); + _param_config.sparse_table_id.push_back(table.table_id()); + } + + for (auto t = 0u; + t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); + ++t) { + auto& table = _pslib_ptr->get_param()->trainer_param().dense_table(t); + std::vector tmp_dense_variable_name; + for (int i = 0u; i < table.dense_variable_name_size(); ++i) { + tmp_dense_variable_name.push_back(table.dense_variable_name(i)); + } + std::vector tmp_dense_gradient_variable_name; + for (auto i = 0u; i < table.dense_gradient_variable_name_size(); ++i) { + tmp_dense_gradient_variable_name.push_back( + table.dense_gradient_variable_name(i)); } + _param_config.dense_variable_name[table.table_id()] = + std::move(tmp_dense_variable_name); + _param_config.dense_gradient_variable_name[table.table_id()] = + std::move(tmp_dense_gradient_variable_name); + _param_config.dense_table_id.push_back(table.table_id()); + _param_config.dense_table_size.push_back(table.fea_dim()); + } } void AsyncExecutor::InitModel() { - for (auto table_id : _param_config.dense_table_id) { - std::vector regions; - for (auto& t : _param_config.dense_variable_name[table_id]) { - Variable* var = root_scope_->FindVar(t); - CHECK(var != nullptr) << "var[" << t << "] not found"; - LoDTensor* tensor = var->GetMutable(); - - float* g = tensor->data(); - CHECK(g != nullptr) << "var[" << t << "] value not initialized"; - - float init_range = 0.2; - int rown = tensor->dims()[0]; - init_range /= sqrt(rown); - - std::normal_distribution ndistr(0.0, 1.0); - for (auto i = 0u; i < tensor->numel(); ++i) { - g[i] = ndistr(local_random_engine()) * init_range; - } - - paddle::ps::Region reg(g, tensor->numel()); - regions.emplace_back(std::move(reg)); - } - - auto push_status = - _pslib_ptr->_worker_ptr->push_dense_param( - regions.data(), regions.size(), table_id); - push_status.wait(); - auto status = push_status.get(); - if (status != 0) { - LOG(FATAL) << "push dense param failed, status[" << status << "]"; - exit(-1); - } + for (auto table_id : _param_config.dense_table_id) { + std::vector regions; + for (auto& t : _param_config.dense_variable_name[table_id]) { + Variable* var = root_scope_->FindVar(t); + CHECK(var != nullptr) << "var[" << t << "] not found"; + LoDTensor* tensor = var->GetMutable(); + + float* g = tensor->data(); + CHECK(g != nullptr) << "var[" << t << "] value not initialized"; + + float init_range = 0.2; + int rown = tensor->dims()[0]; + init_range /= sqrt(rown); + + std::normal_distribution ndistr(0.0, 1.0); + for (auto i = 0u; i < tensor->numel(); ++i) { + g[i] = ndistr(local_random_engine()) * init_range; + } + + paddle::ps::Region reg(g, tensor->numel()); + regions.emplace_back(std::move(reg)); } + + auto push_status = + _pslib_ptr->_worker_ptr->push_dense_param( + regions.data(), regions.size(), table_id); + push_status.wait(); + auto status = push_status.get(); + if (status != 0) { + LOG(FATAL) << "push dense param failed, status[" << status << "]"; + exit(-1); + } + } } void AsyncExecutor::SaveModel(const std::string& path) { - auto ret = _pslib_ptr->_worker_ptr->flush(); - ret.wait(); - ret = _pslib_ptr->_worker_ptr->save(path, 0); - ret.wait(); - int32_t feasign_cnt = ret.get(); - if (feasign_cnt == -1) { // (colourful-tree) TODO should be feasign_cnt < 0 - LOG(FATAL) << "save model failed"; - exit(-1); - } + auto ret = _pslib_ptr->_worker_ptr->flush(); + ret.wait(); + ret = _pslib_ptr->_worker_ptr->save(path, 0); + ret.wait(); + int32_t feasign_cnt = ret.get(); + if (feasign_cnt == -1) { // (colourful-tree) TODO should be feasign_cnt < 0 + LOG(FATAL) << "save model failed"; + exit(-1); + } } void AsyncExecutor::PrepareDenseThread(const std::string& mode) { - if (mode == "mpi") { - DensePullThreadParam param; - param.ps_client = _pslib_ptr->_worker_ptr;; - param.threshold = 1; - param.training_thread_num = actual_thread_num; - param.root_scope = root_scope_; - param.dense_params = &_param_config.dense_variable_name; - - _pull_dense_thread = std::shared_ptr( - new DensePullThread(param)); - _pull_dense_thread->start(); - } + if (mode == "mpi") { + DensePullThreadParam param; + param.ps_client = _pslib_ptr->_worker_ptr;; + param.threshold = 1; + param.training_thread_num = actual_thread_num; + param.root_scope = root_scope_; + param.dense_params = &_param_config.dense_variable_name; + + _pull_dense_thread = std::shared_ptr( + new DensePullThread(param)); + _pull_dense_thread->start(); + } } #endif diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index d6f16d9133..1264212641 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -45,7 +45,8 @@ inline std::default_random_engine& local_random_engine() { engine_wrapper_t() { static std::atomic x(0); std::seed_seq sseq = {x++, x++, x++, - static_cast(current_realtime() * 1000)}; + static_cast( + current_realtime() * 1000)}; engine.seed(sseq); } }; @@ -77,6 +78,7 @@ class AsyncExecutor { void SaveModel(const std::string& path); void InitParamConfig(); #endif + private: void CreateThreads(ExecutorThreadWorker* worker, const ProgramDesc& main_program, @@ -87,6 +89,7 @@ class AsyncExecutor { #ifdef PADDLE_WITH_PSLIB void PrepareDenseThread(const std::string& mode); #endif + public: #ifdef PADDLE_WITH_PSLIB std::shared_ptr _pslib_ptr; diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index a58c269220..59679842bc 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -33,87 +33,87 @@ namespace framework { #ifdef PADDLE_WITH_PSLIB int DensePullThread::start() { - _running = true; - _t = std::thread(&DensePullThread::run, this); - return 0; + _running = true; + _t = std::thread(&DensePullThread::run, this); + return 0; } void DensePullThread::run() { - while (_running) { - _pull_dense_status.resize(0); - for (auto& t : _dense_variable_name) { - if (check_update_param(t.first)) { - auto status = pull_dense(t.first); - _pull_dense_status.emplace_back(std::move(status)); - reset_thread_version(t.first); - } - } - if (_pull_dense_status.size() != 0) { - wait_all(); - } - - usleep(_sleep_time_ms * 1000); + while (_running) { + _pull_dense_status.resize(0); + for (auto& t : _dense_variable_name) { + if (check_update_param(t.first)) { + auto status = pull_dense(t.first); + _pull_dense_status.emplace_back(std::move(status)); + reset_thread_version(t.first); + } + } + if (_pull_dense_status.size() != 0) { + wait_all(); } + + usleep(_sleep_time_ms * 1000); + } } bool DensePullThread::check_update_param(uint64_t table_id) { - { - std::lock_guard lock(_mutex_for_version); - auto& version = _training_versions[table_id]; - _current_version[table_id] = - *(std::min_element(version.begin(), version.end())); - } - if (_current_version[table_id] - _last_versions[table_id] < _threshold) { - return false; - } - return true; + { + std::lock_guard lock(_mutex_for_version); + auto& version = _training_versions[table_id]; + _current_version[table_id] = + *(std::min_element(version.begin(), version.end())); + } + if (_current_version[table_id] - _last_versions[table_id] < _threshold) { + return false; + } + return true; } void DensePullThread::reset_thread_version(uint64_t table_id) { - std::lock_guard lock(_mutex_for_version); - _last_versions[table_id] = _current_version[table_id]; + std::lock_guard lock(_mutex_for_version); + _last_versions[table_id] = _current_version[table_id]; } std::future DensePullThread::pull_dense(uint64_t table_id) { - auto& regions = _regions[table_id]; - regions.clear(); - auto& variables = _dense_variable_name[table_id]; - regions.resize(variables.size()); - - for (auto i = 0u; i < variables.size(); ++i) { - auto& t = variables[i]; - Variable* var = _root_scope->FindVar(t); - LoDTensor* tensor = var->GetMutable(); - - float* w = tensor->data(); - paddle::ps::Region reg(w, tensor->numel()); - regions[i] = std::move(reg); - } - return _ps_client->pull_dense(regions.data(), regions.size(), table_id); + auto& regions = _regions[table_id]; + regions.clear(); + auto& variables = _dense_variable_name[table_id]; + regions.resize(variables.size()); + + for (auto i = 0u; i < variables.size(); ++i) { + auto& t = variables[i]; + Variable* var = _root_scope->FindVar(t); + LoDTensor* tensor = var->GetMutable(); + + float* w = tensor->data(); + paddle::ps::Region reg(w, tensor->numel()); + regions[i] = std::move(reg); + } + return _ps_client->pull_dense(regions.data(), regions.size(), table_id); } void DensePullThread::wait_all() { - for (auto& t : _pull_dense_status) { - t.wait(); - auto status = t.get(); - if (status != 0) { - LOG(WARNING) << "pull dense failed times:" << - ++_pull_dense_fail_times; - } + for (auto& t : _pull_dense_status) { + t.wait(); + auto status = t.get(); + if (status != 0) { + LOG(WARNING) << "pull dense failed times:" << + ++_pull_dense_fail_times; } - - if (_pull_dense_fail_times > 20) { - LOG(FATAL) << "pull dense failed times more than 20 times"; - exit(-1); - } - - _pull_dense_status.resize(0); + } + + if (_pull_dense_fail_times > 20) { + LOG(FATAL) << "pull dense failed times more than 20 times"; + exit(-1); + } + + _pull_dense_status.resize(0); } void DensePullThread::increase_thread_version( int thread_id, uint64_t table_id) { - std::lock_guard lock(_mutex_for_version); - _training_versions[table_id][thread_id]++; + std::lock_guard lock(_mutex_for_version); + _training_versions[table_id][thread_id]++; } -#endif +#endif void ExecutorThreadWorker::CreateThreadOperators(const ProgramDesc& program) { auto& block = program.Block(0); @@ -336,56 +336,56 @@ void AsyncExecutorThreadWorker::TrainFiles() { void AsyncExecutorThreadWorker::SetPSlibPtr( std::shared_ptr pslib_ptr) { - _pslib_ptr = pslib_ptr; + _pslib_ptr = pslib_ptr; } void AsyncExecutorThreadWorker::SetPullDenseThread( std::shared_ptr dpt) { - _pull_dense_thread = dpt; + _pull_dense_thread = dpt; } void AsyncExecutorThreadWorker::TrainOneNetwork() { - PrepareParams(); - - for (auto& op : ops_) { - if (op->Type().find("sgd") != std::string::npos) { - continue; - } - bool need_skip = false; - for (auto t = 0u; t < _param_config->skip_op.size(); ++t) { - if (op->Type().find(_param_config->skip_op[t]) != - std::string::npos) { - need_skip = true; - break; - } - } - if (!need_skip) { - op->Run(*thread_scope_, place_); - } + PrepareParams(); + + for (auto& op : ops_) { + if (op->Type().find("sgd") != std::string::npos) { + continue; + } + bool need_skip = false; + for (auto t = 0u; t < _param_config->skip_op.size(); ++t) { + if (op->Type().find(_param_config->skip_op[t]) != + std::string::npos) { + need_skip = true; + break; + } + } + if (!need_skip) { + op->Run(*thread_scope_, place_); } - UpdateParams(); + } + UpdateParams(); } void AsyncExecutorThreadWorker::SetParamConfig( AsyncWorkerParamConfig* param_config) { - _param_config = param_config; + _param_config = param_config; } void AsyncExecutorThreadWorker::PrepareParams() { - for (auto table_id : _param_config->sparse_table_id) { - PullSparse(table_id); - for (auto& t : _pull_sparse_status) { - t.wait(); - auto status = t.get(); - if (status != 0) { - LOG(ERROR) << "pull sparse failed, status[" << status << "]"; - exit(-1); - } - } + for (auto table_id : _param_config->sparse_table_id) { + PullSparse(table_id); + for (auto& t : _pull_sparse_status) { + t.wait(); + auto status = t.get(); + if (status != 0) { + LOG(ERROR) << "pull sparse failed, status[" << status << "]"; + exit(-1); + } } - _pull_sparse_status.resize(0); + } + _pull_sparse_status.resize(0); - for (auto table_id : _param_config->sparse_table_id) { - FillSparse(table_id); - } + for (auto table_id : _param_config->sparse_table_id) { + FillSparse(table_id); + } } void AsyncExecutorThreadWorker::UpdateParams() { @@ -426,21 +426,20 @@ void AsyncExecutorThreadWorker::UpdateParams() { } void AsyncExecutorThreadWorker::PushDense(int table_id) { - std::vector regions; - for (auto& t : _param_config->dense_gradient_variable_name[table_id]) { - Variable* var = thread_scope_->FindVar(t); - CHECK(var != nullptr) << "var[" << t << "] not found"; - LoDTensor* tensor = var->GetMutable(); - int count = tensor->numel(); - float* g = tensor->data(); - paddle::ps::Region reg(g, count); - regions.emplace_back(std::move(reg)); - } - - auto status = _pslib_ptr->_worker_ptr->push_dense( - regions.data(), regions.size(), table_id); - _push_dense_status.push_back(std::move(status)); - + std::vector regions; + for (auto& t : _param_config->dense_gradient_variable_name[table_id]) { + Variable* var = thread_scope_->FindVar(t); + CHECK(var != nullptr) << "var[" << t << "] not found"; + LoDTensor* tensor = var->GetMutable(); + int count = tensor->numel(); + float* g = tensor->data(); + paddle::ps::Region reg(g, count); + regions.emplace_back(std::move(reg)); + } + + auto status = _pslib_ptr->_worker_ptr->push_dense( + regions.data(), regions.size(), table_id); + _push_dense_status.push_back(std::move(status)); } void AsyncExecutorThreadWorker::PullSparse(int table_id) { @@ -643,24 +642,24 @@ void AsyncExecutorThreadWorker::check_pull_push_memory( const std::vector& features, std::vector>& push_g, int dim) { - push_g.resize(features.size() + 1); - for (auto& t : push_g) { - t.resize(dim); - } + push_g.resize(features.size() + 1); + for (auto& t : push_g) { + t.resize(dim); + } } void AsyncExecutorThreadWorker::check_pull_push_memory( - const std::vector& features, - std::vector& push_g, - int dim) { - if (features.size() > push_g.size()) { - push_g.reserve(features.size() + 1); - auto size = features.size() - push_g.size() + 1; - for (auto i = 0u; i < size; ++i) { - float* ptr = new float[dim]; - push_g.push_back(ptr); - } + const std::vector& features, + std::vector& push_g, + int dim) { + if (features.size() > push_g.size()) { + push_g.reserve(features.size() + 1); + auto size = features.size() - push_g.size() + 1; + for (auto i = 0u; i < size; ++i) { + float* ptr = new float[dim]; + push_g.push_back(ptr); } + } } #endif diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index c23eb09470..20410b4c06 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -67,79 +67,79 @@ struct DensePullThreadParam { class DensePullThread { public: explicit DensePullThread(const DensePullThreadParam& param) : - _running(false) { - _ps_client = param.ps_client; - _threshold = param.threshold; - _thread_num = param.training_thread_num; - _root_scope = param.root_scope; - _sleep_time_ms = param.sleep_time_ms; - - for (auto& t : *param.dense_params) { - _dense_variable_name[t.first].insert( - _dense_variable_name[t.first].end(), - t.second.begin(), t.second.end()); - _training_versions[t.first].resize(_thread_num, 0); - _last_versions[t.first] = 0; - _current_version[t.first] = 0; - } + _running(false) { + _ps_client = param.ps_client; + _threshold = param.threshold; + _thread_num = param.training_thread_num; + _root_scope = param.root_scope; + _sleep_time_ms = param.sleep_time_ms; + + for (auto& t : *param.dense_params) { + _dense_variable_name[t.first].insert( + _dense_variable_name[t.first].end(), + t.second.begin(), t.second.end()); + _training_versions[t.first].resize(_thread_num, 0); + _last_versions[t.first] = 0; + _current_version[t.first] = 0; } - - int start(); - - void stop() { - if (_running) { - _running = false; - _t.join(); - } + } + + int start(); + + void stop() { + if (_running) { + _running = false; + _t.join(); } - - void increase_thread_version(int thread_id, uint64_t table_id); - void reset_thread_version(uint64_t table_id); - std::future pull_dense(uint64_t table_id); - void pull_dense2(uint64_t table_id); - void wait_all(); - + } + + void increase_thread_version(int thread_id, uint64_t table_id); + void reset_thread_version(uint64_t table_id); + std::future pull_dense(uint64_t table_id); + void pull_dense2(uint64_t table_id); + void wait_all(); + private: - void run(); - bool check_update_param(uint64_t table_id); - + void run(); + bool check_update_param(uint64_t table_id); + private: - std::shared_ptr _ps_client; - int _thread_num; - int _threshold; - int _sleep_time_ms; - Scope* _root_scope; - bool _running; - - std::map _last_versions; - std::map _current_version; - std::mutex _mutex_for_version; - std::map> _training_versions; - std::map> _dense_variable_name; - - std::thread _t; - - std::vector<::std::future> _pull_dense_status; - - std::map> _regions; - uint32_t _pull_dense_fail_times = 0; - - std::vector _base_norm_param; - std::vector _mean; - std::vector _scale; - float _squared_sum_epsilon = 1e-4; - std::mutex _mutex_for_mean_scale; - - float _total_batch_num = 0; + std::shared_ptr _ps_client; + int _thread_num; + int _threshold; + int _sleep_time_ms; + Scope* _root_scope; + bool _running; + + std::map _last_versions; + std::map _current_version; + std::mutex _mutex_for_version; + std::map> _training_versions; + std::map> _dense_variable_name; + + std::thread _t; + + std::vector<::std::future> _pull_dense_status; + + std::map> _regions; + uint32_t _pull_dense_fail_times = 0; + + std::vector _base_norm_param; + std::vector _mean; + std::vector _scale; + float _squared_sum_epsilon = 1e-4; + std::mutex _mutex_for_mean_scale; + + float _total_batch_num = 0; }; #endif class ExecutorThreadWorker { public: - ExecutorThreadWorker() - : thread_id_(-1), root_scope_(NULL), thread_scope_(NULL), debug_(false) {} +ExecutorThreadWorker() + : thread_id_(-1), root_scope_(NULL), thread_scope_(NULL), debug_(false) {} virtual ~ExecutorThreadWorker() {} - + void CreateThreadResource(const framework::ProgramDesc& program, const paddle::platform::Place& place); void SetThreadId(int tid); @@ -160,7 +160,7 @@ class ExecutorThreadWorker { void SetFetchVarNames(const std::vector& fetch_var_names); #ifdef PADDLE_WITH_PSLIB virtual void SetPSlibPtr( - std::shared_ptr pslib_ptr) {}; + std::shared_ptr pslib_ptr) {} virtual void SetPullDenseThread( std::shared_ptr dpt) {} virtual void SetParamConfig( @@ -218,32 +218,32 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker { void check_pull_push_memory(const std::vector& features, std::vector>& push_g, int dim); - void collect_feasign_info(int table_id); - + void collect_feasign_info(int table_id); + private: - struct FeasignInfo { - uint32_t slot; - uint32_t ins; - int64_t label; - }; - - std::map> _features; - std::map> _fea_info; - std::map>> _feature_value; - std::map>> _feature_push_value; - - - std::shared_ptr _pslib_ptr; - - std::shared_ptr _pull_dense_thread; - - std::vector<::std::future> _pull_sparse_status; - std::vector<::std::future> _pull_dense_status; - std::vector<::std::future> _push_sparse_status; - std::vector<::std::future> _push_dense_status; - - AsyncWorkerParamConfig* _param_config; - + struct FeasignInfo { + uint32_t slot; + uint32_t ins; + int64_t label; + }; + + std::map> _features; + std::map> _fea_info; + std::map>> _feature_value; + std::map>> _feature_push_value; + + + std::shared_ptr _pslib_ptr; + + std::shared_ptr _pull_dense_thread; + + std::vector<::std::future> _pull_sparse_status; + std::vector<::std::future> _pull_dense_status; + std::vector<::std::future> _push_sparse_status; + std::vector<::std::future> _push_dense_status; + + AsyncWorkerParamConfig* _param_config; + }; #endif From d839bd0dd4ffecaa061aed32684a1a0b09f28d30 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 19:14:35 +0800 Subject: [PATCH 40/62] simple commit --- paddle/fluid/framework/async_executor.h | 52 +++++++++++-------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 1264212641..a82e941559 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -17,13 +17,13 @@ limitations under the License. */ #include #include #include -#include // NOLINT +#include // NOLINT +#include // local_random_engine #include #include #include // NOLINT #include #include -#include // local_random_engine #include "paddle/fluid/framework/data_feed.pb.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor_thread_worker.h" @@ -34,24 +34,23 @@ namespace paddle { namespace framework { inline double current_realtime() { - struct timespec tp; - clock_gettime(CLOCK_REALTIME, &tp); - return tp.tv_sec + tp.tv_nsec * 1e-9; + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + return tp.tv_sec + tp.tv_nsec * 1e-9; } inline std::default_random_engine& local_random_engine() { - struct engine_wrapper_t { - std::default_random_engine engine; - engine_wrapper_t() { - static std::atomic x(0); - std::seed_seq sseq = {x++, x++, x++, - static_cast( - current_realtime() * 1000)}; - engine.seed(sseq); - } - }; - thread_local engine_wrapper_t r; - return r.engine; + struct engine_wrapper_t { + std::default_random_engine engine; + engine_wrapper_t() { + static std::atomic x(0); + std::seed_seq sseq = {x++, x++, x++, + static_cast(current_realtime() * 1000)}; + engine.seed(sseq); + } + }; + thread_local engine_wrapper_t r; + return r.engine; } class AsyncExecutor { @@ -63,14 +62,12 @@ class AsyncExecutor { const std::vector& filelist, const int thread_num, const std::vector& fetch_names, - const std::string& mode, - const bool debug = false); + const std::string& mode, const bool debug = false); #ifdef PADDLE_WITH_PSLIB void InitServer(const std::string& dist_desc, int index); - void InitWorker( - const std::string& dist_desc, - const std::vector& host_sign_list, - int node_num, int index); + void InitWorker(const std::string& dist_desc, + const std::vector& host_sign_list, int node_num, + int index); uint64_t StartServer(); void StopServer(); void GatherServers(const std::vector& host_sign_list, int node_num); @@ -92,19 +89,16 @@ class AsyncExecutor { public: #ifdef PADDLE_WITH_PSLIB - std::shared_ptr _pslib_ptr; - std::shared_ptr _pull_dense_thread; + std::shared_ptr _pslib_ptr; + std::shared_ptr _pull_dense_thread; AsyncWorkerParamConfig _param_config; #endif Scope* root_scope_; platform::Place place_; - + private: int actual_thread_num; - }; - - } // namespace framework } // namespace paddle From f6c30863295de7d3a989c21d7d8e1427c888c301 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 19:44:46 +0800 Subject: [PATCH 41/62] add copy rigth checker to ps_pb2.py --- python/paddle/fluid/distributed/ps_pb2.py | 3490 +++++++++++++-------- 1 file changed, 2140 insertions(+), 1350 deletions(-) diff --git a/python/paddle/fluid/distributed/ps_pb2.py b/python/paddle/fluid/distributed/ps_pb2.py index 978b18d0d5..0d226c4d59 100644 --- a/python/paddle/fluid/distributed/ps_pb2.py +++ b/python/paddle/fluid/distributed/ps_pb2.py @@ -1,8 +1,20 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ps.proto import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1')) from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message @@ -13,104 +25,115 @@ from google.protobuf import descriptor_pb2 _sym_db = _symbol_database.Default() - - - DESCRIPTOR = _descriptor.FileDescriptor( - name='ps.proto', - package='paddle', - syntax='proto2', - serialized_pb=_b('\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xce\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1d\n\x15push_sparse_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\x12\x0f\n\x07skip_op\x18\x05 \x03(\t\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd7\x01\n\x16ServerServiceParameter\x12*\n\x0cserver_class\x18\x01 \x01(\t:\x14\x44ownpourBrpcPsServer\x12*\n\x0c\x63lient_class\x18\x02 \x01(\t:\x14\x44ownpourBrpcPsClient\x12(\n\rservice_class\x18\x03 \x01(\t:\x11\x44ownpourPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01') -) + name='ps.proto', + package='paddle', + syntax='proto2', + serialized_pb=_b( + '\n\x08ps.proto\x12\x06paddle\"\x9e\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x01(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xce\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1d\n\x15push_sparse_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\x12\x0f\n\x07skip_op\x18\x05 \x03(\t\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd7\x01\n\x16ServerServiceParameter\x12*\n\x0cserver_class\x18\x01 \x01(\t:\x14\x44ownpourBrpcPsServer\x12*\n\x0c\x63lient_class\x18\x02 \x01(\t:\x14\x44ownpourBrpcPsClient\x12(\n\rservice_class\x18\x03 \x01(\t:\x11\x44ownpourPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xbf\x01\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x12\n\nshared_num\x18\x03 \x01(\x04\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\"\xf1\x02\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r\x12\x12\n\nembedx_dim\x18\x05 \x01(\r\x12\x18\n\x10\x65mbedx_threshold\x18\x06 \x01(\r\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\"\xce\x01\n\x1e\x44ownpourTableAccessorParameter\x12\x14\n\x0cnonclk_coeff\x18\x01 \x01(\x02\x12\x13\n\x0b\x63lick_coeff\x18\x02 \x01(\x02\x12\x16\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02\x12\x17\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02\x12\x17\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02\x12\x1d\n\x15show_click_decay_rate\x18\x06 \x01(\x02\x12\x18\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"w\n\x16SparseSGDRuleParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x15\n\rinitial_g2sum\x18\x02 \x01(\x01\x12\x18\n\rinitial_range\x18\x03 \x01(\x01:\x01\x30\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\x86\x01\n\x10\x41\x64\x61mSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\x12\x16\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01\x12\x13\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01\x12\x16\n\x0emom_decay_rate\x18\x05 \x01(\x01\"B\n\x11NaiveSGDParameter\x12\x15\n\rlearning_rate\x18\x01 \x01(\x01\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xbd\x02\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x03\x80\x01\x01' + )) _sym_db.RegisterFileDescriptor(DESCRIPTOR) _TABLETYPE = _descriptor.EnumDescriptor( - name='TableType', - full_name='paddle.TableType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='PS_SPARSE_TABLE', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_DENSE_TABLE', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=3286, - serialized_end=3338, -) + name='TableType', + full_name='paddle.TableType', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='PS_SPARSE_TABLE', index=0, number=0, options=None, type=None), + _descriptor.EnumValueDescriptor( + name='PS_DENSE_TABLE', index=1, number=1, options=None, type=None), + ], + containing_type=None, + options=None, + serialized_start=3286, + serialized_end=3338, ) _sym_db.RegisterEnumDescriptor(_TABLETYPE) TableType = enum_type_wrapper.EnumTypeWrapper(_TABLETYPE) _PSCMDID = _descriptor.EnumDescriptor( - name='PsCmdID', - full_name='paddle.PsCmdID', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='PS_PULL_DENSE_TABLE', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_TABLE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PULL_SPARSE_TABLE', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_SPARSE_TABLE', index=3, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SHRINK_TABLE', index=4, number=4, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ONE_TABLE', index=5, number=5, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_SAVE_ALL_TABLE', index=6, number=6, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ONE_TABLE', index=7, number=7, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_LOAD_ALL_TABLE', index=8, number=8, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ONE_TABLE', index=9, number=9, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_CLEAR_ALL_TABLE', index=10, number=10, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_PUSH_DENSE_PARAM', index=11, number=11, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PS_STOP_SERVER', index=12, number=12, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=3341, - serialized_end=3658, -) + name='PsCmdID', + full_name='paddle.PsCmdID', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='PS_PULL_DENSE_TABLE', + index=0, + number=0, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_DENSE_TABLE', + index=1, + number=1, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PULL_SPARSE_TABLE', + index=2, + number=2, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_SPARSE_TABLE', + index=3, + number=3, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_SHRINK_TABLE', index=4, number=4, options=None, type=None), + _descriptor.EnumValueDescriptor( + name='PS_SAVE_ONE_TABLE', + index=5, + number=5, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_SAVE_ALL_TABLE', + index=6, + number=6, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_LOAD_ONE_TABLE', + index=7, + number=7, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_LOAD_ALL_TABLE', + index=8, + number=8, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_CLEAR_ONE_TABLE', + index=9, + number=9, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_CLEAR_ALL_TABLE', + index=10, + number=10, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_PUSH_DENSE_PARAM', + index=11, + number=11, + options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='PS_STOP_SERVER', index=12, number=12, options=None, + type=None), + ], + containing_type=None, + options=None, + serialized_start=3341, + serialized_end=3658, ) _sym_db.RegisterEnumDescriptor(_PSCMDID) PsCmdID = enum_type_wrapper.EnumTypeWrapper(_PSCMDID) @@ -130,1377 +153,2144 @@ PS_CLEAR_ALL_TABLE = 10 PS_PUSH_DENSE_PARAM = 11 PS_STOP_SERVER = 12 - _FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor( - name='FsApiType', - full_name='paddle.FsClientParameter.FsApiType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='HDFS', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AFS', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=3254, - serialized_end=3284, -) + name='FsApiType', + full_name='paddle.FsClientParameter.FsApiType', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='HDFS', index=0, number=0, options=None, type=None), + _descriptor.EnumValueDescriptor( + name='AFS', index=1, number=1, options=None, type=None), + ], + containing_type=None, + options=None, + serialized_start=3254, + serialized_end=3284, ) _sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE) - _PSPARAMETER = _descriptor.Descriptor( - name='PSParameter', - full_name='paddle.PSParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='worker_class', full_name='paddle.PSParameter.worker_class', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_class', full_name='paddle.PSParameter.server_class', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='instance_class', full_name='paddle.PSParameter.instance_class', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='worker_param', full_name='paddle.PSParameter.worker_param', index=3, - number=101, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_param', full_name='paddle.PSParameter.server_param', index=4, - number=102, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='trainer_param', full_name='paddle.PSParameter.trainer_param', index=5, - number=301, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='fs_client_param', full_name='paddle.PSParameter.fs_client_param', index=6, - number=501, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=21, - serialized_end=307, -) - + name='PSParameter', + full_name='paddle.PSParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='worker_class', + full_name='paddle.PSParameter.worker_class', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_class', + full_name='paddle.PSParameter.server_class', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='instance_class', + full_name='paddle.PSParameter.instance_class', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='worker_param', + full_name='paddle.PSParameter.worker_param', + index=3, + number=101, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_param', + full_name='paddle.PSParameter.server_param', + index=4, + number=102, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='trainer_param', + full_name='paddle.PSParameter.trainer_param', + index=5, + number=301, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fs_client_param', + full_name='paddle.PSParameter.fs_client_param', + index=6, + number=501, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=21, + serialized_end=307, ) _WORKERPARAMETER = _descriptor.Descriptor( - name='WorkerParameter', - full_name='paddle.WorkerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='downpour_worker_param', full_name='paddle.WorkerParameter.downpour_worker_param', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=309, - serialized_end=390, -) - + name='WorkerParameter', + full_name='paddle.WorkerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_worker_param', + full_name='paddle.WorkerParameter.downpour_worker_param', + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=309, + serialized_end=390, ) _SERVERPARAMETER = _descriptor.Descriptor( - name='ServerParameter', - full_name='paddle.ServerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='downpour_server_param', full_name='paddle.ServerParameter.downpour_server_param', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=392, - serialized_end=473, -) - + name='ServerParameter', + full_name='paddle.ServerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_server_param', + full_name='paddle.ServerParameter.downpour_server_param', + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=392, + serialized_end=473, ) _DOWNPOURWORKERPARAMETER = _descriptor.Descriptor( - name='DownpourWorkerParameter', - full_name='paddle.DownpourWorkerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='downpour_table_param', full_name='paddle.DownpourWorkerParameter.downpour_table_param', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=475, - serialized_end=554, -) - + name='DownpourWorkerParameter', + full_name='paddle.DownpourWorkerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_table_param', + full_name='paddle.DownpourWorkerParameter.downpour_table_param', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=475, + serialized_end=554, ) _DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor( - name='DownpourTrainerParameter', - full_name='paddle.DownpourTrainerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='dense_table', full_name='paddle.DownpourTrainerParameter.dense_table', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sparse_table', full_name='paddle.DownpourTrainerParameter.sparse_table', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='push_sparse_per_batch', full_name='paddle.DownpourTrainerParameter.push_sparse_per_batch', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='push_dense_per_batch', full_name='paddle.DownpourTrainerParameter.push_dense_per_batch', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='skip_op', full_name='paddle.DownpourTrainerParameter.skip_op', index=4, - number=5, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=557, - serialized_end=763, -) - + name='DownpourTrainerParameter', + full_name='paddle.DownpourTrainerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='dense_table', + full_name='paddle.DownpourTrainerParameter.dense_table', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='sparse_table', + full_name='paddle.DownpourTrainerParameter.sparse_table', + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='push_sparse_per_batch', + full_name='paddle.DownpourTrainerParameter.push_sparse_per_batch', + index=2, + number=3, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='push_dense_per_batch', + full_name='paddle.DownpourTrainerParameter.push_dense_per_batch', + index=3, + number=4, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='skip_op', + full_name='paddle.DownpourTrainerParameter.skip_op', + index=4, + number=5, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=557, + serialized_end=763, ) _DENSETABLEPARAMETER = _descriptor.Descriptor( - name='DenseTableParameter', - full_name='paddle.DenseTableParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='table_id', full_name='paddle.DenseTableParameter.table_id', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dense_variable_name', full_name='paddle.DenseTableParameter.dense_variable_name', index=1, - number=2, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dense_gradient_variable_name', full_name='paddle.DenseTableParameter.dense_gradient_variable_name', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='fea_dim', full_name='paddle.DenseTableParameter.fea_dim', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=765, - serialized_end=888, -) - + name='DenseTableParameter', + full_name='paddle.DenseTableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', + full_name='paddle.DenseTableParameter.table_id', + index=0, + number=1, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_variable_name', + full_name='paddle.DenseTableParameter.dense_variable_name', + index=1, + number=2, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_gradient_variable_name', + full_name='paddle.DenseTableParameter.dense_gradient_variable_name', + index=2, + number=3, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fea_dim', + full_name='paddle.DenseTableParameter.fea_dim', + index=3, + number=4, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=765, + serialized_end=888, ) _SPARSETABLEPARAMETER = _descriptor.Descriptor( - name='SparseTableParameter', - full_name='paddle.SparseTableParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='table_id', full_name='paddle.SparseTableParameter.table_id', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='feature_dim', full_name='paddle.SparseTableParameter.feature_dim', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slot_key', full_name='paddle.SparseTableParameter.slot_key', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slot_value', full_name='paddle.SparseTableParameter.slot_value', index=3, - number=4, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slot_gradient', full_name='paddle.SparseTableParameter.slot_gradient', index=4, - number=5, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=890, - serialized_end=1012, -) - + name='SparseTableParameter', + full_name='paddle.SparseTableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', + full_name='paddle.SparseTableParameter.table_id', + index=0, + number=1, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='feature_dim', + full_name='paddle.SparseTableParameter.feature_dim', + index=1, + number=2, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_key', + full_name='paddle.SparseTableParameter.slot_key', + index=2, + number=3, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_value', + full_name='paddle.SparseTableParameter.slot_value', + index=3, + number=4, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='slot_gradient', + full_name='paddle.SparseTableParameter.slot_gradient', + index=4, + number=5, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=890, + serialized_end=1012, ) _DOWNPOURSERVERPARAMETER = _descriptor.Descriptor( - name='DownpourServerParameter', - full_name='paddle.DownpourServerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='downpour_table_param', full_name='paddle.DownpourServerParameter.downpour_table_param', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='service_param', full_name='paddle.DownpourServerParameter.service_param', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1015, - serialized_end=1149, -) - + name='DownpourServerParameter', + full_name='paddle.DownpourServerParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='downpour_table_param', + full_name='paddle.DownpourServerParameter.downpour_table_param', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='service_param', + full_name='paddle.DownpourServerParameter.service_param', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=1015, + serialized_end=1149, ) _SERVERSERVICEPARAMETER = _descriptor.Descriptor( - name='ServerServiceParameter', - full_name='paddle.ServerServiceParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='server_class', full_name='paddle.ServerServiceParameter.server_class', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("DownpourBrpcPsServer").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='client_class', full_name='paddle.ServerServiceParameter.client_class', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("DownpourBrpcPsClient").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='service_class', full_name='paddle.ServerServiceParameter.service_class', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("DownpourPsService").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='start_server_port', full_name='paddle.ServerServiceParameter.start_server_port', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='server_thread_num', full_name='paddle.ServerServiceParameter.server_thread_num', index=4, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=12, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1152, - serialized_end=1367, -) - + name='ServerServiceParameter', + full_name='paddle.ServerServiceParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='server_class', + full_name='paddle.ServerServiceParameter.server_class', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=True, + default_value=_b("DownpourBrpcPsServer").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='client_class', + full_name='paddle.ServerServiceParameter.client_class', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=True, + default_value=_b("DownpourBrpcPsClient").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='service_class', + full_name='paddle.ServerServiceParameter.service_class', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=True, + default_value=_b("DownpourPsService").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='start_server_port', + full_name='paddle.ServerServiceParameter.start_server_port', + index=3, + number=4, + type=13, + cpp_type=3, + label=1, + has_default_value=True, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='server_thread_num', + full_name='paddle.ServerServiceParameter.server_thread_num', + index=4, + number=5, + type=13, + cpp_type=3, + label=1, + has_default_value=True, + default_value=12, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=1152, + serialized_end=1367, ) _TABLEPARAMETER = _descriptor.Descriptor( - name='TableParameter', - full_name='paddle.TableParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='table_id', full_name='paddle.TableParameter.table_id', index=0, - number=1, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='table_class', full_name='paddle.TableParameter.table_class', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shared_num', full_name='paddle.TableParameter.shared_num', index=2, - number=3, type=4, cpp_type=4, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='accessor', full_name='paddle.TableParameter.accessor', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='paddle.TableParameter.type', index=4, - number=5, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='compress_in_save', full_name='paddle.TableParameter.compress_in_save', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1370, - serialized_end=1561, -) - + name='TableParameter', + full_name='paddle.TableParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='table_id', + full_name='paddle.TableParameter.table_id', + index=0, + number=1, + type=4, + cpp_type=4, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_class', + full_name='paddle.TableParameter.table_class', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='shared_num', + full_name='paddle.TableParameter.shared_num', + index=2, + number=3, + type=4, + cpp_type=4, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='accessor', + full_name='paddle.TableParameter.accessor', + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='type', + full_name='paddle.TableParameter.type', + index=4, + number=5, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='compress_in_save', + full_name='paddle.TableParameter.compress_in_save', + index=5, + number=6, + type=8, + cpp_type=7, + label=1, + has_default_value=True, + default_value=False, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=1370, + serialized_end=1561, ) _TABLEACCESSORPARAMETER = _descriptor.Descriptor( - name='TableAccessorParameter', - full_name='paddle.TableAccessorParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='accessor_class', full_name='paddle.TableAccessorParameter.accessor_class', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sparse_sgd_param', full_name='paddle.TableAccessorParameter.sparse_sgd_param', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dense_sgd_param', full_name='paddle.TableAccessorParameter.dense_sgd_param', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='fea_dim', full_name='paddle.TableAccessorParameter.fea_dim', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='embedx_dim', full_name='paddle.TableAccessorParameter.embedx_dim', index=4, - number=5, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='embedx_threshold', full_name='paddle.TableAccessorParameter.embedx_threshold', index=5, - number=6, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='downpour_accessor_param', full_name='paddle.TableAccessorParameter.downpour_accessor_param', index=6, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='table_accessor_save_param', full_name='paddle.TableAccessorParameter.table_accessor_save_param', index=7, - number=8, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1564, - serialized_end=1933, -) - + name='TableAccessorParameter', + full_name='paddle.TableAccessorParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='accessor_class', + full_name='paddle.TableAccessorParameter.accessor_class', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='sparse_sgd_param', + full_name='paddle.TableAccessorParameter.sparse_sgd_param', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='dense_sgd_param', + full_name='paddle.TableAccessorParameter.dense_sgd_param', + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='fea_dim', + full_name='paddle.TableAccessorParameter.fea_dim', + index=3, + number=4, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='embedx_dim', + full_name='paddle.TableAccessorParameter.embedx_dim', + index=4, + number=5, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='embedx_threshold', + full_name='paddle.TableAccessorParameter.embedx_threshold', + index=5, + number=6, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='downpour_accessor_param', + full_name='paddle.TableAccessorParameter.downpour_accessor_param', + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_accessor_save_param', + full_name='paddle.TableAccessorParameter.table_accessor_save_param', + index=7, + number=8, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=1564, + serialized_end=1933, ) _DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor( - name='DownpourTableAccessorParameter', - full_name='paddle.DownpourTableAccessorParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='nonclk_coeff', full_name='paddle.DownpourTableAccessorParameter.nonclk_coeff', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='click_coeff', full_name='paddle.DownpourTableAccessorParameter.click_coeff', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='base_threshold', full_name='paddle.DownpourTableAccessorParameter.base_threshold', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='delta_threshold', full_name='paddle.DownpourTableAccessorParameter.delta_threshold', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='delta_keep_days', full_name='paddle.DownpourTableAccessorParameter.delta_keep_days', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='show_click_decay_rate', full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='delete_threshold', full_name='paddle.DownpourTableAccessorParameter.delete_threshold', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1936, - serialized_end=2142, -) - + name='DownpourTableAccessorParameter', + full_name='paddle.DownpourTableAccessorParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='nonclk_coeff', + full_name='paddle.DownpourTableAccessorParameter.nonclk_coeff', + index=0, + number=1, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='click_coeff', + full_name='paddle.DownpourTableAccessorParameter.click_coeff', + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='base_threshold', + full_name='paddle.DownpourTableAccessorParameter.base_threshold', + index=2, + number=3, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delta_threshold', + full_name='paddle.DownpourTableAccessorParameter.delta_threshold', + index=3, + number=4, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delta_keep_days', + full_name='paddle.DownpourTableAccessorParameter.delta_keep_days', + index=4, + number=5, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='show_click_decay_rate', + full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate', + index=5, + number=6, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='delete_threshold', + full_name='paddle.DownpourTableAccessorParameter.delete_threshold', + index=6, + number=7, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=1936, + serialized_end=2142, ) _TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor( - name='TableAccessorSaveParameter', - full_name='paddle.TableAccessorSaveParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='param', full_name='paddle.TableAccessorSaveParameter.param', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='converter', full_name='paddle.TableAccessorSaveParameter.converter', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='deconverter', full_name='paddle.TableAccessorSaveParameter.deconverter', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2144, - serialized_end=2227, -) - + name='TableAccessorSaveParameter', + full_name='paddle.TableAccessorSaveParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='param', + full_name='paddle.TableAccessorSaveParameter.param', + index=0, + number=1, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='converter', + full_name='paddle.TableAccessorSaveParameter.converter', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='deconverter', + full_name='paddle.TableAccessorSaveParameter.deconverter', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2144, + serialized_end=2227, ) _PSREQUESTMESSAGE = _descriptor.Descriptor( - name='PsRequestMessage', - full_name='paddle.PsRequestMessage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='cmd_id', full_name='paddle.PsRequestMessage.cmd_id', index=0, - number=1, type=13, cpp_type=3, label=2, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='table_id', full_name='paddle.PsRequestMessage.table_id', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='params', full_name='paddle.PsRequestMessage.params', index=2, - number=3, type=12, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='client_id', full_name='paddle.PsRequestMessage.client_id', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data', full_name='paddle.PsRequestMessage.data', index=4, - number=5, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2229, - serialized_end=2330, -) - + name='PsRequestMessage', + full_name='paddle.PsRequestMessage', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='cmd_id', + full_name='paddle.PsRequestMessage.cmd_id', + index=0, + number=1, + type=13, + cpp_type=3, + label=2, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='table_id', + full_name='paddle.PsRequestMessage.table_id', + index=1, + number=2, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='params', + full_name='paddle.PsRequestMessage.params', + index=2, + number=3, + type=12, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='client_id', + full_name='paddle.PsRequestMessage.client_id', + index=3, + number=4, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='data', + full_name='paddle.PsRequestMessage.data', + index=4, + number=5, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2229, + serialized_end=2330, ) _SPARSESGDRULEPARAMETER = _descriptor.Descriptor( - name='SparseSGDRuleParameter', - full_name='paddle.SparseSGDRuleParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='paddle.SparseSGDRuleParameter.learning_rate', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='initial_g2sum', full_name='paddle.SparseSGDRuleParameter.initial_g2sum', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='initial_range', full_name='paddle.SparseSGDRuleParameter.initial_range', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_bounds', full_name='paddle.SparseSGDRuleParameter.weight_bounds', index=3, - number=4, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2332, - serialized_end=2451, -) - + name='SparseSGDRuleParameter', + full_name='paddle.SparseSGDRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', + full_name='paddle.SparseSGDRuleParameter.learning_rate', + index=0, + number=1, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='initial_g2sum', + full_name='paddle.SparseSGDRuleParameter.initial_g2sum', + index=1, + number=2, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='initial_range', + full_name='paddle.SparseSGDRuleParameter.initial_range', + index=2, + number=3, + type=1, + cpp_type=5, + label=1, + has_default_value=True, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='weight_bounds', + full_name='paddle.SparseSGDRuleParameter.weight_bounds', + index=3, + number=4, + type=2, + cpp_type=6, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2332, + serialized_end=2451, ) _DENSESGDRULEPARAMETER = _descriptor.Descriptor( - name='DenseSGDRuleParameter', - full_name='paddle.DenseSGDRuleParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='paddle.DenseSGDRuleParameter.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='adam', full_name='paddle.DenseSGDRuleParameter.adam', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='naive', full_name='paddle.DenseSGDRuleParameter.naive', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='summary', full_name='paddle.DenseSGDRuleParameter.summary', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='moving_average', full_name='paddle.DenseSGDRuleParameter.moving_average', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2454, - serialized_end=2679, -) - + name='DenseSGDRuleParameter', + full_name='paddle.DenseSGDRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', + full_name='paddle.DenseSGDRuleParameter.name', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='adam', + full_name='paddle.DenseSGDRuleParameter.adam', + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='naive', + full_name='paddle.DenseSGDRuleParameter.naive', + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='summary', + full_name='paddle.DenseSGDRuleParameter.summary', + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='moving_average', + full_name='paddle.DenseSGDRuleParameter.moving_average', + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2454, + serialized_end=2679, ) _ADAMSGDPARAMETER = _descriptor.Descriptor( - name='AdamSGDParameter', - full_name='paddle.AdamSGDParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='paddle.AdamSGDParameter.learning_rate', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_decay_rate', full_name='paddle.AdamSGDParameter.avg_decay_rate', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='ada_decay_rate', full_name='paddle.AdamSGDParameter.ada_decay_rate', index=2, - number=3, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='ada_epsilon', full_name='paddle.AdamSGDParameter.ada_epsilon', index=3, - number=4, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mom_decay_rate', full_name='paddle.AdamSGDParameter.mom_decay_rate', index=4, - number=5, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2682, - serialized_end=2816, -) - + name='AdamSGDParameter', + full_name='paddle.AdamSGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', + full_name='paddle.AdamSGDParameter.learning_rate', + index=0, + number=1, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='avg_decay_rate', + full_name='paddle.AdamSGDParameter.avg_decay_rate', + index=1, + number=2, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='ada_decay_rate', + full_name='paddle.AdamSGDParameter.ada_decay_rate', + index=2, + number=3, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='ada_epsilon', + full_name='paddle.AdamSGDParameter.ada_epsilon', + index=3, + number=4, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='mom_decay_rate', + full_name='paddle.AdamSGDParameter.mom_decay_rate', + index=4, + number=5, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2682, + serialized_end=2816, ) _NAIVESGDPARAMETER = _descriptor.Descriptor( - name='NaiveSGDParameter', - full_name='paddle.NaiveSGDParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='learning_rate', full_name='paddle.NaiveSGDParameter.learning_rate', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='avg_decay_rate', full_name='paddle.NaiveSGDParameter.avg_decay_rate', index=1, - number=2, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2818, - serialized_end=2884, -) - + name='NaiveSGDParameter', + full_name='paddle.NaiveSGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='learning_rate', + full_name='paddle.NaiveSGDParameter.learning_rate', + index=0, + number=1, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='avg_decay_rate', + full_name='paddle.NaiveSGDParameter.avg_decay_rate', + index=1, + number=2, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2818, + serialized_end=2884, ) _SUMMARYSGDPARAMETER = _descriptor.Descriptor( - name='SummarySGDParameter', - full_name='paddle.SummarySGDParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='summary_decay_rate', full_name='paddle.SummarySGDParameter.summary_decay_rate', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=True, default_value=float(0.999999), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2886, - serialized_end=2945, -) - + name='SummarySGDParameter', + full_name='paddle.SummarySGDParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='summary_decay_rate', + full_name='paddle.SummarySGDParameter.summary_decay_rate', + index=0, + number=1, + type=1, + cpp_type=5, + label=1, + has_default_value=True, + default_value=float(0.999999), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2886, + serialized_end=2945, ) _MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor( - name='MovingAverageRuleParameter', - full_name='paddle.MovingAverageRuleParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='momentum', full_name='paddle.MovingAverageRuleParameter.momentum', index=0, - number=1, type=1, cpp_type=5, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2947, - serialized_end=2993, -) - + name='MovingAverageRuleParameter', + full_name='paddle.MovingAverageRuleParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='momentum', + full_name='paddle.MovingAverageRuleParameter.momentum', + index=0, + number=1, + type=1, + cpp_type=5, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2947, + serialized_end=2993, ) _PSRESPONSEMESSAGE = _descriptor.Descriptor( - name='PsResponseMessage', - full_name='paddle.PsResponseMessage', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='err_code', full_name='paddle.PsResponseMessage.err_code', index=0, - number=1, type=5, cpp_type=1, label=2, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='err_msg', full_name='paddle.PsResponseMessage.err_msg', index=1, - number=2, type=9, cpp_type=9, label=2, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data', full_name='paddle.PsResponseMessage.data', index=2, - number=3, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2995, - serialized_end=3068, -) - + name='PsResponseMessage', + full_name='paddle.PsResponseMessage', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='err_code', + full_name='paddle.PsResponseMessage.err_code', + index=0, + number=1, + type=5, + cpp_type=1, + label=2, + has_default_value=True, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='err_msg', + full_name='paddle.PsResponseMessage.err_msg', + index=1, + number=2, + type=9, + cpp_type=9, + label=2, + has_default_value=True, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='data', + full_name='paddle.PsResponseMessage.data', + index=2, + number=3, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=2995, + serialized_end=3068, ) _FSCLIENTPARAMETER = _descriptor.Descriptor( - name='FsClientParameter', - full_name='paddle.FsClientParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='fs_type', full_name='paddle.FsClientParameter.fs_type', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='uri', full_name='paddle.FsClientParameter.uri', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='user', full_name='paddle.FsClientParameter.user', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='passwd', full_name='paddle.FsClientParameter.passwd', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='buffer_size', full_name='paddle.FsClientParameter.buffer_size', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hadoop_bin', full_name='paddle.FsClientParameter.hadoop_bin', index=5, - number=51, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='afs_conf', full_name='paddle.FsClientParameter.afs_conf', index=6, - number=101, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _FSCLIENTPARAMETER_FSAPITYPE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=3071, - serialized_end=3284, -) + name='FsClientParameter', + full_name='paddle.FsClientParameter', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='fs_type', + full_name='paddle.FsClientParameter.fs_type', + index=0, + number=1, + type=14, + cpp_type=8, + label=1, + has_default_value=True, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='uri', + full_name='paddle.FsClientParameter.uri', + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='user', + full_name='paddle.FsClientParameter.user', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='passwd', + full_name='paddle.FsClientParameter.passwd', + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='buffer_size', + full_name='paddle.FsClientParameter.buffer_size', + index=4, + number=5, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='hadoop_bin', + full_name='paddle.FsClientParameter.hadoop_bin', + index=5, + number=51, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + _descriptor.FieldDescriptor( + name='afs_conf', + full_name='paddle.FsClientParameter.afs_conf', + index=6, + number=101, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + options=None), + ], + extensions=[], + nested_types=[], + enum_types=[_FSCLIENTPARAMETER_FSAPITYPE, ], + options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=3071, + serialized_end=3284, ) _PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER _PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER -_PSPARAMETER.fields_by_name['trainer_param'].message_type = _DOWNPOURTRAINERPARAMETER +_PSPARAMETER.fields_by_name[ + 'trainer_param'].message_type = _DOWNPOURTRAINERPARAMETER _PSPARAMETER.fields_by_name['fs_client_param'].message_type = _FSCLIENTPARAMETER -_WORKERPARAMETER.fields_by_name['downpour_worker_param'].message_type = _DOWNPOURWORKERPARAMETER -_SERVERPARAMETER.fields_by_name['downpour_server_param'].message_type = _DOWNPOURSERVERPARAMETER -_DOWNPOURWORKERPARAMETER.fields_by_name['downpour_table_param'].message_type = _TABLEPARAMETER -_DOWNPOURTRAINERPARAMETER.fields_by_name['dense_table'].message_type = _DENSETABLEPARAMETER -_DOWNPOURTRAINERPARAMETER.fields_by_name['sparse_table'].message_type = _SPARSETABLEPARAMETER -_DOWNPOURSERVERPARAMETER.fields_by_name['downpour_table_param'].message_type = _TABLEPARAMETER -_DOWNPOURSERVERPARAMETER.fields_by_name['service_param'].message_type = _SERVERSERVICEPARAMETER -_TABLEPARAMETER.fields_by_name['accessor'].message_type = _TABLEACCESSORPARAMETER +_WORKERPARAMETER.fields_by_name[ + 'downpour_worker_param'].message_type = _DOWNPOURWORKERPARAMETER +_SERVERPARAMETER.fields_by_name[ + 'downpour_server_param'].message_type = _DOWNPOURSERVERPARAMETER +_DOWNPOURWORKERPARAMETER.fields_by_name[ + 'downpour_table_param'].message_type = _TABLEPARAMETER +_DOWNPOURTRAINERPARAMETER.fields_by_name[ + 'dense_table'].message_type = _DENSETABLEPARAMETER +_DOWNPOURTRAINERPARAMETER.fields_by_name[ + 'sparse_table'].message_type = _SPARSETABLEPARAMETER +_DOWNPOURSERVERPARAMETER.fields_by_name[ + 'downpour_table_param'].message_type = _TABLEPARAMETER +_DOWNPOURSERVERPARAMETER.fields_by_name[ + 'service_param'].message_type = _SERVERSERVICEPARAMETER +_TABLEPARAMETER.fields_by_name[ + 'accessor'].message_type = _TABLEACCESSORPARAMETER _TABLEPARAMETER.fields_by_name['type'].enum_type = _TABLETYPE -_TABLEACCESSORPARAMETER.fields_by_name['sparse_sgd_param'].message_type = _SPARSESGDRULEPARAMETER -_TABLEACCESSORPARAMETER.fields_by_name['dense_sgd_param'].message_type = _DENSESGDRULEPARAMETER -_TABLEACCESSORPARAMETER.fields_by_name['downpour_accessor_param'].message_type = _DOWNPOURTABLEACCESSORPARAMETER -_TABLEACCESSORPARAMETER.fields_by_name['table_accessor_save_param'].message_type = _TABLEACCESSORSAVEPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name[ + 'sparse_sgd_param'].message_type = _SPARSESGDRULEPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name[ + 'dense_sgd_param'].message_type = _DENSESGDRULEPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name[ + 'downpour_accessor_param'].message_type = _DOWNPOURTABLEACCESSORPARAMETER +_TABLEACCESSORPARAMETER.fields_by_name[ + 'table_accessor_save_param'].message_type = _TABLEACCESSORSAVEPARAMETER _DENSESGDRULEPARAMETER.fields_by_name['adam'].message_type = _ADAMSGDPARAMETER _DENSESGDRULEPARAMETER.fields_by_name['naive'].message_type = _NAIVESGDPARAMETER -_DENSESGDRULEPARAMETER.fields_by_name['summary'].message_type = _SUMMARYSGDPARAMETER -_DENSESGDRULEPARAMETER.fields_by_name['moving_average'].message_type = _MOVINGAVERAGERULEPARAMETER -_FSCLIENTPARAMETER.fields_by_name['fs_type'].enum_type = _FSCLIENTPARAMETER_FSAPITYPE +_DENSESGDRULEPARAMETER.fields_by_name[ + 'summary'].message_type = _SUMMARYSGDPARAMETER +_DENSESGDRULEPARAMETER.fields_by_name[ + 'moving_average'].message_type = _MOVINGAVERAGERULEPARAMETER +_FSCLIENTPARAMETER.fields_by_name[ + 'fs_type'].enum_type = _FSCLIENTPARAMETER_FSAPITYPE _FSCLIENTPARAMETER_FSAPITYPE.containing_type = _FSCLIENTPARAMETER DESCRIPTOR.message_types_by_name['PSParameter'] = _PSPARAMETER DESCRIPTOR.message_types_by_name['WorkerParameter'] = _WORKERPARAMETER DESCRIPTOR.message_types_by_name['ServerParameter'] = _SERVERPARAMETER -DESCRIPTOR.message_types_by_name['DownpourWorkerParameter'] = _DOWNPOURWORKERPARAMETER -DESCRIPTOR.message_types_by_name['DownpourTrainerParameter'] = _DOWNPOURTRAINERPARAMETER +DESCRIPTOR.message_types_by_name[ + 'DownpourWorkerParameter'] = _DOWNPOURWORKERPARAMETER +DESCRIPTOR.message_types_by_name[ + 'DownpourTrainerParameter'] = _DOWNPOURTRAINERPARAMETER DESCRIPTOR.message_types_by_name['DenseTableParameter'] = _DENSETABLEPARAMETER DESCRIPTOR.message_types_by_name['SparseTableParameter'] = _SPARSETABLEPARAMETER -DESCRIPTOR.message_types_by_name['DownpourServerParameter'] = _DOWNPOURSERVERPARAMETER -DESCRIPTOR.message_types_by_name['ServerServiceParameter'] = _SERVERSERVICEPARAMETER +DESCRIPTOR.message_types_by_name[ + 'DownpourServerParameter'] = _DOWNPOURSERVERPARAMETER +DESCRIPTOR.message_types_by_name[ + 'ServerServiceParameter'] = _SERVERSERVICEPARAMETER DESCRIPTOR.message_types_by_name['TableParameter'] = _TABLEPARAMETER -DESCRIPTOR.message_types_by_name['TableAccessorParameter'] = _TABLEACCESSORPARAMETER -DESCRIPTOR.message_types_by_name['DownpourTableAccessorParameter'] = _DOWNPOURTABLEACCESSORPARAMETER -DESCRIPTOR.message_types_by_name['TableAccessorSaveParameter'] = _TABLEACCESSORSAVEPARAMETER +DESCRIPTOR.message_types_by_name[ + 'TableAccessorParameter'] = _TABLEACCESSORPARAMETER +DESCRIPTOR.message_types_by_name[ + 'DownpourTableAccessorParameter'] = _DOWNPOURTABLEACCESSORPARAMETER +DESCRIPTOR.message_types_by_name[ + 'TableAccessorSaveParameter'] = _TABLEACCESSORSAVEPARAMETER DESCRIPTOR.message_types_by_name['PsRequestMessage'] = _PSREQUESTMESSAGE -DESCRIPTOR.message_types_by_name['SparseSGDRuleParameter'] = _SPARSESGDRULEPARAMETER -DESCRIPTOR.message_types_by_name['DenseSGDRuleParameter'] = _DENSESGDRULEPARAMETER +DESCRIPTOR.message_types_by_name[ + 'SparseSGDRuleParameter'] = _SPARSESGDRULEPARAMETER +DESCRIPTOR.message_types_by_name[ + 'DenseSGDRuleParameter'] = _DENSESGDRULEPARAMETER DESCRIPTOR.message_types_by_name['AdamSGDParameter'] = _ADAMSGDPARAMETER DESCRIPTOR.message_types_by_name['NaiveSGDParameter'] = _NAIVESGDPARAMETER DESCRIPTOR.message_types_by_name['SummarySGDParameter'] = _SUMMARYSGDPARAMETER -DESCRIPTOR.message_types_by_name['MovingAverageRuleParameter'] = _MOVINGAVERAGERULEPARAMETER +DESCRIPTOR.message_types_by_name[ + 'MovingAverageRuleParameter'] = _MOVINGAVERAGERULEPARAMETER DESCRIPTOR.message_types_by_name['PsResponseMessage'] = _PSRESPONSEMESSAGE DESCRIPTOR.message_types_by_name['FsClientParameter'] = _FSCLIENTPARAMETER DESCRIPTOR.enum_types_by_name['TableType'] = _TABLETYPE DESCRIPTOR.enum_types_by_name['PsCmdID'] = _PSCMDID -PSParameter = _reflection.GeneratedProtocolMessageType('PSParameter', (_message.Message,), dict( - DESCRIPTOR = _PSPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PSParameter) - )) +PSParameter = _reflection.GeneratedProtocolMessageType( + 'PSParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_PSPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PSParameter) + )) _sym_db.RegisterMessage(PSParameter) -WorkerParameter = _reflection.GeneratedProtocolMessageType('WorkerParameter', (_message.Message,), dict( - DESCRIPTOR = _WORKERPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) - )) +WorkerParameter = _reflection.GeneratedProtocolMessageType( + 'WorkerParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_WORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.WorkerParameter) + )) _sym_db.RegisterMessage(WorkerParameter) -ServerParameter = _reflection.GeneratedProtocolMessageType('ServerParameter', (_message.Message,), dict( - DESCRIPTOR = _SERVERPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerParameter) - )) +ServerParameter = _reflection.GeneratedProtocolMessageType( + 'ServerParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_SERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerParameter) + )) _sym_db.RegisterMessage(ServerParameter) -DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType('DownpourWorkerParameter', (_message.Message,), dict( - DESCRIPTOR = _DOWNPOURWORKERPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) - )) +DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType( + 'DownpourWorkerParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DOWNPOURWORKERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter) + )) _sym_db.RegisterMessage(DownpourWorkerParameter) -DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType('DownpourTrainerParameter', (_message.Message,), dict( - DESCRIPTOR = _DOWNPOURTRAINERPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) - )) +DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType( + 'DownpourTrainerParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DOWNPOURTRAINERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter) + )) _sym_db.RegisterMessage(DownpourTrainerParameter) -DenseTableParameter = _reflection.GeneratedProtocolMessageType('DenseTableParameter', (_message.Message,), dict( - DESCRIPTOR = _DENSETABLEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) - )) +DenseTableParameter = _reflection.GeneratedProtocolMessageType( + 'DenseTableParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DENSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter) + )) _sym_db.RegisterMessage(DenseTableParameter) -SparseTableParameter = _reflection.GeneratedProtocolMessageType('SparseTableParameter', (_message.Message,), dict( - DESCRIPTOR = _SPARSETABLEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) - )) +SparseTableParameter = _reflection.GeneratedProtocolMessageType( + 'SparseTableParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_SPARSETABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter) + )) _sym_db.RegisterMessage(SparseTableParameter) -DownpourServerParameter = _reflection.GeneratedProtocolMessageType('DownpourServerParameter', (_message.Message,), dict( - DESCRIPTOR = _DOWNPOURSERVERPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) - )) +DownpourServerParameter = _reflection.GeneratedProtocolMessageType( + 'DownpourServerParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DOWNPOURSERVERPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter) + )) _sym_db.RegisterMessage(DownpourServerParameter) -ServerServiceParameter = _reflection.GeneratedProtocolMessageType('ServerServiceParameter', (_message.Message,), dict( - DESCRIPTOR = _SERVERSERVICEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) - )) +ServerServiceParameter = _reflection.GeneratedProtocolMessageType( + 'ServerServiceParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_SERVERSERVICEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter) + )) _sym_db.RegisterMessage(ServerServiceParameter) -TableParameter = _reflection.GeneratedProtocolMessageType('TableParameter', (_message.Message,), dict( - DESCRIPTOR = _TABLEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableParameter) - )) +TableParameter = _reflection.GeneratedProtocolMessageType( + 'TableParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_TABLEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableParameter) + )) _sym_db.RegisterMessage(TableParameter) -TableAccessorParameter = _reflection.GeneratedProtocolMessageType('TableAccessorParameter', (_message.Message,), dict( - DESCRIPTOR = _TABLEACCESSORPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) - )) +TableAccessorParameter = _reflection.GeneratedProtocolMessageType( + 'TableAccessorParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_TABLEACCESSORPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter) + )) _sym_db.RegisterMessage(TableAccessorParameter) -DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType('DownpourTableAccessorParameter', (_message.Message,), dict( - DESCRIPTOR = _DOWNPOURTABLEACCESSORPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DownpourTableAccessorParameter) - )) +DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType( + 'DownpourTableAccessorParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DOWNPOURTABLEACCESSORPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DownpourTableAccessorParameter) + )) _sym_db.RegisterMessage(DownpourTableAccessorParameter) -TableAccessorSaveParameter = _reflection.GeneratedProtocolMessageType('TableAccessorSaveParameter', (_message.Message,), dict( - DESCRIPTOR = _TABLEACCESSORSAVEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.TableAccessorSaveParameter) - )) +TableAccessorSaveParameter = _reflection.GeneratedProtocolMessageType( + 'TableAccessorSaveParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_TABLEACCESSORSAVEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.TableAccessorSaveParameter) + )) _sym_db.RegisterMessage(TableAccessorSaveParameter) -PsRequestMessage = _reflection.GeneratedProtocolMessageType('PsRequestMessage', (_message.Message,), dict( - DESCRIPTOR = _PSREQUESTMESSAGE, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) - )) +PsRequestMessage = _reflection.GeneratedProtocolMessageType( + 'PsRequestMessage', + (_message.Message, ), + dict( + DESCRIPTOR=_PSREQUESTMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage) + )) _sym_db.RegisterMessage(PsRequestMessage) -SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType('SparseSGDRuleParameter', (_message.Message,), dict( - DESCRIPTOR = _SPARSESGDRULEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) - )) +SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( + 'SparseSGDRuleParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_SPARSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter) + )) _sym_db.RegisterMessage(SparseSGDRuleParameter) -DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType('DenseSGDRuleParameter', (_message.Message,), dict( - DESCRIPTOR = _DENSESGDRULEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) - )) +DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType( + 'DenseSGDRuleParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_DENSESGDRULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter) + )) _sym_db.RegisterMessage(DenseSGDRuleParameter) -AdamSGDParameter = _reflection.GeneratedProtocolMessageType('AdamSGDParameter', (_message.Message,), dict( - DESCRIPTOR = _ADAMSGDPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) - )) +AdamSGDParameter = _reflection.GeneratedProtocolMessageType( + 'AdamSGDParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_ADAMSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter) + )) _sym_db.RegisterMessage(AdamSGDParameter) -NaiveSGDParameter = _reflection.GeneratedProtocolMessageType('NaiveSGDParameter', (_message.Message,), dict( - DESCRIPTOR = _NAIVESGDPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) - )) +NaiveSGDParameter = _reflection.GeneratedProtocolMessageType( + 'NaiveSGDParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_NAIVESGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter) + )) _sym_db.RegisterMessage(NaiveSGDParameter) -SummarySGDParameter = _reflection.GeneratedProtocolMessageType('SummarySGDParameter', (_message.Message,), dict( - DESCRIPTOR = _SUMMARYSGDPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) - )) +SummarySGDParameter = _reflection.GeneratedProtocolMessageType( + 'SummarySGDParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_SUMMARYSGDPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter) + )) _sym_db.RegisterMessage(SummarySGDParameter) -MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType('MovingAverageRuleParameter', (_message.Message,), dict( - DESCRIPTOR = _MOVINGAVERAGERULEPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.MovingAverageRuleParameter) - )) +MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType( + 'MovingAverageRuleParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_MOVINGAVERAGERULEPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.MovingAverageRuleParameter) + )) _sym_db.RegisterMessage(MovingAverageRuleParameter) -PsResponseMessage = _reflection.GeneratedProtocolMessageType('PsResponseMessage', (_message.Message,), dict( - DESCRIPTOR = _PSRESPONSEMESSAGE, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) - )) +PsResponseMessage = _reflection.GeneratedProtocolMessageType( + 'PsResponseMessage', + (_message.Message, ), + dict( + DESCRIPTOR=_PSRESPONSEMESSAGE, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage) + )) _sym_db.RegisterMessage(PsResponseMessage) -FsClientParameter = _reflection.GeneratedProtocolMessageType('FsClientParameter', (_message.Message,), dict( - DESCRIPTOR = _FSCLIENTPARAMETER, - __module__ = 'ps_pb2' - # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) - )) +FsClientParameter = _reflection.GeneratedProtocolMessageType( + 'FsClientParameter', + (_message.Message, ), + dict( + DESCRIPTOR=_FSCLIENTPARAMETER, + __module__='ps_pb2' + # @@protoc_insertion_point(class_scope:paddle.FsClientParameter) + )) _sym_db.RegisterMessage(FsClientParameter) - DESCRIPTOR.has_options = True -DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\200\001\001')) +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), + _b('\200\001\001')) # @@protoc_insertion_point(module_scope) From e52bb816e36b5df53c1608f3aada655b21d11ab5 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 20:16:47 +0800 Subject: [PATCH 42/62] add copyright to __init__.py in distributed folder --- paddle/fluid/pybind/async_executor_py.cc | 16 ++++++++-------- python/paddle/fluid/distributed/__init__.py | 12 ++++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/pybind/async_executor_py.cc b/paddle/fluid/pybind/async_executor_py.cc index 71a0e256e4..222c128c66 100644 --- a/paddle/fluid/pybind/async_executor_py.cc +++ b/paddle/fluid/pybind/async_executor_py.cc @@ -49,13 +49,13 @@ void BindAsyncExecutor(py::module* m) { new framework::AsyncExecutor(scope, place)); })) .def("run_from_files", &framework::AsyncExecutor::RunFromFile) - .def("init_server", &framework::AsyncExecutor::InitServer) - .def("init_worker", &framework::AsyncExecutor::InitWorker) - .def("start_server", &framework::AsyncExecutor::StartServer) - .def("stop_server", &framework::AsyncExecutor::StopServer) - .def("gather_servers", &framework::AsyncExecutor::GatherServers) - .def("init_model", &framework::AsyncExecutor::InitModel) - .def("save_model", &framework::AsyncExecutor::SaveModel); + .def("init_server", &framework::AsyncExecutor::InitServer) + .def("init_worker", &framework::AsyncExecutor::InitWorker) + .def("start_server", &framework::AsyncExecutor::StartServer) + .def("stop_server", &framework::AsyncExecutor::StopServer) + .def("gather_servers", &framework::AsyncExecutor::GatherServers) + .def("init_model", &framework::AsyncExecutor::InitModel) + .def("save_model", &framework::AsyncExecutor::SaveModel); } // end BindAsyncExecutor #else void BindAsyncExecutor(py::module* m) { @@ -64,7 +64,7 @@ void BindAsyncExecutor(py::module* m) { return std::unique_ptr( new framework::AsyncExecutor(scope, place)); })) - .def("run_from_files", &framework::AsyncExecutor::RunFromFile) + .def("run_from_files", &framework::AsyncExecutor::RunFromFile); } // end BindAsyncExecutor #endif } // end namespace pybind diff --git a/python/paddle/fluid/distributed/__init__.py b/python/paddle/fluid/distributed/__init__.py index e69de29bb2..cd609c5040 100644 --- a/python/paddle/fluid/distributed/__init__.py +++ b/python/paddle/fluid/distributed/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and From 854ee964e81b2907ca15f201c60e941703f7a909 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 22:41:08 +0800 Subject: [PATCH 43/62] add doc string for async_executor.py --- python/paddle/fluid/async_executor.py | 150 +++++++++++++++++++------- 1 file changed, 111 insertions(+), 39 deletions(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 099805ac1b..fe2e9b8f12 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -89,8 +89,14 @@ class AsyncExecutor(object): self.executor = core.AsyncExecutor(scope, p) self.instance = None - - def run(self, program, data_feed, filelist, thread_num, fetch, mode="", debug=False): + def run(self, + program, + data_feed, + filelist, + thread_num, + fetch, + mode="", + debug=False): """ Run program by this AsyncExecutor. Training dataset will be in filelist. Users can also inspect certain variables by naming them in parameter @@ -110,6 +116,7 @@ class AsyncExecutor(object): thread_num(int): number of concurrent training threads. See :code:`Note` for how to set this properly fetch(str|list): the var name or a list of var names to inspect + mode(str): run mode of this interface debug(bool): When set to True, fetch vars will be printed to standard output after each minibatch @@ -154,83 +161,148 @@ class AsyncExecutor(object): data_feed.desc(), filelist, thread_num, fetch_var_names, mode, debug) - def download_data(self, afs_path, local_path, fs_default_name, ugi, file_cnt, hadoop_home="$HADOOP_HOME", process_num=12): + def download_data(self, + afs_path, + local_path, + fs_default_name, + ugi, + file_cnt, + hadoop_home="$HADOOP_HOME", + process_num=12): + """ + download_data is a default download method for distributed training + a user download data without this method + + Example: + >>> exe = fluid.AsyncExecutor() + >>> exe.download_data("/xxx/xxx/xx/", + >>> "./data", "afs:// + >>> xxx.xxx.xxx.xxx:9901", "xxx,yyy") + Args: + afs_path(str): afs_path defined by users + local_path(str): download data path + fs_default_name(str): file system server address + ugi(str): hadoop ugi + file_cn(int): a user can specify file number for debugging + hadoop_home(str): hadoop home path + process_num(int): download process num + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') - - configs = { - "fs.default.name": fs_default_name, - "hadoop.job.ugi": ugi - } + raise ValueError('instance is None, please run' + 'config_distributed_nodes init instance') + + configs = {"fs.default.name": fs_default_name, "hadoop.job.ugi": ugi} client = hdfs.HDFSClient(hadoop_home, configs) downloads = hdfs.multi_download( client, - afs_path, - local_path, + afs_path, + local_path, self.instance.get_worker_index(), self.instance.get_node_cnt() / 2, file_cnt, multi_processes=process_num) - #self.instance.barrier_all() #wait for download_data #TODO only barriere worker - self.instance.barrier_worker() #wait for download_data #TODO only barriere worker - - def config_distributed_nodes(self): - self.instance = ps_instance.PaddlePSInstance(1, 2) - return self.instance - - # get total rank - # get rank index - # get iplists - # get hadoop info - pass + self.instance.barrier_worker() #wait for download_data def get_instance(self): + """ + get current node's instance so that user can do operations + in distributed setting + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) + return self.instance + + def config_distributed_nodes(self): + """ + if a user needs to run distributed async executor + he or she needs to do a global configuration so that + information of current process can be obtained + """ + self.instance = ps_instance.PaddlePSInstance(1, 2) return self.instance def stop(self): + """ + at the end of process, users should call stop to servers + and barrier all workers + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') - self.instance.barrier_worker() #worker do all things + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) + self.instance.barrier_worker() #worker do all things if self.instance.is_first_worker(): self.executor.stop_server() - self.instance.barrier_worker() #sync + self.instance.barrier_worker() #sync def init_server(self, dist_desc): + """ + initialize server of current node if current process is a server + Args: + dist_desc(str): a protobuf string that describes + how to init a worker and a server + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) self.executor.init_server(dist_desc, self.instance._rankid) ip = self.executor.start_server() self.instance.set_ip(ip) - self.instance.barrier_all() #wait all server start + self.instance.barrier_all() #wait all server start ips = self.instance.gather_ips() self.executor.gather_servers(ips, self.instance.get_node_cnt()) - self.instance.barrier_all() #wait all worker start + self.instance.barrier_all() #wait all worker start def init_worker(self, dist_desc, startup_program): + """ + initialize worker of current node if current process is a worker + Args: + dist_desc(str): a protobuf string that describes + how to init a worker and a server + startup_program(fluid.Program): startup program of current process + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) place = core.CPUPlace() executor = Executor(place) executor.run(startup_program) - self.instance.barrier_all() #wait all server start + self.instance.barrier_all() #wait all server start ips = self.instance.gather_ips() - self.executor.init_worker(dist_desc, ips, self.instance.get_node_cnt(), self.instance._rankid) - self.instance.barrier_all() #wait all worker start + self.executor.init_worker(dist_desc, ips, + self.instance.get_node_cnt(), + self.instance._rankid) + self.instance.barrier_all() #wait all worker start if self.instance.is_first_worker(): self.executor.init_model() - self.instance.barrier_worker() #wait init model - + self.instance.barrier_worker() #wait init model + def init_model(self): + """ + init_model command that can be invoked from one of the worker + model parameters are initialized in servers + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) self.executor.init_model() def save_model(self, save_path): + """ + save_model command that can be invoked from one of the worker + model parameters are saved in servers and upload to save_path of file system + Args: + save_path(str): path to file system + """ if self.instance is None: - raise ValueError('instance is None, please run config_distributed_nodes init instance') + raise ValueError( + 'instance is None, please run config_distributed_nodes init instance' + ) self.executor.save_model(save_path) - From 3759600019f206794d5852bbbc74fd959337cf3d Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Thu, 13 Dec 2018 23:01:53 +0800 Subject: [PATCH 44/62] add doc string for downpour.py and distribute_lookup_table.py --- .../paddle/fluid/distribute_lookup_table.py | 32 ++++++++++++--- python/paddle/fluid/distributed/downpour.py | 41 ++++++++++++++----- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/python/paddle/fluid/distribute_lookup_table.py b/python/paddle/fluid/distribute_lookup_table.py index 243d806c41..74824f6832 100644 --- a/python/paddle/fluid/distribute_lookup_table.py +++ b/python/paddle/fluid/distribute_lookup_table.py @@ -16,31 +16,51 @@ LOOKUP_TABLE_TYPE = "lookup_table" def find_distributed_lookup_table_inputs(program, table_name): + """ + Find input variable of distribute lookup table in program. + We only support one distribute table now. + Args: + program(Program): given program, locate distributed lookup table + table_name(str): given table name that is found beforehand + Returns: + inputs + """ local_vars = program.current_block().vars inputs = [] for op in program.global_block().ops: if op.type == LOOKUP_TABLE_TYPE: if table_name == op.input("W")[0]: - inputs.extend( - [local_vars[name] for name in op.input("Ids")]) + inputs.extend([local_vars[name] for name in op.input("Ids")]) return inputs + def find_distributed_lookup_table_outputs(program, table_name): + """ + Find output variable of distribute lookup table in program. + We only support one distribute table now. + Args: + program(Program): given program, locate distributed lookup table + table_name(str): given table name that is found beforehand + Returns: + outputs + """ local_vars = program.current_block().vars outputs = [] for op in program.global_block().ops: if op.type == LOOKUP_TABLE_TYPE: if table_name == op.input("W")[0]: - outputs.extend( - [local_vars[name] for name in op.output("Out")]) + outputs.extend([local_vars[name] for name in op.output("Out")]) return outputs + def find_distributed_lookup_table(program): """ Find distribute lookup table in program. We only support one distribute table now. - :param program: - :return: table_name or None + Args: + program(Program): given program, locate distributed lookup table + Returns: + table_name or None """ table_name = None diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 9ef9e14ccc..87dfab92c5 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -20,6 +20,7 @@ from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_i from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table_outputs from google.protobuf import text_format + class DownpourSGD(object): """ Distributed optimizer of downpour stochastic gradient descent @@ -35,17 +36,38 @@ class DownpourSGD(object): downpour_sgd = fluid.distributed.DownpourSGD(learning_rate=0.2) downpour_sgd.minimize(cost) """ + def __init__(self, learning_rate=0.001, window=1): # todo(guru4elephant): add more optimizers here as argument # todo(guru4elephant): make learning_rate as a variable self.learning_rate_ = learning_rate self.window_ = window self.type = "downpour" - - def minimize(self, loss, startup_program=None, - parameter_list=None, no_grad_set=None): - params_grads = sorted(append_backward( - loss, parameter_list, no_grad_set), key=lambda x:x[0].name) + + def minimize(self, + loss, + startup_program=None, + parameter_list=None, + no_grad_set=None): + """ + DownpounSGD is a distributed optimizer so + that user can call minimize to generate backward + operators and optimization operators within minmize function + Args: + loss(Variable): loss variable defined by user + startup_program(Program): startup program that defined by user + parameter_list(str list): parameter names defined by users + no_grad_set(set): a set of variables that is defined by users + so that these variables do not need gradient computation + Returns: + [ps_param, worker_skipped_ops] + ps_param: parameter server protobuf desc + worker_skipped_ops: operator names that need + to be skipped during execution + """ + params_grads = sorted( + append_backward(loss, parameter_list, no_grad_set), + key=lambda x: x[0].name) table_name = find_distributed_lookup_table(loss.block.program) prefetch_slots = find_distributed_lookup_table_inputs( loss.block.program, table_name) @@ -67,12 +89,12 @@ class DownpourSGD(object): grads.append(i[1]) server.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - server.add_dense_table(dense_table_index, self.learning_rate_, - params, grads) + server.add_dense_table(dense_table_index, self.learning_rate_, params, + grads) worker.add_sparse_table(sparse_table_index, self.learning_rate_, prefetch_slots, prefetch_slots_emb) - worker.add_dense_table(dense_table_index, self.learning_rate_, - params, grads) + worker.add_dense_table(dense_table_index, self.learning_rate_, params, + grads) ps_param = pslib.PSParameter() ps_param.server_param.CopyFrom(server.get_desc()) ps_param.trainer_param.CopyFrom(worker.get_desc()) @@ -80,5 +102,4 @@ class DownpourSGD(object): # currently only support lookup_table worker_skipped_ops = ["lookup_table", "lookup_table_grad"] ps_param.trainer_param.skip_op.extend(worker_skipped_ops) - ps_param_str = text_format.MessageToString(ps_param) return [ps_param, worker_skipped_ops] From caa6b596775380b568ff934c24d4c641652e8fcc Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 11:43:23 +0800 Subject: [PATCH 45/62] add hdfs_utils & helper & node doc --- .../paddle/fluid/contrib/utils/hdfs_utils.py | 163 +++++++++++++----- python/paddle/fluid/distributed/helper.py | 34 +++- python/paddle/fluid/distributed/node.py | 113 +++++++++--- 3 files changed, 238 insertions(+), 72 deletions(-) diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py index 42b4d7feab..baea57ccce 100644 --- a/python/paddle/fluid/contrib/utils/hdfs_utils.py +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -32,6 +32,28 @@ _logger.setLevel(logging.INFO) class HDFSClient(object): + """ + A tool of HDFS + + Args: + hadoop_home (string): hadoop_home + configs (dict): hadoop config, it is a dict, please contain \ + key "fs.default.name" and "hadoop.job.ugi" + Can be a float value + Examples: + hadoop_home = "/home/client/hadoop-client/hadoop/" + + configs = { + "fs.default.name": "hdfs://xxx.hadoop.com:54310", + "hadoop.job.ugi": "hello,hello123" + } + + client = HDFSClient(hadoop_home, configs) + + client.ls("/user/com/train-25") + files = client.lsr("/user/com/train-25/models") + """ + def __init__(self, hadoop_home, configs): self.pre_commands = [] hadoop_bin = '%s/bin/hadoop' % hadoop_home @@ -55,7 +77,10 @@ class HDFSClient(object): whole_commands = " ".join(whole_commands) for x in range(retry_times + 1): proc = subprocess.Popen( - whole_commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + whole_commands, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True) (output, errors) = proc.communicate() ret_code, ret_out, ret_err = proc.returncode, output, errors if ret_code: @@ -69,10 +94,12 @@ class HDFSClient(object): def upload(self, hdfs_path, local_path, overwrite=False, retry_times=5): """ upload the local file to hdfs - args: - local_file_path: the local file path - remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) - return: + Args: + hdfs_path: hdfs path, target path + local_path: local file path, source path + overwrite: will overwrite the original file + retry_times: max times retry to upload + Returns: True or False """ assert hdfs_path is not None @@ -115,10 +142,12 @@ class HDFSClient(object): def download(self, hdfs_path, local_path, overwrite=False, unzip=False): """ download from hdfs - args: - local_file_path: the local file path - remote_file_path: remote dir on hdfs - return: + Args: + hdfs_path: hdfs path, target path + local_path: local file path, source path + overwrite: will remove original file and overwrite it. + unzip: ignore this param + Returns True or False """ _logger.info('Downloading %r to %r.', hdfs_path, local_path) @@ -160,11 +189,11 @@ class HDFSClient(object): def is_exist(self, hdfs_path=None): """ whether the remote hdfs path exists? - args: - remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) + Args: + hdfs_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) fs_name: The default values are the same as in the job configuration fs_ugi: The default values are the same as in the job configuration - return: + Returns: True or False """ exist_cmd = ['-test', '-e', hdfs_path] @@ -183,11 +212,11 @@ class HDFSClient(object): def is_dir(self, hdfs_path=None): """ whether the remote hdfs path exists? - args: + Args: remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) fs_name: The default values are the same as in the job configuration fs_ugi: The default values are the same as in the job configuration - return: + Returns: True or False """ @@ -207,15 +236,17 @@ class HDFSClient(object): return True def delete(self, hdfs_path): - """Remove a file or directory from HDFS. - - :param hdfs_path: HDFS path. - :param recursive: Recursively delete files and directories. By default, - this method will raise an :class:`HdfsError` if trying to delete a - non-empty directory. + """ + Remove a file or directory from HDFS. - This function returns `True` if the deletion was successful and `False` if - no file or directory previously existed at `hdfs_path`. + Args: + param hdfs_path: HDFS path. + param recursive: Recursively delete files and directories. By default, + this method will raise an :class:`HdfsError` if trying to delete a + non-empty directory. + Returns: + This function returns `True` if the deletion was successful and `False` if + no file or directory previously existed at `hdfs_path`. """ _logger.info('Deleting %r.', hdfs_path) @@ -241,14 +272,17 @@ class HDFSClient(object): return True def rename(self, hdfs_src_path, hdfs_dst_path, overwrite=False): - """Move a file or folder. - - :param hdfs_src_path: Source path. - :param hdfs_dst_path: Destination path. If the path already exists and is - a directory, the source will be moved into it. If the path exists and is - a file, or if a parent destination directory is missing, this method will - raise an :class:`HdfsError`. - + """ + Rename a file or folder. + Args: + :param hdfs_src_path: Source path. + :param hdfs_dst_path: Destination path. If the path already exists and is + a directory, the source will be moved into it. If the path exists and is + a file, or if a parent destination directory is missing, this method will + raise an :class:`HdfsError`. + Returns: + This function returns `True` if the rename was successful and `False` if + rename was faild. """ assert hdfs_src_path is not None assert hdfs_dst_path is not None @@ -274,6 +308,11 @@ class HDFSClient(object): @staticmethod def make_local_dirs(local_path): + """ + create a directiory local, is same to mkdir + Args: + local_path: local path that wants to create a directiory. + """ try: os.makedirs(local_path) except OSError as e: @@ -282,9 +321,11 @@ class HDFSClient(object): def makedirs(self, hdfs_path): """Create a remote directory, recursively if necessary. - - :param hdfs_path: Remote path. Intermediate directories will be created - appropriately. + Args: + :param hdfs_path: Remote path. Intermediate directories will be created + appropriately. + Returns: + True if make a directories was successful, False when make a directiries was failed. """ _logger.info('Creating directories to %r.', hdfs_path) assert hdfs_path is not None @@ -304,6 +345,13 @@ class HDFSClient(object): return True def ls(self, hdfs_path): + """ + ls a hdfs_path. + Args: + :param hdfs_path: hdfs_path will be ls. + Returns: + This function returns a `list` that contaion all files in the hdfs_path. + """ assert hdfs_path is not None if not self.is_exist(hdfs_path): @@ -329,6 +377,14 @@ class HDFSClient(object): return ret_lines def lsr(self, hdfs_path, only_file=True, sort=True): + """ + ls a hdfs_path sort by time. + Args: + :param hdfs_path: hdfs_path will be ls. + Returns: + This function returns a `list` that contaion all files sorted by time in the hdfs_path. + """ + def sort_by_time(v1, v2): v1_time = datetime.strptime(v1[1], '%Y-%m-%d %H:%M') v2_time = datetime.strptime(v2[1], '%Y-%m-%d %H:%M') @@ -372,12 +428,15 @@ def multi_upload(client, multi_processes=5, overwrite=False): """ - :param overwrite: will overwrite hdfs file or not - :param multi_processes: the upload data process at the same time, default=5 - :param client: instance of HDFSClient - :param hdfs_path: path on hdfs - :param local_path: path on local - :return: + Upload file to hdfs. + Args: + :param overwrite: will overwrite hdfs file or not + :param multi_processes: the upload data process at the same time, default=5 + :param client: instance of HDFSClient + :param hdfs_path: path on hdfs + :param local_path: path on local + Returns: + """ def __subprocess_upload(datas): @@ -387,6 +446,13 @@ def multi_upload(client, client.upload(hdfs_re_path, data, overwrite, retry_times=5) def get_local_files(path): + """ + Get all local files + Args: + path: local file path + Returns: + A list that contation all files in the path. + """ rlist = [] if not os.path.isdir(path): @@ -431,14 +497,17 @@ def multi_download(client, multi_processes=5): """ multi_download - :param client: instance of HDFSClient - :param hdfs_path: path on hdfs - :param local_path: path on local - :param trainer_id: current trainer id - :param trainers: all trainers number - :param file_cnt: all file number - :param multi_processes: the download data process at the same time, default=5 - :return: None + Args: + :param client: instance of HDFSClient + :param hdfs_path: path on hdfs + :param local_path: path on local + :param trainer_id: current trainer id + :param trainers: all trainers number + :param file_cnt: all file number + :param multi_processes: the download data process at the same time, default=5 + :return: None + Returns: + A list that be downloaded. """ def __subprocess_download(datas): diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 986525e5d8..ca6dd5dabf 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -15,13 +15,26 @@ from mpi4py import MPI import ps_pb2 as pslib + class FileSystem(object): - def __init__(self, fs_type="afs", + """ + A file system that support async_executor hadoop client desc. + + Args: + fs_type (string): fs_type, for example is "afs" + user (string): hadoop param + passwd (string): hadoop param + hadoop bin (string): hadoop param + Examples: + fs = FileSystm() + """ + + def __init__(self, + fs_type="afs", uri="afs://tianqi.afs.baidu.com:9902", user=None, passwd=None, - hadoop_bin="", - afs_conf=None): + hadoop_bin=""): assert user != None assert passwd != None assert hadoop_bin != None @@ -38,9 +51,22 @@ class FileSystem(object): #self.fs_client.afs_conf = afs_conf if not afs_conf else "" def get_desc(self): + """ + get hadoop desc. + """ return self.fs_client + class MPIHelper(object): + """ + MPIHelper is a wrapper of mpi4py, supprot get_rank get_size etc. + Args: + No params + Examples: + mh = MPIHelper() + mh.get_ip() + """ + def __init__(self): self.comm = MPI.COMM_WORLD @@ -61,5 +87,3 @@ class MPIHelper(object): def finalize(self): MPI.Finalize() - - diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 8755323006..117da9cff8 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -13,17 +13,34 @@ import ps_pb2 as pslib + class Server(object): + """ + A Server basic class. + """ + def __init__(self): pass class Worker(object): + """ + A Worker basic class. + """ + def __init__(self): pass class DownpourServer(Server): + """ + DownpourServer class is used to generate server program_desc + Args: + server: it is pslib.ServerParameter() + Examples: + server = DownpourServer() + """ + def __init__(self): self.server_ = pslib.ServerParameter() self.server_.downpour_server_param.service_param.start_server_port = 0 @@ -33,8 +50,18 @@ class DownpourServer(Server): self.server_.downpour_server_param.service_param.start_server_port = 0 self.server_.downpour_server_param.service_param.server_thread_num = 12 - def add_sparse_table(self, table_id, learning_rate, - slot_key_vars, slot_value_var): + def add_sparse_table(self, table_id, learning_rate, slot_key_vars, + slot_value_var): + """ + Args: + table_id(int): id of sparse params table + learning_rate(float): the learning rate used to update parameters. \ + Can be a float value + slot_key_vars(string): slot key id + slot_value_var(string): slot key value after embedding + Returns: + return None + """ table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id table.table_class = "DownpourSparseTable" @@ -44,10 +71,10 @@ class DownpourServer(Server): table.accessor.sparse_sgd_param.initial_g2sum = 3 table.accessor.sparse_sgd_param.initial_range = 1e-4 table.accessor.sparse_sgd_param.weight_bounds.extend([-10, 10]) - + table.accessor.embedx_dim = 8 table.accessor.embedx_threshold = 5 - table.accessor.fea_dim = 11 + table.accessor.fea_dim = 11 #table.accessor.fea_dim = abs(reduce(lambda x, y: x * y, # slot_value_var[0].shape, 1)) table.accessor.downpour_accessor_param.nonclk_coeff = 0.1 @@ -58,53 +85,99 @@ class DownpourServer(Server): table.accessor.downpour_accessor_param.show_click_decay_rate = 0.999 table.accessor.downpour_accessor_param.delete_threshold = 0.8 - def add_dense_table(self, table_id, learning_rate, - param_var, grad_var): + def add_dense_table(self, table_id, learning_rate, param_var, grad_var): + """ + Args: + table_id(int): id of sparse params table + learning_rate(float): the learning rate used to update parameters. \ + Can be a float value + param_var(list): all dense param. it is a list. + grad_var(list): all dense grad parm it is a list. + Returns: + return None + """ table = self.server_.downpour_server_param.downpour_table_param.add() table.table_id = table_id table.table_class = "DownpourDenseTable" table.type = pslib.PS_DENSE_TABLE table.accessor.accessor_class = "DownpourDenseValueAccessor" - table.accessor.dense_sgd_param.name = "adam" + table.accessor.dense_sgd_param.name = "adam" table.accessor.dense_sgd_param.adam.learning_rate = learning_rate - table.accessor.dense_sgd_param.adam.avg_decay_rate = 0.999993 - table.accessor.dense_sgd_param.adam.ada_decay_rate = 0.9999 + table.accessor.dense_sgd_param.adam.avg_decay_rate = 0.999993 + table.accessor.dense_sgd_param.adam.ada_decay_rate = 0.9999 table.accessor.dense_sgd_param.adam.ada_epsilon = 1e-8 table.accessor.dense_sgd_param.adam.mom_decay_rate = 0.99 table.accessor.dense_sgd_param.naive.learning_rate = 0.0002 fea_dim = 0 - for param in filter(lambda x: x.name.find("embedding") == -1, param_var): + for param in filter(lambda x: x.name.find("embedding") == -1, + param_var): fea_dim += reduce(lambda x, y: x * y, param.shape, 1) table.accessor.fea_dim = fea_dim def get_desc(self): + """ + Return downpour server program_desc + """ return self.server_ class DownpourWorker(Worker): + """ + DownpourWorker class is used to generate worker program_desc + Args: + window (int): push params frequency + worker: it is pslib.DownpourTrainerParameter + Examples: + worker = DownpourWorker(1) + """ + def __init__(self, window): self.window = window self.worker_ = pslib.DownpourTrainerParameter() #self.worker_.pull_dense_per_batch = window #self.worker_.push_dense_per_batch = window - def add_sparse_table(self, table_id, learning_rate, - slot_key_vars, slot_value_vars): + def add_sparse_table(self, table_id, learning_rate, slot_key_vars, + slot_value_vars): + """ + Args: + table_id(int): id of sparse params table + learning_rate(float): the learning rate used to update parameters. \ + Can be a float value + slot_key_vars(string): slot key id + slot_value_var(string): slot key value after embedding + Returns: + return None + """ table = self.worker_.sparse_table.add() table.table_id = table_id - table.slot_key.extend( - [var.name for var in slot_key_vars]) - table.slot_value.extend( - [var.name for var in slot_value_vars]) + table.slot_key.extend([var.name for var in slot_key_vars]) + table.slot_value.extend([var.name for var in slot_value_vars]) table.slot_gradient.extend( [var.name + "@GRAD" for var in slot_value_vars]) - def add_dense_table(self, table_id, learning_rate, - param_vars, grad_vars): + def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars): + """ + Args: + table_id(int): id of sparse params table + learning_rate(float): the learning rate used to update parameters. \ + Can be a float value + param_var(list): all dense param. it is a list. + grad_var(list): all dense grad parm it is a list. + Returns: + return None + """ table = self.worker_.dense_table.add() table.table_id = table_id - table.dense_variable_name.extend(filter(lambda x: x.find("embedding") == -1, [p.name for p in param_vars])) - table.dense_gradient_variable_name.extend(filter(lambda x: x.find("embedding") == -1, [g.name for g in grad_vars])) + table.dense_variable_name.extend( + filter(lambda x: x.find("embedding") == -1, + [p.name for p in param_vars])) + table.dense_gradient_variable_name.extend( + filter(lambda x: x.find("embedding") == -1, + [g.name for g in grad_vars])) def get_desc(self): + """ + Return downpour worker program_desc + """ return self.worker_ From 35ce6ac2e6f1d71da55da74e49212fdbb2a61e79 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 13:18:28 +0800 Subject: [PATCH 46/62] add ps_instance doc --- .../paddle/fluid/distributed/ps_instance.py | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index b93da053a3..6b44d0cd16 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -14,27 +14,36 @@ import helper as dist_helper import sys + class PaddlePSInstance(object): + """ + PaddlePSInstance class is used to generate A instance of server or worker + Args: + server_worker_mode: is a value 0 or 1, default is 1 + proc_per_node: process per node, default is 2 + Examples: + instance = PaddlePSInstance(1, 2) + """ + def __init__(self, server_worker_mode, proc_per_node): self.dh = dist_helper.MPIHelper() self._rankid = self.dh.get_rank() self._server_worker_mode = server_worker_mode self._proc_per_node = proc_per_node self._nodes = self.dh.get_size() - + self._ip = 0 self._worker_num = self._nodes * self._proc_per_node / 2 self._server_num = self._nodes * self._proc_per_node / 2 self._total_server_worker = self._worker_num + self._server_num - self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 + self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 self._set_nodetype() self._comm = None self._split_comm() - def _set_nodetype(self): if self._server_worker_mode == 0: - if self._rankid < self._server_num: + if self._rankid < self._server_num: self._node_type = 1 elif self._rankid < self._total_server_worker: self._node_type = 0 @@ -46,13 +55,13 @@ class PaddlePSInstance(object): self._node_type = 0 else: self._node_type = 1 - else: - self._node_type = -1; + else: + self._node_type = -1 else: self._node_type = -1 - + #if self._rankid == 0: - #print "node type: ", self._node_type + #print "node type: ", self._node_type def _split_comm(self): if self.is_server(): @@ -62,45 +71,78 @@ class PaddlePSInstance(object): pass def get_worker_index(self): + """ + Return worker index + """ if self._server_worker_mode == 0: return self._rankid == self.server_num else: return self._rankid / self._proc_per_node def get_server_index(self): + """ + Return server index + """ if self._server_worker_mode == 0: return self.rank_id else: return self.rank_id / self._proc_per_node def is_worker(self): + """ + Return instance is worker or not + """ return self._node_type == 1 def is_server(self): + """ + Return instance is server or not + """ return self._node_type == 0 def is_first_worker(self): + """ + Return instance is first worker or not + """ return self.is_worker() and 0 == self.get_worker_index() def set_ip(self, ip): + """ + set server ip + """ self._ip = ip def gather_ips(self): + """ + Return all servers and workers ip throught mpi allgather + """ self._ips = self.dh.comm.allgather(self._ip) return self._ips def get_node_cnt(self): + """ + Return node cnt + """ return self._nodes def barrier_all(self): + """ + barrier workers and servers + """ self.dh.comm.barrier() def barrier_worker(self): + """ + barrier workers + """ if self.is_worker(): self._comm.barrier() pass def finalize(self): + """ + MPI finalize + """ self.dh.finalize() pass From bafd823666bde1098cf07eb23d406bc9780c7b28 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 13:18:28 +0800 Subject: [PATCH 47/62] test --- .../paddle/fluid/distributed/ps_instance.py | 58 ++++++++++++++++--- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index b93da053a3..6b44d0cd16 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -14,27 +14,36 @@ import helper as dist_helper import sys + class PaddlePSInstance(object): + """ + PaddlePSInstance class is used to generate A instance of server or worker + Args: + server_worker_mode: is a value 0 or 1, default is 1 + proc_per_node: process per node, default is 2 + Examples: + instance = PaddlePSInstance(1, 2) + """ + def __init__(self, server_worker_mode, proc_per_node): self.dh = dist_helper.MPIHelper() self._rankid = self.dh.get_rank() self._server_worker_mode = server_worker_mode self._proc_per_node = proc_per_node self._nodes = self.dh.get_size() - + self._ip = 0 self._worker_num = self._nodes * self._proc_per_node / 2 self._server_num = self._nodes * self._proc_per_node / 2 self._total_server_worker = self._worker_num + self._server_num - self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 + self._node_type = None #IDLE=-1, WORKER=1, SERVER=0 self._set_nodetype() self._comm = None self._split_comm() - def _set_nodetype(self): if self._server_worker_mode == 0: - if self._rankid < self._server_num: + if self._rankid < self._server_num: self._node_type = 1 elif self._rankid < self._total_server_worker: self._node_type = 0 @@ -46,13 +55,13 @@ class PaddlePSInstance(object): self._node_type = 0 else: self._node_type = 1 - else: - self._node_type = -1; + else: + self._node_type = -1 else: self._node_type = -1 - + #if self._rankid == 0: - #print "node type: ", self._node_type + #print "node type: ", self._node_type def _split_comm(self): if self.is_server(): @@ -62,45 +71,78 @@ class PaddlePSInstance(object): pass def get_worker_index(self): + """ + Return worker index + """ if self._server_worker_mode == 0: return self._rankid == self.server_num else: return self._rankid / self._proc_per_node def get_server_index(self): + """ + Return server index + """ if self._server_worker_mode == 0: return self.rank_id else: return self.rank_id / self._proc_per_node def is_worker(self): + """ + Return instance is worker or not + """ return self._node_type == 1 def is_server(self): + """ + Return instance is server or not + """ return self._node_type == 0 def is_first_worker(self): + """ + Return instance is first worker or not + """ return self.is_worker() and 0 == self.get_worker_index() def set_ip(self, ip): + """ + set server ip + """ self._ip = ip def gather_ips(self): + """ + Return all servers and workers ip throught mpi allgather + """ self._ips = self.dh.comm.allgather(self._ip) return self._ips def get_node_cnt(self): + """ + Return node cnt + """ return self._nodes def barrier_all(self): + """ + barrier workers and servers + """ self.dh.comm.barrier() def barrier_worker(self): + """ + barrier workers + """ if self.is_worker(): self._comm.barrier() pass def finalize(self): + """ + MPI finalize + """ self.dh.finalize() pass From bd1c1724aabc2b1d2d30ae6ac159df297b6c7f54 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 13:18:28 +0800 Subject: [PATCH 48/62] add ps_instance doc --- paddle/fluid/framework/CMakeLists.txt | 15 +- paddle/fluid/framework/async_executor.cc | 114 ++++++------ .../fluid/framework/executor_thread_worker.cc | 165 +++++++++--------- .../fluid/framework/executor_thread_worker.h | 124 ++++++------- 4 files changed, 199 insertions(+), 219 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index f3d66cd883..ab237f768a 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -1,6 +1,6 @@ -# windows treat symbolic file as a real file, which is different with unix -# We create a hidden file and compile it instead of origin source file. +#windows treat symbolic file as a real file, which is different with unix +#We create a hidden file and compile it instead of origin source file. function(windows_symbolic TARGET) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) @@ -11,7 +11,7 @@ function(windows_symbolic TARGET) message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.") endif() - # only copy the xx.cu to .xx.cu when the content are modified +#only copy the xx.cu to.xx.cu when the content are modified set(copy_flag 1) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu) file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc SOURCE_STR) @@ -32,7 +32,7 @@ endfunction() add_subdirectory(ir) add_subdirectory(details) -# ddim lib +#ddim lib proto_library(framework_proto SRCS framework.proto) proto_library(async_executor_proto SRCS data_feed.proto) @@ -89,8 +89,8 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu if(WITH_GPU) if (WIN32) - # windows treat symbolic file as a real file, which is different with unix - # We create a hidden file and compile it instead of origin source file. +#windows treat symbolic file as a real file, which is different with unix +#We create a hidden file and compile it instead of origin source file. windows_symbolic(hidden_file SRCS data_type_transform.cu) nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor) add_dependencies(data_type_transform hidden_file) @@ -137,7 +137,8 @@ cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) -# Generate an empty __init__.py to make framework_py_proto as a valid python module. +#Generate an empty \ + __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) if (NOT WIN32) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 0fe7f3bd5c..e2756cafa2 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -30,7 +30,7 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/pybind/pybind.h" #ifdef PADDLE_WITH_PSLIB -#include "pslib.h" +#include #endif namespace paddle { @@ -70,50 +70,52 @@ void PrepareReaders(std::vector>& readers, // NOLINT #ifdef PADDLE_WITH_PSLIB void AsyncExecutor::InitServer(const std::string& dist_desc, int index) { - _pslib_ptr = - std::shared_ptr( - new paddle::distributed::PSlib()); - _pslib_ptr->init_server(dist_desc, index); - InitParamConfig(); + _pslib_ptr = std::shared_ptr( + new paddle::distributed::PSlib()); + _pslib_ptr->init_server(dist_desc, index); + InitParamConfig(); } void AsyncExecutor::InitWorker(const std::string& dist_desc, const std::vector& host_sign_list, int node_num, int index) { - _pslib_ptr = std::shared_ptr( - new paddle::distributed::PSlib()); - _pslib_ptr->init_worker( - dist_desc, (uint64_t*)(host_sign_list.data()), node_num, index); + _pslib_ptr = std::shared_ptr( + new paddle::distributed::PSlib()); + _pslib_ptr->init_worker(dist_desc, + static_cast(host_sign_list.data()), + node_num, index); - InitParamConfig(); + InitParamConfig(); } -uint64_t AsyncExecutor::StartServer() { - return _pslib_ptr->run_server(); -} +uint64_t AsyncExecutor::StartServer() { return _pslib_ptr->run_server(); } -void AsyncExecutor::StopServer() { - _pslib_ptr->stop_server(); -} +void AsyncExecutor::StopServer() { _pslib_ptr->stop_server(); } -void AsyncExecutor::GatherServers( - const std::vector& host_sign_list, int node_num) { - _pslib_ptr->gather_servers((uint64_t*)(host_sign_list.data()), node_num); +void AsyncExecutor::GatherServers(const std::vector& host_sign_list, + int node_num) { + _pslib_ptr->gather_servers(static_cast(host_sign_list.data()), + node_num); } void AsyncExecutor::InitParamConfig() { - for (int i = 0; i < - _pslib_ptr->get_param()->server_param(). \ - downpour_server_param(). \ - downpour_table_param_size(); + for (int i = 0; i < _pslib_ptr->get_param() + ->server_param() + .downpour_server_param() + .downpour_table_param_size(); ++i) { - if (_pslib_ptr->get_param()->server_param(). \ - downpour_server_param().downpour_table_param(i). \ - table_class().find("SparseTable") != -1) { - _param_config.fea_dim = _pslib_ptr->get_param()->server_param(). \ - downpour_server_param(). \ - downpour_table_param(i). \ - accessor().fea_dim(); + if (_pslib_ptr->get_param() + ->server_param() + .downpour_server_param() + .downpour_table_param(i) + .table_class() + .find("SparseTable") != -1) { + _param_config.fea_dim = _pslib_ptr->get_param() + ->server_param() + .downpour_server_param() + .downpour_table_param(i) + .accessor() + .fea_dim(); break; } } @@ -122,28 +124,24 @@ void AsyncExecutor::InitParamConfig() { _pslib_ptr->get_param()->trainer_param().push_dense_per_batch()); _param_config.tmp_push_sparse_wait_times = static_cast( _pslib_ptr->get_param()->trainer_param().push_sparse_per_batch()); - - for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); + + for (auto t = 0u; t < _pslib_ptr->get_param()->trainer_param().skip_op_size(); ++t) { _param_config.skip_op.push_back( _pslib_ptr->get_param()->trainer_param().skip_op(t)); } - + for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); - ++t) { + t < _pslib_ptr->get_param()->trainer_param().sparse_table_size(); ++t) { auto& table = _pslib_ptr->get_param()->trainer_param().sparse_table(t); std::vector tmp_sparse_variable_name; for (int i = 0u; i < table.slot_value_size(); ++i) { tmp_sparse_variable_name.push_back(table.slot_value(i)); - _param_config.slot_alias_to_table[table.slot_key(i)] = - table.table_id(); + _param_config.slot_alias_to_table[table.slot_key(i)] = table.table_id(); } std::vector tmp_sparse_gradient_variable_name; for (auto i = 0u; i < table.slot_gradient_size(); ++i) { - tmp_sparse_gradient_variable_name.push_back( - table.slot_gradient(i)); + tmp_sparse_gradient_variable_name.push_back(table.slot_gradient(i)); } _param_config.slot_input_vec[table.table_id()] = std::move(tmp_sparse_variable_name); @@ -151,10 +149,9 @@ void AsyncExecutor::InitParamConfig() { std::move(tmp_sparse_gradient_variable_name); _param_config.sparse_table_id.push_back(table.table_id()); } - + for (auto t = 0u; - t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); - ++t) { + t < _pslib_ptr->get_param()->trainer_param().dense_table_size(); ++t) { auto& table = _pslib_ptr->get_param()->trainer_param().dense_table(t); std::vector tmp_dense_variable_name; for (int i = 0u; i < table.dense_variable_name_size(); ++i) { @@ -181,26 +178,25 @@ void AsyncExecutor::InitModel() { Variable* var = root_scope_->FindVar(t); CHECK(var != nullptr) << "var[" << t << "] not found"; LoDTensor* tensor = var->GetMutable(); - + float* g = tensor->data(); CHECK(g != nullptr) << "var[" << t << "] value not initialized"; float init_range = 0.2; int rown = tensor->dims()[0]; init_range /= sqrt(rown); - + std::normal_distribution ndistr(0.0, 1.0); for (auto i = 0u; i < tensor->numel(); ++i) { g[i] = ndistr(local_random_engine()) * init_range; } - + paddle::ps::Region reg(g, tensor->numel()); regions.emplace_back(std::move(reg)); } - - auto push_status = - _pslib_ptr->_worker_ptr->push_dense_param( - regions.data(), regions.size(), table_id); + + auto push_status = _pslib_ptr->_worker_ptr->push_dense_param( + regions.data(), regions.size(), table_id); push_status.wait(); auto status = push_status.get(); if (status != 0) { @@ -225,14 +221,14 @@ void AsyncExecutor::SaveModel(const std::string& path) { void AsyncExecutor::PrepareDenseThread(const std::string& mode) { if (mode == "mpi") { DensePullThreadParam param; - param.ps_client = _pslib_ptr->_worker_ptr;; + param.ps_client = _pslib_ptr->_worker_ptr; param.threshold = 1; param.training_thread_num = actual_thread_num; param.root_scope = root_scope_; param.dense_params = &_param_config.dense_variable_name; - - _pull_dense_thread = std::shared_ptr( - new DensePullThread(param)); + + _pull_dense_thread = + std::shared_ptr(new DensePullThread(param)); _pull_dense_thread->start(); } } @@ -243,8 +239,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, const std::vector& filelist, const int thread_num, const std::vector& fetch_var_names, - const std::string& mode, - const bool debug) { + const std::string& mode, const bool debug) { std::vector threads; auto& block = main_program.Block(0); @@ -293,9 +288,9 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, for (auto& worker : workers) { #ifdef PADDLE_WITH_PSLIB if (mode == "mpi") { - worker.reset(new AsyncExecutorThreadWorker); + worker.reset(new AsyncExecutorThreadWorker); } else { - worker.reset(new ExecutorThreadWorker); + worker.reset(new ExecutorThreadWorker); } #else worker.reset(new ExecutorThreadWorker); @@ -308,7 +303,6 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, fetch_var_names, root_scope_, thidx, debug); } - // start executing ops in multiple threads for (int thidx = 0; thidx < actual_thread_num; ++thidx) { threads.push_back( diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 59679842bc..a945562926 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/executor_thread_worker.h" +#include #include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/message.h" #include "google/protobuf/text_format.h" @@ -51,7 +52,7 @@ void DensePullThread::run() { if (_pull_dense_status.size() != 0) { wait_all(); } - + usleep(_sleep_time_ms * 1000); } } @@ -77,12 +78,12 @@ std::future DensePullThread::pull_dense(uint64_t table_id) { regions.clear(); auto& variables = _dense_variable_name[table_id]; regions.resize(variables.size()); - + for (auto i = 0u; i < variables.size(); ++i) { auto& t = variables[i]; Variable* var = _root_scope->FindVar(t); LoDTensor* tensor = var->GetMutable(); - + float* w = tensor->data(); paddle::ps::Region reg(w, tensor->numel()); regions[i] = std::move(reg); @@ -95,21 +96,20 @@ void DensePullThread::wait_all() { t.wait(); auto status = t.get(); if (status != 0) { - LOG(WARNING) << "pull dense failed times:" << - ++_pull_dense_fail_times; + LOG(WARNING) << "pull dense failed times:" << ++_pull_dense_fail_times; } } - + if (_pull_dense_fail_times > 20) { LOG(FATAL) << "pull dense failed times more than 20 times"; exit(-1); } - + _pull_dense_status.resize(0); } -void DensePullThread::increase_thread_version( - int thread_id, uint64_t table_id) { +void DensePullThread::increase_thread_version(int thread_id, + uint64_t table_id) { std::lock_guard lock(_mutex_for_version); _training_versions[table_id][thread_id]++; } @@ -174,7 +174,6 @@ void ExecutorThreadWorker::SetFetchVarNames( fetch_var_names.end()); } - void ExecutorThreadWorker::SetDevice() { #if defined _WIN32 || defined __APPLE__ return; @@ -344,15 +343,14 @@ void AsyncExecutorThreadWorker::SetPullDenseThread( } void AsyncExecutorThreadWorker::TrainOneNetwork() { PrepareParams(); - + for (auto& op : ops_) { if (op->Type().find("sgd") != std::string::npos) { continue; } bool need_skip = false; for (auto t = 0u; t < _param_config->skip_op.size(); ++t) { - if (op->Type().find(_param_config->skip_op[t]) != - std::string::npos) { + if (op->Type().find(_param_config->skip_op[t]) != std::string::npos) { need_skip = true; break; } @@ -436,14 +434,13 @@ void AsyncExecutorThreadWorker::PushDense(int table_id) { paddle::ps::Region reg(g, count); regions.emplace_back(std::move(reg)); } - - auto status = _pslib_ptr->_worker_ptr->push_dense( - regions.data(), regions.size(), table_id); + + auto status = _pslib_ptr->_worker_ptr->push_dense(regions.data(), + regions.size(), table_id); _push_dense_status.push_back(std::move(status)); } void AsyncExecutorThreadWorker::PullSparse(int table_id) { - auto& features = _features[table_id]; auto& feature_value = _feature_value[table_id]; auto fea_dim = _param_config->fea_dim; @@ -451,8 +448,7 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { features.clear(); features.resize(0); features.reserve(MAX_FEASIGN_NUM); - const std::vector& feed_vec = - thread_reader_->GetUseSlotAlias(); + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); @@ -468,20 +464,20 @@ void AsyncExecutorThreadWorker::PullSparse(int table_id) { features.push_back(static_cast(ids[i])); } } - check_pull_push_memory(features, feature_value, fea_dim); - + check_pull_push_memory(features, &feature_value, fea_dim); + std::vector pull_feature_value; for (auto i = 0u; i < features.size(); ++i) { pull_feature_value.push_back(feature_value[i].data()); } - + auto status = _pslib_ptr->_worker_ptr->pull_sparse( pull_feature_value.data(), table_id, features.data(), features.size()); _pull_sparse_status.push_back(std::move(status)); - + auto& push_g = _feature_push_value[table_id]; - check_pull_push_memory(features, push_g, fea_dim); - + check_pull_push_memory(features, &push_g, fea_dim); + collect_feasign_info(table_id); } @@ -490,15 +486,14 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { auto fea_dim = _param_config->fea_dim; auto& features = _features[table_id]; auto& fea_value = _feature_value[table_id]; - + CHECK(features.size() > 0) << "feature size check failed"; - + auto fea_idx = 0u; - + std::vector init_value(fea_dim); - - const std::vector& feed_vec = - thread_reader_->GetUseSlotAlias(); + + const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); // slot_idx = 0 is label TODO for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); @@ -508,22 +503,22 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) { Variable* var_emb = thread_scope_->FindVar( _param_config->slot_input_vec[table_id][slot_idx - 1]); LoDTensor* tensor_emb = var_emb->GetMutable(); - float* ptr = tensor_emb->mutable_data( - {len, slot_dim}, platform::CPUPlace()); + float* ptr = + tensor_emb->mutable_data({len, slot_dim}, platform::CPUPlace()); memset(ptr, 0, sizeof(float) * len * slot_dim); auto& tensor_lod = tensor->lod()[0]; - + LoD data_lod{tensor_lod}; tensor_emb->set_lod(data_lod); - + for (auto index = 0u; index < len; ++index) { if (ids[index] == 0u) { - memcpy(ptr + slot_dim * index, - init_value.data() + 2, sizeof(float) * slot_dim); + memcpy(ptr + slot_dim * index, init_value.data() + 2, + sizeof(float) * slot_dim); continue; } - memcpy(ptr + slot_dim * index, - fea_value[fea_idx].data() + 2, sizeof(float) * slot_dim); + memcpy(ptr + slot_dim * index, fea_value[fea_idx].data() + 2, + sizeof(float) * slot_dim); fea_idx++; } } @@ -534,35 +529,38 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { auto fea_dim = _param_config->fea_dim; auto& features = _features[table_id]; auto& push_g = _feature_push_value[table_id]; - check_pull_push_memory(features, push_g, fea_dim); - CHECK(push_g.size() == features.size() + 1) << - "push_g size:" << push_g.size() << " features size:" << features.size(); + check_pull_push_memory(features, &push_g, fea_dim); + CHECK(push_g.size() == features.size() + 1) + << "push_g size:" << push_g.size() + << " features size:" << features.size(); uint64_t fea_idx = 0u; auto& fea_info = _fea_info[table_id]; int offset = 2; const std::vector& feed_vec = thread_reader_->GetUseSlotAlias(); - // slot_idx = 0 is label + // slot_idx = 0 is label for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { - if (_param_config->slot_alias_to_table.find( - feed_vec[slot_idx]) == _param_config->slot_alias_to_table.end()) { - LOG(ERROR) << "ERROR slot_idx:" << slot_idx << - " name:" << feed_vec[slot_idx]; - } else if ( - _param_config->slot_alias_to_table[feed_vec[slot_idx]] != table_id) { + if (_param_config->slot_alias_to_table.find(feed_vec[slot_idx]) == + _param_config->slot_alias_to_table.end()) { + LOG(ERROR) << "ERROR slot_idx:" << slot_idx + << " name:" << feed_vec[slot_idx]; + } else if (_param_config->slot_alias_to_table[feed_vec[slot_idx]] != + table_id) { continue; } Variable* g_var = thread_scope_->FindVar( _param_config->gradient_var[table_id][slot_idx - 1]); - CHECK(g_var != nullptr) << "var[" << - _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; + CHECK(g_var != nullptr) + << "var[" << _param_config->gradient_var[table_id][slot_idx - 1] + << "] not found"; LoDTensor* g_tensor = g_var->GetMutable(); if (g_tensor == NULL) { - LOG(ERROR) << "var[" << - _param_config->gradient_var[table_id][slot_idx - 1] << "] not found"; + LOG(ERROR) << "var[" + << _param_config->gradient_var[table_id][slot_idx - 1] + << "] not found"; exit(-1); } float* g = g_tensor->data(); - + Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); CHECK(var != nullptr) << "var[" << feed_vec[slot_idx] << "] not found"; LoDTensor* tensor = var->GetMutable(); @@ -571,42 +569,40 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { exit(-1); } int len = tensor->numel(); - CHECK(slot_dim * len == g_tensor->numel()) << - "len:" << len << " g_numel:" << g_tensor->numel(); - CHECK(len == tensor->numel()) << "len:" << - len << "t_numel:" << tensor->numel(); + CHECK(slot_dim * len == g_tensor->numel()) + << "len:" << len << " g_numel:" << g_tensor->numel(); + CHECK(len == tensor->numel()) << "len:" << len + << "t_numel:" << tensor->numel(); int64_t* ids = tensor->data(); for (auto id_idx = 0u; id_idx < len; ++id_idx) { if (ids[id_idx] == 0) { g += slot_dim; continue; } - memcpy(push_g[fea_idx].data() + offset, - g, sizeof(float) * slot_dim); + memcpy(push_g[fea_idx].data() + offset, g, sizeof(float) * slot_dim); push_g[fea_idx][0] = 1.0f; - CHECK(fea_idx < fea_info.size()) << "fea_idx:" << - fea_idx << " size:" << fea_info.size(); + CHECK(fea_idx < fea_info.size()) << "fea_idx:" << fea_idx + << " size:" << fea_info.size(); push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); g += slot_dim; fea_idx++; } } - CHECK(fea_idx == features.size()) << "fea_idx:" << - fea_idx << " features size:" << features.size(); + CHECK(fea_idx == features.size()) << "fea_idx:" << fea_idx + << " features size:" << features.size(); CHECK_GT(features.size(), 0); - + std::vector push_g_vec; for (auto i = 0u; i < features.size(); ++i) { push_g_vec.push_back(push_g[i].data()); } auto status = _pslib_ptr->_worker_ptr->push_sparse( - table_id, features.data(), - (const float**)push_g_vec.data(), features.size()); + table_id, features.data(), (const float**)push_g_vec.data(), + features.size()); _push_sparse_status.push_back(std::move(status)); } -void AsyncExecutorThreadWorker::collect_feasign_info( - int table_id) { +void AsyncExecutorThreadWorker::collect_feasign_info(int table_id) { auto& fea_info = _fea_info[table_id]; auto& feature = _features[table_id]; fea_info.resize(feature.size()); @@ -614,13 +610,13 @@ void AsyncExecutorThreadWorker::collect_feasign_info( Variable* var = thread_scope_->FindVar(feed_vec[0]); LoDTensor* tensor = var->GetMutable(); int64_t* label = tensor->data(); - + int global_index = 0; for (auto slot_idx = 1u; slot_idx < feed_vec.size(); ++slot_idx) { Variable* var = thread_scope_->FindVar(feed_vec[slot_idx]); LoDTensor* tensor = var->GetMutable(); int64_t* ids = tensor->data(); - + int fea_idx = 0; for (auto ins_idx = 1u; ins_idx < tensor->lod()[0].size(); ++ins_idx) { for (; fea_idx < tensor->lod()[0][ins_idx]; ++fea_idx) { @@ -628,36 +624,33 @@ void AsyncExecutorThreadWorker::collect_feasign_info( continue; } FeasignInfo info{slot_idx, ins_idx, label[ins_idx - 1]}; - + fea_info[global_index++] = std::move(info); } } } - CHECK(global_index == feature.size()) << - "expect fea info size:" << feature.size() - << " real:" << global_index; + CHECK(global_index == feature.size()) + << "expect fea info size:" << feature.size() << " real:" << global_index; } void AsyncExecutorThreadWorker::check_pull_push_memory( - const std::vector& features, - std::vector>& push_g, - int dim) { - push_g.resize(features.size() + 1); - for (auto& t : push_g) { + const std::vector& features, + std::vector>* push_g, int dim) { + push_g->resize(features.size() + 1); + for (auto& t : *push_g) { t.resize(dim); } } void AsyncExecutorThreadWorker::check_pull_push_memory( - const std::vector& features, - std::vector& push_g, + const std::vector& features, std::vector* push_g, int dim) { - if (features.size() > push_g.size()) { - push_g.reserve(features.size() + 1); - auto size = features.size() - push_g.size() + 1; + if (features.size() > push_g->size()) { + push_g->reserve(features.size() + 1); + auto size = features.size() - push_g->size() + 1; for (auto i = 0u; i < size; ++i) { float* ptr = new float[dim]; - push_g.push_back(ptr); + push_g->push_back(ptr); } } } diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 20410b4c06..30b81ad880 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -26,7 +26,7 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #ifdef PADDLE_WITH_PSLIB -#include "pslib.h" +#include #endif namespace paddle { @@ -34,75 +34,74 @@ namespace framework { void CreateTensor(Variable* var, proto::VarType::Type var_type); #ifdef PADDLE_WITH_PSLIB -const static uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; +static const uint32_t MAX_FEASIGN_NUM = 1000 * 100 * 100; struct AsyncWorkerParamConfig { int slot_dim; int fea_dim; int32_t tmp_push_dense_wait_times; int32_t tmp_push_sparse_wait_times; - + std::vector skip_op; - + std::map> dense_variable_name; std::map> dense_gradient_variable_name; - std::vector dense_table_id; + std::vector dense_table_id; // fea_dim for each dense table - std::vector dense_table_size; - std::vector sparse_table_id; + std::vector dense_table_size; + std::vector sparse_table_id; std::map> slot_input_vec; std::map> gradient_var; std::map slot_alias_to_table; }; struct DensePullThreadParam { - std::shared_ptr ps_client; - int threshold; - int training_thread_num; - Scope* root_scope; - std::map>* dense_params; - int sleep_time_ms = 2; + std::shared_ptr ps_client; + int threshold; + int training_thread_num; + Scope* root_scope; + std::map>* dense_params; + int sleep_time_ms = 2; }; class DensePullThread { public: - explicit DensePullThread(const DensePullThreadParam& param) : - _running(false) { + explicit DensePullThread(const DensePullThreadParam& param) + : _running(false) { _ps_client = param.ps_client; _threshold = param.threshold; _thread_num = param.training_thread_num; _root_scope = param.root_scope; _sleep_time_ms = param.sleep_time_ms; - + for (auto& t : *param.dense_params) { - _dense_variable_name[t.first].insert( - _dense_variable_name[t.first].end(), - t.second.begin(), t.second.end()); + _dense_variable_name[t.first].insert(_dense_variable_name[t.first].end(), + t.second.begin(), t.second.end()); _training_versions[t.first].resize(_thread_num, 0); _last_versions[t.first] = 0; _current_version[t.first] = 0; } } - + int start(); - + void stop() { if (_running) { _running = false; _t.join(); } } - + void increase_thread_version(int thread_id, uint64_t table_id); void reset_thread_version(uint64_t table_id); std::future pull_dense(uint64_t table_id); void pull_dense2(uint64_t table_id); void wait_all(); - + private: void run(); bool check_update_param(uint64_t table_id); - + private: std::shared_ptr _ps_client; int _thread_num; @@ -113,33 +112,33 @@ class DensePullThread { std::map _last_versions; std::map _current_version; - std::mutex _mutex_for_version; + std::mutex _mutex_for_version; std::map> _training_versions; std::map> _dense_variable_name; - + std::thread _t; - + std::vector<::std::future> _pull_dense_status; - + std::map> _regions; - uint32_t _pull_dense_fail_times = 0; - - std::vector _base_norm_param; - std::vector _mean; - std::vector _scale; + uint32_t _pull_dense_fail_times = 0; + + std::vector _base_norm_param; + std::vector _mean; + std::vector _scale; float _squared_sum_epsilon = 1e-4; std::mutex _mutex_for_mean_scale; - + float _total_batch_num = 0; }; #endif class ExecutorThreadWorker { public: -ExecutorThreadWorker() - : thread_id_(-1), root_scope_(NULL), thread_scope_(NULL), debug_(false) {} + ExecutorThreadWorker() + : thread_id_(-1), root_scope_(NULL), thread_scope_(NULL), debug_(false) {} virtual ~ExecutorThreadWorker() {} - + void CreateThreadResource(const framework::ProgramDesc& program, const paddle::platform::Place& place); void SetThreadId(int tid); @@ -161,10 +160,8 @@ ExecutorThreadWorker() #ifdef PADDLE_WITH_PSLIB virtual void SetPSlibPtr( std::shared_ptr pslib_ptr) {} - virtual void SetPullDenseThread( - std::shared_ptr dpt) {} - virtual void SetParamConfig( - AsyncWorkerParamConfig * param_config) {} + virtual void SetPullDenseThread(std::shared_ptr dpt) {} + virtual void SetParamConfig(AsyncWorkerParamConfig* param_config) {} #endif private: @@ -195,7 +192,7 @@ ExecutorThreadWorker() }; #ifdef PADDLE_WITH_PSLIB -class AsyncExecutorThreadWorker: public ExecutorThreadWorker { +class AsyncExecutorThreadWorker : public ExecutorThreadWorker { public: AsyncExecutorThreadWorker() {} virtual ~AsyncExecutorThreadWorker() {} @@ -210,40 +207,35 @@ class AsyncExecutorThreadWorker: public ExecutorThreadWorker { void FillSparse(int table_id); void PushSparse(int table_id); void PushDense(int table_id); - - void check_pull_push_memory( - const std::vector& features, - std::vector& push_g, - int dim); + void check_pull_push_memory(const std::vector& features, - std::vector>& push_g, - int dim); + std::vector* push_g, int dim); + void check_pull_push_memory(const std::vector& features, + std::vector>* push_g, int dim); void collect_feasign_info(int table_id); - + private: struct FeasignInfo { uint32_t slot; uint32_t ins; int64_t label; }; - - std::map> _features; - std::map> _fea_info; + + std::map> _features; + std::map> _fea_info; std::map>> _feature_value; std::map>> _feature_push_value; - - - std::shared_ptr _pslib_ptr; - - std::shared_ptr _pull_dense_thread; - - std::vector<::std::future> _pull_sparse_status; - std::vector<::std::future> _pull_dense_status; - std::vector<::std::future> _push_sparse_status; - std::vector<::std::future> _push_dense_status; - - AsyncWorkerParamConfig* _param_config; - + + std::shared_ptr _pslib_ptr; + + std::shared_ptr _pull_dense_thread; + + std::vector<::std::future> _pull_sparse_status; + std::vector<::std::future> _pull_dense_status; + std::vector<::std::future> _push_sparse_status; + std::vector<::std::future> _push_dense_status; + + AsyncWorkerParamConfig* _param_config; }; #endif From 58110921bd84af43ed08a955c515aadd1558bac3 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 14:16:43 +0800 Subject: [PATCH 49/62] fix CMakeList bug --- paddle/fluid/framework/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index ab237f768a..3575080c99 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -138,7 +138,7 @@ nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) #Generate an empty \ - __init__.py to make framework_py_proto as a valid python module. + #__init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) if (NOT WIN32) From 09d669ba40aa900920dea84eb07aa868c44831b0 Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Fri, 14 Dec 2018 14:16:43 +0800 Subject: [PATCH 50/62] fix static_cast to const_cast --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/async_executor.cc | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index ab237f768a..3575080c99 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -138,7 +138,7 @@ nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) #Generate an empty \ - __init__.py to make framework_py_proto as a valid python module. + #__init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) if (NOT WIN32) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index e2756cafa2..ee3c5e01f8 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -81,9 +81,8 @@ void AsyncExecutor::InitWorker(const std::string& dist_desc, int node_num, int index) { _pslib_ptr = std::shared_ptr( new paddle::distributed::PSlib()); - _pslib_ptr->init_worker(dist_desc, - static_cast(host_sign_list.data()), - node_num, index); + _pslib_ptr->init_worker( + dist_desc, const_cast(host_sign_list.data()), node_num, index); InitParamConfig(); } @@ -94,7 +93,7 @@ void AsyncExecutor::StopServer() { _pslib_ptr->stop_server(); } void AsyncExecutor::GatherServers(const std::vector& host_sign_list, int node_num) { - _pslib_ptr->gather_servers(static_cast(host_sign_list.data()), + _pslib_ptr->gather_servers(const_cast(host_sign_list.data()), node_num); } From f2b92d77b59f6bcb55f33ee69d640b9b9b77c348 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sun, 16 Dec 2018 20:02:44 +0800 Subject: [PATCH 51/62] remove clock time in WIN32 mode --- paddle/fluid/framework/async_executor.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index a82e941559..95c8472b2f 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -34,9 +34,13 @@ namespace paddle { namespace framework { inline double current_realtime() { +#if !defined(_WIN32) struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); return tp.tv_sec + tp.tv_nsec * 1e-9; +#else + return 0.0; +#endif } inline std::default_random_engine& local_random_engine() { From 66522046ad9c8659b80dc3be6d0c50c3d56f17fa Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sun, 16 Dec 2018 20:02:44 +0800 Subject: [PATCH 52/62] remove clock time in WIN32 mode test=develop --- paddle/fluid/framework/async_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 95c8472b2f..7accc4cb57 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -39,7 +39,7 @@ inline double current_realtime() { clock_gettime(CLOCK_REALTIME, &tp); return tp.tv_sec + tp.tv_nsec * 1e-9; #else - return 0.0; + return 0; #endif } From 4c0a769d1d70f2d4f86f3369d65eb2fb6bd6981f Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sun, 16 Dec 2018 20:16:14 +0800 Subject: [PATCH 53/62] avoid clock time in WIN32 mode test=develop --- paddle/fluid/framework/async_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index 7accc4cb57..95c8472b2f 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -39,7 +39,7 @@ inline double current_realtime() { clock_gettime(CLOCK_REALTIME, &tp); return tp.tv_sec + tp.tv_nsec * 1e-9; #else - return 0; + return 0.0; #endif } From 8a6b53a4943f671b456a2ab20d85e62c83f56ed6 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Sun, 16 Dec 2018 20:16:14 +0800 Subject: [PATCH 54/62] avoid clock time in WIN32 mode test=develop --- python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements.txt b/python/requirements.txt index 5d64674fe0..36313333b2 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,4 +9,4 @@ Pillow nltk>=3.2.2 graphviz six -mpi4py=3.0.0 +mpi4py==3.0.0 From 29c772663a7905b64fec66e452d77cd6b7ec9449 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 00:12:03 +0800 Subject: [PATCH 55/62] refine import path for ps_instance.py test=develop --- python/paddle/fluid/distributed/ps_instance.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index 6b44d0cd16..91f53102b6 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -import helper as dist_helper -import sys +from .helper import MPIHelper class PaddlePSInstance(object): @@ -26,7 +25,7 @@ class PaddlePSInstance(object): """ def __init__(self, server_worker_mode, proc_per_node): - self.dh = dist_helper.MPIHelper() + self.dh = MPIHelper() self._rankid = self.dh.get_rank() self._server_worker_mode = server_worker_mode self._proc_per_node = proc_per_node From 178c47c074ba0c1294dd8e0e8f38faa0a5e17ab3 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 00:12:03 +0800 Subject: [PATCH 56/62] refine import path for ps_instance.py test=develop --- python/paddle/fluid/distributed/helper.py | 7 ++++--- python/requirements.txt | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index ca6dd5dabf..999c8d77b8 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from mpi4py import MPI import ps_pb2 as pslib @@ -59,7 +58,7 @@ class FileSystem(object): class MPIHelper(object): """ - MPIHelper is a wrapper of mpi4py, supprot get_rank get_size etc. + MPIHelper is a wrapper of mpi4py, support get_rank get_size etc. Args: No params Examples: @@ -68,7 +67,9 @@ class MPIHelper(object): """ def __init__(self): + from mpi4py import MPI self.comm = MPI.COMM_WORLD + self.MPI = MPI def get_rank(self): return self.comm.Get_rank() @@ -86,4 +87,4 @@ class MPIHelper(object): return socket.gethostname() def finalize(self): - MPI.Finalize() + self.MPI.Finalize() diff --git a/python/requirements.txt b/python/requirements.txt index 36313333b2..2f81d85df0 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,4 +9,3 @@ Pillow nltk>=3.2.2 graphviz six -mpi4py==3.0.0 From 43028f655d44eb524fc988a1645b993cefd08e6a Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 00:12:03 +0800 Subject: [PATCH 57/62] refine import path for ps_instance.py test=develop --- python/paddle/fluid/distributed/helper.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index 999c8d77b8..cdde5403cd 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ps_pb2 as pslib - class FileSystem(object): """ @@ -37,6 +35,7 @@ class FileSystem(object): assert user != None assert passwd != None assert hadoop_bin != None + import ps_pb2 as pslib self.fs_client = pslib.FsClientParameter() #if fs_type == "afs": # fs_client.fs_type = pslib.FsApiType.AFS From 921b7f452a2a4bf26f3aa288365401c35719903f Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 10:24:46 +0800 Subject: [PATCH 58/62] add API.spec test=develop --- paddle/fluid/API.spec | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 26113ee7e9..e156945147 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -38,7 +38,15 @@ paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], va paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)) +paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'debug'], varargs=None, keywords=None, defaults=(False,)) +paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) From 5553c0b0da5884a062e5b7b136c30eb12a7d4d6b Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 10:24:46 +0800 Subject: [PATCH 59/62] add API.spec test=develop --- paddle/fluid/API.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e156945147..fe2ee3f98d 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -37,14 +37,14 @@ paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=Non paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')) paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)) paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'debug'], varargs=None, keywords=None, defaults=(False,)) +paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)) paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) From bc4f16ca6f4bcb938683086c0eb325366729cd25 Mon Sep 17 00:00:00 2001 From: dongdaxiang Date: Mon, 17 Dec 2018 10:24:46 +0800 Subject: [PATCH 60/62] remove some comments --- paddle/fluid/API.spec | 4 ++-- python/paddle/fluid/distributed/helper.py | 6 +----- python/paddle/fluid/distributed/node.py | 4 ---- python/paddle/fluid/distributed/ps_instance.py | 3 --- 4 files changed, 3 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e156945147..fe2ee3f98d 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -37,14 +37,14 @@ paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=Non paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')) paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)) paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'debug'], varargs=None, keywords=None, defaults=(False,)) +paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)) paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) diff --git a/python/paddle/fluid/distributed/helper.py b/python/paddle/fluid/distributed/helper.py index cdde5403cd..06d3d0315c 100644 --- a/python/paddle/fluid/distributed/helper.py +++ b/python/paddle/fluid/distributed/helper.py @@ -28,7 +28,7 @@ class FileSystem(object): def __init__(self, fs_type="afs", - uri="afs://tianqi.afs.baidu.com:9902", + uri="afs://xx", user=None, passwd=None, hadoop_bin=""): @@ -37,10 +37,6 @@ class FileSystem(object): assert hadoop_bin != None import ps_pb2 as pslib self.fs_client = pslib.FsClientParameter() - #if fs_type == "afs": - # fs_client.fs_type = pslib.FsApiType.AFS - #else: - # fs_client.fs_type = pslib.FsApiType.HDFS self.fs_client.uri = uri self.fs_client.user = user self.fs_client.passwd = passwd diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 117da9cff8..41e0d64e0b 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -75,8 +75,6 @@ class DownpourServer(Server): table.accessor.embedx_dim = 8 table.accessor.embedx_threshold = 5 table.accessor.fea_dim = 11 - #table.accessor.fea_dim = abs(reduce(lambda x, y: x * y, - # slot_value_var[0].shape, 1)) table.accessor.downpour_accessor_param.nonclk_coeff = 0.1 table.accessor.downpour_accessor_param.click_coeff = 2 table.accessor.downpour_accessor_param.base_threshold = 0.2 @@ -134,8 +132,6 @@ class DownpourWorker(Worker): def __init__(self, window): self.window = window self.worker_ = pslib.DownpourTrainerParameter() - #self.worker_.pull_dense_per_batch = window - #self.worker_.push_dense_per_batch = window def add_sparse_table(self, table_id, learning_rate, slot_key_vars, slot_value_vars): diff --git a/python/paddle/fluid/distributed/ps_instance.py b/python/paddle/fluid/distributed/ps_instance.py index 91f53102b6..d3ce3ce693 100644 --- a/python/paddle/fluid/distributed/ps_instance.py +++ b/python/paddle/fluid/distributed/ps_instance.py @@ -59,9 +59,6 @@ class PaddlePSInstance(object): else: self._node_type = -1 - #if self._rankid == 0: - #print "node type: ", self._node_type - def _split_comm(self): if self.is_server(): self._comm = self.dh.comm.Split(self._node_type) From 5f0358add9767390b8bc329c97236f8d72ce758e Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Mon, 17 Dec 2018 16:18:10 +0800 Subject: [PATCH 61/62] async_executor stop add barrier_all & finalize --- python/paddle/fluid/async_executor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index bd32138651..3181654feb 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -237,6 +237,8 @@ class AsyncExecutor(object): if self.instance.is_first_worker(): self.executor.stop_server() self.instance.barrier_worker() #sync + self.instance.barrier_all() + self.instance.finalize() def init_server(self, dist_desc): """ From cbc7208399e980687e7bd51102d3e84907353fba Mon Sep 17 00:00:00 2001 From: heqiaozhi Date: Tue, 18 Dec 2018 10:15:07 +0800 Subject: [PATCH 62/62] fix doc test=develop --- python/paddle/fluid/async_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/async_executor.py b/python/paddle/fluid/async_executor.py index 3181654feb..4ca6a5170e 100644 --- a/python/paddle/fluid/async_executor.py +++ b/python/paddle/fluid/async_executor.py @@ -301,7 +301,7 @@ class AsyncExecutor(object): save_model command that can be invoked from one of the worker model parameters are saved in servers and upload to save_path of file system Args: - save_path(str): path to file system + save_path(str): save path to file system """ if self.instance is None: raise ValueError(