parent
29a0bc83e0
commit
99dc60642d
@ -0,0 +1,21 @@
|
|||||||
|
# Design Doc: Remote Parameter Updater for Cluster Train
|
||||||
|
|
||||||
|
For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters.
|
||||||
|
|
||||||
|
## Parameter Updater
|
||||||
|
|
||||||
|
Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here.
|
||||||
|
|
||||||
|
### Remote Parameter Updater
|
||||||
|
|
||||||
|
Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md))
|
||||||
|
|
||||||
|
In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig.
|
||||||
|
|
||||||
|
#### Sparse Remote Parameter Updater
|
||||||
|
|
||||||
|
Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage.
|
||||||
|
|
||||||
|
### Interface Design
|
||||||
|
|
||||||
|
TBD
|
@ -1,11 +1,16 @@
|
|||||||
cmake_minimum_required(VERSION 3.0)
|
cmake_minimum_required(VERSION 3.0)
|
||||||
|
|
||||||
include_directories(${CMAKE_BINARY_DIR})
|
|
||||||
|
|
||||||
add_executable(main main.c)
|
add_executable(main main.c)
|
||||||
add_dependencies(main client)
|
add_dependencies(main paddle_pserver_cclient)
|
||||||
|
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
|
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)
|
|
||||||
|
if(PROJ_ROOT)
|
||||||
|
include_directories(${CMAKE_BINARY_DIR}/go/pserver/cclient/)
|
||||||
|
target_link_libraries(main ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a pthread)
|
||||||
|
else(PROJ_ROOT)
|
||||||
|
include_directories(${CMAKE_BINARY_DIR})
|
||||||
|
target_link_libraries(main ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread)
|
||||||
|
endif(PROJ_ROOT)
|
||||||
|
@ -0,0 +1,60 @@
|
|||||||
|
import paddle.v2 as paddle
|
||||||
|
import paddle.v2.dataset.uci_housing as uci_housing
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# init
|
||||||
|
paddle.init(use_gpu=False, trainer_count=1, trainer_id=1)
|
||||||
|
|
||||||
|
# network config
|
||||||
|
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
|
||||||
|
y_predict = paddle.layer.fc(input=x,
|
||||||
|
param_attr=paddle.attr.Param(name='w'),
|
||||||
|
size=1,
|
||||||
|
act=paddle.activation.Linear(),
|
||||||
|
bias_attr=paddle.attr.Param(name='b'))
|
||||||
|
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
|
||||||
|
cost = paddle.layer.mse_cost(input=y_predict, label=y)
|
||||||
|
|
||||||
|
# create parameters
|
||||||
|
parameters = paddle.parameters.create(cost)
|
||||||
|
|
||||||
|
# create optimizer
|
||||||
|
optimizer = paddle.optimizer.Momentum(momentum=0)
|
||||||
|
|
||||||
|
trainer = paddle.trainer.SGD(cost=cost,
|
||||||
|
parameters=parameters,
|
||||||
|
update_equation=optimizer,
|
||||||
|
is_local=False,
|
||||||
|
pserver_spec="localhost:3000")
|
||||||
|
|
||||||
|
# event_handler to print training and testing info
|
||||||
|
def event_handler(event):
|
||||||
|
if isinstance(event, paddle.event.EndIteration):
|
||||||
|
if event.batch_id % 100 == 0:
|
||||||
|
print "Pass %d, Batch %d, Cost %f" % (
|
||||||
|
event.pass_id, event.batch_id, event.cost)
|
||||||
|
|
||||||
|
if isinstance(event, paddle.event.EndPass):
|
||||||
|
if (event.pass_id + 1) % 10 == 0:
|
||||||
|
result = trainer.test(
|
||||||
|
reader=paddle.batch(
|
||||||
|
uci_housing.test(), batch_size=2),
|
||||||
|
feeding={'x': 0,
|
||||||
|
'y': 1})
|
||||||
|
print "Test %d, %.2f" % (event.pass_id, result.cost)
|
||||||
|
|
||||||
|
# training
|
||||||
|
trainer.train(
|
||||||
|
reader=paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
uci_housing.train(), buf_size=500),
|
||||||
|
batch_size=2),
|
||||||
|
feeding={'x': 0,
|
||||||
|
'y': 1},
|
||||||
|
event_handler=event_handler,
|
||||||
|
num_passes=30)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,88 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "NewRemoteParameterUpdater.h"
|
||||||
|
#include "Trainer.h"
|
||||||
|
#include "paddle/utils/Stat.h"
|
||||||
|
|
||||||
|
DECLARE_int32(trainer_id);
|
||||||
|
DECLARE_string(save_dir);
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
NewRemoteParameterUpdater::NewRemoteParameterUpdater(
|
||||||
|
const OptimizationConfig &config, const std::string pserverSpec)
|
||||||
|
: pserverSpec_(pserverSpec) {}
|
||||||
|
|
||||||
|
void NewRemoteParameterUpdater::init(
|
||||||
|
const std::vector<ParameterPtr> ¶meters) {
|
||||||
|
ParameterUpdater::init(parameters);
|
||||||
|
LOG(INFO) << "NewRemoteParameterUpdater init in";
|
||||||
|
|
||||||
|
for (auto ¶ : parameters_) {
|
||||||
|
para->getBuf(PARAMETER_VALUE)->zeroMem();
|
||||||
|
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create parameter server client.
|
||||||
|
parameterClient_ =
|
||||||
|
paddle_new_pserver_client((char *)pserverSpec_.c_str(), FLAGS_trainer_id);
|
||||||
|
|
||||||
|
// init names_ for get parameter through paddle_cclient
|
||||||
|
names_ = (char **)malloc(parameterSize() * sizeof(char *));
|
||||||
|
for (int i = 0; i < parameterSize(); ++i) {
|
||||||
|
names_[i] = (char *)parameters_[i]->getName().c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// init new parameter and gradient.
|
||||||
|
initNewParameter(newParameters_, PARAMETER_VALUE);
|
||||||
|
initNewParameter(newGradients_, PARAMETER_GRADIENT);
|
||||||
|
|
||||||
|
// init parameter, one trainer will get the opportunity to int parameter and
|
||||||
|
// send them to parameter server. Others will get the initialized parameter
|
||||||
|
// from parameter server
|
||||||
|
if (paddle_begin_init_params(parameterClient_)) {
|
||||||
|
LOG(INFO) << "paddle_begin_init_params start";
|
||||||
|
for (int i = 0; i < parameterSize(); ++i) {
|
||||||
|
paddle_init_param(parameterClient_, *newParameters_[i], NULL, 0);
|
||||||
|
}
|
||||||
|
paddle_finish_init_params(parameterClient_);
|
||||||
|
LOG(INFO) << "paddle_begin_init_params done";
|
||||||
|
} else {
|
||||||
|
paddle_get_params(
|
||||||
|
parameterClient_, names_, newParameters_, (int)parameters_.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << "NewRemoteParameterUpdater initialized";
|
||||||
|
}
|
||||||
|
|
||||||
|
void NewRemoteParameterUpdater::updateImpl(Parameter *para) {}
|
||||||
|
|
||||||
|
void NewRemoteParameterUpdater::finishBatch(real cost) {
|
||||||
|
LOG(INFO) << "finishBatch in, cost: " << cost;
|
||||||
|
|
||||||
|
// send gradient to parameter server.
|
||||||
|
paddle_send_grads(parameterClient_, *newGradients_, parameterSize());
|
||||||
|
// get the updated parameter from parameterClient.
|
||||||
|
paddle_get_params(parameterClient_, names_, newParameters_, parameterSize());
|
||||||
|
|
||||||
|
// clear gradient after update parameter.
|
||||||
|
for (auto ¶ : parameters_) {
|
||||||
|
para->getBuf(PARAMETER_GRADIENT)->zeroMem();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NewRemoteParameterUpdater::startPass() {}
|
||||||
|
|
||||||
|
bool NewRemoteParameterUpdater::finishPass() { return true; }
|
||||||
|
}
|
@ -0,0 +1,105 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <thread>
|
||||||
|
#include "ParameterUpdater.h"
|
||||||
|
#include "libpaddle_pserver_cclient.h"
|
||||||
|
#include "paddle/pserver/ParameterClient2.h"
|
||||||
|
#include "paddle/utils/Queue.h"
|
||||||
|
#include "paddle/utils/Util.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New remote parameter updater for dense parameters that use cclient of go.
|
||||||
|
*/
|
||||||
|
class NewRemoteParameterUpdater : public ParameterUpdater {
|
||||||
|
public:
|
||||||
|
NewRemoteParameterUpdater(const OptimizationConfig& config,
|
||||||
|
const std::string pserverSpec);
|
||||||
|
~NewRemoteParameterUpdater() {
|
||||||
|
if (newGradients_) {
|
||||||
|
paddle_pserver_client_release(parameterClient_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* initialize the internal parameter client and itself.
|
||||||
|
*/
|
||||||
|
virtual void init(const std::vector<ParameterPtr>& parameters);
|
||||||
|
/**
|
||||||
|
* @brief start batch
|
||||||
|
*
|
||||||
|
* @note one batch training exhibits stateful feature to help
|
||||||
|
* to do performance tuning, sgd optimization if necessary.
|
||||||
|
*/
|
||||||
|
virtual PassType startBatch(int64_t batchSize) { return PASS_TRAIN; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* send parameters to pservers and get returned parameters
|
||||||
|
* from all pservers if necessary.
|
||||||
|
*/
|
||||||
|
virtual void finishBatch(real cost);
|
||||||
|
virtual void startPass();
|
||||||
|
virtual bool finishPass();
|
||||||
|
|
||||||
|
int parameterSize() { return (int)parameters_.size(); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* init parameter of paddle pserver cclient.
|
||||||
|
* @param new_paras
|
||||||
|
* @param type
|
||||||
|
*/
|
||||||
|
void initNewParameter(paddle_parameter**& new_paras, ParameterType type) {
|
||||||
|
new_paras =
|
||||||
|
(paddle_parameter**)malloc(sizeof(paddle_parameter*) * parameterSize());
|
||||||
|
for (int i = 0; i < parameterSize(); ++i) {
|
||||||
|
new_paras[i] = (paddle_parameter*)malloc(sizeof(paddle_parameter));
|
||||||
|
memset(new_paras[i], 0, sizeof(paddle_parameter));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < parameterSize(); ++i) {
|
||||||
|
ParameterPtr para = parameters_[i];
|
||||||
|
new_paras[i]->content_len = 10;
|
||||||
|
new_paras[i]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32;
|
||||||
|
new_paras[i]->name = (char*)para->getName().c_str();
|
||||||
|
new_paras[i]->content =
|
||||||
|
(unsigned char*)(para->getBuf(type).get()->getData());
|
||||||
|
new_paras[i]->content_len = (int)para->getBuf(type).get()->getSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* work need to do after finishBatch
|
||||||
|
*/
|
||||||
|
virtual void updateImpl(Parameter* para);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/// internal parameter client object for exchanging data with pserver
|
||||||
|
client parameterClient_ = -1;
|
||||||
|
/// the parameters for new pserver client
|
||||||
|
paddle_parameter** newParameters_;
|
||||||
|
/// the gradinets for new pserver client
|
||||||
|
paddle_parameter** newGradients_;
|
||||||
|
/// the names for new parameters.
|
||||||
|
char** names_;
|
||||||
|
/// the specification of parameter server "host1:port,host1:port"
|
||||||
|
std::string pserverSpec_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace paddle
|
Loading…
Reference in new issue