Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into crop_layer

cblas_new
wanghaoshuang 8 years ago
commit d378e0a0cd

@ -192,9 +192,9 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main -lstdc++ -lm)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
add_test(${TARGET_NAME} ${TARGET_NAME})
add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endfunction(cc_test)
@ -281,10 +281,11 @@ function(go_library TARGET_NAME)
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND rm "${${TARGET_NAME}_LIB_PATH}"
# Golang build source code
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${${TARGET_NAME}_LIB_PATH}"
"./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}"
# must run under GOPATH
@ -299,11 +300,13 @@ function(go_binary TARGET_NAME)
cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH}
GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
"./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}"
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
# TODO: don't know what ${TARGET_NAME}_link does
add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)

@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
go_binary(master SRC master.go)
go_binary(master SRC master.go DEPS paddle_go_optimizer)

@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
go_binary(pserver SRCS pserver.go)
go_binary(pserver SRCS pserver.go DEPS paddle_go_optimizer)

@ -1,5 +1,6 @@
cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf)
go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer)
if(WITH_TESTING)
add_subdirectory(test)
# TODO: add unit test
#add_subdirectory(test)
endif()

@ -1,2 +1,2 @@
cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient)
cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)
add_style_check_target(test_cclient test_cclient.c)

@ -1,8 +1,7 @@
package pserver
// #cgo CFLAGS: -I ../../
// //FIXME: ldflags contain "build" path
// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
// #cgo LDFLAGS: -lpaddle_go_optimizer -lstdc++ -lm
// #include "paddle/optimizer/optimizer.h"
// #include <stdlib.h>
// #include <string.h>

@ -66,6 +66,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
paddle_trainer_lib
paddle_network
paddle_parameter
paddle_optimizer
paddle_math
paddle_utils
paddle_proto

@ -25,6 +25,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the average pooling
* operation is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences

@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
}
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
void CrossChannelNormLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr inV = getInputValue(0);
@ -51,9 +61,7 @@ void CrossChannelNormLayer::forward(PassType passType) {
Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
normBuffer_->zeroMem();
// add eps to avoid overflow
normBuffer_->addScalar(*normBuffer_, 1e-6);
inV->square2(*dataBuffer_);
for (size_t i = 0; i < batchSize; i++) {
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
@ -63,6 +71,8 @@ void CrossChannelNormLayer::forward(PassType passType) {
// compute norm.
spatialBuffer_->sumCols(*dataTmp, 1, 0);
// add eps to avoid overflow
spatialBuffer_->add(1e-6);
spatialBuffer_->sqrt2(*spatialBuffer_);
normTmp->copyFrom(*spatialBuffer_);
outVTmp->copyFrom(*inVTmp);
@ -82,6 +92,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
size_t dataDim = inG->getWidth();
size_t spatialDim = dataDim / channels_;
MatrixPtr inGBuffer;
Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_);
dataBuffer_->dotMul(*outG, *outV);
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
@ -100,22 +113,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
scaleDiff_->add(*channelBuffer_, 1.);
sampleBuffer_->dotMul(*inVTmp, *outGTmp);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.);
// scale the grad
inGTmp->copyFrom(*inVTmp);
inGTmp->mulRowVector(*spatialBuffer_);
inGBuffer->copyFrom(*inVTmp);
inGBuffer->mulRowVector(*spatialBuffer_);
// divide by square of norm
spatialBuffer_->dotMul(*normTmp, *normTmp);
inGTmp->divRowVector(*spatialBuffer_);
inGBuffer->divRowVector(*spatialBuffer_);
// subtract
inGTmp->add(*outGTmp, -1, 1);
inGBuffer->add(*outGTmp, -1, 1);
// divide by norm
inGTmp->divRowVector(*normTmp);
inGBuffer->divRowVector(*normTmp);
// scale the diff
inGTmp->mulColVector(*scale_->getW());
inGBuffer->mulColVector(*scale_->getW());
inGTmp->add(*inGBuffer);
}
// updata scale
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_);
scale_->getParameterPtr()->incUpdate(callback);
}

@ -26,6 +26,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the max pooling operation is
* then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences

@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
return true;
}
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
} // namespace paddle

@ -26,10 +26,9 @@ namespace paddle {
* If SequenceLevel = kNonseq:
* Output: a sequence containing only the last instance of the input sequence
* If stride_ > 0:
* Output: a shorten sequence. The operation of getting last instance of a
* sequence is independently performed on every slice of the input
* sequence, which is obtained by sliding a window with the window
* size set to stride_.
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and getting last instance
* operation is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence
@ -73,8 +72,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
void SequenceLastInstanceLayer::forward(PassType passType) {
SequencePoolLayer::forward(passType);
auto starts = (stride_ > 0) ? stridePositions_->getData()
: startPositions_->getData(false);
auto starts = startPositions_->getData(false);
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();

@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) {
if (stride_ > 0) {
CHECK_EQ(input.hasSubseq(), 0UL)
<< "sequence stride pooling is invalid for hasSubseq now";
output_.poolSequenceWithStride(
input, stride_, &stridePositions_, reversed_);
newBatchSize_ = stridePositions_->getSize() - 1;
output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_);
newBatchSize_ = startPositions_->getSize() - 1;
}
resetOutput(newBatchSize_, dim);

@ -28,8 +28,9 @@ namespace paddle {
* sequence}{input[i]}
* If stride_ > 0:
* Check input sequence must not have sub-sequence
* Output: a shorten sequence, pooling is performed upon a small local
* area
* Output: a shorten sequence. Stride is the step size by which we slide
* a window upon the input sequence, and the pooling operation
* is then applied to each interval independently.
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
@ -47,8 +48,6 @@ protected:
size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_;
int stride_;
// Store the start position of each window.
IVectorPtr stridePositions_;
// Whether the input sequence is reversed or not.
bool reversed_ = false;

@ -465,7 +465,6 @@ void initTestLayer(TestConfig testConf,
ParameterConfig paraConfig) {
paraConfig.set_name(paraName);
paraConfig.set_size(paraSize);
paraConfig.set_initial_std(1);
paraConfig.set_is_static(isStatic);
auto para =
std::make_shared<Parameter>(paraConfig, FLAGS_use_gpu, initialize);
@ -499,6 +498,9 @@ void initTestLayer(TestConfig testConf,
paraConfig.add_dims((*layerMap)[input.input_layer_name()]->getSize());
paraConfig.add_dims(testConf.layerConfig.size());
}
CHECK_GE(testConf.paramInitialStd, 0);
paraConfig.set_initial_mean(testConf.paramInitialMean);
paraConfig.set_initial_std(testConf.paramInitialStd);
initParameter(paraName, paraSize, inputDef.isStatic, false, paraConfig);
}
}

@ -125,12 +125,16 @@ struct TestConfig {
LayerConfig layerConfig;
std::vector<InputDef> inputDefs;
size_t biasSize;
real paramInitialMean;
real paramInitialStd;
bool testAccumulate;
bool testState;
bool staticBias;
bool testBatchState;
TestConfig()
: biasSize(0),
paramInitialMean(0.0),
paramInitialStd(1.0),
testAccumulate(true),
testState(false),
staticBias(false),

@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq,
TEST(Layer, MaxLayer) {
testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
testDegradeLayer(false,
"max",
"non-seq",
5); // seq max to a shorten seq, stride window = 5
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
}
TEST(Layer, SequenceLastInstanceLayer) {
@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) {
TEST(Layer, AverageLayer) {
testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq
testDegradeLayer(false,
"average",
"non-seq",
5); // seq average to a shorten seq, stride window = 5
testDegradeLayer(
true, "average", "non-seq", -1); // hasSubseq average to non-seq
testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq
@ -1661,6 +1669,8 @@ TEST(Layer, PadLayer) {
TEST(Layer, CrossChannelNormLayer) {
TestConfig config;
config.paramInitialMean = 1.;
config.paramInitialStd = 0.;
config.layerConfig.set_type("norm");
config.layerConfig.set_size(100);
LayerInputConfig* input = config.layerConfig.add_inputs();
@ -1674,7 +1684,7 @@ TEST(Layer, CrossChannelNormLayer) {
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
for (auto useGpu : {false, true}) {
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5);
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false);
}
}

@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) {
void Argument::poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePostions,
ICpuGpuVectorPtr* stridePostions,
bool reversed) {
// If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
// then sequenceStartPositions = [0, 2, 3, 4, 7].
@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input,
stridePos.emplace_back(starts[numSequences]);
int size = stridePos.size();
CHECK_EQ(size - 1, tgtBuf[numSequences]);
IVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->copyFrom(stridePos.data(), size);
ICpuGpuVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size);
}
void Argument::getValueString(

@ -299,7 +299,7 @@ struct Argument {
*/
void poolSequenceWithStride(const Argument& input,
size_t stride,
IVectorPtr* stridePositions,
ICpuGpuVectorPtr* stridePositions,
bool reversed = false);
/**
* @brief getValueString will return the argument's output in string. There

@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) {
int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};
for (auto reversed : {false, true}) {
IVectorPtr stridePositions;
ICpuGpuVectorPtr stridePositions;
output.poolSequenceWithStride(
input, 5 /* stride */, &stridePositions, reversed);
@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]);
CHECK_EQ(stridePositions->getData(false)[i], result[i]);
}
}
}

@ -172,53 +172,3 @@ TEST_F(CommonTest, syncThreadPool) {
EXPECT_EQ((int)0, nums[i]);
}
}
TEST_F(CommonTest, barrierStat) {
const int threadNum = 10;
SyncThreadPool pool(threadNum);
#define TEST_BARRIER_RANDOM(statName, numConnThreads, ...) \
pool.exec([&](int tid, size_t numThreads) { \
struct timeval time; \
gettimeofday(&time, nullptr); \
uint64_t usec = timeToMicroSecond(time); \
std::srand(usec); \
auto value = std::rand() % 100000; \
usleep(value); \
REGISTER_SLOW_NODES_PROBE( \
globalStat, statName, numConnThreads, tid, __VA_ARGS__); \
});
for (auto i = 0; i < 10; i++) {
TEST_BARRIER_RANDOM("synThreadBarrier1", threadNum);
TEST_BARRIER_RANDOM("synThreadBarrier2", threadNum);
}
globalStat.printAllStatus();
globalStat.reset();
for (auto i = 0; i < 10; i++) {
TEST_BARRIER_RANDOM("synThreadBarrier3", threadNum, "tag0");
TEST_BARRIER_RANDOM("synThreadBarrier4", threadNum, "tag1");
}
globalStat.printAllStatus();
globalStat.reset();
// use it to test accurate barrier gap
#define TEST_BARRIER(statName, numConnThreads, ...) \
pool.exec([&](int tid, size_t numThreads) { \
usleep(tid * 10000); \
REGISTER_SLOW_NODES_PROBE( \
globalStat, statName, numConnThreads, tid, __VA_ARGS__); \
});
for (auto i = 0; i < 10; i++) {
TEST_BARRIER("synThreadBarrier3", threadNum, "tag0");
TEST_BARRIER("synThreadBarrier4", threadNum, "tag1");
}
globalStat.printAllStatus();
globalStat.reset();
}

@ -142,7 +142,7 @@ SocketServer::SocketServer(const std::string &addr, int port, int rdmaCpu)
}
/// trigger to initialize RDMA lib
PCHECK(RdmaClientDaemons::get()) << "initilizate RDMA failed\n";
CHECK(RdmaClientDaemons::get()) << "initilizate RDMA failed\n";
}
SocketServer::~SocketServer() {
@ -168,7 +168,7 @@ void SocketServer::tcpServer() {
/// First call to socket() function
socket_ = socket(AF_INET, SOCK_STREAM, 0);
PCHECK(socket_ >= 0) << "ERROR opening socket";
CHECK(socket_ >= 0) << "ERROR opening socket";
/// Initialize socket structure
bzero((char *)&serv_addr, sizeof(serv_addr));
@ -176,7 +176,7 @@ void SocketServer::tcpServer() {
serv_addr.sin_port = htons(port_);
if (!addr_.empty()) {
server = gethostbyname(addr_.c_str());
PCHECK(server) << "ERROR, no such host: " << addr_;
CHECK(server) << "ERROR, no such host: " << addr_;
bcopy((char *)server->h_addr,
(char *)&serv_addr.sin_addr.s_addr,
server->h_length);
@ -187,7 +187,7 @@ void SocketServer::tcpServer() {
setOption(socket_);
/// Now bind the host address using bind() call.
PCHECK(bind(socket_, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0)
CHECK(bind(socket_, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0)
<< "ERROR on binding " << addr_;
/// Now start listening for the clients, here process will
@ -201,7 +201,7 @@ void SocketServer::tcpServer() {
if (stopping_) {
break;
}
PCHECK(newsockfd >= 0) << "ERROR on accept";
CHECK(newsockfd >= 0) << "ERROR on accept";
constexpr int kPeerNameLen = 128;
char peerName[kPeerNameLen];
CHECK(inet_ntop(AF_INET, &cli_addr.sin_addr, peerName, kPeerNameLen));
@ -227,14 +227,14 @@ void SocketServer::rdmaServer() {
/// First call to socket() function
rdmaSocket_ = rdma::ssocket(rdmaCpu_);
PCHECK(rdmaSocket_) << "ERROR opening RDMA socket";
CHECK(rdmaSocket_) << "ERROR opening RDMA socket";
PCHECK(rdma::bind(rdmaSocket_, rdmaUri_.c_str()) == 0)
CHECK(rdma::bind(rdmaSocket_, rdmaUri_.c_str()) == 0)
<< "ERROR bind RDMA socket";
/// Now start listening for the clients, here process will
/// go in sleep mode and will wait for the incoming connection
PCHECK(rdma::listen(rdmaSocket_) == 0) << "ERROR listen RDMA socket";
CHECK(rdma::listen(rdmaSocket_) == 0) << "ERROR listen RDMA socket";
while (true) {
/// Accept actual connection from the client
@ -242,7 +242,7 @@ void SocketServer::rdmaServer() {
if (stopping_) {
break;
}
PCHECK(newsock) << "ERROR on accept";
CHECK(newsock) << "ERROR on accept";
constexpr int kPeerNameLen = 128;
char peerName[kPeerNameLen];
@ -290,7 +290,7 @@ RdmaClientDaemons::RdmaClientDaemons() {
onlineCpus_ = rdma::numCpus();
for (auto i = 0; i < onlineCpus_; i++) {
socket = rdma::csocket(i);
PCHECK(socket) << "ERROR open client socket daemon";
CHECK(socket) << "ERROR open client socket daemon";
rdmaClientSocket_.push_back(socket);
}
@ -355,7 +355,7 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
/// Create a socket point
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
PCHECK(sockfd >= 0) << "ERROR opening socket";
CHECK(sockfd >= 0) << "ERROR opening socket";
#if defined(__OSX__) || defined(__APPLE__)
server = getipnodebyname(serverAddr.c_str(), AF_INET, AI_DEFAULT, &errRet);
@ -396,8 +396,8 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) {
}
std::this_thread::sleep_for(std::chrono::seconds(1));
} else {
PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":"
<< serverPort << "errorno: " << errno;
CHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":"
<< serverPort << "errorno: " << errno;
}
} while (errno == ECONNREFUSED);
@ -426,7 +426,7 @@ void SocketClient::RdmaClient(const std::string &serverAddr, int serverPort) {
/// connect to server with socket daemon
sock = rdma::connect(socketDaemon_, rdmaUri.c_str());
PCHECK(sock) << "ERROR connect to server" << rdmaUri;
CHECK(sock) << "ERROR connect to server" << rdmaUri;
std::vector<std::string> seg;
str::split(rdmaUri, '/', &seg);

File diff suppressed because it is too large Load Diff

@ -298,24 +298,6 @@ protected:
/// barrier performance tuning sync-sgd required
std::atomic<int64_t> batchId_;
/// the beginning of addGradient without network overhead
ThreadLocal<struct timeval> addGradBegin_;
/**
* tuning barrier performance
* to better control log for sparse and dense parameter,
* we use different log entities for different parameterServer
* objects.
* it will output lots of performance stats to perceive the
* overhead of network, fluctuation of computation from
* forwardbackward and network, computation from optimization
* at pserver end, barrier overhead, etc. to understand tuning
* data, focus on the synchronization between addGradient and
* doOperation which indirectly call op_SGD operation controlled
* by remote updater controller
*/
std::unique_ptr<StatSet> statSet_;
public:
struct Buffer {
real* base;
@ -325,7 +307,6 @@ public:
protected:
/// async gradient commit control
bool asyncGrdientCommitCheckAndStat(const SendParameterRequest& request);
void printAsyncGradientCommitStatAndReset();
public:
/// disable default parameter for overloading
@ -710,36 +691,6 @@ public:
void op_load(const Operation& operation, OperationResult* result);
void op_save(const Operation& operation, OperationResult* result);
/**
* @brief output log in at the middle stage of training
*
* @note flush log histroy and state at the end for sgd
*/
void tuningSgdMidOutput();
/**
* @brief output log in at the end stage of training
*
* @note flush log histroy and state at the end for sgd. it will also
* flush some stateful stat for next pass.
*/
void tuningSgdFinished();
/**
* @brief output log in at the middle stage of training
*
* @note flush log histroy and state at the end for async-sgd.
* it will log some performance log if some lagged node are found
*/
void tuningAsyncsgdMidOutput();
/**
* @brief output log in at the end stage of training
*
* @note flush log histroy and state at the end for async-sgd.
*/
void tuningAsyncsgdFinished();
};
} // namespace paddle

@ -51,7 +51,7 @@ size_t SocketChannel::read(void* buf, size_t size) {
else
len = rdma::read(rdmaSocket_, (char*)buf + total, size - total);
PCHECK(len >= 0) << " peer=" << peerName_;
CHECK(len >= 0) << " peer=" << peerName_;
if (len <= 0) {
return total;
}
@ -69,7 +69,7 @@ size_t SocketChannel::write(const void* buf, size_t size) {
else
len = rdma::write(rdmaSocket_, (char*)buf + total, size - total);
PCHECK(len >= 0) << " peer=" << peerName_;
CHECK(len >= 0) << " peer=" << peerName_;
if (len <= 0) {
return total;
}
@ -98,10 +98,10 @@ static size_t readwritev(IOFunc iofunc,
while (size < total) {
ssize_t len =
iofunc(socket, &iovs[curIov], std::min(iovcnt - curIov, maxiovs));
PCHECK(len > 0) << " peer=" << peerName << " curIov=" << curIov
<< " iovCnt=" << iovcnt
<< " iovs[curIov].base=" << iovs[curIov].iov_base
<< " iovs[curIov].iov_len=" << iovs[curIov].iov_len;
CHECK(len > 0) << " peer=" << peerName << " curIov=" << curIov
<< " iovCnt=" << iovcnt
<< " iovs[curIov].base=" << iovs[curIov].iov_base
<< " iovs[curIov].iov_len=" << iovs[curIov].iov_len;
size += len;
/// restore iovs[curIov] to the original value
@ -183,7 +183,7 @@ void SocketChannel::writeMessage(const std::vector<struct iovec>& userIovs) {
header.totalLength += iov.iov_len;
}
PCHECK(writev(iovs) == (size_t)header.totalLength);
CHECK(writev(iovs) == (size_t)header.totalLength);
}
std::unique_ptr<MsgReader> SocketChannel::readMessage() {
@ -194,7 +194,7 @@ std::unique_ptr<MsgReader> SocketChannel::readMessage() {
return nullptr;
}
PCHECK(len == sizeof(header));
CHECK(len == sizeof(header));
std::unique_ptr<MsgReader> msgReader(new MsgReader(this, header.numIovs));
@ -209,7 +209,7 @@ std::unique_ptr<MsgReader> SocketChannel::readMessage() {
MsgReader::MsgReader(SocketChannel* channel, size_t numBlocks)
: channel_(channel), blockLengths_(numBlocks), currentBlockIndex_(0) {
size_t size = numBlocks * sizeof(blockLengths_[0]);
PCHECK(channel_->read(&blockLengths_[0], size) == size);
CHECK(channel_->read(&blockLengths_[0], size) == size);
}
void MsgReader::readBlocks(const std::vector<void*>& bufs) {
@ -223,12 +223,12 @@ void MsgReader::readBlocks(const std::vector<void*>& bufs) {
++currentBlockIndex_;
}
PCHECK(channel_->readv(&iovs) == totalLength);
CHECK(channel_->readv(&iovs) == totalLength);
}
void MsgReader::readNextBlock(void* buf) {
CHECK_LT(currentBlockIndex_, blockLengths_.size());
PCHECK(channel_->read(buf, getNextBlockLength()) == getNextBlockLength());
CHECK(channel_->read(buf, getNextBlockLength()) == getNextBlockLength());
++currentBlockIndex_;
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save