Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into improve_pruning

gangliao-patch-1
zlx 8 years ago
commit 15bf6e05b5

@ -47,6 +47,7 @@ option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
@ -107,6 +108,7 @@ include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient")
set(EXTERNAL_LIBS
${GFLAGS_LIBRARIES}
@ -126,9 +128,12 @@ endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(go/master/c)
add_subdirectory(python)
add_subdirectory(go/pserver/cclient)
if(WITH_GOLANG)
#TODO (add go/master/c back when fixed)
add_subdirectory(go/pserver/cclient)
endif(WITH_GOLANG)
if(WITH_DOC)
add_subdirectory(doc)

@ -40,6 +40,10 @@ if(NOT CMAKE_CROSSCOMPILING)
endif()
endif()
if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG)
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)

File diff suppressed because it is too large Load Diff

@ -84,6 +84,7 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}

@ -11,13 +11,4 @@ include(flags)
go_library(paddle_pserver_cclient STATIC)
if(PROJ_ROOT)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS paddle_pserver_cclient)
add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a)
endif(PROJ_ROOT)
add_subdirectory(test)

@ -8,6 +8,7 @@ add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(optimizer)
add_subdirectory(strings)
# Do not build go directory until go cmake is working smoothly.
@ -19,8 +20,8 @@ find_package(Boost QUIET)
if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory(majel)
add_subdirectory(platform)
add_subdirectory(framework)
endif()
if(WITH_C_API)

@ -16,7 +16,7 @@ set(API_HEADER
Internal.h)
add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib)
add_dependencies(paddle_api gen_proto_cpp paddle_trainer_lib)
INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)

@ -842,7 +842,8 @@ public:
int passCount,
bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, const std::string pserverSpec);
OptimizationConfig* config,
const std::string pserverSpec) throw(UnsupportError);
~ParameterUpdater();
/**

@ -15,7 +15,9 @@ limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#ifndef PADDLE_WITHOUT_GOLANG
#include "paddle/trainer/NewRemoteParameterUpdater.h"
#endif
#include "paddle/trainer/RemoteParameterUpdater.h"
#include "paddle/trainer/ThreadParameterUpdater.h"
@ -30,11 +32,16 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
}
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, const std::string pserverSpec) {
OptimizationConfig *config,
const std::string pserverSpec) throw(UnsupportError) {
#ifndef PADDLE_WITHOUT_GOLANG
auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec));
return updater;
#else
throw UnsupportError();
#endif
}
ParameterUpdater *ParameterUpdater::createRemoteUpdater(

@ -0,0 +1,5 @@
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...

@ -0,0 +1,4 @@
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)

@ -1,6 +1,7 @@
#include "paddle/majel/ddim.h"
#include "paddle/framework/ddim.h"
namespace majel {
namespace paddle {
namespace framework {
///@cond HIDDEN
@ -66,7 +67,7 @@ DDim make_ddim(const std::vector<int>& dims) {
///@cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
class DynamicMutableIndexer : public boost::static_visitor<int&> {
public:
public:
DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D>
@ -74,12 +75,12 @@ public:
return dim[idx_];
}
private:
private:
int idx_;
};
class DynamicConstIndexer : public boost::static_visitor<int> {
public:
public:
DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D>
@ -87,7 +88,7 @@ public:
return dim[idx_];
}
private:
private:
int idx_;
};
@ -213,10 +214,11 @@ struct DDimPrinter : boost::static_visitor<void> {
///\endcond
std::ostream& operator<<(std::ostream& os, const majel::DDim& ddim) {
std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
DDimPrinter printer(os);
boost::apply_visitor(printer, ddim);
return os;
}
} // namespace majel
} // namespace framework
} // namespace paddle

@ -5,20 +5,14 @@
#include <stdexcept>
#include <vector>
#include "paddle/majel/dim.h"
#include "paddle/framework/dim.h"
namespace majel {
namespace paddle {
namespace framework {
namespace {
typedef boost::variant<Dim<1>,
Dim<2>,
Dim<3>,
Dim<4>,
Dim<5>,
Dim<6>,
Dim<7>,
Dim<8>,
Dim<9>>
typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
Dim<8>, Dim<9>>
DDimVar;
}
@ -95,14 +89,15 @@ ssize_t product(const DDim& ddim);
int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const majel::DDim&);
std::ostream& operator<<(std::ostream&, const DDim&);
} // namespace majel
} // namespace framework
} // namespace paddle
namespace boost {
template <typename T>
T get(const majel::DDim& in) {
T get(const paddle::framework::DDim& in) {
return boost::get<T>(in.var);
}

@ -4,18 +4,18 @@
#include <vector>
#include "gtest/gtest.h"
#include "paddle/majel/ddim.h"
#include "paddle/framework/ddim.h"
TEST(DDim, Equality) {
// construct a DDim from an initialization list
majel::DDim ddim = majel::make_ddim({9, 1, 5});
paddle::framework::DDim ddim = paddle::framework::make_ddim({9, 1, 5});
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
// construct a DDim from a vector
std::vector<int> vec({9, 1, 5});
majel::DDim vddim = majel::make_ddim(vec);
paddle::framework::DDim vddim = paddle::framework::make_ddim(vec);
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
@ -23,43 +23,43 @@ TEST(DDim, Equality) {
// mutate a DDim
ddim[1] = 2;
EXPECT_EQ(ddim[1], 2);
majel::set(ddim, 0, 6);
EXPECT_EQ(majel::get(ddim, 0), 6);
paddle::framework::set(ddim, 0, 6);
EXPECT_EQ(paddle::framework::get(ddim, 0), 6);
// vectorize a DDim
std::vector<int> res_vec = majel::vectorize(vddim);
std::vector<int> res_vec = paddle::framework::vectorize(vddim);
EXPECT_EQ(res_vec[0], 9);
EXPECT_EQ(res_vec[1], 1);
EXPECT_EQ(res_vec[2], 5);
majel::Dim<3> d(3, 2, 1);
res_vec = majel::vectorize(majel::DDim(d));
paddle::framework::Dim<3> d(3, 2, 1);
res_vec = paddle::framework::vectorize(paddle::framework::DDim(d));
EXPECT_EQ(res_vec[0], 3);
EXPECT_EQ(res_vec[1], 2);
EXPECT_EQ(res_vec[2], 1);
// add two DDims
majel::DDim ddim_sum = ddim + vddim;
paddle::framework::DDim ddim_sum = ddim + vddim;
EXPECT_EQ(ddim_sum[0], 15);
EXPECT_EQ(ddim_sum[1], 3);
EXPECT_EQ(ddim_sum[2], 10);
// multiply two DDims
majel::DDim ddim_mul = ddim * vddim;
paddle::framework::DDim ddim_mul = ddim * vddim;
EXPECT_EQ(ddim_mul[0], 54);
EXPECT_EQ(ddim_mul[1], 2);
EXPECT_EQ(ddim_mul[2], 25);
// arity of a DDim
EXPECT_EQ(majel::arity(ddim), 3);
EXPECT_EQ(paddle::framework::arity(ddim), 3);
// product of a DDim
EXPECT_EQ(majel::product(vddim), 45);
EXPECT_EQ(paddle::framework::product(vddim), 45);
}
TEST(DDim, Print) {
// print a DDim
std::stringstream ss;
majel::DDim ddim = majel::make_ddim({2, 3, 4});
paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 3, 4});
ss << ddim;
EXPECT_EQ("2, 3, 4", ss.str());
}

@ -5,10 +5,11 @@
#include <stdexcept>
#include <type_traits>
#include "paddle/majel/detail/cuda_assert.h"
#include "paddle/majel/detail/hostdevice.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/hostdevice.h"
namespace majel {
namespace paddle {
namespace framework {
// Statically sized, statically indexed dimension
template <int i>
@ -74,7 +75,7 @@ struct Dim<1> {
throw std::invalid_argument("Index out of range.");
}
#else
MAJEL_ASSERT(idx < size.head);
PADDLE_ASSERT(idx < size.head);
#endif
}
@ -131,7 +132,7 @@ HOSTDEVICE int& indexer(Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
@ -146,7 +147,7 @@ HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
PADDLE_ASSERT(idx == 0);
#endif
return dim.head;
}
@ -158,7 +159,7 @@ HOSTDEVICE int indexer(const Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
@ -173,7 +174,7 @@ HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
PADDLE_ASSERT(idx == 0);
#endif
return dim.head;
}
@ -411,7 +412,7 @@ HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
std::ostream& os, const Dim<i>& d) {
os << d.head << ", " << d.tail;
return os;
}
@ -420,7 +421,7 @@ typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
std::ostream& os, const Dim<i>& d) {
os << d.head;
return os;
}
@ -448,4 +449,5 @@ HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
return result;
}
} // namespace majel
} // namespace framework
} // namespace paddle

@ -0,0 +1,128 @@
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/framework/dim.h"
#include "gtest/gtest.h"
__global__ void test(paddle::framework::Dim<2>* o) {
o[0] = paddle::framework::make_dim(5, 6);
}
__global__ void dyn_idx_gpu(int* o) {
auto d = paddle::framework::make_dim(5, 6);
o[0] = d[1];
}
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = paddle::framework::make_dim(3, 4);
EXPECT_EQ(paddle::framework::get<0>(a), 3);
EXPECT_EQ(paddle::framework::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<paddle::framework::Dim<2>> t(2);
test<<<1,1>>>(thrust::raw_pointer_cast(t.data()));
a = t[0];
EXPECT_EQ(paddle::framework::get<0>(a), 5);
EXPECT_EQ(paddle::framework::get<1>(a), 6);
// linearization
auto b = paddle::framework::make_dim(7, 8);
EXPECT_EQ(paddle::framework::linearize(a, b), 83);
// product
EXPECT_EQ(paddle::framework::product(a), 30);
// mutate a Dim
paddle::framework::get<1>(b) = 10;
EXPECT_EQ(paddle::framework::get<0>(b), 7);
EXPECT_EQ(paddle::framework::get<1>(b), 10);
// dynamic access
paddle::framework::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(paddle::framework::get<0>(b), 8);
EXPECT_EQ(paddle::framework::get<1>(b), 11);
EXPECT_EQ(paddle::framework::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data()));
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 12);
// contiguous_strides
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 1, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 0);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 10, 1));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 10);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(1, 10, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 0);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(2, 3, 4));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 2);
EXPECT_EQ(paddle::framework::get<2>(c), 6);
// generate from an index
auto size = paddle::framework::make_dim(4, 5, 2);
c = paddle::framework::Dim<3>(14, size);
EXPECT_EQ(paddle::framework::get<0>(c), 2);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::Dim<3>(25, size);
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 1);
}
TEST(Dim, Bool) {
auto a = paddle::framework::make_dim(3, 4);
auto b = paddle::framework::make_dim(5, 6);
auto c = paddle::framework::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(paddle::framework::contained(a, b));
EXPECT_FALSE(paddle::framework::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
paddle::framework::Dim<3> sizef(x, y, z);
paddle::framework::Dim<3> stridea(1, x, x*y);
paddle::framework::Dim<3> strideb(2, 2*x, 2*x*y);
paddle::framework::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(paddle::framework::contiguous(sizef, stridea));
EXPECT_FALSE(paddle::framework::contiguous(sizef, strideb));
EXPECT_FALSE(paddle::framework::contiguous(sizef, stridec));
}
TEST(Dim, Print) {
{
std::stringstream ss;
auto a = paddle::framework::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
}
{
std::stringstream ss;
ss << paddle::framework::make_dim(8);
EXPECT_EQ(ss.str(), "8");
}
}

@ -68,14 +68,12 @@ public:
numOutputs_ = 1;
}
virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {}
// input can be INPUT and INPUT_GRAD
// filter can be FILTER and FILTER_GRAD
// output can be OUTPUT and OUTPUT_GRAD
void check(const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
void checkShape(const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
// inputs and outputs arguments should be 4-dimensional.
CHECK_EQ(input.ndims(), (size_t)4);
CHECK_EQ(output.ndims(), (size_t)4);

@ -117,15 +117,23 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// TODO(hedaoyuan): Need to define some index macros,
// to avoid useing 0 and 1.
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {
@ -209,16 +217,24 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// Since the implementation of Col2ImFunctor is ADD_TO,
// this function only supports ADD_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
check(input, filter, output);
size_t batchSize = input[0];
size_t inputChannels = input[1];
@ -295,13 +311,21 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {

@ -54,8 +54,8 @@ public:
T inValue;
const int inH = inStartH + fH;
const int inW = inStartW + fW;
if ((inH >= 0 && inH < inputHeight) &&
(inW >= 0 && inW < inputWidth)) {
if ((inH >= 0 && inH < (int)inputHeight) &&
(inW >= 0 && inW < (int)inputWidth)) {
size_t offsetInput =
batch * inputChannels * inputHeight * inputWidth +
inC * inputHeight * inputWidth + inH * inputWidth + inW;
@ -90,14 +90,19 @@ public:
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
check(inputs, outputs);
size_t batchSize = inputs[0].shape()[0];
size_t inputChannels = inputs[0].shape()[1];

@ -284,6 +284,16 @@ public:
}
protected:
std::vector<Argument::SeqInfo> commonSeqInfo_;
ICpuGpuVectorPtr sequenceStartPositions_;
void calcSequenceStartPositions();
void checkInputConsistency(int inlinkId,
const std::vector<Argument::SeqInfo>& seqInfo);
void reorganizeInput(PassType passType);
void reorganizeOutput(PassType passType);
void connectFrames(PassType passType);
void calcNumSequencesAtEachStep();
void resizeOrCreateFrames(int numFrames);
void resizeBootFrame(int numSequences);
@ -295,8 +305,7 @@ protected:
std::string linkName;
LayerPtr inLayer;
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
bool hasSubseq;
Argument outArg; // scatter output argument
Argument outArg; // scatter output argument
};
std::vector<InFrameLine> inFrameLines_;
@ -318,7 +327,6 @@ protected:
std::vector<LayerPtr> agents;
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
Argument outArg; // scatter output argument
bool is_sequence;
// Different memoryFrameLine have different element as follows
IVectorPtr allIds; // scattered id of realLayer
ICpuGpuVectorPtr
@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
// The original positions in the original batch
IVectorPtr allIds; // scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std::vector<int> idIndex;
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
std::vector<Info> info_;
std::vector<Info> info_; // for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std::vector<int> numSeqs_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
void checkOutputConsistency(OutFrameLine& outFrameLine);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
@ -354,6 +367,28 @@ protected:
void createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_nonseq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_seq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_subseq(int inlinks_id,
const Argument& input,
PassType passType);
void createOutFrameInfo(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
@ -386,9 +421,7 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
int maxSequenceLength_; // Max top-level length
bool useGpu_;
bool stopBeamSearch_;

@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realHeight = realOutput.getBatchSize();
CHECK_LE(numSamples_, realHeight);
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) {
if (realOutput.ids) {
output_.ids =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_);
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
if (realOutput.hasSeq()) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
void SequenceAgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
getSize(),
useGpu_,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_ = realOutput;
}
}
REGISTER_LAYER(sequence_agent, SequenceAgentLayer);
bool GatherAgentLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CHECK_EQ(config_.inputs_size(), 0);
@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return true;
}
void GatherAgentLayer::copyIdAndSequenceInfo(const Argument& input,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
realLayers_.clear();
void GatherAgentLayer::copyIdAndSequenceInfo(
ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = sequenceStartPositions;
output_.subSequenceStartPositions = subSequenceStartPositions;
allIds_ = ids;
idIndex_ = idIndex;
}
void GatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
forwardIds(passType);
forwardValue(passType);
}
void GatherAgentLayer::forwardValue(PassType passType) {
MatrixPtr valueReal = realLayers_[0]->getOutputValue();
if (!valueReal) return;
int height = allIds_->getSize();
int width = this->getSize();
@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
if (realOutArg_.hasSeq()) {
forwardSequence(passType);
} else if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
} else { // used in generation
@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if (realGrad) {
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if (idIndex_ == 0) {
if (handleBackward_) {
outputGrad->addToRows(*realGrad, *ids_);
}
}
@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void SequenceGatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0;
int* starts = output_.subSequenceStartPositions->getMutableData(false);
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (idReal) {
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
// Gather output.value, same as GatherAgentLayer
CHECK(output_.subSequenceStartPositions);
GatherAgentLayer::forward(passType);
LOG(FATAL) << "Not implemented";
}
}
void SequenceScatterAgentLayer::forward(PassType passType) {
void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
// Putting the generation logic here is really an ugly hack!
// used in generation
int height = 0;
size_t numSequences = ids_->getSize();
@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER(sequence_gather_agent, SequenceGatherAgentLayer);
REGISTER_LAYER(sequence_scatter_agent, SequenceScatterAgentLayer);
} // namespace paddle

@ -49,18 +49,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class SequenceAgentLayer : public AgentLayer {
public:
explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {}
~SequenceAgentLayer() {}
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
@ -83,7 +71,10 @@ public:
const ParameterMap& parameterMap) override;
// call before addRealLayer
void copyIdAndSequenceInfo(const Argument& input,
void clearRealLayers() { realLayers_.clear(); }
void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& allIds,
const std::vector<int>& idIndex);
@ -92,24 +83,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class SequenceGatherAgentLayer : public GatherAgentLayer {
public:
explicit SequenceGatherAgentLayer(const LayerConfig& config)
: GatherAgentLayer(config) {}
virtual ~SequenceGatherAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
// same as GatherAgentLayer
GatherAgentLayer::backward(callback);
}
void forwardValue(PassType passType);
void forwardIds(PassType passType);
};
/**
@ -129,6 +104,11 @@ protected:
int idSize_;
int seqStartPosIndex_;
int numSequences_; // number of sequences in this scatterAgentLayer
bool handleBackward_;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void setRealLayer(LayerPtr layer,
const std::vector<int>& ids,
bool copyId = false) {
void setRealLayer(LayerPtr layer, const std::vector<int>& ids) {
realLayer_ = layer;
IVector::resizeOrCreate(ids_, ids.size(), useGpu_);
ids_->copyFrom(ids.data(), ids.size());
if (copyId) {
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
} else {
cpuIds_ = ids_;
}
}
@ -169,12 +145,14 @@ public:
const Argument& outArg,
const IVectorPtr& ids,
int idIndex,
int idSize) {
int idSize,
bool handleBackward) {
realLayer_ = layer;
realOutArg_ = outArg;
ids_ = ids;
idIndex_ = idIndex;
idSize_ = idSize;
handleBackward_ = handleBackward;
}
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
@ -187,28 +165,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
: ScatterAgentLayer(config) {}
virtual ~SequenceScatterAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
ScatterAgentLayer::backward(callback);
}
void forwardSequence(PassType passType);
};
} // namespace paddle

@ -40,6 +40,7 @@ namespace paddle {
class FeatureMapExpandLayer : public Layer {
private:
int numFilters_;
bool asRowVector_;
public:
explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {}
@ -62,6 +63,7 @@ bool FeatureMapExpandLayer::init(const LayerMap& layerMap,
CHECK_EQ(inputLayers_.size(), 1UL);
numFilters_ = config_.num_filters();
asRowVector_ = config_.user_arg() != "as_col_vec";
return true;
}
@ -76,16 +78,30 @@ void FeatureMapExpandLayer::forward(PassType passType) {
{
AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
outVTmp->addRowVector(*inVTmp);
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
outVTmp->addRowVector(*inVTmp);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_);
outVTmp->addColVector(*inVTmp);
}
}
}
/* activation */ {
@ -102,24 +118,38 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
MatrixPtr outGrad = getOutputGrad();
size_t batchSize = getInput(0).getBatchSize();
int imgSize = inGrad->getWidth();
/* Do activation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
{
AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
inGradTmp->sumRows(*outGradTmp, 1, 1);
}
}
}
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
}
} // namespace paddle.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save