diff --git a/.gitignore b/.gitignore index 1512c1438e..7480bd53a4 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,7 @@ third_party/ cmake-build-* # generated while compiling -python/paddle/v2/framework/core.so +python/paddle/v2/fluid/core.so paddle/pybind/pybind.h CMakeFiles cmake_install.cmake diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 6b56d9ec8d..89c1f48eda 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "hl_base.h" /** - * @brief Maximum pool forward. + * @brief Maximum pool forward with Mask output. * * @param[in] frameCnt batch size of input image. * @param[in] inputData input data. @@ -35,7 +35,7 @@ limitations under the License. */ * @param[in] paddingW padding width. * @param[out] tgtData output data. * @param[in] tgtStride stride between output data samples. - * + * @param[out] maskData the location indices of select max data. */ extern void hl_maxpool_forward(const int frameCnt, const real* inputData, @@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride); + const int tgtStride, + real* maskData = NULL); /** * @brief Maximum pool backward. diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index a76dbf0b65..968ed4840f 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride) {} + const int tgtStride, + real* MaskData) {} inline void hl_maxpool_backward(const int frameCnt, const real* inputData, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 58674febdc..3699b1e8ae 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads, const int offsetH, const int offsetW, real* tgtData, - const int tgtStride) { + const int tgtStride, + real* maskData) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; @@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads, hstart = max(hstart, 0); wstart = max(wstart, 0); real maxval = -FLT_MAX; + int max_index = -1; inputData += (frameNum * channels + c) * height * width; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[h * width + w]) - maxval = inputData[h * width + w]; + if (maxval < inputData[h * width + w]) { + max_index = h * width + w; + maxval = inputData[max_index]; + } } } int tgtIndex = index % (pooledW * pooledH * channels) + frameNum * tgtStride; tgtData[tgtIndex] = maxval; + if (maskData != NULL) { + maskData[tgtIndex] = max_index; + } } } @@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride) { + const int tgtStride, + real* maskData) { int num_kernels = pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; dim3 threads(1024, 1); @@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt, paddingH, paddingW, tgtData, - tgtStride); + tgtStride, + maskData); CHECK_SYNC("hl_maxpool_forward failed"); } diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 913cd0f81e..b3b9c45ded 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -377,6 +377,12 @@ std::vector> MakeOpGrad( return grad_op_descs; } +static BlockDescBind* CreateStepBlock( + ProgramDescBind& program_desc, + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var, + int step_block_idx); + std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, std::unordered_set* no_grad_vars, @@ -392,13 +398,13 @@ std::vector> MakeBlockBackward( if ((*it)->Type() == "recurrent") { int step_block_idx = (*it)->GetBlockAttr("step_block"); - auto backward_block_op_descs = MakeBlockBackward( - program_desc, step_block_idx, no_grad_vars, grad_to_var); + BlockDescBind* backward_block = CreateStepBlock( + program_desc, no_grad_vars, grad_to_var, step_block_idx); + op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); + } else if ((*it)->Type() == "conditional_block") { BlockDescBind* backward_block = - program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx)); - for (auto& ptr : backward_block_op_descs) { - backward_block->AppendAllocatedOp(std::move(ptr)); - } + CreateStepBlock(program_desc, no_grad_vars, grad_to_var, + (*it)->GetBlockAttr("block")); op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); } else { op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var); @@ -449,6 +455,21 @@ std::vector> MakeBlockBackward( return backward_descs; } +static BlockDescBind* CreateStepBlock( + ProgramDescBind& program_desc, + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var, + int step_block_idx) { + auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx, + no_grad_vars, grad_to_var); + BlockDescBind* backward_block = + program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx)); + for (auto& ptr : backward_block_op_descs) { + backward_block->AppendAllocatedOp(move(ptr)); + } + return backward_block; +} + ParamGradInfoMap AppendBackward( ProgramDescBind& program_desc, const VarDescBind& target, const std::unordered_set& no_grad_vars) { diff --git a/paddle/framework/var_type.h b/paddle/framework/var_type.h index d060196bb2..0f19870bec 100644 --- a/paddle/framework/var_type.h +++ b/paddle/framework/var_type.h @@ -27,10 +27,32 @@ inline VarDesc::VarType ToVarType(std::type_index type) { return VarDesc_VarType_LOD_RANK_TABLE; } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { return VarDesc_VarType_LOD_TENSOR_ARRAY; + } else if (type.hash_code() == typeid(SelectedRows).hash_code()) { + return VarDesc_VarType_SELECTED_ROWS; } else { PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); } } +template +inline void VisitVarType(const Variable& var, Visitor visitor) { + switch (ToVarType(var.Type())) { + case VarDesc_VarType_LOD_TENSOR: + visitor(var.Get()); + return; + case VarDesc_VarType_LOD_RANK_TABLE: + visitor(var.Get()); + return; + case VarDesc_VarType_LOD_TENSOR_ARRAY: + visitor(var.Get()); + return; + case VarDesc_VarType_SELECTED_ROWS: + visitor(var.Get()); + return; + default: + PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type())); + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/function/ConvOp.h b/paddle/function/ConvOp.h index baf78bc6c8..062ea25a11 100644 --- a/paddle/function/ConvOp.h +++ b/paddle/function/ConvOp.h @@ -61,6 +61,7 @@ public: // function arguments strides_ = config.get>("strides"); paddings_ = config.get>("paddings"); + dilations_ = config.get>("dilations"); groups_ = config.get("groups"); // number of inputs and outputs @@ -118,6 +119,7 @@ protected: std::vector strides_; std::vector paddings_; + std::vector dilations_; /// Group size, refer to grouped convolution in /// Alex Krizhevsky's paper: when group=2, the first half of the @@ -133,6 +135,10 @@ protected: inline int paddingW() const { return paddings_[1]; } + inline int dilationH() const { return dilations_[0]; } + + inline int dilationW() const { return dilations_[1]; } + // A temporary memory in convolution calculation. MemoryHandlePtr memory_; diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h index cb02a96d0d..d8d3c792df 100644 --- a/paddle/function/ConvOpTest.h +++ b/paddle/function/ConvOpTest.h @@ -79,45 +79,59 @@ void Convolution(const std::string& conv1, if (outputChannels < inputChannels) continue; for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; + for (size_t dilation : {1, 3}) { + if (padding >= filterSize) break; + size_t filterS = (filterSize - 1) * dilation + 1; - // NNPACK only supports stride = 1 if batchSize > 1 - if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && - batchSize > 1 && stride > 1) - break; + if (inputSize + 2 * padding < filterS) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize << " stride=" << stride - << " padding=" << padding; + if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" || + conv1 == "NNPACKConv-CPU" || + conv2 == "NNPACKConv-CPU") && + dilation > 1) + break; - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("algo", (std::string) "auto")); + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || + conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; + size_t outputSize = + (inputSize - filterS + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize + << " stride=" << stride << " padding=" << padding; - function(test, input, filter, output); + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + std::vector dilations = {dilation, dilation}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("dilations", dilations) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } } } } @@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1, for (size_t outputChannels : {7}) { size_t stride = 1; size_t padding = 0; + size_t dilation = 1; size_t outputHeight = (inputHeight - filterHeight + 2 * padding + stride) / stride; @@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1, std::vector paddings = {padding, padding}; std::vector strides = {stride, stride}; + std::vector dilations = {dilation, dilation}; Compare2Function test( conv1, conv2, @@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)1) + .set("dilations", dilations) .set("algo", (std::string) "auto")); TensorShape input{ @@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1, std::vector paddings = {padding, padding}; std::vector strides = {stride, stride}; + std::vector dilations = {1, 1}; size_t groups = inputChannels; Compare2Function test( conv1, @@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", groups) + .set("dilations", dilations) .set("algo", (std::string) "auto")); TensorShape input{ diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index bdb56ddac3..8d34eee886 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -100,7 +100,9 @@ public: strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } else { colData = inputData + g * inputOffset; } @@ -223,7 +225,9 @@ public: strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } } inputGrad += inputChannels * inputHeight * inputWidth; @@ -310,7 +314,9 @@ public: strideH(), strideW(), paddingH(), - paddingW()); + paddingW(), + dilationH(), + dilationW()); } else { colData = inputData + g * inputOffset; } diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 1e0cff436f..0c37fc9724 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -78,7 +78,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth); + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1); }; template @@ -91,7 +93,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth); + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1); }; } // namespace paddle diff --git a/paddle/function/Im2ColOp.cpp b/paddle/function/Im2ColOp.cpp index b7d1eb1ede..f864d42f80 100644 --- a/paddle/function/Im2ColOp.cpp +++ b/paddle/function/Im2ColOp.cpp @@ -31,7 +31,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -47,8 +49,8 @@ public: int c_im = c / filterWidth / filterHeight; for (int h = 0; h < outputHeight; ++h) { for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; + int imRowIdx = h * strideHeight + hOffset * dilationHeight; + int imColIdx = w * strideWidth + wOffset * dilationWidth; if ((imRowIdx - paddingHeight) < 0 || (imRowIdx - paddingHeight) >= inputHeight || (imColIdx - paddingWidth) < 0 || @@ -81,7 +83,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -97,8 +101,8 @@ public: int c_im = c / filterWidth / filterHeight; for (int h = 0; h < outputHeight; ++h) { for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; + int imRowIdx = h * strideHeight + hOffset * dilationHeight; + int imColIdx = w * strideWidth + wOffset * dilationWidth; if ((imRowIdx - paddingHeight) >= 0 && (imRowIdx - paddingHeight) < inputHeight && (imColIdx - paddingWidth) >= 0 && @@ -134,7 +138,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -147,9 +153,10 @@ public: for (int channel = 0; channel < inputChannels; ++channel) { for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int imRowOffset = outputH * strideHeight + + filterH * dilationHeight - paddingHeight; + int imColOffset = outputW * strideWidth + + filterW * dilationWidth - paddingWidth; int colDataOffset = (((outputH * outputWidth + outputW) * inputChannels + channel) * @@ -189,7 +196,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight = 1, + int dilationWidth = 1) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -202,9 +211,10 @@ public: for (int channel = 0; channel < inputChannels; ++channel) { for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int imRowOffset = outputH * strideHeight + + filterH * dilationHeight - paddingHeight; + int imColOffset = outputW * strideWidth + + filterW * dilationWidth - paddingWidth; int colDataOffset = (((outputH * outputWidth + outputW) * inputChannels + channel) * diff --git a/paddle/function/Im2ColOpGpu.cu b/paddle/function/Im2ColOpGpu.cu index bd98610498..71da11b955 100644 --- a/paddle/function/Im2ColOpGpu.cu +++ b/paddle/function/Im2ColOpGpu.cu @@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im, int strideW, int paddingH, int paddingW, + int dilationH, + int dilationW, int height_col, int width_col, T* data_col) { @@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im, data_col += (channel_out * height_col + h_out) * width_col + w_out; for (int i = 0; i < blockH; ++i) { for (int j = 0; j < blockW; ++j) { - int rIdx = int(h_in + i); - int cIdx = int(w_in + j); + int rIdx = int(h_in + i * dilationH); + int cIdx = int(w_in + j * dilationW); if ((rIdx - (int)paddingH) >= (int)height || (rIdx - (int)paddingH) < 0 || (cIdx - (int)paddingW) >= (int)width || @@ -77,7 +79,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -102,6 +106,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth, colData); @@ -121,6 +127,8 @@ __global__ void col2im(size_t n, size_t strideW, size_t paddingH, size_t paddingW, + size_t dilationH, + size_t dilationW, size_t height_col, size_t width_col, T* data_im) { @@ -131,23 +139,34 @@ __global__ void col2im(size_t n, int w = int(index % width); int h = int((index / width) % height); int c = int(index / (width * height)); + int filterH = (blockH - 1) * dilationH + 1; + int filterW = (blockW - 1) * dilationW + 1; + if ((w - (int)paddingW) >= 0 && (w - (int)paddingW) < (width - 2 * paddingW) && (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { // compute the start and end of the output int w_col_start = - (w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; + (w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1; int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); int h_col_start = - (h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; + (h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1; int h_col_end = min(int(h / strideH + 1), int(height_col)); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * blockH * blockW) + - (h - h_col * (int)strideH) * (int)blockW + - (w - w_col * (int)strideW); - val += data_col[(c_col * height_col + h_col) * width_col + w_col]; + int h_k = (h - h_col * strideH); + int w_k = (w - w_col * strideW); + if (h_k % dilationH == 0 && w_k % dilationW == 0) { + h_k /= dilationH; + w_k /= dilationW; + int c_col = + (((c * blockH + h_k) * blockW + w_k) * height_col + h_col) * + width_col + + w_col; + val += data_col[c_col]; + } } } h -= paddingH; @@ -173,7 +192,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -205,6 +226,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth, imData); @@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData, int strideWidth, int paddingHeight, int paddingWidth, + int dilationHeight, + int dilationWidth, int outputHeight, int outputWidth) { int swId = blockIdx.x; @@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData, channelId += blockDim.z) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; + int widthOffset = + idx * dilationHeight + swId * strideWidth - paddingWidth; + int heightOffset = + idy * dilationWidth + shId * strideHeight - paddingHeight; int imOffset = widthOffset + heightOffset * inputWidth + channelId * inputHeight * inputWidth; @@ -273,7 +300,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -312,6 +341,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth); CHECK_SYNC("Im2ColFunctor GPU failed"); @@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData, int strideWidth, int paddingHeight, int paddingWidth, + int dilationHeight, + int dilationWidth, int outputHeight, int outputWidth) { int swId = blockIdx.x; @@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData, channelId += blockDim.z) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; + int widthOffset = + idx * dilationWidth + swId * strideWidth - paddingWidth; + int heightOffset = + idy * dilationHeight + shId * strideHeight - paddingHeight; int imOffset = widthOffset + heightOffset * inputWidth + channelId * inputHeight * inputWidth; @@ -372,7 +407,9 @@ public: int strideHeight, int strideWidth, int paddingHeight, - int paddingWidth) { + int paddingWidth, + int dilationHeight, + int dilationWidth) { int inputChannels = imShape[0]; int inputHeight = imShape[1]; int inputWidth = imShape[2]; @@ -411,6 +448,8 @@ public: strideWidth, paddingHeight, paddingWidth, + dilationHeight, + dilationWidth, outputHeight, outputWidth); CHECK_SYNC("Col2ImFunctor GPU failed"); diff --git a/paddle/function/Im2ColTest.cpp b/paddle/function/Im2ColTest.cpp index a0a01a5fc7..1f085538d8 100644 --- a/paddle/function/Im2ColTest.cpp +++ b/paddle/function/Im2ColTest.cpp @@ -29,82 +29,98 @@ void TestIm2ColFunctor() { for (size_t filterWidth : {3, 7}) { for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { - if (inputHeight <= filterHeight || inputWidth <= filterWidth) - break; - if (padding >= filterHeight || padding >= filterWidth) break; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / stride; - - TensorShape imShape = - TensorShape({channels, inputHeight, inputWidth}); - TensorShape colShape1 = TensorShape({channels, - filterHeight, - filterWidth, - outputHeight, - outputWidth}); - TensorShape colShape2 = TensorShape({outputHeight, - outputWidth, - channels, - filterHeight, - filterWidth}); - - size_t height = channels * filterHeight * filterWidth; - size_t width = outputHeight * outputWidth; - VectorPtr input1 = Vector::create(imShape.getElements(), false); - VectorPtr input2 = Vector::create(imShape.getElements(), false); - MatrixPtr output1 = Matrix::create(height, width, false, false); - MatrixPtr output2 = Matrix::create(width, height, false, false); - input1->uniform(0.001, 1); - input2->copyFrom(*input1); - - Im2ColFunctor im2Col1; - Im2ColFunctor im2Col2; - im2Col1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding); - im2Col2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding); - - // The transposition of the result of ColFormat == kCFO - // is equal to the result of ColFormat == kOCF. - MatrixPtr test; - output2->transpose(test, true); - autotest::TensorCheckErr(*output1, *test); - - Col2ImFunctor col2Im1; - Col2ImFunctor col2Im2; - col2Im1(input1->getData(), - imShape, - output1->getData(), - colShape1, - stride, - stride, - padding, - padding); - col2Im2(input2->getData(), - imShape, - output2->getData(), - colShape2, - stride, - stride, - padding, - padding); - - autotest::TensorCheckErr(*input1, *input2); + for (size_t dilation : {1, 3}) { + size_t filterSizeH = (filterHeight - 1) * dilation + 1; + size_t filterSizeW = (filterWidth - 1) * dilation + 1; + if (inputHeight + 2 * padding < filterSizeH || + inputWidth + 2 * padding < filterSizeW) + break; + if (padding >= filterSizeH || padding >= filterSizeW) break; + size_t outputHeight = + (inputHeight - filterSizeH + 2 * padding) / stride + 1; + size_t outputWidth = + (inputWidth - filterSizeW + 2 * padding) / stride + 1; + + TensorShape imShape = + TensorShape({channels, inputHeight, inputWidth}); + TensorShape colShape1 = TensorShape({channels, + filterHeight, + filterWidth, + outputHeight, + outputWidth}); + TensorShape colShape2 = TensorShape({outputHeight, + outputWidth, + channels, + filterHeight, + filterWidth}); + + size_t height = channels * filterHeight * filterWidth; + size_t width = outputHeight * outputWidth; + VectorPtr input1 = + Vector::create(imShape.getElements(), false); + VectorPtr input2 = + Vector::create(imShape.getElements(), false); + MatrixPtr output1 = + Matrix::create(height, width, false, false); + MatrixPtr output2 = + Matrix::create(width, height, false, false); + input1->uniform(0.001, 1); + input2->copyFrom(*input1); + + Im2ColFunctor im2Col1; + Im2ColFunctor im2Col2; + im2Col1(input1->getData(), + imShape, + output1->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation); + im2Col2(input2->getData(), + imShape, + output2->getData(), + colShape2, + stride, + stride, + padding, + padding, + dilation, + dilation); + + // The transposition of the result of ColFormat == kCFO + // is equal to the result of ColFormat == kOCF. + MatrixPtr test; + output2->transpose(test, true); + autotest::TensorCheckErr(*output1, *test); + + Col2ImFunctor col2Im1; + Col2ImFunctor col2Im2; + + col2Im1(input1->getData(), + imShape, + output1->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation); + col2Im2(input2->getData(), + imShape, + output2->getData(), + colShape2, + stride, + stride, + padding, + padding, + dilation, + dilation); + autotest::TensorCheckErr(*input1, *input2); + } } } } diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 48dfcb49a4..7ff0c73721 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, for (int i = 0; i < config_.inputs_size(); i++) { std::vector paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector strides = {(size_t)strideY_[i], (size_t)stride_[i]}; + std::vector dilations = {(size_t)dilationY_[i], + (size_t)dilation_[i]}; + + bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1); // Convolution Layer uses the GemmConv function by default. convType = "GemmConv"; @@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, #if defined(__ARM_NEON__) || defined(__ARM_NEON) if ((filterSize_[i] == filterSizeY_[i]) && (filterSize_[i] == 3 || filterSize_[i] == 4) && - (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) { + (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) && + !useDilation) { convType = "NeonDepthwiseConv"; } #endif } - if (FLAGS_use_nnpack && !isDeconv_) { + if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) { createFunction(forward_, "NNPACKConv", FuncConfig() @@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); createFunction(backward_, @@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); createFunction(backward_, @@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, FuncConfig() .set("paddings", paddings) .set("strides", strides) + .set("dilations", dilations) .set("groups", (size_t)groups_[i])); } } diff --git a/paddle/gserver/layers/MaxPoolWithMaskLayer.cpp b/paddle/gserver/layers/MaxPoolWithMaskLayer.cpp new file mode 100644 index 0000000000..d810a58d9a --- /dev/null +++ b/paddle/gserver/layers/MaxPoolWithMaskLayer.cpp @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MaxPoolWithMaskLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + PoolLayer::init(layerMap, parameterMap); + setOutput("mask", &mask_); + return true; +} + +size_t MaxPoolWithMaskLayer::getSize() { + CHECK_EQ(inputLayers_.size(), 1UL); + size_t layerSize = 0; + + outputY_ = outputSize(imgSizeY_, + sizeY_, + confPaddingY_, + strideY_, + /* caffeMode */ false); + outputX_ = outputSize(imgSize_, + sizeX_, + confPadding_, + stride_, + /* caffeMode */ false); + + layerSize = outputX_ * outputY_ * channels_; + getOutput().setFrameHeight(outputY_); + getOutput().setFrameWidth(outputX_); + + return layerSize; +} + +void MaxPoolWithMaskLayer::forward(PassType passType) { + size_t size = getSize(); + MatrixPtr inputV = inputLayers_[0]->getOutputValue(); + int batchSize = inputV->getHeight(); + resetOutput(batchSize, size); + + MatrixPtr outV = getOutputValue(); + CHECK_EQ(size, outV->getWidth()); + + resetSpecifyOutput(mask_, + batchSize, + size, + /* isValueClean */ false, + /* isGradClean */ true); + + MatrixPtr maskV = mask_.value; + outV->maxPoolForward(*inputV, + imgSizeY_, + imgSize_, + channels_, + sizeX_, + sizeY_, + strideY_, + stride_, + outputY_, + outputX_, + confPaddingY_, + confPadding_, + maskV); +} + +void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) { + (void)callback; + if (NULL == getInputGrad(0)) { + return; + } + + MatrixPtr outGrad = getOutputGrad(); + MatrixPtr inputV = inputLayers_[0]->getOutputValue(); + MatrixPtr outV = getOutputValue(); + MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad(); + + inputGrad->maxPoolBackward(*inputV, + imgSizeY_, + imgSize_, + *outGrad, + *outV, + sizeX_, + sizeY_, + strideY_, + stride_, + outputY_, + outputX_, + 1, + 1, + confPaddingY_, + confPadding_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MaxPoolWithMaskLayer.h b/paddle/gserver/layers/MaxPoolWithMaskLayer.h new file mode 100644 index 0000000000..e0174add9d --- /dev/null +++ b/paddle/gserver/layers/MaxPoolWithMaskLayer.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "PoolLayer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { +/** + * @brief Basic parent layer of different kinds of pooling + */ +class MaxPoolWithMaskLayer : public PoolLayer { +protected: + Argument mask_; + +public: + explicit MaxPoolWithMaskLayer(const LayerConfig& config) + : PoolLayer(config) {} + + size_t getSize(); + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; +}; +} // namespace paddle diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/gserver/layers/PoolLayer.cpp index 7b932d5a76..87613a96c5 100644 --- a/paddle/gserver/layers/PoolLayer.cpp +++ b/paddle/gserver/layers/PoolLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "PoolLayer.h" +#include "MaxPoolWithMaskLayer.h" #include "PoolProjectionLayer.h" #include "paddle/utils/Logging.h" #ifdef PADDLE_WITH_CUDA @@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap, strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - return true; } @@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) { } else if (CudnnPoolLayer::typeCheck(pool)) { return new CudnnPoolLayer(config); #endif + } else if (pool == "max-pool-with-mask") { + return new MaxPoolWithMaskLayer(config); } else { LOG(FATAL) << "Unknown pool type: " << pool; return nullptr; diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 99cfddb0cf..35d4b12d3d 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -98,7 +98,7 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiStartH = round(bottomROIs[2] * spatialScale_); size_t roiEndW = round(bottomROIs[3] * spatialScale_); size_t roiEndH = round(bottomROIs[4] * spatialScale_); - CHECK_GE(roiBatchIdx, 0); + CHECK_GE(roiBatchIdx, 0UL); CHECK_LT(roiBatchIdx, batchSize); size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index aa94ee406e..e24d423b9e 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -24,6 +24,7 @@ gserver_test(test_ConvUnify) gserver_test(test_BatchNorm) gserver_test(test_KmaxSeqScore) gserver_test(test_Expand) +gserver_test(test_MaxPoolingWithMaskOutput) ########## test_Mkldnn layers and activations ########## if(WITH_MKLDNN) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index fcbcb5b0f1..3517d293e3 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - int dilation = 1; + int dilation = 2; if (type == "cudnn_conv") { #if CUDNN_VERSION >= 6000 dilation = 2; @@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { TEST(Layer, PoolLayer) { testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); + testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false); #ifdef PADDLE_WITH_CUDA testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); @@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) { testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true); #endif } diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index a0e039c2a3..a859e34c89 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg, } void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) { - CHECK_GE(nInputs, 1); + CHECK_GE(nInputs, 1UL); TestConfig dnnConfig; getAddtoConfig(dnnConfig, pm, nInputs); dnnConfig.layerConfig.set_type("mkldnn_addto"); diff --git a/paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp b/paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp new file mode 100644 index 0000000000..16438886df --- /dev/null +++ b/paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "LayerGradUtil.h" +#include "paddle/math/MathUtils.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; + +void setPoolConfig(TestConfig* config, + PoolConfig* pool, + const string& poolType) { + (*config).biasSize = 0; + (*config).layerConfig.set_type("pool"); + (*config).layerConfig.set_num_filters(1); + + int kw = 3, kh = 3; + int pw = 0, ph = 0; + int sw = 2, sh = 2; + pool->set_pool_type(poolType); + pool->set_channels(1); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_start(0); + pool->set_padding(pw); + pool->set_padding_y(ph); + pool->set_stride(sw); + pool->set_stride_y(sh); + + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); + pool->set_output_x(ow); + pool->set_output_y(oh); +} + +void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat, + const string& poolType, + bool use_gpu, + MatrixPtr& maskMat) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + pool->set_img_size(5); + pool->set_img_size_y(5); + setPoolConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + config.layerConfig.set_name("MaxPoolWithMask"); + + std::vector dataLayers; + LayerMap layerMap; + vector datas; + + initDataLayer(config, + &dataLayers, + &datas, + &layerMap, + "MaxPoolWithMask", + 1, + false, + use_gpu); + + dataLayers[0]->getOutputValue()->copyFrom(*inputMat); + + FLAGS_use_gpu = use_gpu; + std::vector parameters; + LayerPtr maxPoolingWithMaskOutputLayer; + initTestLayer(config, &layerMap, ¶meters, &maxPoolingWithMaskOutputLayer); + maxPoolingWithMaskOutputLayer->forward(PASS_GC); + + checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value, + maskMat); +} + +TEST(Layer, maxPoolingWithMaskOutputLayerFwd) { + bool useGpu = false; + MatrixPtr inputMat; + MatrixPtr maskMat; + real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1, + 0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8, + 0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0}; + real maskData[] = {12, 4, 22, 24}; + + inputMat = Matrix::create(1, 25, false, useGpu); + maskMat = Matrix::create(1, 4, false, useGpu); + inputMat->setData(inputData); + maskMat->setData(maskData); + doOneMaxPoolingWithMaskOutputTest( + inputMat, "max-pool-with-mask", useGpu, maskMat); +#ifdef PADDLE_WITH_CUDA + useGpu = true; + inputMat = Matrix::create(1, 25, false, useGpu); + maskMat = Matrix::create(1, 4, false, useGpu); + inputMat->copyFrom(inputData, 25); + maskMat->copyFrom(maskData, 4); + doOneMaxPoolingWithMaskOutputTest( + inputMat, "max-pool-with-mask", useGpu, maskMat); +#endif +} diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c3e34d5309..41ee508967 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1028,15 +1028,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + MatrixPtr maskMatP) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); + real* maskData = NULL; size_t frameNum = inputMat.getHeight(); CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); CHECK(height_ == inputMat.getHeight()); CHECK(width_ == outputH * outputW * channels); + if (maskMatP != NULL) { + CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal"; + CHECK(outputH * outputW * channels == maskMatP->getWidth()); + maskData = maskMatP->getData(); + } + hl_maxpool_forward(frameNum, inputData, channels, @@ -1051,7 +1059,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, paddingH, paddingW, data_, - getStride()); + getStride(), + maskData); } void GpuMatrix::maxPoolBackward(Matrix& inputMat, @@ -1973,9 +1982,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + MatrixPtr maskMatP) { real* inputData = inputMat.getData(); real* outData = data_; + real* maskData = NULL; size_t num = inputMat.getHeight(); size_t inLength = imgSizeH * imgSizeW; size_t outLength = outputH * outputW; @@ -1984,6 +1995,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, CHECK_EQ(channels * outLength, this->getWidth()); size_t outStride = getStride(); + if (maskMatP != NULL) { + maskData = maskMatP->getData(); + CHECK_EQ(channels * outLength, maskMatP->getWidth()); + } + /* initialize the data_ */ for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { @@ -2005,10 +2021,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, int wstart = pw * strideW - paddingW; int wend = std::min(wstart + sizeX, imgSizeW); wstart = std::max(wstart, 0); - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - outData[ph * outputW + pw] = std::max( - outData[ph * outputW + pw], inputData[h * imgSizeW + w]); + if (maskData == NULL) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[ph * outputW + pw] = std::max( + outData[ph * outputW + pw], inputData[h * imgSizeW + w]); + } + } + } else { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) { + outData[ph * outputW + pw] = inputData[h * imgSizeW + w]; + maskData[ph * outputW + pw] = h * imgSizeW + w; + } + } } } } @@ -2016,6 +2043,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, // compute offset inputData += inLength; outData += outLength; + + if (maskData != NULL) maskData += outLength; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 44180bca8b..d252d64225 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -861,7 +861,8 @@ public: /** * Pooling forward operation, pick out the largest element - * in the sizeX of value + * in the sizeX of value, if the maskMatP is not NULL, it will + * also caculate the location indices. */ virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH, @@ -874,7 +875,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + MatrixPtr maskMatP = NULL) { LOG(FATAL) << "Not implemeted"; } @@ -1426,7 +1428,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW); + size_t paddingW, + MatrixPtr maskMatP); void maxPoolBackward(Matrix& image, size_t imgSizeH, @@ -1697,7 +1700,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW); + size_t paddingW, + MatrixPtr maskMatP); void maxPoolBackward(Matrix& image, size_t imgSizeH, diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 29ce44c233..709f7de2e4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) +cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc rnn/recurrent_op_utils.cc diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc new file mode 100644 index 0000000000..609e915b93 --- /dev/null +++ b/paddle/operators/assign_op.cc @@ -0,0 +1,138 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/data_type.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/var_type.h" + +namespace paddle { +namespace operators { +class AssignFunctor { + public: + AssignFunctor(framework::Variable *out, + const platform::DeviceContext &dev_ctx) + : out_(out), dev_ctx_(dev_ctx) {} + + void operator()(const framework::LoDTensor &lod_tensor) const { + auto &out_tensor = *out_->GetMutable(); + copy_tensor(lod_tensor, &out_tensor); + } + + void operator()(const framework::LoDTensorArray &array) const { + auto &out_array = *out_->GetMutable(); + out_array.resize(array.size()); + for (size_t i = 0; i < array.size(); ++i) { + copy_tensor(array[i], &out_array[i]); + } + } + + void operator()(const framework::SelectedRows &rows) const { + framework::SelectedRows &out_rows = + *out_->GetMutable(); + out_rows.set_rows(rows.rows()); + out_rows.set_height(rows.height()); + auto &t = rows.value(); + out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_); + } + + template + void operator()(const T &v) const { + PADDLE_THROW("Not support type for assign op %s", typeid(T).name()); + } + + private: + void copy_tensor(const framework::LoDTensor &lod_tensor, + framework::LoDTensor *out) const { + auto &out_tensor = *out; + out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_); + out_tensor.set_lod(lod_tensor.lod()); + } + + framework::Variable *out_; + const platform::DeviceContext &dev_ctx_; +}; + +class AssignOp : public framework::OperatorBase { + public: + AssignOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto *x = scope.FindVar(Input("X")); + if (x == nullptr) { + return; + } + auto *out = scope.FindVar(Output("Out")); + PADDLE_ENFORCE( + out != nullptr, + "The Output(Out) should not be null if the Input(X) is set."); + framework::VisitVarType(*x, AssignFunctor(out, dev_ctx)); + } +}; + +class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + AssignOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor, SelectedRows or LoDTensorArray) The input variable " + "could be LoDTensor, SelectedRows or LoDTensorArray.") + .AsDispensable(); + AddOutput("Out", + "(LoDTensor, SelectedRows or LoDTensorArray) The type of output " + "is the same as input X."); + AddComment(R"DOC(Assign Operator + +Out = X, when type in [LoDTensor/SelectedRows/LoDTensorArray] +raise error if the type is not listed above. +)DOC"); + } +}; + +class AssignInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + if (context->HasInput("X")) { + auto type = context->GetInputsVarType("X")[0]; + if (type == framework::VarDesc_VarType_SELECTED_ROWS || + type == framework::VarDesc_VarType_LOD_TENSOR) { + context->SetOutputDim("Out", context->GetInputDim("X")); + } + } + } +}; + +class AssignGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *op = new framework::OpDescBind(); + op->SetType("assign"); + op->SetInput("X", OutputGrad("Out")); + op->SetOutput("Out", InputGrad("X")); + return std::unique_ptr(op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(assign, ops::AssignOp, ops::AssignGradMaker, + ops::AssignInferShape, ops::AssignOpProtoMaker); diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc new file mode 100644 index 0000000000..3904a97d58 --- /dev/null +++ b/paddle/operators/beam_search_decode_op.cc @@ -0,0 +1,111 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/beam_search_decode_op.h" + +namespace paddle { +namespace operators { + +class BeamSearchDecodeOp : public framework::OperatorBase { + public: + BeamSearchDecodeOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { + framework::ExecutionContext ctx(*this, scope, dev_ctx); + + const LoDTensorArray* ids = ctx.Input("Ids"); + const LoDTensorArray* scores = ctx.Input("Scores"); + const size_t step_num = ids->size(); + PADDLE_ENFORCE_GT(step_num, 0UL, + "beam search steps should be larger than 0"); + const size_t source_num = ids->at(0).lod().at(0).size() - 1; + PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0"); + + for (size_t i = 0; i < step_num; ++i) { + PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL, + "Level of LodTensor should be 2"); + } + + // prepare output + LoDTensor* sentenceIds = ctx.Output("SentenceIds"); + LoDTensor* sentenceScores = ctx.Output("SentenceScores"); + + BeamSearchDecoder beam_search_decoder; + beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds, + sentenceScores); + } +}; + +class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + BeamSearchDecodeOpProtoMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Ids", + "(LodTensorArray)" + "score of the candidate words in each step"); + AddInput("Scores", + "(LodTensorArray)" + "score of the candidate words in each step"); + AddOutput("SentenceIds", + "(LodTensor)" + "All possible result sentences of word ids"); + AddOutput("SentenceScores", + "(LodTensor)" + "All possible result sentences of word scores"); + AddComment(R"DOC( +Pack the result of Beam search op into SentenceIds and SentenceScores. +)DOC"); + } +}; + +class BeamSearchDecodeInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* context) const override { + PADDLE_ENFORCE(context->HasInput("Ids"), + "BeamSearchDecodeOp must has input Ids"); + PADDLE_ENFORCE(context->HasInput("Scores"), + "BeamSearchDecodeOp must has input Scores"); + PADDLE_ENFORCE(context->HasOutput("SentenceIds"), + "BeamSearchDecodeOp must has output SentenceIds"); + PADDLE_ENFORCE(context->HasOutput("SentenceScores"), + "BeamSearchDecodeOp must has output SentenceScores"); + } +}; + +class BeamSearchDecodeInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind& op_desc, + framework::BlockDescBind* block) const override { + for (auto& o : op_desc.Output("SentenceIds")) { + block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); + } + for (auto& o : op_desc.Output("SentenceScores")) { + block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); + } + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp, + paddle::operators::BeamSearchDecodeOpProtoMaker, + paddle::operators::BeamSearchDecodeInferShape, + paddle::operators::BeamSearchDecodeInferVarType, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/operators/beam_search_decode_op.h b/paddle/operators/beam_search_decode_op.h new file mode 100644 index 0000000000..0f007ec22f --- /dev/null +++ b/paddle/operators/beam_search_decode_op.h @@ -0,0 +1,280 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; +using LoDTensorArray = framework::LoDTensorArray; + +// all the lod have 2 levels. +// The First is source level, the second is sentence level. +// source level describe how many candidate words for this source. +// sentence level describe these candidates belong to which prefix +const size_t kSourceLevel = 0; +const size_t kSentenceLevel = 1; + +template +struct BeamNode { + BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {} + + ~BeamNode() { + if (parent_) { + parent_->DropKid(this); + if (parent_->kids_.size() == 0UL) { + delete parent_; + } + } + VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_; + } + + void AppendTo(BeamNode* parent) { + parent_ = parent; + parent->kids_.insert(this); + } + + void DropKid(BeamNode* kid) { kids_.erase(kid); } + + BeamNode* parent_ = nullptr; + std::unordered_set kids_; + int64_t word_id_; + T score_; +}; + +template +using BeamNodeVector = std::vector>>; + +template +struct Sentence { + std::vector word_ids; + std::vector scores; +}; + +template +using SentenceVector = std::vector>; + +template +struct BeamSearchDecoder { + /** + * make a BeamNode and all it's related prefix BeanNode into a Sentence. + */ + Sentence MakeSentence(const BeamNode* node) const; + + /** + * Param: + * cur_ids: LoDTensor of One step for word ID + * cur_scores: LoDTensor of One Step for word score + * prefixes_list: prefixes for each source sentence. + * sentence_vector_list: result sentence_vector for each source sentence. + * Return: + * a new prefixes list for each source of current step + */ + std::vector> PackTwoSteps( + const LoDTensor& cur_ids, const LoDTensor& cur_scores, + std::vector>& prefixes_list, + std::vector>* sentence_vector_list) const; + + /** + * convert the result sentence_vector for each source sentence into two + * LodTensor. + * One is all candidate sentences with word id, one is all candidate sentences + * with word score. + * Param: + * sentence_vector_list: sentence_vector for each source sentence. + * id_tensor: result LoDTensor for sentences of id. + * score_tensor: result LoDTensor for sentences of score. + */ + void ConvertSentenceVectorToLodTensor( + std::vector> sentence_vector_list, LoDTensor* id_tensor, + LoDTensor* score_tensor) const; + + /** + * Pack all steps of id/score LodTensor into sentence LoDTensor + * it's main logic is: + * ```python + * prefix + * result_sentence + * result_lod_tensor + * + * for (step in steps): + * prefix = PackTwoSteps(prefix, step, &result_sentence) + * ConvertSentenceVectorToLodTensor(result_sentence, &result_lod_tensor) + * ``` + */ + void PackAllSteps(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, LoDTensor* id_tensor, + LoDTensor* score_tensor) const; +}; + +template +Sentence BeamSearchDecoder::MakeSentence(const BeamNode* node) const { + Sentence sentence; + while (node != nullptr) { + sentence.word_ids.emplace_back(node->word_id_); + sentence.scores.emplace_back(node->score_); + node = node->parent_; + } + + std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids)); + std::reverse(std::begin(sentence.scores), std::end(sentence.scores)); + + return sentence; +} + +template +std::vector> BeamSearchDecoder::PackTwoSteps( + const LoDTensor& cur_ids, const LoDTensor& cur_scores, + std::vector>& prefixes_list, + std::vector>* sentence_vector_list) const { + std::vector> result; + + for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1; + ++src_idx) { + size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx]; + size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1]; + + BeamNodeVector beam_nodes; + + // if prefixes size is 0, it means this is the first step. In this step, + // all candidate id is the start of candidate sentences. + if (prefixes_list.empty()) { + PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(), + cur_ids.lod().at(kSentenceLevel).back(), + "in the first step"); + for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) { + beam_nodes.push_back(std::unique_ptr>(new BeamNode( + cur_ids.data()[id_idx], cur_scores.data()[id_idx]))); + } + } else { + BeamNodeVector& prefixes = prefixes_list[src_idx]; + SentenceVector& sentence_vector = (*sentence_vector_list)[src_idx]; + + PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(), + "prefix and candidate set number should be the same"); + + auto candidate_offset = cur_ids.lod()[kSentenceLevel]; + for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) { + std::unique_ptr>& prefix = prefixes[prefix_idx]; + size_t candidate_start = candidate_offset[src_start + prefix_idx]; + size_t candidate_end = candidate_offset[src_start + prefix_idx + 1]; + if (candidate_start == candidate_end) { + VLOG(3) << "this sentence has no more candidate, " + "add to result sentence and rm it from beam tree"; + sentence_vector.push_back(MakeSentence(prefix.get())); + prefix.reset(); + } else { + for (size_t candidate_idx = candidate_start; + candidate_idx < candidate_end; ++candidate_idx) { + auto* candidate = + new BeamNode(cur_ids.data()[candidate_idx], + cur_scores.data()[candidate_idx]); + candidate->AppendTo(prefix.get()); + beam_nodes.push_back(std::unique_ptr>(candidate)); + } + prefix.release(); + } + } + } + result.push_back(std::move(beam_nodes)); + } + return result; +} + +template +void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( + std::vector> sentence_vector_list, LoDTensor* id_tensor, + LoDTensor* score_tensor) const { + size_t src_num = sentence_vector_list.size(); + + PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0"); + + std::vector source_level_lod = {0}; + std::vector sentence_level_lod = {0}; + std::vector id_data; + std::vector score_data; + + for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + for (Sentence& sentence : sentence_vector_list[src_idx]) { + id_data.insert(id_data.end(), sentence.word_ids.begin(), + sentence.word_ids.end()); + score_data.insert(score_data.end(), sentence.scores.begin(), + sentence.scores.end()); + sentence_level_lod.push_back(sentence_level_lod.back() + + sentence.word_ids.size()); + } + source_level_lod.push_back(source_level_lod.back() + + sentence_vector_list[src_idx].size()); + } + + auto cpu_place = new paddle::platform::CPUPlace(); + paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place); + + framework::LoD lod; + lod.push_back(source_level_lod); + lod.push_back(sentence_level_lod); + + id_tensor->set_lod(lod); + id_tensor->Resize({static_cast(id_data.size())}); + id_tensor->mutable_data(paddle::platform::CPUPlace()); + id_tensor->CopyFromVector(id_data, cpu_ctx); + + score_tensor->set_lod(lod); + score_tensor->Resize({static_cast(score_data.size())}); + score_tensor->mutable_data(paddle::platform::CPUPlace()); + score_tensor->CopyFromVector(score_data, cpu_ctx); +} + +template +void BeamSearchDecoder::PackAllSteps(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, + LoDTensor* score_tensor) const { + PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0"); + PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(), + "step_ids and step_scores should be the same"); + const size_t step_num = step_ids.size(); + const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1; + + PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0"); + + // previous prefixes for each step, + // the init length is 0, means this is the first step. + std::vector> beamnode_vector_list(0); + std::vector> sentence_vector_list(src_num); + + // pack all steps for one batch first, then another batch + for (size_t step_id = 0; step_id < step_num; ++step_id) { + beamnode_vector_list = + PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id), + beamnode_vector_list, &sentence_vector_list); + } + // append last beam_node to result + for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + for (auto& beam_node : beamnode_vector_list.at(src_idx)) { + sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get())); + beam_node.reset(); + } + } + + ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor, + score_tensor); +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/beam_search_decode_op_test.cc b/paddle/operators/beam_search_decode_op_test.cc new file mode 100644 index 0000000000..5ac23991f3 --- /dev/null +++ b/paddle/operators/beam_search_decode_op_test.cc @@ -0,0 +1,221 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/beam_search_decode_op.h" +#include "gtest/gtest.h" + +using CPUPlace = paddle::platform::CPUPlace; +using LoD = paddle::framework::LoD; +using LoDTensor = paddle::framework::LoDTensor; +using LoDTensorArray = paddle::framework::LoDTensorArray; + +template +using BeamNode = paddle::operators::BeamNode; +template +using BeamSearchDecoder = paddle::operators::BeamSearchDecoder; +template +using Sentence = paddle::operators::Sentence; +template +using BeamNodeVector = paddle::operators::BeamNodeVector; +template +using SentenceVector = paddle::operators::SentenceVector; + +namespace paddle { +namespace test { + +void GenerateExample(const std::vector& level_0, + const std::vector& level_1, + const std::vector& data, LoDTensorArray* ids, + LoDTensorArray* scores) { + PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1, + "source level is used to describe candidate set"); + PADDLE_ENFORCE_EQ(level_1.back(), data.size(), + "the lowest level is used to describe data" + ", so it's last element should be data length"); + + CPUPlace place; + + LoD lod; + lod.push_back(level_0); + lod.push_back(level_1); + + // Ids + LoDTensor tensor_id; + tensor_id.set_lod(lod); + tensor_id.Resize({static_cast(data.size())}); + // malloc memory + int64_t* id_ptr = tensor_id.mutable_data(place); + for (size_t i = 0; i < data.size(); ++i) { + id_ptr[i] = static_cast(data.at(i)); + } + + // Scores + LoDTensor tensor_score; + tensor_score.set_lod(lod); + tensor_score.Resize({static_cast(data.size())}); + // malloc memory + float* score_ptr = tensor_score.mutable_data(place); + for (size_t i = 0; i < data.size(); ++i) { + score_ptr[i] = static_cast(data.at(i)); + } + + ids->push_back(tensor_id); + scores->push_back(tensor_score); +} + +} // namespace test +} // namespace paddle + +TEST(BeamSearchDecodeOp, DeleteBeamNode) { + auto* root = new BeamNode(0, 0); + auto* b1 = new BeamNode(1, 1); + auto* b2 = new BeamNode(2, 2); + auto* b3 = new BeamNode(3, 3); + + b1->AppendTo(root); + b2->AppendTo(root); + b3->AppendTo(b1); + + delete b3; + delete b2; +} + +TEST(BeamSearchDecodeOp, MakeSentence) { + auto* root = new BeamNode(0, 0); + auto* b1 = new BeamNode(1, 1); + auto* end = new BeamNode(2, 2); + b1->AppendTo(root); + end->AppendTo(b1); + + BeamSearchDecoder helper; + Sentence sentence = helper.MakeSentence(end); + delete end; + + std::vector expect_ids = {0, 1, 2}; + ASSERT_EQ(sentence.word_ids, expect_ids); + + std::vector expect_scores = {0, 1, 2}; + ASSERT_EQ(sentence.scores, expect_scores); +} + +TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) { + CPUPlace place; + + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample( + std::vector{0, 2, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, + std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); + + std::vector> beamnode_vector_list; + std::vector> sentence_vector_list( + 2, SentenceVector()); + + BeamSearchDecoder helper; + beamnode_vector_list = helper.PackTwoSteps( + ids[0], scores[0], beamnode_vector_list, &sentence_vector_list); + ASSERT_EQ(beamnode_vector_list.size(), 2UL); + ASSERT_EQ(beamnode_vector_list[0].size(), 2UL); + ASSERT_EQ(beamnode_vector_list[1].size(), 4UL); +} + +TEST(BeamSearchDecodeOp, PackTwoSteps) { + CPUPlace place; + + // first source has three prefix + BeamNodeVector source0_prefixes; + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(1, 1))); + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(0, 0))); + source0_prefixes.push_back( + std::unique_ptr>(new BeamNode(3, 3))); + + // second source has two prefix + BeamNodeVector source1_prefixes; + source1_prefixes.push_back( + std::unique_ptr>(new BeamNode(4, 4))); + source1_prefixes.push_back( + std::unique_ptr>(new BeamNode(5, 5))); + + std::vector> beamnode_vector_list; + std::vector> sentence_vector_list( + 2, SentenceVector()); + + beamnode_vector_list.push_back(std::move(source0_prefixes)); + beamnode_vector_list.push_back(std::move(source1_prefixes)); + + // generate data for one step + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample(std::vector{0, 3, 5}, + std::vector{0, 1, 1, 3, 4, 5}, + std::vector{0, 1, 2, 3, 4}, &ids, &scores); + + BeamSearchDecoder helper1; + beamnode_vector_list = helper1.PackTwoSteps( + ids[0], scores[0], beamnode_vector_list, &sentence_vector_list); + + ASSERT_EQ(sentence_vector_list[0].size(), 1UL); + ASSERT_EQ(sentence_vector_list[1].size(), 0UL); + ASSERT_EQ(beamnode_vector_list[0].size(), 3UL); + ASSERT_EQ(beamnode_vector_list[1].size(), 2UL); +} + +TEST(BeamSearchDecodeOp, PackAllSteps) { + CPUPlace place; + + // we will constuct a sample data with 3 steps and 2 source sentences + LoDTensorArray ids; + LoDTensorArray scores; + + paddle::test::GenerateExample( + std::vector{0, 3, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, + std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); + paddle::test::GenerateExample( + std::vector{0, 3, 6}, std::vector{0, 1, 1, 3, 5, 5, 6}, + std::vector{0, 1, 2, 3, 4, 5}, &ids, &scores); + paddle::test::GenerateExample(std::vector{0, 3, 6}, + std::vector{0, 0, 1, 2, 3, 4, 5}, + std::vector{0, 1, 2, 3, 4}, &ids, &scores); + + ASSERT_EQ(ids.size(), 3UL); + ASSERT_EQ(scores.size(), 3UL); + + BeamSearchDecoder helper; + + LoDTensor id_tensor; + LoDTensor score_tensor; + helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor); + + LoD lod = id_tensor.lod(); + std::vector expect_source_lod = {0, 4, 8}; + EXPECT_EQ(lod[0], expect_source_lod); + std::vector expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19}; + EXPECT_EQ(lod[1], expect_sentence_lod); + // 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4 + std::vector expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5, + 4, 3, 2, 4, 4, 3, 6, 5, 4}; + ASSERT_EQ(id_tensor.dims()[0], static_cast(expect_data.size())); + for (size_t i = 0; i < expect_data.size(); ++i) { + ASSERT_EQ(id_tensor.data()[i], + static_cast(expect_data[i])); + } + for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) { + ASSERT_EQ(score_tensor.data()[i], + static_cast(id_tensor.data()[i])); + } +} diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc new file mode 100644 index 0000000000..c65ba7eb26 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -0,0 +1,159 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/bilinear_tensor_product_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class BilinearTensorProductOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(Weight) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null."); + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + auto weight_dims = ctx->GetInputDim("Weight"); + + PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input(X) must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input(Y) must be a 2D Tensor."); + PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL, + "The input(Weight) must be a 3D tensor."); + PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0], + "The first dimension(batch_size) of input(X) must be " + "equal to the first dimension of the input(Y)."); + PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1], + "The second dimension of input(X) must be equal to " + "the second dimension of the input(Weight)."); + PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2], + "The second dimension of input(Y) must be equal to " + "the third dimension of the input(Weight)."); + + if (ctx->HasInput("Bias")) { + auto bias_dims = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE(bias_dims.size() == 2UL && bias_dims[0] == 1UL, + "The Input(Bias) must be a 2-D tensor with " + "the 2nd dimension fixed to 1 (a row vector)."); + PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0], + "The second dimension of input(Bias) must be equal " + "to the first dimension of the input(Weight)."); + } + + ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]}); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; + +class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { + public: + BilinearTensorProductOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The first input of bilinear_tensor_product operator."); + AddInput("Y", "The second input of bilinear_tensor_product operator."); + AddInput("Weight", + "The learnable parameters of bilinear_tensor_product operator."); + AddInput("Bias", "The learnable bias of bilinear_tensor_product operator.") + .AsDispensable(); + AddOutput("Out", "The output of bilinear_tensor_product operator."); + AddComment(R"DOC( +Bilinear Tensor Product operator. +Given input X and Y, a 3D tensor weight, and bias. Each column of the +output is computed by one slice i = 1, . . . , k of the tensor: + + M = (X W_i) \cdot Y + Out_i = \sum_i {M_i} + Bias_i + +)DOC"); + } +}; + +class BilinearTensorProductOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(Weight) should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null."); + auto x_dims = ctx->GetInputDim("X"); + auto y_dims = ctx->GetInputDim("Y"); + auto weight_dims = ctx->GetInputDim("Weight"); + auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); + + PADDLE_ENFORCE_EQ(out_dims.size(), 2UL, + "The input(Out@GRAD) must be a 2D Tensor."); + PADDLE_ENFORCE_EQ( + x_dims[0], out_dims[0], + "The first dimension(batch_size) of input(Out@GRAD) must be " + "equal to the first dimension of the Input(X)."); + PADDLE_ENFORCE_EQ( + weight_dims[0], out_dims[1], + "The second dimension of input(Out@GRAD) must be equal to " + "the third dimension of the Input(Weight)."); + + if (ctx->HasInput("Bias")) { + auto bias_dims = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE_EQ( + bias_dims[1], out_dims[1], + "The second dimension of input(Out@GRAD) must be equal to " + "the second dimension of the Input(Bias)."); + auto bias_grad_name = framework::GradVarName("Bias"); + if (ctx->HasOutput(bias_grad_name)) + ctx->SetOutputDim(bias_grad_name, bias_dims); + } + + auto x_grad_name = framework::GradVarName("X"); + auto y_grad_name = framework::GradVarName("Y"); + auto weight_grad_name = framework::GradVarName("Weight"); + + if (ctx->HasOutput(x_grad_name)) { + ctx->SetOutputDim(x_grad_name, x_dims); + } + if (ctx->HasOutput(y_grad_name)) { + ctx->SetOutputDim(y_grad_name, y_dims); + } + if (ctx->HasOutput(weight_grad_name)) { + ctx->SetOutputDim(weight_grad_name, weight_dims); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp, + ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad, + ops::BilinearTensorProductOpGrad); +REGISTER_OP_CPU_KERNEL( + bilinear_tensor_product, + ops::BilinearTensorProductKernel, + ops::BilinearTensorProductKernel); +REGISTER_OP_CPU_KERNEL( + bilinear_tensor_product_grad, + ops::BilinearTensorProductGradKernel, + ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.cu b/paddle/operators/bilinear_tensor_product_op.cu new file mode 100644 index 0000000000..858d2668d0 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/bilinear_tensor_product_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + bilinear_tensor_product, + ops::BilinearTensorProductKernel, + ops::BilinearTensorProductKernel); +REGISTER_OP_GPU_KERNEL( + bilinear_tensor_product_grad, + ops::BilinearTensorProductGradKernel, + ops::BilinearTensorProductGradKernel); diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h new file mode 100644 index 0000000000..ffa4f43a32 --- /dev/null +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -0,0 +1,184 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +template +using EigenMatrix = framework::EigenMatrix; + +template +class BilinearTensorProductKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* weight = ctx.Input("Weight"); + auto* bias = ctx.Input("Bias"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto y_mat = EigenMatrix::From(*y); + auto output_mat = EigenMatrix::From(*out); + + auto batch_size = x->dims()[0]; + auto weight_dims = weight->dims(); + int out_dim = weight_dims[0]; + auto x_dim = weight_dims[1]; + auto y_dim = weight_dims[2]; + auto place = ctx.GetEigenDevice(); + + // Create the intermediate variable to caculate the result of + // Input(X) multiplied by Input(Weight_i), the formula is: + // left_mul = X Weight_i. + Tensor left_mul; + left_mul.mutable_data(framework::make_ddim({batch_size, y_dim}), + ctx.GetPlace()); + auto left_mul_mat = EigenMatrix::From(left_mul); + + for (int i = 0; i < out_dim; ++i) { + auto output_col_vec = output_mat.chip(i, 1); + Tensor weight_mat = + weight->Slice(i, i + 1).Resize(framework::make_ddim({x_dim, y_dim})); + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + batch_size, y_dim, x_dim, 1, x->data(), + weight_mat.data(), 0, left_mul.data()); + output_col_vec.device(place) = + (left_mul_mat * y_mat).sum(Eigen::DSizes(1)); + } + if (bias) { + auto bias_vec = EigenMatrix::From(*bias); + Eigen::DSizes bcast(batch_size, 1); + output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat; + } + } +}; + +template +class BilinearTensorProductGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const Tensor* x = ctx.Input("X"); + const Tensor* y = ctx.Input("Y"); + const Tensor* weight = ctx.Input("Weight"); + Tensor* d_x = ctx.Output(framework::GradVarName("X")); + Tensor* d_y = ctx.Output(framework::GradVarName("Y")); + Tensor* d_weight = ctx.Output(framework::GradVarName("Weight")); + Tensor* d_bias = ctx.Output(framework::GradVarName("Bias")); + const Tensor* d_out = ctx.Input(framework::GradVarName("Out")); + + auto batch_size = x->dims()[0]; + auto weight_dims = weight->dims(); + int out_dim = weight_dims[0]; + auto x_dim = weight_dims[1]; + auto y_dim = weight_dims[2]; + + auto x_mat = EigenMatrix::From(*x); + auto y_mat = EigenMatrix::From(*y); + auto d_out_mat = EigenMatrix::From(*d_out); + auto place = ctx.GetEigenDevice(); + + // Create the intermediate variable to caculate the Output(Y@Grad). + Tensor x_scale; + x_scale.mutable_data(framework::make_ddim({batch_size, x_dim}), + ctx.GetPlace()); + auto x_scale_mat = EigenMatrix::From(x_scale); + + // Create the intermediate variable to caculate the Output(X@Grad). + Tensor y_scale; + y_scale.mutable_data(framework::make_ddim({batch_size, y_dim}), + ctx.GetPlace()); + auto y_scale_mat = EigenMatrix::From(y_scale); + + math::SetConstant set_zero; + + // Set Output(X@Grad) be zero. + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + set_zero(ctx.device_context(), d_x, static_cast(0)); + } + + // Set Output(Y@Grad) be zero. + if (d_y) { + d_y->mutable_data(ctx.GetPlace()); + set_zero(ctx.device_context(), d_y, static_cast(0)); + } + + // Caculate the Output(X@Grad) and Output(Y@Grad). + if (d_x || d_y) { + Eigen::DSizes bcast_for_x(1, y_dim); + Eigen::DSizes bcast_for_y(1, x_dim); + for (int i = 0; i < out_dim; ++i) { + Tensor weight_i = weight->Slice(i, i + 1).Resize( + framework::make_ddim({x_dim, y_dim})); + auto output_vec = d_out_mat.chip(i, 1); + if (d_x) { + y_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_x) * + y_mat; + math::gemm(ctx.device_context(), CblasNoTrans, CblasTrans, + batch_size, x_dim, y_dim, 1, y_scale.data(), + weight_i.data(), 1, d_x->data()); + } + if (d_y) { + x_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_y) * + x_mat; + math::gemm(ctx.device_context(), CblasNoTrans, CblasNoTrans, + batch_size, y_dim, x_dim, 1, x_scale.data(), + weight_i.data(), 1, d_y->data()); + } + } + } + + // Caculate the gradient of Input(Weight). + if (d_weight) { + d_weight->mutable_data(ctx.GetPlace()); + Eigen::DSizes bcast_for_weight(1, x_dim); + for (int i = 0; i < out_dim; ++i) { + Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( + framework::make_ddim({x_dim, y_dim})); + auto output_vec = d_out_mat.chip(i, 1); + x_scale_mat.device(place) = + output_vec.reshape(Eigen::DSizes(batch_size, 1)) + .broadcast(bcast_for_weight) * + x_mat; + math::gemm(ctx.device_context(), CblasTrans, CblasNoTrans, + x_dim, y_dim, batch_size, 1, x_scale.data(), + y->data(), 0, d_weight_i.data()); + } + } + + // Caculate the gradient of Input(Bias). + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + auto d_bias_mat = EigenMatrix::From(*d_bias); + d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes(0)); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/conditional_block_op.cc b/paddle/operators/conditional_block_op.cc new file mode 100644 index 0000000000..d5b124682d --- /dev/null +++ b/paddle/operators/conditional_block_op.cc @@ -0,0 +1,197 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include "paddle/framework/executor.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class ConditionalOp : public framework::OperatorBase { + public: + ConditionalOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + protected: + std::vector InputTensors( + const framework::Scope &scope) const { + std::vector retv; + auto xs = Inputs("X"); + retv.resize(xs.size(), nullptr); + std::transform( + xs.begin(), xs.end(), retv.begin(), + [&scope](const std::string &var_name) -> const framework::LoDTensor * { + auto *var = scope.FindVar(var_name); + PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", var_name); + return &var->Get(); + }); + return retv; + } +}; + +class ConditionalBlockOp : public ConditionalOp { + public: + ConditionalBlockOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : ConditionalOp(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto xs = InputTensors(scope); + bool need_run = std::all_of( + xs.begin(), xs.end(), + [](const framework::LoDTensor *t) { return t->numel() != 0; }); + + if (need_run) { + auto *scope_var = scope.FindVar(Output("Scope")); + PADDLE_ENFORCE(scope_var != nullptr, "Must set scope"); + auto *scopes = scope_var->GetMutable>(); + scopes->resize(1); + scopes->front() = &scope.NewScope(); + auto &cur_scope = *scopes->front(); + + auto *block = Attr("block"); + framework::Executor exec(dev_ctx); + exec.Run(*block->Program(), &cur_scope, block->ID(), false); + } + } +}; + +class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + ConditionalBlockOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "The conditional variable of this operator. If X is empty, the " + "whole sub-block will not be executed.") + .AsDuplicable(); + AddInput("Params", "The input variables of the sub-block.").AsDuplicable(); + AddOutput("Out", "The output variables of the sub-block.").AsDuplicable(); + AddOutput("Scope", + "(std::vector) The step scope of conditional block. To " + "unify the conditional block, rnn and while op, the type of " + "scope is std::vector"); + AddAttr( + "block", "The step block of conditional block operator"); + AddComment(R"DOC(Conditional block operator + +Run the sub-block if X is not empty. Params is the other inputs and Out is the +outputs of the sub-block. +)DOC"); + } +}; + +class ConditionalBlockGradOp : public ConditionalOp { + public: + ConditionalBlockGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : ConditionalOp(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto xs = this->InputTensors(scope); + bool need_run = std::all_of( + xs.begin(), xs.end(), + [](const framework::LoDTensor *t) { return t->numel() != 0; }); + + if (need_run) { + auto *scope_var = scope.FindVar(Input("Scope")); + PADDLE_ENFORCE(scope_var != nullptr, "Must set scope"); + auto &scopes = scope_var->Get>(); + framework::Scope &cur_scope = *scopes[0]; + + auto *block = Attr("block"); + framework::Executor exec(dev_ctx); + exec.Run(*block->Program(), &cur_scope, block->ID(), false); + + AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("Params"), + Outputs(framework::GradVarName("Params"))); + + AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("X"), + Outputs(framework::GradVarName("X"))); + } + } + + private: + void AssignLocalGradientToGlobal( + const platform::DeviceContext &dev_ctx, const framework::Scope &cur_scope, + const std::vector &p_names, + const std::vector &pg_names) const { + for (size_t i = 0; i < p_names.size(); ++i) { + auto out_grad_name = pg_names[i]; + auto in_grad_name = framework::GradVarName(p_names[i]); + auto *in_var = cur_scope.FindVar(in_grad_name); + if (in_var == nullptr) { + continue; + } + auto new_in_grad_name = cur_scope.Rename(in_grad_name); + auto assign = + framework::OpRegistry::CreateOp("assign", {{"X", {new_in_grad_name}}}, + {{"Out", {out_grad_name}}}, {}); + assign->Run(cur_scope, dev_ctx); + cur_scope.Rename(new_in_grad_name, in_grad_name); + } + } +}; + +class ConditionalBlockGradInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInputs("X")); + if (context->HasInputs("Params")) { + PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("Params"))); + context->SetOutputsDim(framework::GradVarName("Params"), + context->GetInputsDim("Params")); + } + PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X"))); + context->SetOutputsDim(framework::GradVarName("X"), + context->GetInputsDim("X")); + } +}; + +class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto grad_op = new framework::OpDescBind(); + grad_op->SetType("conditional_block_grad"); + grad_op->SetInput("X", Input("X")); + grad_op->SetInput("Params", Input("Params")); + grad_op->SetInput("Out", Output("Out")); + grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + grad_op->SetInput("Scope", Output("Scope")); + grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params")); + grad_op->SetBlockAttr("block", *this->grad_block_[0]); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(conditional_block, ops::ConditionalBlockOp, + ops::ConditionalBlockOpProtoMaker, + ops::ConditionalBlockGradMaker); +REGISTER_OPERATOR(conditional_block_grad, ops::ConditionalBlockGradOp, + ops::ConditionalBlockGradInferShape); diff --git a/paddle/operators/l1_norm_op.h b/paddle/operators/l1_norm_op.h index de459818ad..3c60dc3dc7 100644 --- a/paddle/operators/l1_norm_op.h +++ b/paddle/operators/l1_norm_op.h @@ -29,7 +29,7 @@ class L1NormKernel : public framework::OpKernel { Out->mutable_data(context.GetPlace()); auto x = framework::EigenVector::Flatten(*X); - auto out = framework::EigenVector::Flatten(*Out); + auto out = framework::EigenScalar::From(*Out); auto place = context.GetEigenDevice(); out.device(place) = x.abs().sum(); diff --git a/paddle/operators/lod_reset_op.cc b/paddle/operators/lod_reset_op.cc new file mode 100644 index 0000000000..32831cb1e2 --- /dev/null +++ b/paddle/operators/lod_reset_op.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/lod_reset_op.h" + +namespace paddle { +namespace operators { + +class LoDResetOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + // input check + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LoDResetOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of LoDResetOp should not be null."); + // If target LoD is not set form Input(), then it must be set from Attr(). + if (!ctx->HasInput("TargetLoD")) { + auto level0 = ctx->Attrs().Get>("target_lod"); + PADDLE_ENFORCE(level0.size() > 1, + "Target LoD is not found, should be set to be a valid one " + "through Input() or Attr()."); + } + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LoDResetOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(LoDTensor) The input tensor of lod_reset operator."); + AddInput("TargetLoD", + "(Tensor, optional) The target level 0 LoD from Input().") + .AsDispensable(); + AddOutput("Out", "(LoDTensor) The output tensor of lod_reset operator."); + AddAttr>("target_lod", + "The target level 0 LoD from Attr().") + .SetDefault(std::vector{}); + AddComment(R"DOC(LoDReset operator + +Reset LoD of Input(X) into a new one specified by Input(TargetLoD) or +Attr(target_lod), or set LoD for Input(X) if it doesn't have one. +Currently the lod_reset operator only supports the reset of level 0 LoD. +At least one of Input(TargetLoD) and Attr(target_lod) must be set, +and if both of them are set, Input(TargetLoD) will be chosen as the +target LoD. + +An example: +Given a float LoDTensor X with shape (6, 1), its transpose form represents + + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + +with LoD = [[0, 2, 5, 6]] and the three (transposed) sequences look like + + [1.0, 2.0], [3.0, 4.0, 5.0], [6.0]. + +If target LoD = [0, 4, 6], the lod_reset operator will reset the LoD and +the sequences that the LoDTensor Output(Out) contains becomes: + + [1.0, 2.0, 3.0, 4.0], [5.0, 6.0]. + +)DOC"); + } +}; + +class LoDResetGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, lod_reset_grad, + ops::LoDResetGradOp); +REGISTER_OP_CPU_KERNEL(lod_reset, + ops::LoDResetKernel, + ops::LoDResetKernel); +REGISTER_OP_CPU_KERNEL( + lod_reset_grad, ops::LoDResetGradKernel, + ops::LoDResetGradKernel); diff --git a/paddle/operators/lod_reset_op.cu b/paddle/operators/lod_reset_op.cu new file mode 100644 index 0000000000..5244a17c3a --- /dev/null +++ b/paddle/operators/lod_reset_op.cu @@ -0,0 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/lod_reset_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(lod_reset, + ops::LoDResetKernel, + ops::LoDResetKernel); +REGISTER_OP_GPU_KERNEL( + lod_reset_grad, ops::LoDResetGradKernel, + ops::LoDResetGradKernel); diff --git a/paddle/operators/lod_reset_op.h b/paddle/operators/lod_reset_op.h new file mode 100644 index 0000000000..2bb916ccee --- /dev/null +++ b/paddle/operators/lod_reset_op.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class LoDResetKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* out = ctx.Output("Out"); + auto* in = ctx.Input("X"); + auto* lod_t = ctx.Input("TargetLoD"); + + std::vector level0; + if (lod_t) { + auto* lod = lod_t->data(); + if (platform::is_gpu_place(ctx.GetPlace())) { + framework::Tensor lod_cpu; + lod_cpu.CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context()); + lod = lod_cpu.data(); + } + level0 = std::vector(lod, lod + lod_t->numel()); + } else { + level0 = ctx.Attr>("target_lod"); + } + + PADDLE_ENFORCE(level0.size() > 1UL, + "The size of target LoD should be greater than 1."); + PADDLE_ENFORCE(level0[0] == 0, + "Target LoD should be a vector starting from 0."); + PADDLE_ENFORCE(level0.back() == in->dims()[0], + "Target LoD should be a vector end with the " + "first dimension of Input(X)."); + for (size_t i = 0; i < level0.size() - 1; ++i) { + PADDLE_ENFORCE(level0[i + 1] > level0[i], + "Target LoD should be an ascending vector."); + } + + out->ShareDataWith(*in); + // cast level0 to size_t + std::vector ulevel0(level0.size(), 0); + std::transform(level0.begin(), level0.end(), ulevel0.begin(), + [](int a) { return static_cast(a); }); + framework::LoD target_lod; + target_lod.push_back(ulevel0); + out->set_lod(target_lod); + } +}; + +template +class LoDResetGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + + d_x->ShareDataWith(*d_out); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/pooling.cc b/paddle/operators/math/pooling.cc index 50cfb88bb5..ead89e146f 100644 --- a/paddle/operators/math/pooling.cc +++ b/paddle/operators/math/pooling.cc @@ -27,15 +27,15 @@ template class Pool2dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_process) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; - const int output_channels = output.dims()[1]; - const int output_height = output.dims()[2]; - const int output_width = output.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; const int ksize_height = ksize[0]; const int ksize_width = ksize[1]; const int stride_height = strides[0]; @@ -47,7 +47,7 @@ class Pool2dFunctor { const int output_stride = output_height * output_width; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -87,11 +87,12 @@ template class Pool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_grad_process) { + PoolProcess pool_grad_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -110,7 +111,7 @@ class Pool2dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -154,10 +155,11 @@ template class MaxPool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -176,7 +178,7 @@ class MaxPool2dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -240,17 +242,17 @@ template class Pool3dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_process) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; const int input_width = input.dims()[4]; - const int output_channels = output.dims()[1]; - const int output_depth = output.dims()[2]; - const int output_height = output.dims()[3]; - const int output_width = output.dims()[4]; + const int output_channels = output->dims()[1]; + const int output_depth = output->dims()[2]; + const int output_height = output->dims()[3]; + const int output_width = output->dims()[4]; const int ksize_depth = ksize[0]; const int ksize_height = ksize[1]; const int ksize_width = ksize[2]; @@ -265,7 +267,7 @@ class Pool3dFunctor { const int output_stride = output_depth * output_height * output_width; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -315,11 +317,12 @@ template class Pool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_grad_process) { + PoolProcess pool_grad_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -343,7 +346,7 @@ class Pool3dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -398,10 +401,11 @@ template class MaxPool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -425,7 +429,7 @@ class MaxPool3dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -498,15 +502,15 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; - const int output_channels = output.dims()[1]; - const int output_height = output.dims()[2]; - const int output_width = output.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; const int ksize_height = ksize[0]; const int ksize_width = ksize[1]; const int stride_height = strides[0]; @@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor { const int output_stride = output_height * output_width; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); - T* mask_data = mask.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); + T* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -563,13 +567,13 @@ template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { - const int batch_size = input_grad.dims()[0]; - const int input_height = input_grad.dims()[2]; - const int input_width = input_grad.dims()[3]; + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { + const int batch_size = input_grad->dims()[0]; + const int input_height = input_grad->dims()[2]; + const int input_width = input_grad->dims()[3]; const int output_channels = output_grad.dims()[1]; const int output_height = output_grad.dims()[2]; const int output_width = output_grad.dims()[3]; @@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor { const T* mask_data = mask.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -612,17 +616,17 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; const int input_width = input.dims()[4]; - const int output_channels = output.dims()[1]; - const int output_depth = output.dims()[2]; - const int output_height = output.dims()[3]; - const int output_width = output.dims()[4]; + const int output_channels = output->dims()[1]; + const int output_depth = output->dims()[2]; + const int output_height = output->dims()[3]; + const int output_width = output->dims()[4]; const int ksize_depth = ksize[0]; const int ksize_height = ksize[1]; const int ksize_width = ksize[2]; @@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor { const int output_stride = output_depth * output_height * output_width; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); - T* mask_data = mask.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); + T* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -691,14 +695,14 @@ template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { - const int batch_size = input_grad.dims()[0]; - const int input_depth = input_grad.dims()[2]; - const int input_height = input_grad.dims()[3]; - const int input_width = input_grad.dims()[4]; + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { + const int batch_size = input_grad->dims()[0]; + const int input_depth = input_grad->dims()[2]; + const int input_height = input_grad->dims()[3]; + const int input_width = input_grad->dims()[4]; const int output_channels = output_grad.dims()[1]; const int output_depth = output_grad.dims()[2]; const int output_height = output_grad.dims()[3]; @@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor { const T* mask_data = mask.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { diff --git a/paddle/operators/math/pooling.cu b/paddle/operators/math/pooling.cu index 736327f4b7..6d1138ad50 100644 --- a/paddle/operators/math/pooling.cu +++ b/paddle/operators/math/pooling.cu @@ -21,13 +21,13 @@ namespace math { template __global__ void KernelPool2D(const int nthreads, const T* input_data, - T* output_data, const int channels, - const int input_height, const int input_width, - const int output_height, const int output_width, - const int ksize_height, const int ksize_width, - const int stride_height, const int stride_width, - const int padding_height, const int padding_width, - PoolProcess pool_process) { + const int channels, const int input_height, + const int input_width, const int output_height, + const int output_width, const int ksize_height, + const int ksize_width, const int stride_height, + const int stride_width, const int padding_height, + const int padding_width, PoolProcess pool_process, + T* output_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, template __global__ void KernelPool2DGrad( const int nthreads, const T* input_data, const T* output_data, - const T* output_grad, T* input_grad, const int channels, - const int input_height, const int input_width, const int output_height, - const int output_width, const int ksize_height, const int ksize_width, - const int stride_height, const int stride_width, const int padding_height, - const int padding_width, PoolProcess pool_process) { + const T* output_grad, const int channels, const int input_height, + const int input_width, const int output_height, const int output_width, + const int ksize_height, const int ksize_width, const int stride_height, + const int stride_width, const int padding_height, const int padding_width, + PoolProcess pool_process, T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int offsetW = index % input_width + padding_width; @@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad( template __global__ void KernelMaxPool2DGrad( const int nthreads, const T* input_data, const T* output_data, - const T* output_grad, T* input_grad, const int channels, - const int input_height, const int input_width, const int output_height, - const int output_width, const int ksize_height, const int ksize_width, - const int stride_height, const int stride_width, const int padding_height, - const int padding_width) { + const T* output_grad, const int channels, const int input_height, + const int input_width, const int output_height, const int output_width, + const int ksize_height, const int ksize_width, const int stride_height, + const int stride_width, const int padding_height, const int padding_width, + T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -158,16 +158,16 @@ template class Pool2dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_process) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; - const int output_channels = output.dims()[1]; - const int output_height = output.dims()[2]; - const int output_width = output.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; const int ksize_height = ksize[0]; const int ksize_width = ksize[1]; const int stride_height = strides[0]; @@ -176,7 +176,7 @@ class Pool2dFunctor { const int padding_width = paddings[1]; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -187,11 +187,10 @@ class Pool2dFunctor { PoolProcess, T><<(context) - .stream()>>>(nthreads, input_data, output_data, input_channels, - input_height, input_width, output_height, - output_width, ksize_height, ksize_width, - stride_height, stride_width, padding_height, - padding_width, pool_process); + .stream()>>>( + nthreads, input_data, input_channels, input_height, input_width, + output_height, output_width, ksize_height, ksize_width, stride_height, + stride_width, padding_height, padding_width, pool_process, output_data); } }; @@ -204,11 +203,11 @@ template class Pool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_process) { + PoolProcess pool_process, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; @@ -225,7 +224,7 @@ class Pool2dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -237,10 +236,10 @@ class Pool2dGradFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, output_grad_data, input_grad_data, - input_channels, input_height, input_width, output_height, output_width, - ksize_height, ksize_width, stride_height, stride_width, padding_height, - padding_width, pool_process); + nthreads, input_data, output_data, output_grad_data, input_channels, + input_height, input_width, output_height, output_width, ksize_height, + ksize_width, stride_height, stride_width, padding_height, padding_width, + pool_process, input_grad_data); } }; @@ -253,10 +252,11 @@ template class MaxPool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; @@ -274,7 +274,7 @@ class MaxPool2dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -285,10 +285,10 @@ class MaxPool2dGradFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, output_grad_data, input_grad_data, - input_channels, input_height, input_width, output_height, output_width, - ksize_height, ksize_width, stride_height, stride_width, padding_height, - padding_width); + nthreads, input_data, output_data, output_grad_data, input_channels, + input_height, input_width, output_height, output_width, ksize_height, + ksize_width, stride_height, stride_width, padding_height, padding_width, + input_grad_data); } }; @@ -313,14 +313,16 @@ template class Pool2dGradFunctor< platform::GPUPlace, paddle::operators::math::AvgPoolGrad, double>; template -__global__ void KernelPool3D( - const int nthreads, const T* input_data, T* output_data, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, const int padding_width, - PoolProcess pool_process) { +__global__ void KernelPool3D(const int nthreads, const T* input_data, + const int channels, const int input_depth, + const int input_height, const int input_width, + const int output_depth, const int output_height, + const int output_width, const int ksize_depth, + const int ksize_height, const int ksize_width, + const int stride_depth, const int stride_height, + const int stride_width, const int padding_depth, + const int padding_height, const int padding_width, + PoolProcess pool_process, T* output_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -358,13 +360,13 @@ __global__ void KernelPool3D( template __global__ void KernelPool3DGrad( const int nthreads, const T* input_data, const T* output_data, - const T* output_grad, T* input_grad, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, const int padding_width, - PoolProcess pool_process) { + const T* output_grad, const int channels, const int input_depth, + const int input_height, const int input_width, const int output_depth, + const int output_height, const int output_width, const int ksize_depth, + const int ksize_height, const int ksize_width, const int stride_depth, + const int stride_height, const int stride_width, const int padding_depth, + const int padding_height, const int padding_width, PoolProcess pool_process, + T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int offsetW = index % input_width + padding_width; @@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad( template __global__ void KernelMaxPool3DGrad( const int nthreads, const T* input_data, const T* output_data, - const T* output_grad, T* input_grad, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, - const int padding_width) { + const T* output_grad, const int channels, const int input_depth, + const int input_height, const int input_width, const int output_depth, + const int output_height, const int output_width, const int ksize_depth, + const int ksize_height, const int ksize_width, const int stride_depth, + const int stride_height, const int stride_width, const int padding_depth, + const int padding_height, const int padding_width, T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -480,18 +481,18 @@ template class Pool3dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_process) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_process, framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; const int input_width = input.dims()[4]; - const int output_channels = output.dims()[1]; - const int output_depth = output.dims()[2]; - const int output_height = output.dims()[3]; - const int output_width = output.dims()[4]; + const int output_channels = output->dims()[1]; + const int output_depth = output->dims()[2]; + const int output_height = output->dims()[3]; + const int output_width = output->dims()[4]; const int ksize_depth = ksize[0]; const int ksize_height = ksize[1]; const int ksize_width = ksize[2]; @@ -503,7 +504,7 @@ class Pool3dFunctor { const int padding_width = paddings[2]; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -516,11 +517,11 @@ class Pool3dFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, input_channels, input_depth, - input_height, input_width, output_depth, output_height, output_width, - ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, - stride_width, padding_depth, padding_height, padding_width, - pool_process); + nthreads, input_data, input_channels, input_depth, input_height, + input_width, output_depth, output_height, output_width, ksize_depth, + ksize_height, ksize_width, stride_depth, stride_height, stride_width, + padding_depth, padding_height, padding_width, pool_process, + output_data); } }; @@ -533,11 +534,11 @@ template class Pool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_process) { + PoolProcess pool_process, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; @@ -560,7 +561,7 @@ class Pool3dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_depth * input_height * input_width; @@ -573,11 +574,11 @@ class Pool3dGradFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, output_grad_data, input_grad_data, - input_channels, input_depth, input_height, input_width, output_depth, - output_height, output_width, ksize_depth, ksize_height, ksize_width, - stride_depth, stride_height, stride_width, padding_depth, - padding_height, padding_width, pool_process); + nthreads, input_data, output_data, output_grad_data, input_channels, + input_depth, input_height, input_width, output_depth, output_height, + output_width, ksize_depth, ksize_height, ksize_width, stride_depth, + stride_height, stride_width, padding_depth, padding_height, + padding_width, pool_process, input_grad_data); } }; @@ -590,10 +591,11 @@ template class MaxPool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; @@ -616,7 +618,7 @@ class MaxPool3dGradFunctor { const T* input_data = input.data(); const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -628,11 +630,11 @@ class MaxPool3dGradFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, output_grad_data, input_grad_data, - input_channels, input_depth, input_height, input_width, output_depth, - output_height, output_width, ksize_depth, ksize_height, ksize_width, - stride_depth, stride_height, stride_width, padding_depth, - padding_height, padding_width); + nthreads, input_data, output_data, output_grad_data, input_channels, + input_depth, input_height, input_width, output_depth, output_height, + output_width, ksize_depth, ksize_height, ksize_width, stride_depth, + stride_height, stride_width, padding_depth, padding_height, + padding_width, input_grad_data); } }; @@ -658,11 +660,11 @@ template class Pool3dGradFunctor< template __global__ void KernelMaxPool2dWithIdx( - const int nthreads, const T* input_data, T* output_data, T* mask_data, - const int channels, const int input_height, const int input_width, - const int output_height, const int output_width, const int ksize_height, - const int ksize_width, const int stride_height, const int stride_width, - const int padding_height, const int padding_width) { + const int nthreads, const T* input_data, const int channels, + const int input_height, const int input_width, const int output_height, + const int output_width, const int ksize_height, const int ksize_width, + const int stride_height, const int stride_width, const int padding_height, + const int padding_width, T* output_data, T* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx( template __global__ void KernelMaxPool2DWithIdxGrad( - const int nthreads, T* input_grad, const T* output_grad, const T* mask_data, + const int nthreads, const T* output_grad, const T* mask_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, - const int padding_height, const int padding_width) { + const int padding_height, const int padding_width, T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -748,16 +750,16 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; - const int output_channels = output.dims()[1]; - const int output_height = output.dims()[2]; - const int output_width = output.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; const int ksize_height = ksize[0]; const int ksize_width = ksize[1]; const int stride_height = strides[0]; @@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor { const int padding_width = paddings[1]; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); - T* mask_data = mask.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); + T* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor { KernelMaxPool2dWithIdx< T><<(context) - .stream()>>>(nthreads, input_data, output_data, mask_data, - input_channels, input_height, input_width, - output_height, output_width, ksize_height, - ksize_width, stride_height, stride_width, - padding_height, padding_width); + .stream()>>>( + nthreads, input_data, input_channels, input_height, input_width, + output_height, output_width, ksize_height, ksize_width, stride_height, + stride_width, padding_height, padding_width, output_data, mask_data); } }; @@ -794,14 +795,14 @@ template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { - const int batch_size = input_grad.dims()[0]; - const int input_channels = input_grad.dims()[1]; - const int input_height = input_grad.dims()[2]; - const int input_width = input_grad.dims()[3]; + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { + const int batch_size = input_grad->dims()[0]; + const int input_channels = input_grad->dims()[1]; + const int input_height = input_grad->dims()[2]; + const int input_width = input_grad->dims()[3]; const int output_height = output_grad.dims()[2]; const int output_width = output_grad.dims()[3]; const int ksize_height = ksize[0]; @@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor { const T* mask_data = mask.data(); const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor { KernelMaxPool2DWithIdxGrad< T><<(context) - .stream()>>>(nthreads, input_grad_data, output_grad_data, - mask_data, input_channels, input_height, - input_width, output_height, output_width, - ksize_height, ksize_width, stride_height, - stride_width, padding_height, padding_width); + .stream()>>>(nthreads, output_grad_data, mask_data, + input_channels, input_height, input_width, + output_height, output_width, ksize_height, + ksize_width, stride_height, stride_width, + padding_height, padding_width, input_grad_data); } }; @@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor; template __global__ void KernelMaxPool3DWithIdx( - const int nthreads, const T* input_data, T* output_data, T* mask_data, - const int channels, const int input_depth, const int input_height, - const int input_width, const int output_depth, const int output_height, - const int output_width, const int ksize_depth, const int ksize_height, - const int ksize_width, const int stride_depth, const int stride_height, - const int stride_width, const int padding_depth, const int padding_height, - const int padding_width) { + const int nthreads, const T* input_data, const int channels, + const int input_depth, const int input_height, const int input_width, + const int output_depth, const int output_height, const int output_width, + const int ksize_depth, const int ksize_height, const int ksize_width, + const int stride_depth, const int stride_height, const int stride_width, + const int padding_depth, const int padding_height, const int padding_width, + T* output_data, T* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx( template __global__ void KernelMaxPool3DWithIdxGrad( - const int nthreads, T* input_grad, const T* output_grad, const T* mask, - const int channels, const int input_depth, const int input_height, - const int input_width, const int output_depth, const int output_height, - const int output_width, const int ksize_depth, const int ksize_height, - const int ksize_width, const int stride_depth, const int stride_height, - const int stride_width, const int padding_depth, const int padding_height, - const int padding_width) { + const int nthreads, const T* output_grad, const T* mask, const int channels, + const int input_depth, const int input_height, const int input_width, + const int output_depth, const int output_height, const int output_width, + const int ksize_depth, const int ksize_height, const int ksize_width, + const int stride_depth, const int stride_height, const int stride_width, + const int padding_depth, const int padding_height, const int padding_width, + T* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -952,18 +953,18 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; const int input_width = input.dims()[4]; - const int output_channels = output.dims()[1]; - const int output_depth = output.dims()[2]; - const int output_height = output.dims()[3]; - const int output_width = output.dims()[4]; + const int output_channels = output->dims()[1]; + const int output_depth = output->dims()[2]; + const int output_height = output->dims()[3]; + const int output_width = output->dims()[4]; const int ksize_depth = ksize[0]; const int ksize_height = ksize[1]; const int ksize_width = ksize[2]; @@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor { const int padding_width = paddings[2]; const T* input_data = input.data(); - T* output_data = output.mutable_data(context.GetPlace()); - T* mask_data = mask.mutable_data(context.GetPlace()); + T* output_data = output->mutable_data(context.GetPlace()); + T* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor { T><<(context) .stream()>>>( - nthreads, input_data, output_data, mask_data, input_channels, - input_depth, input_height, input_width, output_depth, output_height, - output_width, ksize_depth, ksize_height, ksize_width, stride_depth, - stride_height, stride_width, padding_depth, padding_height, - padding_width); + nthreads, input_data, input_channels, input_depth, input_height, + input_width, output_depth, output_height, output_width, ksize_depth, + ksize_height, ksize_width, stride_depth, stride_height, stride_width, + padding_depth, padding_height, padding_width, output_data, mask_data); } }; @@ -1005,15 +1005,15 @@ template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings) { - const int batch_size = input_grad.dims()[0]; - const int input_channels = input_grad.dims()[1]; - const int input_depth = input_grad.dims()[2]; - const int input_height = input_grad.dims()[3]; - const int input_width = input_grad.dims()[4]; + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad) { + const int batch_size = input_grad->dims()[0]; + const int input_channels = input_grad->dims()[1]; + const int input_depth = input_grad->dims()[2]; + const int input_height = input_grad->dims()[3]; + const int input_width = input_grad->dims()[4]; const int output_depth = output_grad.dims()[2]; const int output_height = output_grad.dims()[3]; const int output_width = output_grad.dims()[4]; @@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor { const T* output_grad_data = output_grad.data(); const T* mask_data = mask.data(); - T* input_grad_data = input_grad.mutable_data(context.GetPlace()); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_depth * input_height * input_width; @@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor { T><<(context) .stream()>>>( - nthreads, input_grad_data, output_grad_data, mask_data, input_channels, - input_depth, input_height, input_width, output_depth, output_height, - output_width, ksize_depth, ksize_height, ksize_width, stride_depth, - stride_height, stride_width, padding_depth, padding_height, - padding_width); + nthreads, output_grad_data, mask_data, input_channels, input_depth, + input_height, input_width, output_depth, output_height, output_width, + ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, + stride_width, padding_depth, padding_height, padding_width, + input_grad_data); } }; diff --git a/paddle/operators/math/pooling.h b/paddle/operators/math/pooling.h index c50c57b5c5..f6719e1e62 100644 --- a/paddle/operators/math/pooling.h +++ b/paddle/operators/math/pooling.h @@ -88,60 +88,62 @@ template class Pool2dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_compute); + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_compute, framework::Tensor* output); }; template class Pool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_compute); + PoolProcess pool_compute, framework::Tensor* input_grad); }; template class MaxPool2dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings); + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad); }; template class Pool3dFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_compute); + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + PoolProcess pool_compute, framework::Tensor* output); }; template class Pool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, std::vector& strides, std::vector& paddings, - PoolProcess pool_compute); + PoolProcess pool_compute, framework::Tensor* input_grad); }; template class MaxPool3dGradFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& input_grad, + const framework::Tensor& input, const framework::Tensor& output, const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings); + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad); }; /* @@ -155,38 +157,38 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings); + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask); }; template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings); + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad); }; template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& input, framework::Tensor& output, - framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings); + const framework::Tensor& input, std::vector& ksize, + std::vector& strides, std::vector& paddings, + framework::Tensor* output, framework::Tensor* mask); }; template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& input_grad, const framework::Tensor& output_grad, const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings); + std::vector& strides, std::vector& paddings, + framework::Tensor* input_grad); }; } // namespace math diff --git a/paddle/operators/matmul_op.h b/paddle/operators/matmul_op.h index 5ce30740c9..4f565946d5 100644 --- a/paddle/operators/matmul_op.h +++ b/paddle/operators/matmul_op.h @@ -74,11 +74,10 @@ Tensor CombineBatchAndN(const framework::ExecutionContext& context, Tensor output; auto in_dims = input.dims(); if (in_dims.size() == 3) { - output.Resize(in_dims); + output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); EigenTranspose(context, input, output, {1, 0, 2}); - std::vector out_dims = {in_dims[1], in_dims[0] * in_dims[2]}; - output.Resize(make_ddim(out_dims)); + output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); } else { output.ShareDataWith(input); } diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc new file mode 100644 index 0000000000..80460c4769 --- /dev/null +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -0,0 +1,182 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace operators { + +using LoD = framework::LoD; + +class MergeLoDTensorOp : public framework::OperatorBase { + public: + MergeLoDTensorOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &mask = scope.FindVar(Input("Mask"))->Get(); + auto &in_true = scope.FindVar(Input("InTrue"))->Get(); + auto &in_false = + scope.FindVar(Input("InFalse"))->Get(); + auto *out = + scope.FindVar(Output("Out"))->GetMutable(); + auto level = static_cast(Attr("level")); + + auto &mask_dim = mask.dims(); + + std::unique_ptr cpu_mask{new framework::LoDTensor()}; + if (platform::is_cpu_place(mask.place())) { + cpu_mask->ShareDataWith(mask); + } else if (platform::is_gpu_place(mask.place())) { +#ifdef PADDLE_WITH_CUDA + cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); +#else + PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); +#endif + } + auto *mask_data = cpu_mask->data(); + + int rank = in_true.dims().size(); + platform::Place place = in_true.place(); + std::type_index data_type = in_true.type(); + framework::DDim in_true_dims = + framework::slice_ddim(in_true.dims(), 1, rank); + + int64_t batch_size = in_true.dims()[0] + in_false.dims()[0]; + + auto in_true_dim_vec = framework::vectorize(in_true_dims); + in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size); + + framework::DDim out_dims = framework::make_ddim(in_true_dim_vec); + out->Resize(out_dims); + out->mutable_data(place, data_type); + + auto *out_lod = out->mutable_lod(); + out_lod->clear(); + size_t out_offset = 0; + + // Build LoDTensor `out` + + size_t in_true_idx = 0; + size_t in_false_idx = 0; + for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { + const framework::LoDTensor *input = nullptr; + size_t *in_idx = nullptr; + if (static_cast(mask_data[i]) == 0) { + input = &in_false; + in_idx = &in_false_idx; + } else { + input = &in_true; + in_idx = &in_true_idx; + } + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + input->lod(), *in_idx, (*in_idx) + 1, 0); + auto &lod_length = lod_and_offset.first; + + framework::AppendLoD(out_lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + + PADDLE_ENFORCE_GE(end_offset, start_offset); + size_t len = end_offset - start_offset; + if (len == 0) { + continue; + } + out->Slice(out_offset, out_offset + len) + .CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx); + out_offset += len; + (*in_idx) += 1; + } + + for (size_t i = 0; i < level; i++) { + out_lod->insert(out_lod->begin(), x.lod()[i]); + } + } +}; + +class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + MergeLoDTensorOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "The input LoDTensor, contains complete lod information to " + "construct the output"); + AddInput("Mask", "A bool column vector which mask the input"); + AddInput("InTrue", "The True branch to be merged"); + AddInput("InFalse", "The False branch to be merged"); + AddOutput("Out", "The merged output LoDTensor"); + AddAttr("level", "(int) the specific lod level to rank.") + .SetDefault(0) + .EqualGreaterThan(0); + AddComment( + R"DOC( + Merge True and False branches of LoDTensor into a single Output, + with a mask at certain lod level. X is used to obtain complete + lod information. Please refer to SplitLoDTensorOp.)DOC"); + } +}; + +class MergeLoDTensorInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "MergeLoDTensorOp must has input X."); + PADDLE_ENFORCE(context->HasInput("Mask"), + "MergeLoDTensorOp must has input Mask."); + PADDLE_ENFORCE(context->HasInput("InTrue"), + "MergeLoDTensorOp must has input InTrue."); + PADDLE_ENFORCE(context->HasInput("InFalse"), + "MergeLoDTensorOp must has input InFalse."); + PADDLE_ENFORCE(context->HasOutput("Out"), + "MergeLoDTensorOp must has output Out"); + + auto mask_dim = context->GetInputDim("Mask"); + PADDLE_ENFORCE_EQ(mask_dim.size(), 2); + PADDLE_ENFORCE_EQ(mask_dim[1], 1); + + context->SetOutputDim("Out", context->GetInputDim("InTrue")); + } +}; + +class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("split_lod_tensor"); + grad_op->SetInput("X", OutputGrad("Out")); + grad_op->SetInput("Mask", Input("Mask")); + grad_op->SetOutput("OutTrue", InputGrad("InTrue")); + grad_op->SetOutput("OutFalse", InputGrad("InFalse")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(merge_lod_tensor, ops::MergeLoDTensorOp, + ops::MergeLoDTensorOpProtoMaker, + ops::MergeLoDTensorInferShape, ops::MergeLoDTensorGradMaker); diff --git a/paddle/operators/pool_op.h b/paddle/operators/pool_op.h index 4da1941ab5..63492a89e8 100644 --- a/paddle/operators/pool_op.h +++ b/paddle/operators/pool_op.h @@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel { Place, paddle::operators::math::MaxPool, T> pool2d_forward; paddle::operators::math::MaxPool pool_process; - pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, - paddings, pool_process); + pool2d_forward(context.device_context(), *in_x, ksize, strides, + paddings, pool_process, out); } else if (pooling_type == "avg") { paddle::operators::math::Pool2dFunctor< Place, paddle::operators::math::AvgPool, T> pool2d_forward; paddle::operators::math::AvgPool pool_process; - pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, - paddings, pool_process); + pool2d_forward(context.device_context(), *in_x, ksize, strides, + paddings, pool_process, out); } } break; case 3: { @@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel { Place, paddle::operators::math::MaxPool, T> pool3d_forward; paddle::operators::math::MaxPool pool_process; - pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, - paddings, pool_process); + pool3d_forward(context.device_context(), *in_x, ksize, strides, + paddings, pool_process, out); } else if (pooling_type == "avg") { paddle::operators::math::Pool3dFunctor< Place, paddle::operators::math::AvgPool, T> pool3d_forward; paddle::operators::math::AvgPool pool_process; - pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, - paddings, pool_process); + pool3d_forward(context.device_context(), *in_x, ksize, strides, + paddings, pool_process, out); } } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } @@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel { if (pooling_type == "max") { paddle::operators::math::MaxPool2dGradFunctor pool2d_backward; - pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, - *out_grad, ksize, strides, paddings); + pool2d_backward(context.device_context(), *in_x, *out, *out_grad, + ksize, strides, paddings, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool2dGradFunctor< Place, paddle::operators::math::AvgPoolGrad, T> pool2d_backward; paddle::operators::math::AvgPoolGrad pool_process; - pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, - *out_grad, ksize, strides, paddings, pool_process); + pool2d_backward(context.device_context(), *in_x, *out, *out_grad, + ksize, strides, paddings, pool_process, in_x_grad); } } break; case 3: { if (pooling_type == "max") { paddle::operators::math::MaxPool3dGradFunctor pool3d_backward; - pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, - *out_grad, ksize, strides, paddings); + pool3d_backward(context.device_context(), *in_x, *out, *out_grad, + ksize, strides, paddings, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool3dGradFunctor< Place, paddle::operators::math::AvgPoolGrad, T> pool3d_backward; paddle::operators::math::AvgPoolGrad pool_process; - pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, - *out_grad, ksize, strides, paddings, pool_process); + pool3d_backward(context.device_context(), *in_x, *out, *out_grad, + ksize, strides, paddings, pool_process, in_x_grad); } } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } diff --git a/paddle/operators/pool_with_index_op.h b/paddle/operators/pool_with_index_op.h index ea37de84ab..c0e3b117dc 100644 --- a/paddle/operators/pool_with_index_op.h +++ b/paddle/operators/pool_with_index_op.h @@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { case 2: { paddle::operators::math::MaxPool2dWithIndexFunctor pool2d_forward; - pool2d_forward(context.device_context(), *in_x, *out, *mask, ksize, - strides, paddings); + pool2d_forward(context.device_context(), *in_x, ksize, strides, + paddings, out, mask); } break; case 3: { paddle::operators::math::MaxPool3dWithIndexFunctor pool3d_forward; - pool3d_forward(context.device_context(), *in_x, *out, *mask, ksize, - strides, paddings); + pool3d_forward(context.device_context(), *in_x, ksize, strides, + paddings, out, mask); } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } } @@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { case 2: { paddle::operators::math::MaxPool2dWithIndexGradFunctor pool2d_backward; - pool2d_backward(context.device_context(), *in_x_grad, *out_grad, - *mask, ksize, strides, paddings); + pool2d_backward(context.device_context(), *out_grad, *mask, ksize, + strides, paddings, in_x_grad); } break; case 3: { paddle::operators::math::MaxPool3dWithIndexGradFunctor pool3d_backward; - pool3d_backward(context.device_context(), *in_x_grad, *out_grad, - *mask, ksize, strides, paddings); + pool3d_backward(context.device_context(), *out_grad, *mask, ksize, + strides, paddings, in_x_grad); } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } } diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc index db737bed7a..d1de0b4447 100644 --- a/paddle/operators/sequence_concat_op.cc +++ b/paddle/operators/sequence_concat_op.cc @@ -47,7 +47,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "(vector) Input is a vector of LoDTensor, " + "(LodTensorArray) Input is a vector of LoDTensor, " "each of which is a variable-length sequence or nested sequence.") .AsDuplicable(); AddOutput("Out", diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index 2b8a25c241..7f136d8cf0 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -126,6 +126,7 @@ class SequencePoolGradKernel : public framework::OpKernel { int64_t h = static_cast(lod[i + 1] - lod[i]); auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); + auto out_g_e_v = EigenVector::Flatten(out_g_t); Eigen::DSizes bcast(h, 1); if (pooltype == "AVERAGE") { @@ -136,9 +137,9 @@ class SequencePoolGradKernel : public framework::OpKernel { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); } else if (pooltype == "LAST") { - in_g_e.chip(h - 1, 0).device(place) = out_g_e; + in_g_e.chip(h - 1, 0).device(place) = out_g_e_v; } else if (pooltype == "FIRST") { - in_g_e.chip(0, 0).device(place) = out_g_e; + in_g_e.chip(0, 0).device(place) = out_g_e_v; } else { PADDLE_THROW("unsupported pooling pooltype"); } diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc new file mode 100644 index 0000000000..db635f2ba0 --- /dev/null +++ b/paddle/operators/split_lod_tensor_op.cc @@ -0,0 +1,186 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" + +namespace paddle { +namespace operators { + +struct CopyRange { + size_t begin; + size_t end; +}; + +using LoD = framework::LoD; + +class SplitLoDTensorOp : public framework::OperatorBase { + public: + SplitLoDTensorOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &x = scope.FindVar(Input("X"))->Get(); + auto &mask = scope.FindVar(Input("Mask"))->Get(); + auto *out_true = + scope.FindVar(Output("OutTrue"))->GetMutable(); + auto *out_false = + scope.FindVar(Output("OutFalse"))->GetMutable(); + auto level = static_cast(Attr("level")); + auto &x_lod = x.lod(); + auto &mask_dim = mask.dims(); + + std::unique_ptr cpu_mask{new framework::LoDTensor()}; + if (platform::is_cpu_place(mask.place())) { + cpu_mask->ShareDataWith(mask); + } else if (platform::is_gpu_place(mask.place())) { +#ifdef PADDLE_WITH_CUDA + cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); +#else + PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); +#endif + } + auto *mask_data = cpu_mask->data(); + + std::vector> copy_ranges(mask_dim[0]); + + // set out_true/out_false lod + for (size_t t = 0; t < 2; t++) { + LoD *lod = nullptr; + if (t == 0) { + lod = out_false->mutable_lod(); + } else { + lod = out_true->mutable_lod(); + } + lod->clear(); + for (size_t i = 0; i < static_cast(mask_dim[0]); i++) { + if (static_cast(mask_data[i]) == t) { + size_t start_idx = i; + auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset( + x_lod, start_idx, start_idx + 1, level); + + auto &lod_length = lod_and_offset.first; + framework::AppendLoD(lod, lod_length); + + size_t start_offset = lod_and_offset.second.first; + size_t end_offset = lod_and_offset.second.second; + copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset}); + } + } + } + + for (size_t t = 0; t < 2; ++t) { + framework::LoDTensor *out; + if (t == 0) { + out = out_false; + } else { + out = out_true; + } + auto &ranges = copy_ranges[t]; + size_t height = std::accumulate( + ranges.begin(), ranges.end(), 0UL, + [](size_t a, const CopyRange &b) { return a + b.end - b.begin; }); + auto x_dim = x.dims(); + x_dim[0] = static_cast(height); + out->Resize(x_dim); + out->mutable_data(x.place(), x.type()); + size_t offset = 0; + for (auto &each_range : ranges) { + size_t len = each_range.end - each_range.begin; + if (len == 0) { + continue; + } + // out[offset: offset+len] = x[each_range.begin: each_range.end] + out->Slice(static_cast(offset), static_cast(offset + len)) + .CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx); + offset += len; + } + } + } +}; + +class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + SplitLoDTensorOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input LoDTensor"); + AddInput("Mask", "A bool column vector which mask the input"); + AddOutput("OutTrue", "True branch of input LoDTensor"); + AddOutput("OutFalse", "False branch of input LoDTensor"); + AddAttr("level", "(int) the specific lod level to split.") + .SetDefault(0) + .EqualGreaterThan(0); + AddComment( + R"DOC( + Split a LoDTensor with a Mask at certain level. The input LoDTensor + has 3 sequence at certain lod level. The Mask is a bool column vector, + such as [0, 1, 0] at the same level. The first and third sequence will + be send to False Output LoDTensor; whereas the second sequence will + be send to True Output LoDTensor. Please refer to MergeLoDTensorOp.)DOC"); + } +}; + +class SplitLoDTensorInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("X"), + "SplitLoDTensorOp must has input X."); + PADDLE_ENFORCE(context->HasInput("Mask"), + "SplitLoDTensorOp must has input Mask."); + PADDLE_ENFORCE(context->HasOutput("OutTrue"), + "SplitLoDTensorOp must has output OutTrue."); + PADDLE_ENFORCE(context->HasOutput("OutFalse"), + "SplitLoDTensorOp must has output OutFalse."); + + auto mask_dim = context->GetInputDim("Mask"); + PADDLE_ENFORCE_EQ(mask_dim.size(), 2); + PADDLE_ENFORCE_EQ(mask_dim[1], 1); + + context->SetOutputDim("OutTrue", context->GetInputDim("X")); + context->SetOutputDim("OutFalse", context->GetInputDim("X")); + } +}; + +class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("merge_lod_tensor"); + grad_op->SetInput("InTrue", OutputGrad("OutTrue")); + grad_op->SetInput("InFalse", OutputGrad("OutFalse")); + grad_op->SetInput("Mask", Input("Mask")); + grad_op->SetInput("X", Input("X")); + grad_op->SetOutput("Out", InputGrad("X")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(split_lod_tensor, ops::SplitLoDTensorOp, + ops::SplitLoDTensorOpProtoMaker, + ops::SplitLoDTensorInferShape, + ops::SplitLoDTensorArrayGradMaker); diff --git a/paddle/operators/squared_l2_norm_op.h b/paddle/operators/squared_l2_norm_op.h index c8d37ac40c..48d7b1c2d5 100644 --- a/paddle/operators/squared_l2_norm_op.h +++ b/paddle/operators/squared_l2_norm_op.h @@ -29,7 +29,7 @@ class SquaredL2NormKernel : public framework::OpKernel { Out->mutable_data(context.GetPlace()); auto x = framework::EigenVector::Flatten(*X); - auto out = framework::EigenVector::Flatten(*Out); + auto out = framework::EigenScalar::From(*Out); auto place = context.GetEigenDevice(); out.device(place) = x.square().sum(); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 0f906e0e47..3d8d3f1d2f 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -42,6 +42,9 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #endif +// disable auto conversion to list in Python +PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); + namespace paddle { namespace pybind { static size_t UniqueIntegerGenerator(const std::string &prefix) { diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 32578ad779..c8632295a2 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so - COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so + COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so DEPENDS paddle_pybind) -add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) +add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so) add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp @@ -66,7 +66,7 @@ if (WITH_TESTING) add_subdirectory(paddle/v2/tests) add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/plot/tests) - add_subdirectory(paddle/v2/framework/tests) + add_subdirectory(paddle/v2/fluid/tests) endif() endif() install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 43b83b4823..725a5fd60d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None): #caffe_mode: compute the output size using floor instead of ceil, # which is consistent of caffe and CuDNN's convention. -def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): - output = (2 * padding + img_size - filter_size) / float(stride) +def cnn_output_size(img_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + output = (2 * padding + img_size - filter_s) / float(stride) if caffe_mode: return 1 + int(math.floor(output)) else: @@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): #calcualte image_size based on output_size for de-convolution (ConvTransLayer). #It is the reverse function of cnn_output_size -def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): - img_size = (output_size - 1) * stride + filter_size - 2 * padding +def cnn_image_size(output_size, + filter_size, + padding, + stride, + caffe_mode, + dilation=1): + filter_s = (filter_size - 1) * dilation + 1 + img_size = (output_size - 1) * stride + filter_s - 2 * padding if not caffe_mode: img_size = img_size + 1 return img_size @@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf): def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): pool_conf.pool_type = pool.pool_type config_assert(pool.pool_type in [ - 'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool' - ], "pool-type %s is not in " - "['max-projection', 'avg-projection', " + 'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool' + ], "pool-type %s is not in " \ + "['max-projection', 'avg-projection', 'max-pool-with-mask'," \ "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type) pool_conf.channels = pool.channels @@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): conv_conf.stride_y = conv.stride_y conv_conf.groups = conv.groups conv_conf.caffe_mode = conv.caffe_mode + if not conv.dilation: + conv.dilation = 1 + conv.dilation_y = 1 + else: + conv_conf.dilation = conv.dilation + conv_conf.dilation_y = conv.dilation_y if not trans: conv_conf.filter_channels = conv.channels / conv.groups @@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): get_img_size(input_layer_name, conv.channels) conv_conf.output_x = cnn_output_size( conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv.dilation) conv_conf.output_y = cnn_output_size( conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) else: conv_conf.filter_channels = num_filters / conv.groups conv_conf.output_x, conv_conf.output_y = \ get_img_size(input_layer_name, conv.channels) conv_conf.img_size = cnn_image_size( conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, - conv_conf.stride, conv_conf.caffe_mode) + conv_conf.stride, conv_conf.caffe_mode, conv.dilation) conv_conf.img_size_y = cnn_image_size( conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, - conv_conf.stride_y, conv_conf.caffe_mode) + conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y) #caffe_mode: compute the output size using floor instead of ceil, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5ace7598dc..469a5466c1 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -20,7 +20,7 @@ from paddle.trainer.config_parser import * from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation from .evaluators import * -from .poolings import MaxPooling, AvgPooling, BasePoolingType, \ +from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \ CudnnAvgPooling, CudnnMaxPooling from .attrs import * from .default_decorators import * @@ -2571,7 +2571,9 @@ def img_conv_layer(input, if layer_type: if dilation > 1 or dilation_y > 1: - assert layer_type in ["cudnn_conv", "cudnn_convt"] + assert layer_type in [ + "cudnn_conv", "cudnn_convt", "exconv", "exconvt" + ] if trans: assert layer_type in ["exconvt", "cudnn_convt"] else: @@ -2699,9 +2701,9 @@ def img_pool_layer(input, elif isinstance(pool_type, AvgPooling): pool_type.name = 'avg' - assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, + assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling, CudnnMaxPooling], \ - "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" + "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported" type_name = pool_type.name + '-projection' \ if ( @@ -3592,10 +3594,9 @@ def lstm_step_layer(input, :type gate_act: BaseActivation :param state_act: State Activation Type. TanhActivation is the default. :type state_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. - :type bias_attr: ParameterAttribute | None | bool | Any + :param bias_attr: The parameter attribute for bias. If this parameter is + set to True or None, the bias is initialized to zero. + :type bias_attr: ParameterAttribute | None | True :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. @@ -3650,9 +3651,10 @@ def gru_step_layer(input, :param name: The name of this layer. It is optional. :param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :type gate_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, no bias + is defined. If this parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param param_attr: the parameter_attribute for transforming the output_mem from previous step. @@ -3712,9 +3714,10 @@ def gru_step_naive_layer(input, :type act: BaseActivation :param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :type gate_act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, no bias + is defined. If this parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param param_attr: :param layer_attr: @@ -3844,9 +3847,10 @@ def recurrent_layer(input, :type input: LayerOutput :param act: Activation type. TanhActivation is the default. :type act: BaseActivation - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, + no bias is defined. If the parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param param_attr: parameter attribute. :type param_attr: ParameterAttribute @@ -4836,9 +4840,10 @@ def tensor_layer(a, :type act: BaseActivation :param param_attr: The Parameter Attribute. :type param_attr: ParameterAttribute - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, + no bias is defined. If this parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute | None @@ -4900,9 +4905,10 @@ def selective_fc_layer(input, :type act: BaseActivation :param param_attr: The Parameter Attribute. :type param_attr: ParameterAttribute - :param bias_attr: The bias attribute. If the parameter is set to False or an object - whose type is not ParameterAttribute, no bias is defined. If the - parameter is set to True, the bias is initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, + no bias is defined. If this parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param layer_attr: Extra Layer config. :type layer_attr: ExtraLayerAttribute | None @@ -5585,10 +5591,10 @@ def nce_layer(input, to the num_classes. Each member of the list defines the probability of a class given input x. :type neg_distribution: list | tuple | collections.Sequence | None - :param bias_attr: The attribute for bias. If this parameter is set False or - any object whose type is not ParameterAttribute, no bias - is added. If this parameter is set True, the bias is - initialized to zero. + :param bias_attr: The parameter attribute for bias. If this parameter is set to + False or an object whose type is not ParameterAttribute, + no bias is defined. If this parameter is set to True, + the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any :param layer_attr: Extra Layer Attribute. :type layer_attr: ExtraLayerAttribute @@ -6519,9 +6525,9 @@ def gated_unit_layer(input, :param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute for details. :type gate_param_attr: ParameterAttribute - :param gate_bias_attr: The bias attribute of the gate. If the parameter is set to False or + :param gate_bias_attr: The bias attribute of the gate. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. - If the parameter is set to True, the bias is initialized to zero. + If this parameter is set to True, the bias is initialized to zero. :type gate_bias_attr: ParameterAttribute | bool | None | Any :param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for details. @@ -6529,9 +6535,9 @@ def gated_unit_layer(input, :param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute for details. :type inproj_param_attr: ParameterAttribute - :param inproj_bias_attr: The bias attribute of the projection. If the parameter is set to False + :param inproj_bias_attr: The bias attribute of the projection. If this parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. - If the parameter is set to True, the bias is initialized to zero. + If this parameter is set to True, the bias is initialized to zero. :type inproj_bias_attr: ParameterAttribute | bool | None | Any :param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for details. diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 3821d075cb..d323d34c3f 100644 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -681,34 +681,42 @@ def lstmemory_unit(input, state_act=TanhActivation()) - :param input: input layer. + :param input: Input layer. :type input: LayerOutput - :param out_memory: output of previous time step + :param out_memory: The output of previous time step. :type out_memory: LayerOutput | None - :param name: lstmemory unit name. + :param name: The lstmemory unit name. :type name: basestring - :param size: lstmemory unit size. + :param size: The lstmemory unit size. :type size: int - :param param_attr: parameter attribute, None means default attribute. + :param param_attr: The parameter attribute for the weights in + input to hidden projection. + None means default attribute. :type param_attr: ParameterAttribute - :param act: last activiation type of lstm. + :param act: The last activiation type of lstm. :type act: BaseActivation - :param gate_act: gate activiation type of lstm. + :param gate_act: The gate activiation type of lstm. :type gate_act: BaseActivation - :param state_act: state activiation type of lstm. + :param state_act: The state activiation type of lstm. :type state_act: BaseActivation - :param input_proj_bias_attr: bias attribute for input to hidden projection. - False means no bias, None means default bias. - :type input_proj_bias_attr: ParameterAttribute|False|None - :param input_proj_layer_attr: extra layer attribute for input to hidden - projection of the LSTM unit, such as dropout, error clipping. + :param input_proj_bias_attr: The parameter attribute for the bias in + input to hidden projection. + False or None means no bias. + If this parameter is set to True, + the bias is initialized to zero. + :type input_proj_bias_attr: ParameterAttribute|bool|None + :param input_proj_layer_attr: The extra layer attribute for + input to hidden projection of the LSTM unit, + such as dropout, error clipping. :type input_proj_layer_attr: ExtraLayerAttribute - :param lstm_bias_attr: bias parameter attribute of lstm layer. - False means no bias, None means default bias. - :type lstm_bias_attr: ParameterAttribute|False|None - :param lstm_layer_attr: extra attribute of lstm layer. + :param lstm_bias_attr: The parameter attribute for the bias in lstm layer. + False or None means no bias. + If this parameter is set to True, + the bias is initialized to zero. + :type lstm_bias_attr: ParameterAttribute|True|None + :param lstm_layer_attr: The extra attribute of lstm layer. :type lstm_layer_attr: ExtraLayerAttribute - :return: lstmemory unit name. + :return: The lstmemory unit name. :rtype: LayerOutput """ if size is None: @@ -786,34 +794,42 @@ def lstmemory_group(input, gate_act=SigmoidActivation(), state_act=TanhActivation()) - :param input: input layer. + :param input: Input layer. :type input: LayerOutput - :param size: lstmemory group size. + :param size: The lstmemory group size. :type size: int - :param name: name of lstmemory group. + :param name: The name of lstmemory group. :type name: basestring - :param out_memory: output of previous time step. + :param out_memory: The output of previous time step. :type out_memory: LayerOutput | None - :param reverse: process the input in a reverse order or not. + :param reverse: Process the input in a reverse order or not. :type reverse: bool - :param param_attr: parameter attribute, None means default attribute. + :param param_attr: The parameter attribute for the weights in + input to hidden projection. + None means default attribute. :type param_attr: ParameterAttribute - :param act: last activiation type of lstm. + :param act: The last activiation type of lstm. :type act: BaseActivation - :param gate_act: gate activiation type of lstm. + :param gate_act: The gate activiation type of lstm. :type gate_act: BaseActivation - :param state_act: state activiation type of lstm. + :param state_act: The state activiation type of lstm. :type state_act: BaseActivation - :param lstm_bias_attr: bias parameter attribute of lstm layer. - False means no bias, None means default bias. - :type lstm_bias_attr: ParameterAttribute|False|None - :param input_proj_bias_attr: bias attribute for input to hidden projection. - False means no bias, None means default bias. - :type input_proj_bias_attr: ParameterAttribute|False|None - :param input_proj_layer_attr: extra layer attribute for input to hidden - projection of the LSTM unit, such as dropout, error clipping. + :param input_proj_bias_attr: The parameter attribute for the bias in + input to hidden projection. + False or None means no bias. + If this parameter is set to True, + the bias is initialized to zero. + :type input_proj_bias_attr: ParameterAttribute|bool|None + :param input_proj_layer_attr: The extra layer attribute for + input to hidden projection of the LSTM unit, + such as dropout, error clipping. :type input_proj_layer_attr: ExtraLayerAttribute - :param lstm_layer_attr: lstm layer's extra attribute. + :param lstm_bias_attr: The parameter attribute for the bias in lstm layer. + False or None means no bias. + If this parameter is set to True, + the bias is initialized to zero. + :type lstm_bias_attr: ParameterAttribute|True|None + :param lstm_layer_attr: The extra attribute of lstm layer. :type lstm_layer_attr: ExtraLayerAttribute :return: the lstmemory group. :rtype: LayerOutput diff --git a/python/paddle/trainer_config_helpers/poolings.py b/python/paddle/trainer_config_helpers/poolings.py index 0c38a8dce5..f45616551b 100644 --- a/python/paddle/trainer_config_helpers/poolings.py +++ b/python/paddle/trainer_config_helpers/poolings.py @@ -15,8 +15,8 @@ """ __all__ = [ - "BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling", - "CudnnAvgPooling", "SumPooling", "SquareRootNPooling" + "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling", + "CudnnMaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling" ] @@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType): self.output_max_index = output_max_index +class MaxWithMaskPooling(BasePoolingType): + """ + MaxWithMask pooling. + + Not only return the very large values for each dimension in sequence or time steps, + but also the location indices of found maxinum values. + + """ + + def __init__(self): + BasePoolingType.__init__(self, "max-pool-with-mask") + + class CudnnMaxPooling(BasePoolingType): """ Cudnn max pooling only support GPU. Return the maxinum value in the diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index 5ddf6052df..b14121e82c 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 227 img_size_y: 256 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index c0252b945b..c7a487a112 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 227 img_size_y: 256 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr index fd5224ca55..25ec632375 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bilinear_interp.protostr @@ -28,6 +28,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr index 03f4f3a31d..39dc487146 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_maxout.protostr @@ -30,6 +30,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" @@ -105,6 +107,8 @@ layers { stride_y: 1 output_y: 24 img_size_y: 24 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_1__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr index 15c6ab4dc8..d5d6d31a17 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pad.protostr @@ -30,6 +30,8 @@ layers { stride_y: 1 output_y: 48 img_size_y: 48 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr index f1bc65b3ae..0ec88aa998 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -36,6 +36,8 @@ layers { stride_y: 1 output_y: 14 img_size_y: 14 + dilation: 1 + dilation_y: 1 } } bias_parameter_name: "___conv_0__.wbias" diff --git a/python/paddle/v2/framework/.gitignore b/python/paddle/v2/fluid/.gitignore similarity index 100% rename from python/paddle/v2/framework/.gitignore rename to python/paddle/v2/fluid/.gitignore diff --git a/python/paddle/v2/framework/__init__.py b/python/paddle/v2/fluid/__init__.py similarity index 100% rename from python/paddle/v2/framework/__init__.py rename to python/paddle/v2/fluid/__init__.py diff --git a/python/paddle/v2/framework/backward.py b/python/paddle/v2/fluid/backward.py similarity index 97% rename from python/paddle/v2/framework/backward.py rename to python/paddle/v2/fluid/backward.py index 678efd5d20..f188582178 100644 --- a/python/paddle/v2/framework/backward.py +++ b/python/paddle/v2/fluid/backward.py @@ -1,4 +1,4 @@ -from paddle.v2.framework import framework as framework +from paddle.v2.fluid import framework as framework __all__ = ['append_backward_ops'] diff --git a/python/paddle/v2/framework/default_scope_funcs.py b/python/paddle/v2/fluid/default_scope_funcs.py similarity index 92% rename from python/paddle/v2/framework/default_scope_funcs.py rename to python/paddle/v2/fluid/default_scope_funcs.py index c07f9a6ab9..60c6165b6b 100644 --- a/python/paddle/v2/framework/default_scope_funcs.py +++ b/python/paddle/v2/fluid/default_scope_funcs.py @@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be invoked in a new local scope. """ -import paddle.v2.framework.core +import paddle.v2.fluid.core import threading __tl_scope__ = threading.local() @@ -27,13 +27,13 @@ __all__ = [ def get_cur_scope(): """ Get current scope. - :rtype: paddle.v2.framework.core.Scope + :rtype: paddle.v2.fluid.core.Scope """ cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None) if cur_scope_stack is None: __tl_scope__.cur_scope = list() if len(__tl_scope__.cur_scope) == 0: - __tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope()) + __tl_scope__.cur_scope.append(paddle.v2.fluid.core.Scope()) return __tl_scope__.cur_scope[-1] diff --git a/python/paddle/v2/framework/evaluator.py b/python/paddle/v2/fluid/evaluator.py similarity index 94% rename from python/paddle/v2/framework/evaluator.py rename to python/paddle/v2/fluid/evaluator.py index 254dd5f1a3..180d0135ff 100644 --- a/python/paddle/v2/framework/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -1,6 +1,6 @@ -import paddle.v2.framework.op as op +import paddle.v2.fluid.op as op import numpy as np -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core def avg_accumulate(accumulated_var, per_eval, num_batches, place): @@ -22,7 +22,7 @@ class Evaluator(object): NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much. :param scope: the scope instance contains the input. - :type scope: paddle.v2.framework.core.scope + :type scope: paddle.v2.fluid.core.scope :param operator: operator name for caculating the evaluation for each mini-batch. :type operator: string :param input: output variable name of forward network. diff --git a/python/paddle/v2/framework/executor.py b/python/paddle/v2/fluid/executor.py similarity index 94% rename from python/paddle/v2/framework/executor.py rename to python/paddle/v2/fluid/executor.py index f5c833190e..ed1c2c06da 100644 --- a/python/paddle/v2/framework/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -1,5 +1,5 @@ -import paddle.v2.framework.core as core -from paddle.v2.framework.framework import Block, Program, g_main_program +import paddle.v2.fluid.core as core +from paddle.v2.fluid.framework import Block, Program, g_main_program g_scope = core.Scope() diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/fluid/framework.py similarity index 99% rename from python/paddle/v2/framework/framework.py rename to python/paddle/v2/fluid/framework.py index b9db2707c0..e2587b4f74 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -1,5 +1,5 @@ -import paddle.v2.framework.core as core -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import paddle.v2.fluid.core as core +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import collections import numpy as np import copy @@ -285,7 +285,7 @@ class Operator(object): self.desc.check_attrs() no_kernel_op_set = { 'feed', 'fetch', 'save', 'load', 'recurrent', - 'rnn_memory_helper_grad', 'while' + 'rnn_memory_helper_grad', 'conditional_block', 'while' } if type not in no_kernel_op_set: self.desc.infer_var_type(self.block.desc) diff --git a/python/paddle/v2/framework/initializer.py b/python/paddle/v2/fluid/initializer.py similarity index 99% rename from python/paddle/v2/framework/initializer.py rename to python/paddle/v2/fluid/initializer.py index 98a87bfa86..ded144ecd5 100644 --- a/python/paddle/v2/framework/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.framework as framework +import paddle.v2.fluid.framework as framework import numpy as np __all__ = [ diff --git a/python/paddle/v2/framework/io.py b/python/paddle/v2/fluid/io.py similarity index 98% rename from python/paddle/v2/framework/io.py rename to python/paddle/v2/fluid/io.py index 5c247904a3..394a171c67 100644 --- a/python/paddle/v2/framework/io.py +++ b/python/paddle/v2/fluid/io.py @@ -1,7 +1,7 @@ import os import cPickle as pickle -from paddle.v2.framework.framework import Program, Parameter, g_main_program, \ +from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ Variable __all__ = [ diff --git a/python/paddle/v2/framework/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py similarity index 98% rename from python/paddle/v2/framework/layer_helper.py rename to python/paddle/v2/fluid/layer_helper.py index 552976185d..9dc3c119ea 100644 --- a/python/paddle/v2/framework/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,9 +1,9 @@ import copy import itertools -from paddle.v2.framework.framework import Variable, g_main_program, \ +from paddle.v2.fluid.framework import Variable, g_main_program, \ g_startup_program, unique_name, Program -from paddle.v2.framework.initializer import ConstantInitializer, \ +from paddle.v2.fluid.initializer import ConstantInitializer, \ UniformInitializer, XavierInitializer diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/fluid/layers.py similarity index 89% rename from python/paddle/v2/framework/layers.py rename to python/paddle/v2/fluid/layers.py index fe3c86febb..8a1aa1c42d 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -1,17 +1,17 @@ -import paddle.v2.framework.core as core -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 -from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ +import paddle.v2.fluid.core as core +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 +from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \ Operator -from paddle.v2.framework.initializer import ConstantInitializer, \ +from paddle.v2.fluid.initializer import ConstantInitializer, \ NormalInitializer -from paddle.v2.framework.layer_helper import LayerHelper, unique_name +from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', 'StaticRNN', 'cast', 'sequence_conv', 'sequence_pool', 'sums', 'cos_sim', - 'batch_norm', 'accuracy' + 'batch_norm', 'accuracy', 'split_lod_tensor' ] @@ -226,6 +226,11 @@ def data(name, stop_gradient=stop_gradient) +def create_tensor(dtype, name=None, main_program=None): + helper = LayerHelper("create_tensor", **locals()) + return helper.create_variable(name=helper.name, dtype=dtype) + + def _convert_(name): """ Formatting. @@ -451,6 +456,56 @@ def sums(input, main_program=None, startup_program=None): return out +def assign(input, output, main_program=None): + helper = LayerHelper('assign', **locals()) + helper.append_op( + type='scale', + inputs={'X': [input]}, + outputs={'Out': [output]}, + attrs={'scale': 1.0}) + return output + + +def split_lod_tensor(input, + mask, + level, + main_program=None, + startup_program=None): + helper = LayerHelper('split_lod_tensor', **locals()) + out_true = helper.create_tmp_variable(dtype=input.data_type) + out_false = helper.create_tmp_variable(dtype=input.data_type) + helper.append_op( + type='split_lod_tensor', + inputs={ + 'X': input, + 'Mask': mask, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': level}) + return out_true, out_false + + +def merge_lod_tensor(in_true, + in_false, + x, + mask, + level, + main_program=None, + startup_program=None): + helper = LayerHelper('merge_lod_tensor', **locals()) + out = helper.create_tmp_variable(dtype=x.data_type) + helper.append_op( + type='merge_lod_tensor', + inputs={'X': x, + 'Mask': mask, + 'InTrue': in_true, + 'InFalse': in_false}, + outputs={'Out': out}, + attrs={'level': level}) + return out + + def cos_sim(X, Y, **kwargs): """ This function performs the cosine similarity between two tensors @@ -784,6 +839,23 @@ def batch_norm(input, return helper.append_activation(batch_norm_out) +def beam_search_decode(ids, scores, main_program=None, startup_program=None): + helper = LayerHelper('beam_search_decode', **locals()) + sentence_ids = helper.create_tmp_variable(dtype=ids.data_type) + sentence_scores = helper.create_tmp_variable(dtype=ids.data_type) + + helper.append_op( + type="beam_search_decode", + inputs={"Ids": ids, + "Scores": scores}, + outputs={ + "SentenceIds": sentence_ids, + "SentenceScores": sentence_scores + }) + + return sentence_ids, sentence_scores + + class BlockGuard(object): """ BlockGuard class. @@ -1375,3 +1447,73 @@ def array_length(array, main_program=None): helper.append_op( type='lod_array_length', inputs={'X': [array]}, outputs={'Out': [tmp]}) return tmp + + +class ConditionalBlockGuard(BlockGuard): + def __init__(self, block): + if not isinstance(block, ConditionalBlock): + raise TypeError("block should be conditional block") + super(ConditionalBlockGuard, self).__init__(block.helper.main_program) + self.block = block + + def __enter__(self): + return super(ConditionalBlockGuard, self).__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + self.block.complete() + return super(ConditionalBlockGuard, self).__exit__(exc_type, exc_val, + exc_tb) + + +class ConditionalBlock(object): + def __init__(self, inputs, name=None, main_program=None): + for each_input in inputs: + if not isinstance(each_input, Variable): + raise TypeError("Each input should be variable") + self.inputs = inputs + self.helper = LayerHelper( + 'conditional_block', name=name, main_program=main_program) + + def block(self): + return ConditionalBlockGuard(self) + + def complete(self): + inside_block = self.helper.main_program.current_block() + parent_block = self.helper.main_program.block(inside_block.parent_idx) + + intermediate = set() + params = set() + + for each_op in inside_block.ops: + assert isinstance(each_op, Operator) + for iname in each_op.input_names: + for in_var_name in each_op.input(iname): + if in_var_name not in intermediate: + params.add(in_var_name) + + for oname in each_op.output_names: + for out_var_name in each_op.output(oname): + intermediate.add(out_var_name) + input_set = set([ipt.name for ipt in self.inputs]) + + param_list = [ + parent_block.var(each_name) for each_name in params + if each_name not in input_set + ] + + out_list = [ + parent_block.var(var_name) for var_name in parent_block.vars + if var_name not in intermediate + ] + + step_scope = parent_block.create_var( + type=core.VarDesc.VarType.STEP_SCOPES) + parent_block.append_op( + type='conditional_block', + inputs={ + 'X': self.inputs, + 'Params': param_list, + }, + outputs={'Out': out_list, + 'Scope': [step_scope]}, + attrs={'block': inside_block}) diff --git a/python/paddle/v2/framework/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py similarity index 96% rename from python/paddle/v2/framework/net_drawer.py rename to python/paddle/v2/fluid/net_drawer.py index 045e267c25..17ad547c2b 100644 --- a/python/paddle/v2/framework/net_drawer.py +++ b/python/paddle/v2/fluid/net_drawer.py @@ -3,8 +3,8 @@ import json import logging from collections import defaultdict -import paddle.v2.framework.core as core -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import paddle.v2.fluid.core as core +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/python/paddle/v2/framework/nets.py b/python/paddle/v2/fluid/nets.py similarity index 98% rename from python/paddle/v2/framework/nets.py rename to python/paddle/v2/fluid/nets.py index 725d2fa7f5..5e14ca594b 100644 --- a/python/paddle/v2/framework/nets.py +++ b/python/paddle/v2/fluid/nets.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.layers as layers +import paddle.v2.fluid.layers as layers __all__ = ["simple_img_conv_pool", "sequence_conv_pool"] diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/fluid/op.py similarity index 98% rename from python/paddle/v2/framework/op.py rename to python/paddle/v2/fluid/op.py index bc771a964a..5828803497 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/fluid/op.py @@ -1,5 +1,5 @@ -import paddle.v2.framework.core as core -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import paddle.v2.fluid.core as core +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 def get_all_op_protos(): diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/fluid/optimizer.py similarity index 98% rename from python/paddle/v2/framework/optimizer.py rename to python/paddle/v2/fluid/optimizer.py index f06c0fb98d..4252a6f085 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -1,11 +1,11 @@ from collections import defaultdict -import paddle.v2.framework.framework as framework -from paddle.v2.framework.framework import unique_name, Program -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.initializer import ConstantInitializer -from paddle.v2.framework.regularizer import append_regularization_ops -from paddle.v2.framework.layer_helper import LayerHelper +import paddle.v2.fluid.framework as framework +from paddle.v2.fluid.framework import unique_name, Program +from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.initializer import ConstantInitializer +from paddle.v2.fluid.regularizer import append_regularization_ops +from paddle.v2.fluid.layer_helper import LayerHelper __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', diff --git a/python/paddle/v2/framework/regularizer.py b/python/paddle/v2/fluid/regularizer.py similarity index 98% rename from python/paddle/v2/framework/regularizer.py rename to python/paddle/v2/fluid/regularizer.py index 5111ac5566..098cd0dd64 100644 --- a/python/paddle/v2/framework/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.framework as framework +import paddle.v2.fluid.framework as framework __all__ = [ 'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer' diff --git a/python/paddle/v2/framework/tests/.gitignore b/python/paddle/v2/fluid/tests/.gitignore similarity index 100% rename from python/paddle/v2/framework/tests/.gitignore rename to python/paddle/v2/fluid/tests/.gitignore diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt new file mode 100644 index 0000000000..e795627bfe --- /dev/null +++ b/python/paddle/v2/fluid/tests/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() + +add_subdirectory(book) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/book/CMakeLists.txt similarity index 100% rename from python/paddle/v2/framework/tests/CMakeLists.txt rename to python/paddle/v2/fluid/tests/book/CMakeLists.txt diff --git a/python/paddle/v2/framework/tests/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py similarity index 87% rename from python/paddle/v2/framework/tests/test_fit_a_line.py rename to python/paddle/v2/fluid/tests/book/test_fit_a_line.py index 6e09b88dca..5ef963bffa 100644 --- a/python/paddle/v2/framework/tests/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program -from paddle.v2.framework.io import save_persistables, load_persistables -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.io import save_persistables, load_persistables +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py similarity index 95% rename from python/paddle/v2/framework/tests/test_image_classification_train.py rename to python/paddle/v2/fluid/tests/book/test_image_classification_train.py index a4165da970..e253b8d27f 100644 --- a/python/paddle/v2/framework/tests/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -1,12 +1,12 @@ import numpy as np import paddle.v2 as paddle -import paddle.v2.framework.core as core -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -import paddle.v2.framework.optimizer as optimizer -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.framework import g_startup_program, g_main_program -from paddle.v2.framework.initializer import XavierInitializer +import paddle.v2.fluid.core as core +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.optimizer as optimizer +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.framework import g_startup_program, g_main_program +from paddle.v2.fluid.initializer import XavierInitializer def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py similarity index 90% rename from python/paddle/v2/framework/tests/test_recognize_digits_conv.py rename to python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 66c629eb42..2b72312541 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py similarity index 88% rename from python/paddle/v2/framework/tests/test_recognize_digits_mlp.py rename to python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 076cf88216..2e1a9f236b 100644 --- a/python/paddle/v2/framework/tests/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,12 +1,12 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer - -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.regularizer import L2DecayRegularizer -from paddle.v2.framework.initializer import UniformInitializer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer + +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.regularizer import L2DecayRegularizer +from paddle.v2.fluid.initializer import UniformInitializer import numpy as np diff --git a/python/paddle/v2/framework/tests/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py similarity index 97% rename from python/paddle/v2/framework/tests/test_recommender_system.py rename to python/paddle/v2/fluid/tests/book/test_recommender_system.py index 31562b4391..4708dfe3e9 100644 --- a/python/paddle/v2/framework/tests/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py similarity index 90% rename from python/paddle/v2/framework/tests/test_understand_sentiment_conv.py rename to python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index eb377e9264..dc4b63da9b 100644 --- a/python/paddle/v2/framework/tests/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program, g_main_program, g_startup_program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py similarity index 91% rename from python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py rename to python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 2457c71e1a..6d507f4c8e 100644 --- a/python/paddle/v2/framework/tests/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program, g_main_program, g_startup_program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py similarity index 92% rename from python/paddle/v2/framework/tests/test_understand_sentiment_lstm.py rename to python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index 26cbd01bc0..848dcce974 100644 --- a/python/paddle/v2/framework/tests/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -1,10 +1,10 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import g_main_program, g_startup_program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import g_main_program, g_startup_program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py similarity index 95% rename from python/paddle/v2/framework/tests/test_word2vec.py rename to python/paddle/v2/fluid/tests/book/test_word2vec.py index cb9fc2ab62..054dbd5a3d 100644 --- a/python/paddle/v2/framework/tests/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,10 +1,10 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor import numpy as np diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py similarity index 98% rename from python/paddle/v2/framework/tests/op_test.py rename to python/paddle/v2/fluid/tests/op_test.py index 4a269341a4..90269e308a 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -2,12 +2,12 @@ import unittest import numpy as np import random import itertools -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import collections -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.op import Operator -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.framework import Program, OpProtoHolder +from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.op import Operator +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.framework import Program, OpProtoHolder def randomize_probability(batch_size, class_num, dtype='float32'): diff --git a/python/paddle/v2/framework/tests/test_accuracy_op.py b/python/paddle/v2/fluid/tests/test_accuracy_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_accuracy_op.py rename to python/paddle/v2/fluid/tests/test_accuracy_op.py diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_activation_op.py rename to python/paddle/v2/fluid/tests/test_activation_op.py diff --git a/python/paddle/v2/framework/tests/test_adadelta_op.py b/python/paddle/v2/fluid/tests/test_adadelta_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_adadelta_op.py rename to python/paddle/v2/fluid/tests/test_adadelta_op.py diff --git a/python/paddle/v2/framework/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_adagrad_op.py rename to python/paddle/v2/fluid/tests/test_adagrad_op.py diff --git a/python/paddle/v2/framework/tests/test_adam_op.py b/python/paddle/v2/fluid/tests/test_adam_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_adam_op.py rename to python/paddle/v2/fluid/tests/test_adam_op.py diff --git a/python/paddle/v2/framework/tests/test_adamax_op.py b/python/paddle/v2/fluid/tests/test_adamax_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_adamax_op.py rename to python/paddle/v2/fluid/tests/test_adamax_op.py diff --git a/python/paddle/v2/framework/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py similarity index 91% rename from python/paddle/v2/framework/tests/test_array_read_write_op.py rename to python/paddle/v2/fluid/tests/test_array_read_write_op.py index 79e9938216..e019a4e15f 100644 --- a/python/paddle/v2/framework/tests/test_array_read_write_op.py +++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py @@ -1,9 +1,9 @@ import unittest -import paddle.v2.framework.core as core -import paddle.v2.framework.layers as layers -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.framework import g_main_program +import paddle.v2.fluid.core as core +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.framework import g_main_program import numpy diff --git a/python/paddle/v2/fluid/tests/test_assign_op.py b/python/paddle/v2/fluid/tests/test_assign_op.py new file mode 100644 index 0000000000..1b0c145f1a --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_assign_op.py @@ -0,0 +1,21 @@ +import op_test +import numpy +import unittest + + +class TestAssignOp(op_test.OpTest): + def setUp(self): + self.op_type = "assign" + x = numpy.random.random(size=(100, 10)) + self.inputs = {'X': x} + self.outputs = {'Out': x} + + def test_forward(self): + self.check_output() + + def test_backward(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_auc_op.py b/python/paddle/v2/fluid/tests/test_auc_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_auc_op.py rename to python/paddle/v2/fluid/tests/test_auc_op.py diff --git a/python/paddle/v2/framework/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py similarity index 99% rename from python/paddle/v2/framework/tests/test_batch_norm_op.py rename to python/paddle/v2/fluid/tests/test_batch_norm_op.py index dee339f43c..71f9599e0d 100644 --- a/python/paddle/v2/framework/tests/test_batch_norm_op.py +++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py @@ -1,8 +1,8 @@ import unittest import numpy as np from op_test import OpTest -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator def grad_var_name(var_name): diff --git a/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py new file mode 100644 index 0000000000..080ca43b82 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py @@ -0,0 +1,37 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestBilinearTensorProductOp(OpTest): + def setUp(self): + self.op_type = "bilinear_tensor_product" + batch_size = 6 + size0 = 3 + size1 = 4 + size2 = 5 + a = np.random.random((batch_size, size0)).astype("float32") + b = np.random.random((batch_size, size1)).astype("float32") + w = np.random.random((size2, size0, size1)).astype("float32") + bias = np.random.random((1, size2)).astype("float32") + output = np.zeros((batch_size, size2)).astype("float32") + for i in range(size2): + w_i = w[i, :, :] + output[:, i] = np.sum(np.matmul(a, w_i) * b, axis=1) + self.inputs = { + 'X': a, + 'Y': b, + 'Weight': w, + 'Bias': bias, + } + self.outputs = {'Out': output + bias} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y', 'Weight', 'Bias'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py similarity index 93% rename from python/paddle/v2/framework/tests/test_cast_op.py rename to python/paddle/v2/fluid/tests/test_cast_op.py index 52ee71a8a4..0c4b631065 100644 --- a/python/paddle/v2/framework/tests/test_cast_op.py +++ b/python/paddle/v2/fluid/tests/test_cast_op.py @@ -1,7 +1,7 @@ import op_test import unittest import numpy as np -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core class TestCastOp(op_test.OpTest): diff --git a/python/paddle/v2/framework/tests/test_chunk_eval_op.py b/python/paddle/v2/fluid/tests/test_chunk_eval_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_chunk_eval_op.py rename to python/paddle/v2/fluid/tests/test_chunk_eval_op.py diff --git a/python/paddle/v2/framework/tests/test_clip_by_norm_op.py b/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_clip_by_norm_op.py rename to python/paddle/v2/fluid/tests/test_clip_by_norm_op.py diff --git a/python/paddle/v2/framework/tests/test_clip_op.py b/python/paddle/v2/fluid/tests/test_clip_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_clip_op.py rename to python/paddle/v2/fluid/tests/test_clip_op.py diff --git a/python/paddle/v2/framework/tests/test_compare_op.py b/python/paddle/v2/fluid/tests/test_compare_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_compare_op.py rename to python/paddle/v2/fluid/tests/test_compare_op.py diff --git a/python/paddle/v2/framework/tests/test_concat_op.py b/python/paddle/v2/fluid/tests/test_concat_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_concat_op.py rename to python/paddle/v2/fluid/tests/test_concat_op.py diff --git a/python/paddle/v2/framework/tests/test_cond_op.py b/python/paddle/v2/fluid/tests/test_cond_op.py similarity index 97% rename from python/paddle/v2/framework/tests/test_cond_op.py rename to python/paddle/v2/fluid/tests/test_cond_op.py index 09a3f5dc97..9d1df44b90 100644 --- a/python/paddle/v2/framework/tests/test_cond_op.py +++ b/python/paddle/v2/fluid/tests/test_cond_op.py @@ -1,8 +1,8 @@ import logging -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest import numpy as np -from paddle.v2.framework.op import Operator, CondOp +from paddle.v2.fluid.op import Operator, CondOp class PySimpleCond(object): diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py new file mode 100644 index 0000000000..293803f004 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -0,0 +1,40 @@ +import unittest +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +from paddle.v2.fluid.framework import g_startup_program, g_main_program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops +import numpy + + +class ConditionalBlock(unittest.TestCase): + def test_forward(self): + data = layers.data(name='X', shape=[1], data_type='float32') + data.stop_gradient = False + cond = layers.ConditionalBlock(inputs=[data]) + out = layers.create_tensor(dtype='float32') + with cond.block(): + hidden = layers.fc(input=data, size=10) + layers.assign(hidden, out) + + cpu = core.CPUPlace() + exe = Executor(cpu) + exe.run(g_startup_program) + + x = core.LoDTensor() + x.set(numpy.random.random(size=(10, 1)).astype('float32'), cpu) + + outs = map(numpy.array, exe.run(feed={'X': x}, fetch_list=[out]))[0] + print outs + loss = layers.mean(x=out) + append_backward_ops(loss=loss) + outs = map(numpy.array, + exe.run(feed={'X': x}, + fetch_list=[ + g_main_program.block(0).var(data.name + "@GRAD") + ]))[0] + print outs + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_conv2d_op.py rename to python/paddle/v2/fluid/tests/test_conv2d_op.py diff --git a/python/paddle/v2/framework/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_conv2d_transpose_op.py rename to python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py diff --git a/python/paddle/v2/framework/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_conv3d_op.py rename to python/paddle/v2/fluid/tests/test_conv3d_op.py diff --git a/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_conv3d_transpose_op.py rename to python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py diff --git a/python/paddle/v2/framework/tests/test_conv_shift_op.py b/python/paddle/v2/fluid/tests/test_conv_shift_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_conv_shift_op.py rename to python/paddle/v2/fluid/tests/test_conv_shift_op.py diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/fluid/tests/test_cos_sim_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_cos_sim_op.py rename to python/paddle/v2/fluid/tests/test_cos_sim_op.py diff --git a/python/paddle/v2/framework/tests/test_create_op_doc_string.py b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py similarity index 80% rename from python/paddle/v2/framework/tests/test_create_op_doc_string.py rename to python/paddle/v2/fluid/tests/test_create_op_doc_string.py index d21e96df2a..42b6f7a361 100644 --- a/python/paddle/v2/framework/tests/test_create_op_doc_string.py +++ b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py @@ -1,5 +1,5 @@ import unittest -import paddle.v2.framework.layers as layers +import paddle.v2.fluid.layers as layers class TestDocString(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_crf_decoding_op.py b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_crf_decoding_op.py rename to python/paddle/v2/fluid/tests/test_crf_decoding_op.py diff --git a/python/paddle/v2/framework/tests/test_crop_op.py b/python/paddle/v2/fluid/tests/test_crop_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_crop_op.py rename to python/paddle/v2/fluid/tests/test_crop_op.py diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_cross_entropy_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_cross_entropy_op.py rename to python/paddle/v2/fluid/tests/test_cross_entropy_op.py diff --git a/python/paddle/v2/framework/tests/test_decayed_adagrad_op.py b/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_decayed_adagrad_op.py rename to python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py diff --git a/python/paddle/v2/framework/tests/test_default_scope_funcs.py b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py similarity index 94% rename from python/paddle/v2/framework/tests/test_default_scope_funcs.py rename to python/paddle/v2/fluid/tests/test_default_scope_funcs.py index 09a9850d05..738e69529e 100644 --- a/python/paddle/v2/framework/tests/test_default_scope_funcs.py +++ b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py @@ -1,4 +1,4 @@ -from paddle.v2.framework.default_scope_funcs import * +from paddle.v2.fluid.default_scope_funcs import * import unittest diff --git a/python/paddle/v2/framework/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_dropout_op.py rename to python/paddle/v2/fluid/tests/test_dropout_op.py diff --git a/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py b/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py similarity index 98% rename from python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py rename to python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py index 70af9dbc49..c2d8b48ea9 100644 --- a/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py @@ -1,7 +1,7 @@ import logging -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest -from paddle.v2.framework.op import Operator, DynamicRecurrentOp +from paddle.v2.fluid.op import Operator, DynamicRecurrentOp import numpy as np # for siplicity, just one level LoD diff --git a/python/paddle/v2/framework/tests/test_elementwise_add_op.py b/python/paddle/v2/fluid/tests/test_elementwise_add_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_elementwise_add_op.py rename to python/paddle/v2/fluid/tests/test_elementwise_add_op.py diff --git a/python/paddle/v2/framework/tests/test_elementwise_div_op.py b/python/paddle/v2/fluid/tests/test_elementwise_div_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_elementwise_div_op.py rename to python/paddle/v2/fluid/tests/test_elementwise_div_op.py diff --git a/python/paddle/v2/framework/tests/test_elementwise_mul_op.py b/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_elementwise_mul_op.py rename to python/paddle/v2/fluid/tests/test_elementwise_mul_op.py diff --git a/python/paddle/v2/framework/tests/test_elementwise_sub_op.py b/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_elementwise_sub_op.py rename to python/paddle/v2/fluid/tests/test_elementwise_sub_op.py diff --git a/python/paddle/v2/framework/tests/test_evaluator.py b/python/paddle/v2/fluid/tests/test_evaluator.py similarity index 92% rename from python/paddle/v2/framework/tests/test_evaluator.py rename to python/paddle/v2/fluid/tests/test_evaluator.py index 37dbfbc06b..1d51205b70 100644 --- a/python/paddle/v2/framework/tests/test_evaluator.py +++ b/python/paddle/v2/fluid/tests/test_evaluator.py @@ -1,6 +1,6 @@ -from paddle.v2.framework.evaluator import Evaluator -from paddle.v2.framework.op import Operator -import paddle.v2.framework.core as core +from paddle.v2.fluid.evaluator import Evaluator +from paddle.v2.fluid.op import Operator +import paddle.v2.fluid.core as core import unittest import op_test import numpy as np diff --git a/python/paddle/v2/framework/tests/test_exception.py b/python/paddle/v2/fluid/tests/test_exception.py similarity index 89% rename from python/paddle/v2/framework/tests/test_exception.py rename to python/paddle/v2/fluid/tests/test_exception.py index 5ae048817c..b871f40c4a 100644 --- a/python/paddle/v2/framework/tests/test_exception.py +++ b/python/paddle/v2/fluid/tests/test_exception.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest diff --git a/python/paddle/v2/framework/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py similarity index 83% rename from python/paddle/v2/framework/tests/test_executor_and_mul.py rename to python/paddle/v2/fluid/tests/test_executor_and_mul.py index c885cfbebd..709250d0c8 100644 --- a/python/paddle/v2/framework/tests/test_executor_and_mul.py +++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py @@ -1,8 +1,8 @@ import unittest -from paddle.v2.framework.layers import mul, data -import paddle.v2.framework.core as core -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.framework import g_main_program +from paddle.v2.fluid.layers import mul, data +import paddle.v2.fluid.core as core +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.framework import g_main_program import numpy diff --git a/python/paddle/v2/framework/tests/test_expand_op.py b/python/paddle/v2/fluid/tests/test_expand_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_expand_op.py rename to python/paddle/v2/fluid/tests/test_expand_op.py diff --git a/python/paddle/v2/framework/tests/test_feed_fetch_method.py b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py similarity index 95% rename from python/paddle/v2/framework/tests/test_feed_fetch_method.py rename to python/paddle/v2/fluid/tests/test_feed_fetch_method.py index fbd659ece0..178c85b0dd 100644 --- a/python/paddle/v2/framework/tests/test_feed_fetch_method.py +++ b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest import numpy as np diff --git a/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py rename to python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py diff --git a/python/paddle/v2/framework/tests/test_fill_constant_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_fill_constant_op.py rename to python/paddle/v2/fluid/tests/test_fill_constant_op.py diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_fill_zeros_like_op.py rename to python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py diff --git a/python/paddle/v2/framework/tests/test_framework_debug_str.py b/python/paddle/v2/fluid/tests/test_framework_debug_str.py similarity index 85% rename from python/paddle/v2/framework/tests/test_framework_debug_str.py rename to python/paddle/v2/fluid/tests/test_framework_debug_str.py index 8fdf8f9117..a4cbabdb36 100644 --- a/python/paddle/v2/framework/tests/test_framework_debug_str.py +++ b/python/paddle/v2/fluid/tests/test_framework_debug_str.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.framework.framework import Program +from paddle.v2.fluid.framework import Program class TestDebugStringFramework(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/fluid/tests/test_gather_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_gather_op.py rename to python/paddle/v2/fluid/tests/test_gather_op.py diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py similarity index 91% rename from python/paddle/v2/framework/tests/test_gaussian_random_op.py rename to python/paddle/v2/fluid/tests/test_gaussian_random_op.py index 0dc7e091a5..627ab4e235 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py @@ -1,6 +1,6 @@ import unittest -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator import numpy diff --git a/python/paddle/v2/framework/tests/test_gru_op.py b/python/paddle/v2/fluid/tests/test_gru_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_gru_op.py rename to python/paddle/v2/fluid/tests/test_gru_op.py diff --git a/python/paddle/v2/framework/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_gru_unit_op.py rename to python/paddle/v2/fluid/tests/test_gru_unit_op.py diff --git a/python/paddle/v2/framework/tests/test_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_huber_loss_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_huber_loss_op.py rename to python/paddle/v2/fluid/tests/test_huber_loss_op.py diff --git a/python/paddle/v2/framework/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py similarity index 95% rename from python/paddle/v2/framework/tests/test_image_classification_layer.py rename to python/paddle/v2/fluid/tests/test_image_classification_layer.py index b1a267ec32..bf5444107f 100644 --- a/python/paddle/v2/framework/tests/test_image_classification_layer.py +++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py @@ -1,8 +1,8 @@ import unittest -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -from paddle.v2.framework.framework import Program +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +from paddle.v2.fluid.framework import Program def conv_block(input, diff --git a/python/paddle/v2/framework/tests/test_infer_shape.py b/python/paddle/v2/fluid/tests/test_infer_shape.py similarity index 98% rename from python/paddle/v2/framework/tests/test_infer_shape.py rename to python/paddle/v2/fluid/tests/test_infer_shape.py index 2b2995f5e2..9f6695ce02 100644 --- a/python/paddle/v2/framework/tests/test_infer_shape.py +++ b/python/paddle/v2/fluid/tests/test_infer_shape.py @@ -1,6 +1,6 @@ import unittest -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core class TestInferShape(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py similarity index 90% rename from python/paddle/v2/framework/tests/test_inference_model_io.py rename to python/paddle/v2/fluid/tests/test_inference_model_io.py index 48984f86a1..98b95713b7 100644 --- a/python/paddle/v2/framework/tests/test_inference_model_io.py +++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py @@ -1,11 +1,11 @@ import paddle.v2 as paddle -import paddle.v2.framework.layers as layers -import paddle.v2.framework.core as core -import paddle.v2.framework.optimizer as optimizer +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.core as core +import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.framework.framework import Program -from paddle.v2.framework.io import save_inference_model, load_inference_model -import paddle.v2.framework.executor as executor +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.io import save_inference_model, load_inference_model +import paddle.v2.fluid.executor as executor import unittest import numpy as np diff --git a/python/paddle/v2/framework/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py similarity index 98% rename from python/paddle/v2/framework/tests/test_initializer.py rename to python/paddle/v2/fluid/tests/test_initializer.py index bd4d2e39d7..f2eb79b209 100644 --- a/python/paddle/v2/framework/tests/test_initializer.py +++ b/python/paddle/v2/fluid/tests/test_initializer.py @@ -1,8 +1,8 @@ import numpy as np import unittest -import paddle.v2.framework.framework as framework -import paddle.v2.framework.initializer as initializer +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.initializer as initializer DELTA = 0.00001 diff --git a/python/paddle/v2/framework/tests/test_l1_norm_op.py b/python/paddle/v2/fluid/tests/test_l1_norm_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_l1_norm_op.py rename to python/paddle/v2/fluid/tests/test_l1_norm_op.py diff --git a/python/paddle/v2/framework/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py similarity index 97% rename from python/paddle/v2/framework/tests/test_layers.py rename to python/paddle/v2/fluid/tests/test_layers.py index b42af5ea45..3d18e7ce3a 100644 --- a/python/paddle/v2/framework/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,7 +1,7 @@ -import paddle.v2.framework.layers as layers -import paddle.v2.framework.nets as nets -from paddle.v2.framework.framework import Program -import paddle.v2.framework.core as core +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets +from paddle.v2.fluid.framework import Program +import paddle.v2.fluid.core as core import unittest diff --git a/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_linear_chain_crf_op.py rename to python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py diff --git a/python/paddle/v2/framework/tests/test_lod_array_length_op.py b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py similarity index 79% rename from python/paddle/v2/framework/tests/test_lod_array_length_op.py rename to python/paddle/v2/fluid/tests/test_lod_array_length_op.py index af2b4d705e..a01ae83772 100644 --- a/python/paddle/v2/framework/tests/test_lod_array_length_op.py +++ b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py @@ -1,7 +1,7 @@ import unittest -import paddle.v2.framework.layers as layers -from paddle.v2.framework.executor import Executor -import paddle.v2.framework.core as core +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.executor import Executor +import paddle.v2.fluid.core as core import numpy diff --git a/python/paddle/v2/framework/tests/test_lod_rank_table.py b/python/paddle/v2/fluid/tests/test_lod_rank_table.py similarity index 78% rename from python/paddle/v2/framework/tests/test_lod_rank_table.py rename to python/paddle/v2/fluid/tests/test_lod_rank_table.py index 408145c10f..bbc11930b9 100644 --- a/python/paddle/v2/framework/tests/test_lod_rank_table.py +++ b/python/paddle/v2/fluid/tests/test_lod_rank_table.py @@ -1,7 +1,7 @@ -from paddle.v2.framework.layers import lod_rank_table, data -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.framework import g_main_program -import paddle.v2.framework.core as core +from paddle.v2.fluid.layers import lod_rank_table, data +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.framework import g_main_program +import paddle.v2.fluid.core as core import numpy import unittest diff --git a/python/paddle/v2/fluid/tests/test_lod_reset_op.py b/python/paddle/v2/fluid/tests/test_lod_reset_op.py new file mode 100644 index 0000000000..652ccecfa4 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_lod_reset_op.py @@ -0,0 +1,64 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestLodResetOpByAttr(OpTest): + def setUp(self): + self.op_type = "lod_reset" + x = np.random.random((10, 20)).astype("float32") + lod = [[0, 3, 5, 10]] + target_lod_0 = [0, 7, 10] + self.inputs = {'X': (x, lod)} + self.attrs = {'target_lod': target_lod_0} + self.outputs = {'Out': (x, [target_lod_0])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestLodResetOpByInput(OpTest): + def setUp(self): + self.op_type = "lod_reset" + x = np.random.random((10, 20)).astype("float32") + lod = [[0, 3, 5, 10]] + target_lod_0 = [0, 4, 7, 10] + self.inputs = { + 'X': (x, lod), + 'TargetLoD': np.array([target_lod_0]).astype('int32') + } + self.outputs = {'Out': (x, [target_lod_0])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out", no_grad_set=set("TargetLoD")) + + +class TestLodResetOpBoth(OpTest): + def setUp(self): + self.op_type = "lod_reset" + x = np.random.random((10, 20)).astype("float32") + lod = [[0, 3, 5, 10]] + target_lod_0_attr = [0, 7, 10] + target_lod_0_in = [0, 4, 7, 10] + self.inputs = { + 'X': (x, lod), + 'TargetLoD': np.array(target_lod_0_in).astype('int32') + } + self.attrs = {'target_lod': target_lod_0_attr} + self.outputs = {'Out': (x, [target_lod_0_in])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out", no_grad_set=set("TargetLoD")) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_lod_tensor_array.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py similarity index 96% rename from python/paddle/v2/framework/tests/test_lod_tensor_array.py rename to python/paddle/v2/fluid/tests/test_lod_tensor_array.py index a433bcf622..d6d3e23fd8 100644 --- a/python/paddle/v2/framework/tests/test_lod_tensor_array.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py @@ -1,5 +1,5 @@ import unittest -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import numpy diff --git a/python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py similarity index 96% rename from python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py rename to python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py index e9713666b3..b18cb6b49f 100644 --- a/python/paddle/v2/framework/tests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py @@ -1,10 +1,10 @@ import unittest -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import numpy -import paddle.v2.framework.layers as layers -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.backward import append_backward_ops +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops class TestCPULoDTensorArrayOps(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_lookup_table_op.py b/python/paddle/v2/fluid/tests/test_lookup_table_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lookup_table_op.py rename to python/paddle/v2/fluid/tests/test_lookup_table_op.py diff --git a/python/paddle/v2/framework/tests/test_lrn_op.py b/python/paddle/v2/fluid/tests/test_lrn_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lrn_op.py rename to python/paddle/v2/fluid/tests/test_lrn_op.py diff --git a/python/paddle/v2/framework/tests/test_lstm_op.py b/python/paddle/v2/fluid/tests/test_lstm_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lstm_op.py rename to python/paddle/v2/fluid/tests/test_lstm_op.py diff --git a/python/paddle/v2/framework/tests/test_lstm_unit_op.py b/python/paddle/v2/fluid/tests/test_lstm_unit_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lstm_unit_op.py rename to python/paddle/v2/fluid/tests/test_lstm_unit_op.py diff --git a/python/paddle/v2/framework/tests/test_margin_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_margin_rank_loss_op.py rename to python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py diff --git a/python/paddle/v2/framework/tests/test_matmul_op.py b/python/paddle/v2/fluid/tests/test_matmul_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_matmul_op.py rename to python/paddle/v2/fluid/tests/test_matmul_op.py diff --git a/python/paddle/v2/framework/tests/test_mean_op.py b/python/paddle/v2/fluid/tests/test_mean_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_mean_op.py rename to python/paddle/v2/fluid/tests/test_mean_op.py diff --git a/python/paddle/v2/framework/tests/test_minus_op.py b/python/paddle/v2/fluid/tests/test_minus_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_minus_op.py rename to python/paddle/v2/fluid/tests/test_minus_op.py diff --git a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_modified_huber_loss_op.py rename to python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py diff --git a/python/paddle/v2/framework/tests/test_momentum_op.py b/python/paddle/v2/fluid/tests/test_momentum_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_momentum_op.py rename to python/paddle/v2/fluid/tests/test_momentum_op.py diff --git a/python/paddle/v2/framework/tests/test_mul_op.py b/python/paddle/v2/fluid/tests/test_mul_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_mul_op.py rename to python/paddle/v2/fluid/tests/test_mul_op.py diff --git a/python/paddle/v2/framework/tests/test_multiplex_op.py b/python/paddle/v2/fluid/tests/test_multiplex_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_multiplex_op.py rename to python/paddle/v2/fluid/tests/test_multiplex_op.py diff --git a/python/paddle/v2/framework/tests/test_nccl_init_op.py b/python/paddle/v2/fluid/tests/test_nccl_init_op.py similarity index 91% rename from python/paddle/v2/framework/tests/test_nccl_init_op.py rename to python/paddle/v2/fluid/tests/test_nccl_init_op.py index 054909fdf5..a536800ccd 100644 --- a/python/paddle/v2/framework/tests/test_nccl_init_op.py +++ b/python/paddle/v2/fluid/tests/test_nccl_init_op.py @@ -1,8 +1,8 @@ import unittest, os import numpy as np import paddle.v2 as paddle -from paddle.v2.framework.op import Operator -import paddle.v2.framework.core as core +from paddle.v2.fluid.op import Operator +import paddle.v2.fluid.core as core from op_test import OpTest, create_op, set_input if not core.is_compile_gpu(): diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/fluid/tests/test_net.py similarity index 93% rename from python/paddle/v2/framework/tests/test_net.py rename to python/paddle/v2/fluid/tests/test_net.py index 8503257feb..318df08a9e 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/fluid/tests/test_net.py @@ -1,5 +1,5 @@ -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator import unittest diff --git a/python/paddle/v2/framework/tests/test_op_support_gpu.py b/python/paddle/v2/fluid/tests/test_op_support_gpu.py similarity index 84% rename from python/paddle/v2/framework/tests/test_op_support_gpu.py rename to python/paddle/v2/fluid/tests/test_op_support_gpu.py index dd36c666c4..a0eb4bd5fd 100644 --- a/python/paddle/v2/framework/tests/test_op_support_gpu.py +++ b/python/paddle/v2/fluid/tests/test_op_support_gpu.py @@ -1,5 +1,5 @@ import unittest -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core class TestOpSupportGPU(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_operator.py b/python/paddle/v2/fluid/tests/test_operator.py similarity index 97% rename from python/paddle/v2/framework/tests/test_operator.py rename to python/paddle/v2/fluid/tests/test_operator.py index 98f6b2f5ee..4aa022ef90 100644 --- a/python/paddle/v2/framework/tests/test_operator.py +++ b/python/paddle/v2/fluid/tests/test_operator.py @@ -1,7 +1,7 @@ import unittest -import paddle.v2.framework.op as op -import paddle.v2.framework.core as core -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import paddle.v2.fluid.op as op +import paddle.v2.fluid.core as core +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 class TestGetAllProtos(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_operator_desc.py b/python/paddle/v2/fluid/tests/test_operator_desc.py similarity index 96% rename from python/paddle/v2/framework/tests/test_operator_desc.py rename to python/paddle/v2/fluid/tests/test_operator_desc.py index a0bc4e0b91..e8362d2e9c 100644 --- a/python/paddle/v2/framework/tests/test_operator_desc.py +++ b/python/paddle/v2/fluid/tests/test_operator_desc.py @@ -1,6 +1,6 @@ import unittest -from paddle.v2.framework.framework import Variable, Program, g_main_program -import paddle.v2.framework.core as core +from paddle.v2.fluid.framework import Variable, Program, g_main_program +import paddle.v2.fluid.core as core class TestOperator(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py similarity index 98% rename from python/paddle/v2/framework/tests/test_optimizer.py rename to python/paddle/v2/fluid/tests/test_optimizer.py index a39e740260..0ebf7cdf20 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -1,8 +1,8 @@ import unittest -import paddle.v2.framework.framework as framework -import paddle.v2.framework.optimizer as optimizer -from paddle.v2.framework.backward import append_backward_ops +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.optimizer as optimizer +from paddle.v2.fluid.backward import append_backward_ops class TestOptimizer(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_pad_op.py b/python/paddle/v2/fluid/tests/test_pad_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_pad_op.py rename to python/paddle/v2/fluid/tests/test_pad_op.py diff --git a/python/paddle/v2/framework/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py similarity index 87% rename from python/paddle/v2/framework/tests/test_parameter.py rename to python/paddle/v2/fluid/tests/test_parameter.py index f04eb4cf27..71a1bd2aaf 100644 --- a/python/paddle/v2/framework/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -1,6 +1,6 @@ import unittest -from paddle.v2.framework.framework import g_main_program -import paddle.v2.framework.core as core +from paddle.v2.fluid.framework import g_main_program +import paddle.v2.fluid.core as core class TestParameter(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_pool2d_op.py rename to python/paddle/v2/fluid/tests/test_pool2d_op.py diff --git a/python/paddle/v2/framework/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_pool3d_op.py rename to python/paddle/v2/fluid/tests/test_pool3d_op.py diff --git a/python/paddle/v2/framework/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_pool_max_op.py rename to python/paddle/v2/fluid/tests/test_pool_max_op.py diff --git a/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py b/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_positive_negative_pair_op.py rename to python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py diff --git a/python/paddle/v2/framework/tests/test_precision_recall_op.py b/python/paddle/v2/fluid/tests/test_precision_recall_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_precision_recall_op.py rename to python/paddle/v2/fluid/tests/test_precision_recall_op.py diff --git a/python/paddle/v2/framework/tests/test_prelu_op.py b/python/paddle/v2/fluid/tests/test_prelu_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_prelu_op.py rename to python/paddle/v2/fluid/tests/test_prelu_op.py diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py similarity index 96% rename from python/paddle/v2/framework/tests/test_program.py rename to python/paddle/v2/fluid/tests/test_program.py index 7be67b6614..ef2daf6916 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/fluid/tests/test_program.py @@ -1,8 +1,8 @@ import unittest -import paddle.v2.framework.core as core -from paddle.v2.framework.framework import Program -from paddle.v2.framework.framework import g_main_program +import paddle.v2.fluid.core as core +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.framework import g_main_program class TestProgram(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/fluid/tests/test_protobuf.py similarity index 92% rename from python/paddle/v2/framework/tests/test_protobuf.py rename to python/paddle/v2/fluid/tests/test_protobuf.py index 848a396b3b..e064374176 100644 --- a/python/paddle/v2/framework/tests/test_protobuf.py +++ b/python/paddle/v2/fluid/tests/test_protobuf.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import unittest diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py similarity index 99% rename from python/paddle/v2/framework/tests/test_protobuf_descs.py rename to python/paddle/v2/fluid/tests/test_protobuf_descs.py index 2fd3d5d165..098a9802df 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py @@ -1,5 +1,5 @@ import unittest -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core class TestOpDesc(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_proximal_adagrad_op.py b/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_proximal_adagrad_op.py rename to python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py diff --git a/python/paddle/v2/framework/tests/test_proximal_gd_op.py b/python/paddle/v2/fluid/tests/test_proximal_gd_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_proximal_gd_op.py rename to python/paddle/v2/fluid/tests/test_proximal_gd_op.py diff --git a/python/paddle/v2/framework/tests/test_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_rank_loss_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_rank_loss_op.py rename to python/paddle/v2/fluid/tests/test_rank_loss_op.py diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py similarity index 98% rename from python/paddle/v2/framework/tests/test_recurrent_op.py rename to python/paddle/v2/fluid/tests/test_recurrent_op.py index 16100429dd..b623d12318 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -1,11 +1,11 @@ import unittest -import paddle.v2.framework.layers as layers -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.backward import append_backward_ops +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops import numpy as np -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core class PyRNNBase(object): diff --git a/python/paddle/v2/framework/tests/test_reduce_op.py b/python/paddle/v2/fluid/tests/test_reduce_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_reduce_op.py rename to python/paddle/v2/fluid/tests/test_reduce_op.py diff --git a/python/paddle/v2/framework/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py similarity index 92% rename from python/paddle/v2/framework/tests/test_regularizer.py rename to python/paddle/v2/fluid/tests/test_regularizer.py index b21dceb584..f5d1eb3b96 100644 --- a/python/paddle/v2/framework/tests/test_regularizer.py +++ b/python/paddle/v2/fluid/tests/test_regularizer.py @@ -1,9 +1,9 @@ import unittest -import paddle.v2.framework.framework as framework -import paddle.v2.framework.optimizer as optimizer -import paddle.v2.framework.regularizer as regularizer -from paddle.v2.framework.backward import append_backward_ops +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.optimizer as optimizer +import paddle.v2.fluid.regularizer as regularizer +from paddle.v2.fluid.backward import append_backward_ops class TestL2DecayRegularizer(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_reshape_op.py b/python/paddle/v2/fluid/tests/test_reshape_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_reshape_op.py rename to python/paddle/v2/fluid/tests/test_reshape_op.py diff --git a/python/paddle/v2/framework/tests/test_rmsprop_op.py b/python/paddle/v2/fluid/tests/test_rmsprop_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_rmsprop_op.py rename to python/paddle/v2/fluid/tests/test_rmsprop_op.py diff --git a/python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py similarity index 95% rename from python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py rename to python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py index 731beff17c..a3cba92504 100644 --- a/python/paddle/v2/framework/tests/test_rnn_memory_helper_op.py +++ b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py @@ -1,10 +1,10 @@ import unittest -from paddle.v2.framework.framework import Program -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.backward import append_backward_ops +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops import numpy as np -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core def create_tensor(np_data, place): diff --git a/python/paddle/v2/framework/tests/test_scale_op.py b/python/paddle/v2/fluid/tests/test_scale_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_scale_op.py rename to python/paddle/v2/fluid/tests/test_scale_op.py diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/fluid/tests/test_scatter_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_scatter_op.py rename to python/paddle/v2/fluid/tests/test_scatter_op.py diff --git a/python/paddle/v2/framework/tests/test_scope.py b/python/paddle/v2/fluid/tests/test_scope.py similarity index 81% rename from python/paddle/v2/framework/tests/test_scope.py rename to python/paddle/v2/fluid/tests/test_scope.py index 1474365479..e4857b590a 100644 --- a/python/paddle/v2/framework/tests/test_scope.py +++ b/python/paddle/v2/fluid/tests/test_scope.py @@ -1,22 +1,22 @@ -import paddle.v2.framework.core +import paddle.v2.fluid.core import unittest class TestScope(unittest.TestCase): def test_create_destroy(self): - paddle_c = paddle.v2.framework.core + paddle_c = paddle.v2.fluid.core scope = paddle_c.Scope() self.assertIsNotNone(scope) scope_with_parent = scope.new_scope() self.assertIsNotNone(scope_with_parent) def test_none_variable(self): - paddle_c = paddle.v2.framework.core + paddle_c = paddle.v2.fluid.core scope = paddle_c.Scope() self.assertIsNone(scope.find_var("test")) def test_create_var_get_var(self): - paddle_c = paddle.v2.framework.core + paddle_c = paddle.v2.fluid.core scope = paddle_c.Scope() var_a = scope.var("var_a") self.assertIsNotNone(var_a) @@ -25,7 +25,7 @@ class TestScope(unittest.TestCase): self.assertIsNotNone(scope2.find_var('var_a')) def test_var_get_int(self): - paddle_c = paddle.v2.framework.core + paddle_c = paddle.v2.fluid.core scope = paddle_c.Scope() var = scope.var("test_int") var.set_int(10) diff --git a/python/paddle/v2/framework/tests/test_selected_rows.py b/python/paddle/v2/fluid/tests/test_selected_rows.py similarity index 96% rename from python/paddle/v2/framework/tests/test_selected_rows.py rename to python/paddle/v2/fluid/tests/test_selected_rows.py index e8a930cb08..93daf37aa2 100644 --- a/python/paddle/v2/framework/tests/test_selected_rows.py +++ b/python/paddle/v2/fluid/tests/test_selected_rows.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest import numpy as np diff --git a/python/paddle/v2/framework/tests/test_seq_concat_op.py b/python/paddle/v2/fluid/tests/test_seq_concat_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_seq_concat_op.py rename to python/paddle/v2/fluid/tests/test_seq_concat_op.py diff --git a/python/paddle/v2/framework/tests/test_seq_conv.py b/python/paddle/v2/fluid/tests/test_seq_conv.py similarity index 100% rename from python/paddle/v2/framework/tests/test_seq_conv.py rename to python/paddle/v2/fluid/tests/test_seq_conv.py diff --git a/python/paddle/v2/framework/tests/test_seq_expand.py b/python/paddle/v2/fluid/tests/test_seq_expand.py similarity index 100% rename from python/paddle/v2/framework/tests/test_seq_expand.py rename to python/paddle/v2/fluid/tests/test_seq_expand.py diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/fluid/tests/test_seq_pool.py similarity index 100% rename from python/paddle/v2/framework/tests/test_seq_pool.py rename to python/paddle/v2/fluid/tests/test_seq_pool.py diff --git a/python/paddle/v2/framework/tests/test_sequence_softmax_op.py b/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_sequence_softmax_op.py rename to python/paddle/v2/fluid/tests/test_sequence_softmax_op.py diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/fluid/tests/test_sgd_op.py similarity index 97% rename from python/paddle/v2/framework/tests/test_sgd_op.py rename to python/paddle/v2/fluid/tests/test_sgd_op.py index 01262bba4d..ca05a381f0 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/fluid/tests/test_sgd_op.py @@ -1,7 +1,7 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator from op_test import OpTest diff --git a/python/paddle/v2/framework/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py similarity index 86% rename from python/paddle/v2/framework/tests/test_shrink_rnn_memory.py rename to python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 2090455b96..1a3b88e18e 100644 --- a/python/paddle/v2/framework/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -1,9 +1,9 @@ import unittest -import paddle.v2.framework.core as core -from paddle.v2.framework.executor import Executor -import paddle.v2.framework.layers as layers -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.framework import g_main_program +import paddle.v2.fluid.core as core +from paddle.v2.fluid.executor import Executor +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.framework import g_main_program import numpy diff --git a/python/paddle/v2/framework/tests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_sigmoid_cross_entropy_with_logits_op.py rename to python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py diff --git a/python/paddle/v2/framework/tests/test_sign_op.py b/python/paddle/v2/fluid/tests/test_sign_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_sign_op.py rename to python/paddle/v2/fluid/tests/test_sign_op.py diff --git a/python/paddle/v2/framework/tests/test_smooth_l1_loss_op.py b/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_smooth_l1_loss_op.py rename to python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/fluid/tests/test_softmax_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_softmax_op.py rename to python/paddle/v2/fluid/tests/test_softmax_op.py diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py rename to python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py new file mode 100644 index 0000000000..3aed83b2ea --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py @@ -0,0 +1,181 @@ +import unittest +import paddle.v2.fluid.core as core +import numpy as np +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward_ops + + +class TestCPULoDTensorArrayOps(unittest.TestCase): + def place(self): + return core.CPUPlace() + + def test_split_and_merge_lod_tensor_no_lod(self): + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place()) + + mask_np = np.array([0, 0, 1, 1, 1, 1, 0, 0, 0, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, self.place()) + + expect_true_tensor = np.array([2, 3, 4, 5]).astype('int32') + expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1) + expect_true = core.LoDTensor() + expect_true.set(expect_true_tensor, self.place()) + + expect_false_tensor = np.array([0, 1, 6, 7, 8, 9]).astype('int32') + expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1) + + expect_false = core.LoDTensor() + expect_false.set(expect_false_tensor, self.place()) + + self.main( + tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor) + + def test_split_and_merge_lod_tensor_level_0(self): + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 3, 9, 10]]) + + mask_np = np.array([0, 1, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, self.place()) + + expect_true_tensor = np.array([3, 4, 5, 6, 7, 8]).astype('int32') + expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1) + expect_true = core.LoDTensor() + expect_true.set(expect_true_tensor, self.place()) + expect_true.set_lod([[0, 6]]) + + expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32') + expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1) + expect_false_lod = [[0, 3, 4]] + + expect_false = core.LoDTensor() + expect_false.set(expect_false_tensor, self.place()) + expect_false.set_lod(expect_false_lod) + + self.main( + tensor=tensor, + mask=mask, + expect_true=expect_true, + expect_false=expect_false, + expect_out=tensor) + + def main(self, tensor, mask, expect_true, expect_false, expect_out, + level=0): + place = self.place() + program = Program() + x = layers.data(name='x', shape=[1], main_program=program) + x.persistable = True + + y = layers.data(name='y', shape=[1], main_program=program) + y.persistable = True + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level, main_program=program) + out_true.persistable = True + out_false.persistable = True + + out = layers.merge_lod_tensor( + in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level, + main_program=program) + + out.persistable = True + + exe = Executor(place) + scope = core.Scope() + exe.run(program, feed={'x': tensor, 'y': mask}, scope=scope) + + var_true = scope.find_var(out_true.name).get_tensor() + + var_false = scope.find_var(out_false.name).get_tensor() + + var_out = scope.find_var(out.name).get_tensor() + + self.check_tensor_same(var_true, expect_true) + self.check_tensor_same(var_false, expect_false) + self.check_tensor_same(var_out, expect_out) + + def check_tensor_same(self, actual, expect): + self.assertTrue(np.allclose(np.array(actual), np.array(expect))) + self.assertEqual(actual.lod(), expect.lod()) + + +class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): + def test_grad(self): + place = core.CPUPlace() + program = Program() + + x = layers.data( + name='x', + shape=[1], + data_type='float32', + main_program=program, + stop_gradient=False) + y = layers.data( + name='y', + shape=[1], + data_type='bool', + main_program=program, + stop_gradient=False) + + level = 0 + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level, main_program=program) + out = layers.merge_lod_tensor( + in_true=out_true, + in_false=out_false, + mask=y, + x=x, + level=level, + main_program=program) + mean = layers.mean(x=out, main_program=program) + + append_backward_ops(mean) + + tensor = core.LoDTensor() + tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) + tensor.set_lod([[0, 3, 9, 10]]) + + mask_np = np.array([0, 1, 0]).astype('bool') + mask_np = np.expand_dims(mask_np, axis=1) + + mask = core.LoDTensor() + mask.set(mask_np, place) + + exe = Executor(place) + scope = core.Scope() + + g_vars = program.global_block().var(x.name + "@GRAD") + g_out = [ + item.sum() + for item in map(np.array, + exe.run(program, + feed={'x': tensor, + 'y': mask}, + fetch_list=[g_vars], + scope=scope)) + ] + + g_out_sum = np.array(g_out).sum() + + self.assertAlmostEqual(1.0, g_out_sum, delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_split_op.py b/python/paddle/v2/fluid/tests/test_split_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_split_op.py rename to python/paddle/v2/fluid/tests/test_split_op.py diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_squared_l2_distance_op.py rename to python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py diff --git a/python/paddle/v2/framework/tests/test_squared_l2_norm_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_squared_l2_norm_op.py rename to python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py diff --git a/python/paddle/v2/framework/tests/test_sum_op.py b/python/paddle/v2/fluid/tests/test_sum_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_sum_op.py rename to python/paddle/v2/fluid/tests/test_sum_op.py diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/fluid/tests/test_tensor.py similarity index 98% rename from python/paddle/v2/framework/tests/test_tensor.py rename to python/paddle/v2/fluid/tests/test_tensor.py index e0cd2fa8aa..9f870d9eb3 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/fluid/tests/test_tensor.py @@ -1,4 +1,4 @@ -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest import numpy diff --git a/python/paddle/v2/framework/tests/test_tensor_array.py b/python/paddle/v2/fluid/tests/test_tensor_array.py similarity index 98% rename from python/paddle/v2/framework/tests/test_tensor_array.py rename to python/paddle/v2/fluid/tests/test_tensor_array.py index 50b3e09162..d6929ba16e 100644 --- a/python/paddle/v2/framework/tests/test_tensor_array.py +++ b/python/paddle/v2/fluid/tests/test_tensor_array.py @@ -1,5 +1,5 @@ import logging -import paddle.v2.framework.core as core +import paddle.v2.fluid.core as core import unittest import numpy as np diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/fluid/tests/test_top_k_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_top_k_op.py rename to python/paddle/v2/fluid/tests/test_top_k_op.py diff --git a/python/paddle/v2/framework/tests/test_transpose_op.py b/python/paddle/v2/fluid/tests/test_transpose_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_transpose_op.py rename to python/paddle/v2/fluid/tests/test_transpose_op.py diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py similarity index 90% rename from python/paddle/v2/framework/tests/test_uniform_random_op.py rename to python/paddle/v2/fluid/tests/test_uniform_random_op.py index ded777105e..f736dfb2e8 100644 --- a/python/paddle/v2/framework/tests/test_uniform_random_op.py +++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py @@ -1,6 +1,6 @@ import unittest -from paddle.v2.framework.op import Operator -import paddle.v2.framework.core as core +from paddle.v2.fluid.op import Operator +import paddle.v2.fluid.core as core import numpy diff --git a/python/paddle/v2/framework/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py similarity index 93% rename from python/paddle/v2/framework/tests/test_variable.py rename to python/paddle/v2/fluid/tests/test_variable.py index 03115f10a5..a3e60a7517 100644 --- a/python/paddle/v2/framework/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -1,6 +1,6 @@ import unittest -from paddle.v2.framework.framework import Variable, g_main_program, Program -import paddle.v2.framework.core as core +from paddle.v2.fluid.framework import Variable, g_main_program, Program +import paddle.v2.fluid.core as core import numpy as np diff --git a/python/paddle/v2/framework/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py similarity index 94% rename from python/paddle/v2/framework/tests/test_while_op.py rename to python/paddle/v2/fluid/tests/test_while_op.py index 1c344eae49..0f01acb3b9 100644 --- a/python/paddle/v2/framework/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -1,7 +1,7 @@ import unittest -import paddle.v2.framework.layers as layers -from paddle.v2.framework.executor import Executor -import paddle.v2.framework.core as core +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.executor import Executor +import paddle.v2.fluid.core as core import numpy diff --git a/python/paddle/v2/framework/tests/test_beam_search_decode_op.py b/python/paddle/v2/framework/tests/test_beam_search_decode_op.py new file mode 100644 index 0000000000..e9f180bbae --- /dev/null +++ b/python/paddle/v2/framework/tests/test_beam_search_decode_op.py @@ -0,0 +1,75 @@ +import unittest + +import numpy as np +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + + +class TestBeamSearchDecodeOp(unittest.TestCase): + def setUp(self): + self.scope = core.Scope() + self.cpu_place = core.CPUPlace() + + def append_lod_tensor(self, tensor_array, lod, data): + lod_tensor = core.LoDTensor() + lod_tensor.set_lod(lod) + lod_tensor.set(data, self.cpu_place) + tensor_array.append(lod_tensor) + + def test_get_set(self): + ids = self.scope.var("ids").get_lod_tensor_array() + self.append_lod_tensor( + ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], + np.array( + [1, 2, 3, 4, 5, 6], dtype="int64")) + self.append_lod_tensor( + ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], + np.array( + [0, 1, 2, 3, 4, 5], dtype="int64")) + self.append_lod_tensor( + ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], + np.array( + [0, 1, 2, 3, 4], dtype="int64")) + + scores = self.scope.var("scores").get_lod_tensor_array() + self.append_lod_tensor( + scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], + np.array( + [1, 2, 3, 4, 5, 6], dtype="float32")) + self.append_lod_tensor( + scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], + np.array( + [0, 1, 2, 3, 4, 5], dtype="float32")) + self.append_lod_tensor( + scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], + np.array( + [0, 1, 2, 3, 4], dtype="float32")) + + sentence_ids = self.scope.var("sentence_ids").get_tensor() + sentence_scores = self.scope.var("sentence_scores").get_tensor() + + beam_search_decode_op = Operator( + "beam_search_decode", + # inputs + Ids="ids", + Scores="scores", + # outputs + SentenceIds="sentence_ids", + SentenceScores="sentence_scores") + + ctx = core.DeviceContext.create(self.cpu_place) + beam_search_decode_op.run(self.scope, ctx) + + expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]] + self.assertEqual(sentence_ids.lod(), expected_lod) + self.assertEqual(sentence_scores.lod(), expected_lod) + + expected_data = np.array( + [2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64") + self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data)) + self.assertTrue( + np.array_equal(np.array(sentence_scores), expected_data)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index 5348c2d8d7..fe91df10da 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -13,8 +13,8 @@ packages=['paddle', 'paddle.v2.reader', 'paddle.v2.master', 'paddle.v2.plot', - 'paddle.v2.framework', - 'paddle.v2.framework.proto', + 'paddle.v2.fluid', + 'paddle.v2.fluid.proto', 'py_paddle'] with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: @@ -44,14 +44,14 @@ setup(name='paddlepaddle', ext_modules=[Extension('_foo', ['stub.cc'])], package_data={ 'paddle.v2.master': ['libpaddle_master.so'], - 'paddle.v2.framework': ['core.so'], + 'paddle.v2.fluid': ['core.so'], 'py_paddle':['*.py','_swig_paddle.so'] }, package_dir={ '': '${CMAKE_CURRENT_SOURCE_DIR}', - # The paddle.v2.framework.proto will be generated while compiling. + # The paddle.v2.fluid.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', + 'paddle.v2.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/framework', 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, scripts=paddle_bins,