Merge branch 'develop' into feature/change_op_creation

8 years ago · a1e16bb5d3
parent 89d33ff83f 455ad5b5b2
commit a1e16bb5d3
128 changed files with 5070 additions and 3849 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -24,7 +24,7 @@
        description: Format files with ClangFormat.
        entry: clang-format -i
        language: system
-        files: \.(c|cc|cxx|cpp|h|hpp|hxx)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
 -   repo: https://github.com/PaddlePaddle/pre-commit-golang
    sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
    hooks:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -36,8 +36,8 @@ include(simd)
 ################################ Configurations #######################################
 option(WITH_GPU         "Compile PaddlePaddle with NVIDIA GPU"          ${CUDA_FOUND})
 option(WITH_AVX         "Compile PaddlePaddle with AVX intrinsics"      ${AVX_FOUND})
-option(WITH_MKLDNN      "Compile PaddlePaddle with mkl-dnn support."    ${AVX_FOUND})
-option(WITH_MKLML       "Compile PaddlePaddle with mklml package."      ${AVX_FOUND})
+option(WITH_MKLDNN      "Compile PaddlePaddle with mkl-dnn support."    OFF)
+option(WITH_MKLML       "Compile PaddlePaddle with mklml package."      OFF)
 option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
 option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        ON)
 option(WITH_SWIG_PY     "Compile PaddlePaddle with inference api"       ON)
@ -55,7 +55,6 @@ option(WITH_C_API       "Compile PaddlePaddle with C-API(Prediction)"   OFF)
 option(WITH_GOLANG      "Compile PaddlePaddle with GOLANG"              OFF)
 option(GLIDE_INSTALL    "Download and install go dependencies "         ON)
 option(USE_NNPACK       "Compile PaddlePaddle with NNPACK library"      OFF)
-option(UNITTEST_USE_VIRTUALENV "Python unittest with virtualenv"        ON)

 # CMAKE_BUILD_TYPE
 if(NOT CMAKE_BUILD_TYPE)
--- a/5
+++ b/5
@ -27,13 +27,16 @@ RUN apt-get update && \
    git python-pip python-dev openssh-server bison  \
    wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
    curl sed grep graphviz libjpeg-dev zlib1g-dev  \
-    python-numpy python-matplotlib gcc-4.8 g++-4.8 \
+    python-matplotlib gcc-4.8 g++-4.8 \
    automake locales clang-format-3.8 swig doxygen cmake  \
    liblapack-dev liblapacke-dev libboost-dev \
    clang-3.8 llvm-3.8 libclang-3.8-dev \
    net-tools && \
    apt-get clean -y

+# paddle is using numpy.flip, which is introduced since 1.12.0
+RUN pip --no-cache-dir install 'numpy>=1.12.0'
+
 # Install Go and glide
 RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
    tar -C /usr/local -xzf go.tgz && \
--- a/cmake/cpplint.cmake
+++ b/cmake/cpplint.cmake
@ -56,11 +56,14 @@ macro(add_style_check_target TARGET_NAME)
                # cpplint code style
                get_filename_component(base_filename ${filename} NAME)
                set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint)
-                add_custom_command(TARGET ${TARGET_NAME} PRE_BUILD
+                add_custom_command(OUTPUT ${CUR_GEN} PRE_BUILD
                    COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
                            "--filter=${STYLE_FILTER}"
                            "--write-success=${CUR_GEN}" ${filename}
+                    DEPENDS ${filename} ${PROJ_ROOT}/paddle/scripts/cpplint.py
                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+                add_custom_target(${base_filename}.cpplint DEPENDS ${CUR_GEN})
+                add_dependencies(${TARGET_NAME} ${base_filename}.cpplint)
            endif()
        endforeach()
    endif()
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@ -118,7 +118,6 @@ endfunction()
 macro(add_unittest_without_exec TARGET_NAME)
    add_executable(${TARGET_NAME} ${ARGN})
    link_paddle_test(${TARGET_NAME})
-    add_style_check_target(${TARGET_NAME} ${ARGN})
 endmacro()

 # add_unittest
@ -150,19 +149,12 @@ endfunction()
 # Create a python unittest using run_python_tests.sh,
 # which takes care of making correct running environment
 function(add_python_test TEST_NAME)
-    if (UNITTEST_USE_VIRTUALENV)
-        add_test(NAME ${TEST_NAME}
-            COMMAND env PADDLE_PACKAGE_DIR=${PADDLE_PYTHON_PACKAGE_DIR}
-            bash ${PROJ_ROOT}/paddle/scripts/run_python_tests.sh ${ARGN}
-            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-    else()
-        foreach(arg ${ARGN})
-            get_filename_component(py_fn ${arg} NAME_WE)
-            set(TRG_NAME ${TEST_NAME}_${py_fn})
-            add_test(NAME ${TRG_NAME}
-                    COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR}
-                    python2 ${arg}
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-        endforeach()
-    endif()
+    foreach(arg ${ARGN})
+        get_filename_component(py_fn ${arg} NAME_WE)
+        set(TRG_NAME ${TEST_NAME}_${py_fn})
+        add_test(NAME ${TRG_NAME}
+                COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR}
+                python2 ${arg}
+                WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+    endforeach()
 endfunction()
--- a/paddle/cuda/src/hl_batch_transpose.cu
+++ b/paddle/cuda/src/hl_batch_transpose.cu
@ -12,17 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "hl_batch_transpose.h"
 #include "hl_base.h"
+#include "hl_batch_transpose.h"

 const int TILE_DIM = 64;
 const int BLOCK_ROWS = 16;

 // No bank-conflict transpose for a batch of data.
-__global__ void batchTransposeNoBankConflicts(real* odata,
-                                              const real* idata,
-                                              int numSamples, int width,
-                                              int height) {
+__global__ void batchTransposeNoBankConflicts(
+    real* odata, const real* idata, int numSamples, int width, int height) {
  __shared__ float tile[TILE_DIM][TILE_DIM + 1];

  const int x = blockIdx.x * TILE_DIM + threadIdx.x;
@ -50,12 +48,12 @@ __global__ void batchTransposeNoBankConflicts(real* odata,
          newX] = tile[threadIdx.x][j];
 }

-void batchTranspose(const real* input, real* output, int width, int height,
-                    int batchSize) {
+void batchTranspose(
+    const real* input, real* output, int width, int height, int batchSize) {
  dim3 dimBlock(TILE_DIM, BLOCK_ROWS, 1);
  dim3 dimGrid(DIVUP(width, TILE_DIM), DIVUP(height, TILE_DIM), batchSize);
-  batchTransposeNoBankConflicts<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
-      (output, input, batchSize, width, height);
+  batchTransposeNoBankConflicts<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>(
+      output, input, batchSize, width, height);

  CHECK_SYNC("batchTranspose failed!");
 }
--- a/paddle/cuda/src/hl_cuda_aggregate.cu
+++ b/paddle/cuda/src/hl_cuda_aggregate.cu
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
--- a/paddle/cuda/src/hl_cuda_lstm.cu
+++ b/paddle/cuda/src/hl_cuda_lstm.cu
--- a/paddle/cuda/src/hl_cuda_matrix.cu
+++ b/paddle/cuda/src/hl_cuda_matrix.cu
--- a/paddle/cuda/src/hl_cuda_sequence.cu
+++ b/paddle/cuda/src/hl_cuda_sequence.cu
--- a/paddle/cuda/src/hl_cuda_sparse.cu
+++ b/paddle/cuda/src/hl_cuda_sparse.cu
--- a/paddle/cuda/src/hl_perturbation_util.cu
+++ b/paddle/cuda/src/hl_perturbation_util.cu
@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-
-#include <cmath>
 #include <stdlib.h>
-#include "hl_cuda.h"
-#include "hl_time.h"
+#include <cmath>
 #include "hl_base.h"
+#include "hl_cuda.h"
 #include "hl_perturbation_util.cuh"
+#include "hl_time.h"

 #define _USE_MATH_DEFINES

@ -30,10 +29,16 @@ limitations under the License. */
 * centerX, centerY: translation.
 * sourceX, sourceY: output coordinates in the original image.
 */
-__device__ void getTranformCoord(int x, int y, real theta, real scale,
-                                 real tgtCenter, real imgCenter,
-                                 real centerR, real centerC,
-                                 int* sourceX, int* sourceY) {
+__device__ void getTranformCoord(int x,
+                                 int y,
+                                 real theta,
+                                 real scale,
+                                 real tgtCenter,
+                                 real imgCenter,
+                                 real centerR,
+                                 real centerC,
+                                 int* sourceX,
+                                 int* sourceY) {
  real H[4] = {cosf(-theta), -sinf(-theta), sinf(-theta), cosf(-theta)};

  // compute coornidates in the rotated and scaled image
@ -57,11 +62,17 @@ __device__ void getTranformCoord(int x, int y, real theta, real scale,
 * created by Wei Xu (genome), converted by Jiang Wang
 */

-__global__ void kSamplingPatches(const real* imgs, real* targets,
-                                 int imgSize, int tgtSize, const int channels,
-                                 int samplingRate, const real* thetas,
-                                 const real* scales, const int* centerRs,
-                                 const int* centerCs, const real padValue,
+__global__ void kSamplingPatches(const real* imgs,
+                                 real* targets,
+                                 int imgSize,
+                                 int tgtSize,
+                                 const int channels,
+                                 int samplingRate,
+                                 const real* thetas,
+                                 const real* scales,
+                                 const int* centerRs,
+                                 const int* centerCs,
+                                 const real padValue,
                                 const int numImages) {
  const int caseIdx = blockIdx.x * 4 + threadIdx.x;
  const int pxIdx = blockIdx.y * 128 + threadIdx.y;
@ -80,8 +91,15 @@ __global__ void kSamplingPatches(const real* imgs, real* targets,
    const int pxY = pxIdx / tgtSize;

    int srcPxX, srcPxY;
-    getTranformCoord(pxX, pxY, thetas[imgIdx], scales[imgIdx], tgtCenter,
-                     imgCenter, centerCs[caseIdx], centerRs[caseIdx], &srcPxX,
+    getTranformCoord(pxX,
+                     pxY,
+                     thetas[imgIdx],
+                     scales[imgIdx],
+                     tgtCenter,
+                     imgCenter,
+                     centerCs[caseIdx],
+                     centerRs[caseIdx],
+                     &srcPxX,
                     &srcPxY);

    imgs += (imgIdx * imgPixels + srcPxY * imgSize + srcPxX) * channels;
@ -100,10 +118,15 @@ __global__ void kSamplingPatches(const real* imgs, real* targets,
 *
 * created by Wei Xu
 */
-void hl_generate_disturb_params(real*& gpuAngle, real*& gpuScaleRatio,
-                                int*& gpuCenterR, int*& gpuCenterC,
-                                int numImages, int imgSize, real rotateAngle,
-                                real scaleRatio, int samplingRate,
+void hl_generate_disturb_params(real*& gpuAngle,
+                                real*& gpuScaleRatio,
+                                int*& gpuCenterR,
+                                int*& gpuCenterC,
+                                int numImages,
+                                int imgSize,
+                                real rotateAngle,
+                                real scaleRatio,
+                                int samplingRate,
                                bool isTrain) {
  // The number of output samples.
  int numPatches = numImages * samplingRate;
@ -123,7 +146,8 @@ void hl_generate_disturb_params(real*& gpuAngle, real*& gpuScaleRatio,
    for (int i = 0; i < numImages; i++) {
      r_angle[i] =
          (rotateAngle * M_PI / 180.0) * (rand() / (RAND_MAX + 1.0)  // NOLINT
-                                          - 0.5);
+                                          -
+                                          0.5);
      s_ratio[i] =
          1 + (rand() / (RAND_MAX + 1.0) - 0.5) * scaleRatio;  // NOLINT
    }
@ -140,8 +164,10 @@ void hl_generate_disturb_params(real*& gpuAngle, real*& gpuScaleRatio,
        int pxY =
            (int)(real(imgSize - 1) * rand() / (RAND_MAX + 1.0));  // NOLINT

-        const real H[4] = {cos(-r_angle[i]), -sin(-r_angle[i]),
-                           sin(-r_angle[i]), cos(-r_angle[i])};
+        const real H[4] = {cos(-r_angle[i]),
+                           -sin(-r_angle[i]),
+                           sin(-r_angle[i]),
+                           cos(-r_angle[i])};
        real x = pxX - imgCenter;
        real y = pxY - imgCenter;
        real xx = H[0] * x + H[1] * y;
@ -185,9 +211,12 @@ void hl_generate_disturb_params(real*& gpuAngle, real*& gpuScaleRatio,
  delete[] center_c;
 }

-void hl_conv_random_disturb_with_params(const real* images, int imgSize,
-                                        int tgtSize, int channels,
-                                        int numImages, int samplingRate,
+void hl_conv_random_disturb_with_params(const real* images,
+                                        int imgSize,
+                                        int tgtSize,
+                                        int channels,
+                                        int numImages,
+                                        int samplingRate,
                                        const real* gpuRotationAngle,
                                        const real* gpuScaleRatio,
                                        const int* gpuCenterR,
@ -202,29 +231,59 @@ void hl_conv_random_disturb_with_params(const real* images, int imgSize,
  dim3 threadsPerBlock(4, 128);
  dim3 numBlocks(DIVUP(numPatches, 4), DIVUP(targetSize, 128));

-  kSamplingPatches <<<numBlocks, threadsPerBlock>>>
-      (images, target, imgSize, tgtSize, channels, samplingRate,
-      gpuRotationAngle, gpuScaleRatio, gpuCenterR, gpuCenterC,
-      paddingValue, numImages);
+  kSamplingPatches<<<numBlocks, threadsPerBlock>>>(images,
+                                                   target,
+                                                   imgSize,
+                                                   tgtSize,
+                                                   channels,
+                                                   samplingRate,
+                                                   gpuRotationAngle,
+                                                   gpuScaleRatio,
+                                                   gpuCenterR,
+                                                   gpuCenterC,
+                                                   paddingValue,
+                                                   numImages);

  hl_device_synchronize();
 }

-void hl_conv_random_disturb(const real* images, int imgSize,
-                            int tgtSize, int channels, int numImages,
-                            real scaleRatio, real rotateAngle,
-                            int samplingRate, real* gpu_r_angle,
-                            real* gpu_s_ratio, int* gpu_center_r,
-                            int* gpu_center_c, int paddingValue,
-                            bool isTrain, real* targets) {
+void hl_conv_random_disturb(const real* images,
+                            int imgSize,
+                            int tgtSize,
+                            int channels,
+                            int numImages,
+                            real scaleRatio,
+                            real rotateAngle,
+                            int samplingRate,
+                            real* gpu_r_angle,
+                            real* gpu_s_ratio,
+                            int* gpu_center_r,
+                            int* gpu_center_c,
+                            int paddingValue,
+                            bool isTrain,
+                            real* targets) {
  // generate the random disturbance sequence and the sampling locations
-  hl_generate_disturb_params(gpu_r_angle, gpu_s_ratio, gpu_center_r,
-                  gpu_center_c, numImages, imgSize, rotateAngle,
-                  scaleRatio, samplingRate, isTrain);
-
-  hl_conv_random_disturb_with_params(
-                  images, imgSize, tgtSize, channels, numImages,
-                  samplingRate, gpu_r_angle, gpu_s_ratio,
-                  gpu_center_r, gpu_center_r, paddingValue,
-                  targets);
+  hl_generate_disturb_params(gpu_r_angle,
+                             gpu_s_ratio,
+                             gpu_center_r,
+                             gpu_center_c,
+                             numImages,
+                             imgSize,
+                             rotateAngle,
+                             scaleRatio,
+                             samplingRate,
+                             isTrain);
+
+  hl_conv_random_disturb_with_params(images,
+                                     imgSize,
+                                     tgtSize,
+                                     channels,
+                                     numImages,
+                                     samplingRate,
+                                     gpu_r_angle,
+                                     gpu_s_ratio,
+                                     gpu_center_r,
+                                     gpu_center_r,
+                                     paddingValue,
+                                     targets);
 }
--- a/paddle/cuda/src/hl_table_apply.cu
+++ b/paddle/cuda/src/hl_table_apply.cu
@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-
 #include "hl_base.h"
-#include "hl_device_functions.cuh"
 #include "hl_cuda.h"
+#include "hl_device_functions.cuh"
 #include "paddle/utils/Logging.h"

-template<int blockDimX, int blockDimY, int gridDimX, bool AddRow>
-__global__ void KeMatrixAddRows(real* output, int ldo,
-                                real* table, int ldt,
+template <int blockDimX, int blockDimY, int gridDimX, bool AddRow>
+__global__ void KeMatrixAddRows(real* output,
+                                int ldo,
+                                real* table,
+                                int ldt,
                                int* ids,
                                int numSamples,
                                int tableSize,
@ -31,8 +32,8 @@ __global__ void KeMatrixAddRows(real* output, int ldo,
  while (idy < numSamples) {
    int tableId = ids[idy];
    if ((0 <= tableId) && (tableId < tableSize)) {
-      real *out = output + idy * ldo;
-      real *tab = table + tableId * ldt;
+      real* out = output + idy * ldo;
+      real* tab = table + tableId * ldt;
      for (int i = idx; i < dim; i += blockDimX) {
        if (AddRow) {
          paddle::paddleAtomicAdd(&tab[i], out[i]);
@ -45,8 +46,10 @@ __global__ void KeMatrixAddRows(real* output, int ldo,
  }
 }

-void hl_matrix_select_rows(real* output, int ldo,
-                           real* table, int ldt,
+void hl_matrix_select_rows(real* output,
+                           int ldo,
+                           real* table,
+                           int ldt,
                           int* ids,
                           int numSamples,
                           int tableSize,
@ -57,14 +60,16 @@ void hl_matrix_select_rows(real* output, int ldo,

  dim3 threads(128, 8);
  dim3 grid(8, 1);
-  KeMatrixAddRows<128, 8, 8, 0><<< grid, threads, 0, STREAM_DEFAULT >>>
-    (output, ldo, table, ldt, ids, numSamples, tableSize, dim);
+  KeMatrixAddRows<128, 8, 8, 0><<<grid, threads, 0, STREAM_DEFAULT>>>(
+      output, ldo, table, ldt, ids, numSamples, tableSize, dim);

  CHECK_SYNC("hl_matrix_select_rows failed");
 }

-void hl_matrix_add_to_rows(real* table, int ldt,
-                           real* input, int ldi,
+void hl_matrix_add_to_rows(real* table,
+                           int ldt,
+                           real* input,
+                           int ldi,
                           int* ids,
                           int numSamples,
                           int tableSize,
@ -75,16 +80,15 @@ void hl_matrix_add_to_rows(real* table, int ldt,

  dim3 threads(128, 8);
  dim3 grid(8, 1);
-  KeMatrixAddRows<128, 8, 8, 1><<< grid, threads, 0, STREAM_DEFAULT >>>
-    (input, ldi, table, ldt, ids, numSamples, tableSize, dim);
+  KeMatrixAddRows<128, 8, 8, 1><<<grid, threads, 0, STREAM_DEFAULT>>>(
+      input, ldi, table, ldt, ids, numSamples, tableSize, dim);

  CHECK_SYNC("hl_matrix_add_to_rows failed");
 }

-template<class T, int blockDimX, int gridDimX>
-__global__ void KeVectorSelect(T* dst, int sized,
-                               const T* src, int sizes,
-                               const int* ids, int sizei) {
+template <class T, int blockDimX, int gridDimX>
+__global__ void KeVectorSelect(
+    T* dst, int sized, const T* src, int sizes, const int* ids, int sizei) {
  int idx = threadIdx.x + blockDimX * blockIdx.x;
  while (idx < sizei) {
    int index = ids[idx];
@ -95,9 +99,8 @@ __global__ void KeVectorSelect(T* dst, int sized,
 }

 template <class T>
-void hl_vector_select_from(T* dst, int sized,
-                           const T* src, int sizes,
-                           const int* ids, int sizei) {
+void hl_vector_select_from(
+    T* dst, int sized, const T* src, int sizes, const int* ids, int sizei) {
  CHECK_NOTNULL(dst);
  CHECK_NOTNULL(src);
  CHECK_NOTNULL(ids);
@ -105,18 +108,17 @@ void hl_vector_select_from(T* dst, int sized,

  dim3 threads(512, 1);
  dim3 grid(8, 1);
-  KeVectorSelect<T, 512, 8><<< grid, threads, 0, STREAM_DEFAULT >>>
-    (dst, sized, src, sizes, ids, sizei);
+  KeVectorSelect<T, 512, 8><<<grid, threads, 0, STREAM_DEFAULT>>>(
+      dst, sized, src, sizes, ids, sizei);

  CHECK_SYNC("hl_vector_select_from failed");
 }

-template
-void hl_vector_select_from(real* dst, int sized,
-                           const real* src, int sizes,
-                           const int* ids, int sizei);
-template
-void hl_vector_select_from(int* dst, int sized,
-                           const int* src, int sizes,
-                           const int* ids, int sizei);
-
+template void hl_vector_select_from(real* dst,
+                                    int sized,
+                                    const real* src,
+                                    int sizes,
+                                    const int* ids,
+                                    int sizei);
+template void hl_vector_select_from(
+    int* dst, int sized, const int* src, int sizes, const int* ids, int sizei);
--- a/paddle/cuda/src/hl_top_k.cu
+++ b/paddle/cuda/src/hl_top_k.cu
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -12,13 +12,15 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(scope SRCS scope.cc)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)

-proto_library(attr_type SRCS attr_type.proto)
-proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
-proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
+proto_library(attribute_proto SRCS attribute.proto)
+proto_library(op_proto SRCS op_proto.proto DEPS attribute_proto)
+proto_library(op_desc SRCS op_desc.proto DEPS attribute_proto)
 cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
 cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)

-cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope)
+cc_library(attribute SRCS attribute.cc DEPS op_desc op_proto)
+
+cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope attribute)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)

 cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator)
@ -26,7 +28,7 @@ cc_library(op_registry SRCS op_registry.cc DEPS op_desc grad_op_builder)
 cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
 cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op)

-py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
+py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.proto)
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
--- a/paddle/framework/attribute.cc
+++ b/paddle/framework/attribute.cc
@ -0,0 +1,85 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/framework/attribute.h"
+
+#include <vector>
+
+namespace paddle {
+namespace framework {
+
+template <>
+AttrType AttrTypeID<int>() {
+  return INT;
+}
+template <>
+AttrType AttrTypeID<float>() {
+  return FLOAT;
+}
+template <>
+AttrType AttrTypeID<std::string>() {
+  return STRING;
+}
+template <>
+AttrType AttrTypeID<std::vector<int>>() {
+  return INTS;
+}
+template <>
+AttrType AttrTypeID<std::vector<float>>() {
+  return FLOATS;
+}
+template <>
+AttrType AttrTypeID<std::vector<std::string>>() {
+  return STRINGS;
+}
+
+Attribute GetAttrValue(const AttrDesc& attr_desc) {
+  switch (attr_desc.type()) {
+    case paddle::framework::AttrType::INT: {
+      return attr_desc.i();
+    }
+    case paddle::framework::AttrType::FLOAT: {
+      return attr_desc.f();
+    }
+    case paddle::framework::AttrType::STRING: {
+      return attr_desc.s();
+    }
+    case paddle::framework::AttrType::INTS: {
+      std::vector<int> val(attr_desc.ints_size());
+      for (int i = 0; i < attr_desc.ints_size(); ++i) {
+        val[i] = attr_desc.ints(i);
+      }
+      return val;
+    }
+    case paddle::framework::AttrType::FLOATS: {
+      std::vector<float> val(attr_desc.floats_size());
+      for (int i = 0; i < attr_desc.floats_size(); ++i) {
+        val[i] = attr_desc.floats(i);
+      }
+      return val;
+    }
+    case paddle::framework::AttrType::STRINGS: {
+      std::vector<std::string> val(attr_desc.strings_size());
+      for (int i = 0; i < attr_desc.strings_size(); ++i) {
+        val[i] = attr_desc.strings(i);
+      }
+      return val;
+    }
+  }
+  PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !");
+  return boost::blank();
+}
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/attr_checker.h
+++ b/paddle/framework/attr_checker.h
@ -1,3 +1,17 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 #pragma once

 #include <boost/variant.hpp>
@ -6,6 +20,9 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
+
+#include "paddle/framework/attribute.pb.h"
+#include "paddle/framework/op_desc.pb.h"
 #include "paddle/platform/enforce.h"

 namespace paddle {
@ -14,13 +31,19 @@ namespace framework {
 typedef boost::variant<boost::blank, int, float, std::string, std::vector<int>,
                       std::vector<float>, std::vector<std::string>>
    Attribute;
+
 typedef std::unordered_map<std::string, Attribute> AttributeMap;

+template <typename T>
+AttrType AttrTypeID();
+
+Attribute GetAttrValue(const AttrDesc& attr_desc);
+
 // check whether a value(attribute) fit a certain limit
 template <typename T>
 class LargerThanChecker {
 public:
-  LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
+  explicit LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
  void operator()(T& value) const {
    PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail");
  }
@ -35,7 +58,8 @@ class LargerThanChecker {
 template <typename T>
 class DefaultValueSetter {
 public:
-  DefaultValueSetter(T default_value) : default_value_(default_value) {}
+  explicit DefaultValueSetter(T default_value)
+      : default_value_(default_value) {}
  void operator()(T& value) const { value = default_value_; }

 private:
@ -78,7 +102,8 @@ class TypedAttrChecker {
  typedef std::function<void(T&)> ValueChecker;

 public:
-  TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {}
+  explicit TypedAttrChecker(const std::string& attr_name)
+      : attr_name_(attr_name) {}

  TypedAttrChecker& InEnum(const std::unordered_set<T>& range) {
    value_checkers_.push_back(EnumInContainer<T>(range));
--- a/paddle/framework/attribute.proto
+++ b/paddle/framework/attribute.proto
@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-syntax="proto2";
+syntax = "proto2";
 package paddle.framework;

 // Attribute Type for paddle's Op.
 // Op contains many attributes. Each type of attributes could be different.
 // The AttrType will be shared between AttrDesc and AttrProto.
 enum AttrType {
-    INT = 0;
-    FLOAT = 1;
-    STRING = 2;
-    INTS = 3;
-    FLOATS = 4;
-    STRINGS = 5;
+  INT = 0;
+  FLOAT = 1;
+  STRING = 2;
+  INTS = 3;
+  FLOATS = 4;
+  STRINGS = 5;
 }
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@ -59,19 +59,17 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
  //  If all input gradients of forwarding operator do not need to calculate,
  //  just return an NOP. Not return null ptr because NOP does not take
  //  too much time for calculation, but it is useful for simplifying logic.
-  if (AllInSet(forwardOp.inputs_, OperatorBase::GRAD_VAR_SUFFIX(),
-               no_grad_names)) {
+  if (AllInSet(forwardOp.inputs_, kGradVarSuffix, no_grad_names)) {
    return NOP();
  }

  //  All output gradients of forwarding operator do not need to calculate.
  //  Then all input gradients cannot be computed at all, and we put them into
  //  `no_grad_names` set. Return an NOP.
-  if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(),
-               no_grad_names)) {
+  if (AllInSet(forwardOp.outputs_, kGradVarSuffix, no_grad_names)) {
    for (auto& name : forwardOp.inputs_) {
      // Mark all input is not need
-      no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+      no_grad_names.insert(name + kGradVarSuffix);
    }
    return NOP();
  }
@ -134,9 +132,9 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
    std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
    for (std::string& grad_input : grad_op->inputs_) {
      if (no_grad_names.count(grad_input)) {
-        std::string prefix = grad_input.substr(
-            0, grad_input.size() - OperatorBase::GRAD_VAR_SUFFIX().size());
-        grad_input = prefix + OperatorBase::ZERO_VAR_SUFFIX();
+        std::string prefix =
+            grad_input.substr(0, grad_input.size() - kGradVarSuffix.size());
+        grad_input = prefix + kZeroVarSuffix;

        // If part of input gradient of that operator is not calculated, fill
        // zero variables to that input gradient.
@ -147,7 +145,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(

    for (std::string& grad_output : grad_op->outputs_) {
      if (no_grad_names.count(grad_output)) {
-        grad_output = OperatorBase::EMPTY_VAR_NAME();
+        grad_output = kEmptyVarName;
      }
    }

@ -168,14 +166,14 @@ std::shared_ptr<OperatorBase> Backward(
  std::unordered_set<std::string> no_grad_names;
  no_grad_names.reserve(no_grad_vars.size());

-  no_grad_names.insert(OperatorBase::EMPTY_VAR_NAME() +
-                       OperatorBase::GRAD_VAR_SUFFIX());
+  no_grad_names.insert(kEmptyVarName + kGradVarSuffix);

  for (auto& name : no_grad_vars) {
-    no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+    no_grad_names.insert(name + kGradVarSuffix);
  }
  size_t uid = 0;
  return BackwardRecursive(forwardOp, no_grad_names, uid);
 }
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@ -78,14 +78,14 @@ class FcOp : public ops::NetOp {
                               {Output("mul_result")}, {}));
    auto b_name = Input("b");
    std::string before_act = "mul_result";
-    if (b_name != EMPTY_VAR_NAME()) {
+    if (b_name != kEmptyVarName) {
      AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name},
                                 {Output("add_result")}, {}));
      before_act = "add_result";
    } else {
      auto out_varname = Output("add_result");
-      if (out_varname != EMPTY_VAR_NAME()) {
-        this->Rename(out_varname, EMPTY_VAR_NAME());
+      if (out_varname != kEmptyVarName) {
+        this->Rename(out_varname, kEmptyVarName);
      }
    }

@ -163,13 +163,12 @@ TEST(Backward, simple_op_grad) {
  ASSERT_NE(fwd, nullptr);
  auto gop = f::OpRegistry::CreateGradOp(*fwd);
  ASSERT_EQ(4UL, gop->inputs_.size());
-  ASSERT_EQ(f::OperatorBase::EMPTY_VAR_NAME(), gop->inputs_[0]);
+  ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]);
  ASSERT_EQ("rowwise_add_grad", gop->type_);
-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[0]);
-  ASSERT_EQ("b" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[1]);
+  ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]);
+  ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]);

-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-            gop->Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()));
+  ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix));
 }

 TEST(Backward, simple_op_not_need_grad) {
@ -177,7 +176,7 @@ TEST(Backward, simple_op_not_need_grad) {
  ASSERT_NE(fwd, nullptr);
  auto gop = f::Backward(*fwd, {"X"});
  ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(),
-                      "X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+                      "X" + f::kGradVarSuffix),
            gop->outputs_.end());

  auto no_input_gop = f::Backward(*fwd, {"X", "b"});
@ -210,9 +209,9 @@ TEST(Backward, net_fc_backward_normal) {
 }

 TEST(Backward, net_fc_backward_not_have_b) {
-  std::shared_ptr<f::OperatorBase> fwd = f::OpRegistry::CreateOp(
-      "fc", {"X", "w", f::OperatorBase::EMPTY_VAR_NAME()},
-      {"mul_result", "add_result", "tmp"}, {});
+  std::shared_ptr<f::OperatorBase> fwd =
+      f::OpRegistry::CreateOp("fc", {"X", "w", f::kEmptyVarName},
+                              {"mul_result", "add_result", "tmp"}, {});
  ASSERT_NE(fwd, nullptr);
  std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
  ASSERT_TRUE(gop->IsNetOp());
@ -242,24 +241,21 @@ TEST(Backward, net_input_of_network_not_need_grad) {

  std::unordered_set<std::string> all_output = std::unordered_set<std::string>(
      bwd_net->outputs_.begin(), bwd_net->outputs_.end());
-  all_output.erase(f::OperatorBase::EMPTY_VAR_NAME());
+  all_output.erase(f::kEmptyVarName);

  for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
-    ASSERT_NE(all_output.find(out + f::OperatorBase::GRAD_VAR_SUFFIX()),
-              all_output.end());
+    ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end());
  }

  // Not Generated X
-  ASSERT_EQ(all_output.find("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            all_output.end());
+  ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end());

  ASSERT_EQ(2UL, bwd_net->ops_.size());
  ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
  auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get());
  ASSERT_EQ(3UL, first_fc_grad->ops_.size());
-  ASSERT_EQ(
-      f::OperatorBase::EMPTY_VAR_NAME(),
-      first_fc_grad->ops_[2]->Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()));
+  ASSERT_EQ(f::kEmptyVarName,
+            first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix));
 }

 TEST(Backward, net_shared_weight) {
@ -311,17 +307,15 @@ TEST(Backward, op_part_of_output_are_not_need) {
  ASSERT_EQ(1UL, fill_zero.inputs_.size());
  ASSERT_EQ("Z", fill_zero.inputs_[0]);
  ASSERT_EQ(1UL, fill_zero.outputs_.size());
-  ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(), fill_zero.outputs_[0]);
+  ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]);

  auto &d_many_out = *net->ops_[1];
  ASSERT_EQ("many_output_op_grad", d_many_out.type_);
  ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size());  // I/O/OG
-  ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(),
-            d_many_out.Input("z" + f::OperatorBase::GRAD_VAR_SUFFIX()));
-  ASSERT_EQ("Y" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-            d_many_out.Input("y" + f::OperatorBase::GRAD_VAR_SUFFIX()));
-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-            d_many_out.Output("x" + f::OperatorBase::GRAD_VAR_SUFFIX()));
+  ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix));
+  ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix));
+  ASSERT_EQ("X" + f::kGradVarSuffix,
+            d_many_out.Output("x" + f::kGradVarSuffix));
 }

 TEST(Backward, op_part_of_input_are_not_need) {
@ -331,12 +325,10 @@ TEST(Backward, op_part_of_input_are_not_need) {
  ASSERT_EQ(grad_mul.type_, "mul_grad");
  ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL);
  ASSERT_EQ(grad_mul.outputs_.size(), 2UL);
-  ASSERT_EQ(grad_mul.Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            f::OperatorBase::EMPTY_VAR_NAME());
-  ASSERT_EQ(grad_mul.Output("B" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "b" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  ASSERT_EQ(grad_mul.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "out" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName);
+  ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix);
+  ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix),
+            "out" + f::kGradVarSuffix);
  ASSERT_EQ(grad_mul.Input("A"), "a");
  ASSERT_EQ(grad_mul.Input("B"), "b");
  ASSERT_EQ(grad_mul.Input("Out"), "out");
@ -368,23 +360,4 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
  EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL);
  EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL);
  EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL);
-
-  /*
-    EXPECT_EQ(grad_fc.Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-              f::OperatorBase::EMPTY_VAR_NAME());
-  EXPECT_EQ(grad_fc.Output("W" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-    "w3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Output("b" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-    "b3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Output("mul_result" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-  "mul_out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-
-  EXPECT_EQ(grad_fc.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-  "out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Input("X"), "out2");
-  EXPECT_EQ(grad_fc.Input("W"), "w3");
-  EXPECT_EQ(grad_fc.Input("mul_result"), "mul_out3");
-  EXPECT_EQ(grad_fc.Input("add_result"), "tmp_out3");
-  EXPECT_EQ(grad_fc.Input("Out"), "out3");
-  */
 }
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@ -56,8 +56,7 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,

  for (const auto& arg : src_arg_list) {
    std::string src_name = arg.name();
-    std::string dst_name =
-        is_grad ? src_name + OperatorBase::GRAD_VAR_SUFFIX() : src_name;
+    std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name;
    (*dst_op->in_out_idxs_)[dst_name] = idx++;
    int src_arg_idx = src_op->in_out_idxs_->at(src_name);
    int src_begin =
@ -65,10 +64,9 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
    int src_end = src_format == nullptr ? src_arg_idx + 1
                                        : src_format->at(src_arg_idx + 1);
    for (int i = src_begin; i < src_end; ++i) {
-      std::string s = is_grad ? src_inout[i] + OperatorBase::GRAD_VAR_SUFFIX()
-                              : arg.ignore_gradient()
-                                    ? OperatorBase::EMPTY_VAR_NAME()
-                                    : src_inout[i];
+      std::string s =
+          is_grad ? src_inout[i] + kGradVarSuffix
+                  : (arg.ignore_gradient() ? kEmptyVarName : src_inout[i]);
      dst_inout.emplace_back(s);
    }
    if (dst_format != nullptr) {
--- a/paddle/framework/grad_op_builder_test.cc
+++ b/paddle/framework/grad_op_builder_test.cc
@ -83,24 +83,21 @@ TEST(GradOpBuilder, MutiInOut) {
  EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
  EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
            std::vector<std::string>({"out2_1", "out2_2"}));
-  EXPECT_EQ(grad_test_op->Input("Out1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "out1" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(
-      grad_test_op->Inputs("Out2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-      std::vector<std::string>(
-          {"out2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-           "out2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix),
+            "out1" + f::kGradVarSuffix);
+  EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix),
+            std::vector<std::string>(
+                {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix}));

  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(
-      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-                                "in2_3" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
-  EXPECT_EQ(grad_test_op->Output("In3" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "in3" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
+            "in1" + f::kGradVarSuffix);
+  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
+            std::vector<std::string>({"in2_1" + f::kGradVarSuffix,
+                                      "in2_2" + f::kGradVarSuffix,
+                                      "in2_3" + f::kGradVarSuffix}));
+  EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix),
+            "in3" + f::kGradVarSuffix);
 }

 TEST(GradOpBuilder, IOIgnoredInGradient) {
@ -116,30 +113,25 @@ TEST(GradOpBuilder, IOIgnoredInGradient) {
  ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL);
  EXPECT_EQ(grad_test_op->Input("In1"), "in1");
  EXPECT_EQ(grad_test_op->Inputs("In2_mult"),
-            std::vector<std::string>({f::OperatorBase::EMPTY_VAR_NAME(),
-                                      f::OperatorBase::EMPTY_VAR_NAME()}));
+            std::vector<std::string>({f::kEmptyVarName, f::kEmptyVarName}));
  EXPECT_EQ(grad_test_op->Inputs("In3_mult"),
            std::vector<std::string>({"in3_1", "in3_2"}));
  EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
            std::vector<std::string>({"out1_1", "out1_2"}));
-  EXPECT_EQ(grad_test_op->Input("Out2"), f::OperatorBase::EMPTY_VAR_NAME());
-  EXPECT_EQ(
-      grad_test_op->Inputs("Out1_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-      std::vector<std::string>(
-          {"out1_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-           "out1_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
-  EXPECT_EQ(grad_test_op->Input("Out2" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "out2" + f::OperatorBase::GRAD_VAR_SUFFIX());
+  EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName);
+  EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix),
+            std::vector<std::string>(
+                {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix}));
+  EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix),
+            "out2" + f::kGradVarSuffix);

  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(
-      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
-  EXPECT_EQ(
-      grad_test_op->Outputs("In3_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-      std::vector<std::string>({"in3_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-                                "in3_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
+            "in1" + f::kGradVarSuffix);
+  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
+            std::vector<std::string>(
+                {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix}));
+  EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix),
+            std::vector<std::string>(
+                {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix}));
 }
--- a/paddle/framework/op_desc.proto
+++ b/paddle/framework/op_desc.proto
@ -12,24 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-syntax="proto2";
+syntax = "proto2";
 package paddle.framework;

-import "attr_type.proto";
+import "attribute.proto";

 // AttrDesc is used to describe Attributes of an Operator. It contain's
 // name, type, and value of Attribute.
 //
 // e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0
 message AttrDesc {
-    required string name = 1;
-    required AttrType type = 2;
-    optional int32 i = 3;
-    optional float f = 4;
-    optional string s = 5;
-    repeated int32 ints = 6;
-    repeated float floats = 7;
-    repeated string strings = 8;
+  required string name = 1;
+  required AttrType type = 2;
+  optional int32 i = 3;
+  optional float f = 4;
+  optional string s = 5;
+  repeated int32 ints = 6;
+  repeated float floats = 7;
+  repeated string strings = 8;
 };

 // Protocol Message to describe an Operator.
@ -42,15 +42,15 @@ message AttrDesc {
 // 3rd-party language can build this proto message and call
 // AddOp(const OpDesc& op_desc) of Paddle core to create an Operator.
 message OpDesc {
-    // input names of this Operator.
-    repeated string inputs = 1;
+  // input names of this Operator.
+  repeated string inputs = 1;

-    // output names of this Operator.
-    repeated string outputs = 2;
+  // output names of this Operator.
+  repeated string outputs = 2;

-    // type of this Operator, such as "add", "sub", "fc".
-    required string type = 3;
+  // type of this Operator, such as "add", "sub", "fc".
+  required string type = 3;

-    // Attributes of this Operator. e.g., scale=3.0 in cosine op.
-    repeated AttrDesc attrs = 4;
+  // Attributes of this Operator. e.g., scale=3.0 in cosine op.
+  repeated AttrDesc attrs = 4;
 };
--- a/paddle/framework/op_proto.proto
+++ b/paddle/framework/op_proto.proto
@ -15,100 +15,102 @@ limitations under the License. */
 // Protocol Message for 3rd-party language binding.
 //
 // Paddle Python package will use `OpProto` to generate op creation methods.
-// The op creation methods take user's input and generate `OpDesc` proto message,
+// The op creation methods take user's input and generate `OpDesc` proto
+// message,
 // then pass `OpDesc` to C++ side and create Op pointer.
 //
-syntax="proto2";
+syntax = "proto2";
 package paddle.framework;

-import "attr_type.proto";
+import "attribute.proto";

 // Attribute protocol message for 3rd-party language binding.
 // It will store the Op support what attribute and what type.
 message AttrProto {
-    // Supported attribute name. e.g. `scale` for cosine op.
-    required string name = 1;
+  // Supported attribute name. e.g. `scale` for cosine op.
+  required string name = 1;

-    // Supported attribute type.
-    required AttrType type = 2;
+  // Supported attribute type.
+  required AttrType type = 2;

-    // Supported attribute comments. It helps 3rd-party language generate doc-string.
-    required string comment = 3;
+  // Supported attribute comments. It helps 3rd-party language generate
+  // doc-string.
+  required string comment = 3;

-    // If that attribute is generated, it means the Paddle third language
-    // binding has responsibility to fill that attribute. End-User should
-    // not set that attribute.
-    optional bool generated = 4 [default=false];
+  // If that attribute is generated, it means the Paddle third language
+  // binding has responsibility to fill that attribute. End-User should
+  // not set that attribute.
+  optional bool generated = 4 [ default = false ];
 }

 // Input or output message for 3rd-party language binding.
 // It contains parameter name and its comments.
 message VarProto {
-    // Input or output name in that op creation function.
-    // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names.
-    required string name = 1;
-
-    // The comment for that input. It helps 3rd-party language generate doc-string.
-    required string comment = 2;
-
-    // Is that input/output could be a list or not.
-    // If so, that Op should write a attributed named `input_format` or
-    // `output_format`.
-    //
-    // e.g.
-    //   If the op is a fc op, the inputs are `X`, `W`, `b`. The `X` and `W`
-    //   could be multiple, so the multiple of `X` and `W` is True, and OpDesc
-    //   will hold a attribute of them.
-    //
-    //   The Op desc of same fc could be
-    //   {
-    //      "type": "fc",
-    //      "input": ["X1", "X2", "W1", "W2", "b"],
-    //      "output": "fc.out",
-    //      "attrs" : {
-    //        "input_format": [0, 2, 4, 5]
-    //      }
-    //   }
-    //
-    optional bool multiple = 3 [default=false];
-
-    // It marks that output is a temporary output. That output is not used by
-    // user, but used by other op internally as input. If other op is not use
-    // that output, it could be optimized early.
-    //
-    // Attribute temporary_index will be set in OpDesc if there is some
-    // outputs are temporary.
-    //
-    // output = [ "xxx.out1", "xxx.tmp", "xxx.out2"],
-    // attrs = {
-    //   "temporary_index": [1]
-    // }
-    optional bool temporary = 4 [default=false];
-
-    // The gradient of operator can be ignored immediately
-    // e.g. operator AddOp, y = x1 + x2, the gradient of dy/dx1, dy/dx2
-    // can be ignored for the future optimized on graph.
-    optional bool ignore_gradient = 6;
+  // Input or output name in that op creation function.
+  // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names.
+  required string name = 1;
+
+  // The comment for that input. It helps 3rd-party language generate
+  // doc-string.
+  required string comment = 2;
+
+  // Is that input/output could be a list or not.
+  // If so, that Op should write a attributed named `input_format` or
+  // `output_format`.
+  //
+  // e.g.
+  //   If the op is a fc op, the inputs are `X`, `W`, `b`. The `X` and `W`
+  //   could be multiple, so the multiple of `X` and `W` is True, and OpDesc
+  //   will hold a attribute of them.
+  //
+  //   The Op desc of same fc could be
+  //   {
+  //      "type": "fc",
+  //      "input": ["X1", "X2", "W1", "W2", "b"],
+  //      "output": "fc.out",
+  //      "attrs" : {
+  //        "input_format": [0, 2, 4, 5]
+  //      }
+  //   }
+  //
+  optional bool multiple = 3 [ default = false ];
+
+  // It marks that output is a temporary output. That output is not used by
+  // user, but used by other op internally as input. If other op is not use
+  // that output, it could be optimized early.
+  //
+  // Attribute temporary_index will be set in OpDesc if there is some
+  // outputs are temporary.
+  //
+  // output = [ "xxx.out1", "xxx.tmp", "xxx.out2"],
+  // attrs = {
+  //   "temporary_index": [1]
+  // }
+  optional bool temporary = 4 [ default = false ];
+
+  // The gradient of operator can be ignored immediately
+  // e.g. operator AddOp, y = x1 + x2, the gradient of dy/dx1, dy/dx2
+  // can be ignored for the future optimized on graph.
+  optional bool ignore_gradient = 6;
 }

 // Op protocol message for 3rd-party language binding.
 // It contains all information for generating op creation method.
 message OpProto {
-    // The input information to generate op creation method.
-    repeated VarProto inputs = 1;
+  // The input information to generate op creation method.
+  repeated VarProto inputs = 1;

-    // The output information to generate op creation method.
-    repeated VarProto outputs = 2;
+  // The output information to generate op creation method.
+  repeated VarProto outputs = 2;

-    // The attribute information to generate op creation method.
-    repeated AttrProto attrs = 3;
+  // The attribute information to generate op creation method.
+  repeated AttrProto attrs = 3;

-    // The comments for that Op. It helps 3rd-party language generate
-    // doc-string. The whole documentation of that Op is generated by comment,
-    // inputs, outputs, attrs together.
-    required string comment = 4;
-
-    // The type of that Op.
-    required string type = 5;
+  // The comments for that Op. It helps 3rd-party language generate
+  // doc-string. The whole documentation of that Op is generated by comment,
+  // inputs, outputs, attrs together.
+  required string comment = 4;

+  // The type of that Op.
+  required string type = 5;
 }
--- a/Show More
+++ b/Show More