Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into Add_conv3d_Python_API

7 years ago · cb7ca1c211
parent 9b13b4c0d2 d07d9535ca
commit cb7ca1c211
68 changed files with 2542 additions and 588 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -61,6 +61,7 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen"            OFF)
 option(WITH_ARM_FP16    "Use half precision support on armv8.2-a cpu"   OFF)
 option(WITH_FAST_BUNDLE_TEST    "Bundle tests that can be run in a single process together to reduce launch overhead"   OFF)
 option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(WITH_ANAKIN      "Compile with Anakin library"                   OFF)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 # CMAKE_BUILD_TYPE
@ -193,7 +194,10 @@ set(EXTERNAL_LIBS
 if(WITH_GPU)
    include(cuda)
    include(tensorrt)
-endif(WITH_GPU)
+    include(external/anakin)
 else()
  set(WITH_ANAKIN OFF CACHE STRING "Anakin is valid only when GPU is set." FORCE)
 endif()
 if(WITH_AMD_GPU)
    find_package(HIP)
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
        print_train_time(start_time, time.time(), num_samples)
        print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))),
        # evaluation
-        if not args.no_test and batch_acc:
+        if not args.no_test and batch_acc and not args.use_reader_op:
            pass_test_acc = test(exe, infer_prog, test_reader, feeder,
                                 batch_acc)
            print(", Test Accuracy: %f" % pass_test_acc)
@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
            batch_id += 1
        print_train_time(start_time, time.time(), num_samples)
-        if not args.no_test and batch_acc:
+        if not args.no_test and batch_acc and not args.use_reader_op:
            # we have not implement record io for test
            # skip test when use args.use_reader_op
            test_acc = test(startup_exe, infer_prog, test_reader, feeder,
                            batch_acc)
            print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc))
        exit(0)
 def print_arguments(args):
--- a/benchmark/fluid/models/resnet.py
+++ b/benchmark/fluid/models/resnet.py
@ -199,7 +199,10 @@ def get_model(args):
    batched_train_reader = paddle.batch(
        paddle.reader.shuffle(
            train_reader, buf_size=5120),
-        batch_size=args.batch_size * args.gpus)
+        batch_size=args.batch_size * args.gpus,
-    batched_test_reader = paddle.batch(train_reader, batch_size=args.batch_size)
+        drop_last=True)
    batched_test_reader = paddle.batch(
        train_reader, batch_size=args.batch_size, drop_last=True)
-    return avg_cost, inference_program, optimizer, batched_train_reader, batched_test_reader, batch_acc
+    return avg_cost, inference_program, optimizer, batched_train_reader,\
                   batched_test_reader, batch_acc
--- a/cmake/external/anakin.cmake
+++ b/cmake/external/anakin.cmake
@ -0,0 +1,42 @@
 if (NOT WITH_ANAKIN)
  return()
 endif()
 set(ANAKIN_INSTALL_DIR "${THIRD_PARTY_PATH}/install/anakin" CACHE PATH
  "Anakin install path." FORCE)
 set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header files")
 set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")
 set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp)
 set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
 # A helper function used in Anakin, currently, to use it, one need to recursively include
 # nearly all the header files.
 function(fetch_include_recursively root_dir)
    if (IS_DIRECTORY ${root_dir})
        include_directories(${root_dir})
    endif()
    file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
    foreach(sub ${ALL_SUB})
        if (IS_DIRECTORY ${root_dir}/${sub})
            fetch_include_recursively(${root_dir}/${sub})
        endif()
    endforeach()
 endfunction()
 # download library
 message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
 execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
 execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
 execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
 execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
 execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
 if (WITH_ANAKIN)
    message(STATUS "Anakin for inference is enabled")
    message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
    fetch_include_recursively(${ANAKIN_INCLUDE})
    link_directories(${ANAKIN_LIBRARY})
 endif()
--- a/doc/fluid/api/detection.rst
+++ b/doc/fluid/api/detection.rst
--- a/doc/fluid/api/gen_doc.sh
+++ b/doc/fluid/api/gen_doc.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst
+python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler > layers.rst
 for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
 do
--- a/doc/fluid/api/io.rst
+++ b/doc/fluid/api/io.rst
@ -59,21 +59,3 @@ get_inference_program
 ..  autofunction:: paddle.fluid.io.get_inference_program
    :noindex:
 save_checkpoint
 ---------------
 ..  autofunction:: paddle.fluid.io.save_checkpoint
    :noindex:
 load_checkpoint
 ---------------
 ..  autofunction:: paddle.fluid.io.load_checkpoint
    :noindex:
 clean_checkpoint
 ----------------
 ..  autofunction:: paddle.fluid.io.clean_checkpoint
    :noindex:
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@ -181,12 +181,6 @@ Print
 ..  autofunction:: paddle.fluid.layers.Print
    :noindex:
 is_empty
 --------
 ..  autofunction:: paddle.fluid.layers.is_empty
    :noindex:
 device
 ======
@ -261,19 +255,6 @@ double_buffer
 ..  autofunction:: paddle.fluid.layers.double_buffer
    :noindex:
 random_data_generator
 ---------------------
 ..  autofunction:: paddle.fluid.layers.random_data_generator
    :noindex:
 Preprocessor
 ------------
 ..  autoclass:: paddle.fluid.layers.Preprocessor
    :members:
    :noindex:
 nn
 ==
@ -632,30 +613,6 @@ roi_pool
 ..  autofunction:: paddle.fluid.layers.roi_pool
    :noindex:
 dice_loss
 ---------
 ..  autofunction:: paddle.fluid.layers.dice_loss
    :noindex:
 resize_bilinear
 ---------------
 ..  autofunction:: paddle.fluid.layers.resize_bilinear
    :noindex:
 gather
 ------
 ..  autofunction:: paddle.fluid.layers.gather
    :noindex:
 random_crop
 -----------
 ..  autofunction:: paddle.fluid.layers.random_crop
    :noindex:
 ops
 ===
@ -803,12 +760,6 @@ sum
 ..  autofunction:: paddle.fluid.layers.sum
    :noindex:
 shape
 -----
 ..  autofunction:: paddle.fluid.layers.shape
    :noindex:
 sigmoid
 -------
@ -1058,3 +1009,93 @@ zeros
 ..  autofunction:: paddle.fluid.layers.zeros
    :noindex:
 detection
 =========
 multi_box_head
 --------------
 ..  autofunction:: paddle.fluid.layers.multi_box_head
    :noindex:
 bipartite_match
 ---------------
 ..  autofunction:: paddle.fluid.layers.bipartite_match
    :noindex:
 target_assign
 -------------
 ..  autofunction:: paddle.fluid.layers.target_assign
    :noindex:
 detection_output
 ----------------
 ..  autofunction:: paddle.fluid.layers.detection_output
    :noindex:
 ssd_loss
 --------
 ..  autofunction:: paddle.fluid.layers.ssd_loss
    :noindex:
 detection_map
 -------------
 ..  autofunction:: paddle.fluid.layers.detection_map
    :noindex:
 iou_similarity
 --------------
 ..  autofunction:: paddle.fluid.layers.iou_similarity
    :noindex:
 box_coder
 ---------
 ..  autofunction:: paddle.fluid.layers.box_coder
    :noindex:
 learning_rate_scheduler
 =======================
 exponential_decay
 -----------------
 ..  autofunction:: paddle.fluid.layers.exponential_decay
    :noindex:
 natural_exp_decay
 -----------------
 ..  autofunction:: paddle.fluid.layers.natural_exp_decay
    :noindex:
 inverse_time_decay
 ------------------
 ..  autofunction:: paddle.fluid.layers.inverse_time_decay
    :noindex:
 polynomial_decay
 ----------------
 ..  autofunction:: paddle.fluid.layers.polynomial_decay
    :noindex:
 piecewise_decay
 ---------------
 ..  autofunction:: paddle.fluid.layers.piecewise_decay
    :noindex:
 noam_decay
 ----------
 ..  autofunction:: paddle.fluid.layers.noam_decay
    :noindex:
--- a/doc/fluid/api/optimizer.rst
+++ b/doc/fluid/api/optimizer.rst
@ -89,13 +89,6 @@ DecayedAdagradOptimizer
    :members:
    :noindex:
 RMSPropOptimizer
 ----------------
 ..  autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
    :members:
    :noindex:
 Adadelta
 --------
--- a/doc/fluid/api/profiler.rst
+++ b/doc/fluid/api/profiler.rst
@ -23,15 +23,3 @@ profiler
 ..  autofunction:: paddle.fluid.profiler.profiler
    :noindex:
 start_profiler
 --------------
 ..  autofunction:: paddle.fluid.profiler.start_profiler
    :noindex:
 stop_profiler
 -------------
 ..  autofunction:: paddle.fluid.profiler.stop_profiler
    :noindex:
--- a/doc/survey/dynamic_graph.md
+++ b/doc/survey/dynamic_graph.md
@ -171,7 +171,7 @@ Pytorch chooses immediate evaluation. It avoids ever materializing a "forward gr
 ## What can fluid learn from them?
-TBD
+Please refer to `paddle/contrib/dynamic/`.
 # Appendix
--- a/doc/v2/api/config/evaluators.rst
+++ b/doc/v2/api/config/evaluators.rst
@ -101,7 +101,7 @@ value_printer
    :noindex:
 Detection
-=====
+==========
 detection_map
 -------------
--- a/doc/v2/api/config/layer.rst
+++ b/doc/v2/api/config/layer.rst
--- a/doc/v2/api/index_en.rst
+++ b/doc/v2/api/index_en.rst
@ -8,4 +8,3 @@ API
    model_configs.rst
    data.rst
    run_logic.rst
    fluid/index.rst
--- a/doc/v2/build_and_install/pip_install_cn.rst
+++ b/doc/v2/build_and_install/pip_install_cn.rst
@ -60,6 +60,7 @@ paddlepaddle-gpu==0.11.0            使用CUDA 7.5和cuDNN 5编译的0.11.0版
    "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_"
    "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
    "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
    "cuda9.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
 .. _pip_dependency:
--- a/doc/v2/build_and_install/pip_install_en.rst
+++ b/doc/v2/build_and_install/pip_install_en.rst
@ -63,6 +63,7 @@ If the links below shows up the login form, just click "Log in as guest" to star
    "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
    "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
    "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
    "cuda9.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
 .. _pip_dependency:
--- a/paddle/contrib/CMakeLists.txt
+++ b/paddle/contrib/CMakeLists.txt
@ -14,3 +14,4 @@
 #
 add_subdirectory(inference)
 add_subdirectory(tape)
--- a/paddle/contrib/inference/CMakeLists.txt
+++ b/paddle/contrib/inference/CMakeLists.txt
@ -17,48 +17,9 @@ if(APPLE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
 endif(APPLE)
 set(ANAKIN_INCLUDE "" CACHE STRING "root of Anakin header files")
 set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library")
 set(inference_deps paddle_inference_api paddle_fluid_api)
 # if anakin is set enable anakin api implementation
 if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY)
    set(ANAKIN_FOUND ON)
 else()
    set(ANAKIN_FOUND OFF)
 endif()
 function(fetch_include_recursively root_dir) 
    if (IS_DIRECTORY ${root_dir}) 
        include_directories(${root_dir})
    endif()
    file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
    foreach(sub ${ALL_SUB})
        if (IS_DIRECTORY ${root_dir}/${sub})
            fetch_include_recursively(${root_dir}/${sub})
        endif()
    endforeach()
 endfunction()
 if (ANAKIN_FOUND)
    # Anakin's code style doesn't follow google c style.
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp")
    message(STATUS "Anakin for inference is enabled")
    message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
    fetch_include_recursively(${ANAKIN_INCLUDE})
    link_directories(${ANAKIN_LIBRARY})
    nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
    target_link_libraries(inference_anakin_api anakin anakin_saber_common)
    list(APPEND inference_deps inference_anakin_api)
 endif()
 function(inference_api_test TARGET_NAME)
    if (WITH_TESTING)
        set(options "")
@ -89,9 +50,17 @@ cc_test(test_paddle_inference_api
 inference_api_test(test_paddle_inference_api_impl
                    ARGS test_word2vec test_image_classification)
-if (ANAKIN_FOUND)
+if (WITH_ANAKIN)
    # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
    # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
    # compile the libinference_anakin_api.a and compile with anakin.so.
    nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
    target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
    target_link_libraries(inference_anakin_api anakin anakin_saber_common)
    cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
-    DEPS ${inference_deps})
+                                  ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
                                  DEPS inference_anakin_api)
    target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
 endif()
 if(WITH_TESTING)
--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
@ -12,9 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <cuda.h>
 #include "paddle/contrib/inference/paddle_inference_api_anakin_engine.h"
 #include <cuda.h>
 namespace paddle {
--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.h
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.h
@ -19,10 +19,9 @@ limitations under the License. */
 #pragma once
 // NOTE This header file do not have namespace.
 //#include <test/framework/net/paddle_api.h>
 #include "paddle/contrib/inference/paddle_inference_api.h"
 // from anakin
 #include "framework/core/net/net.h"
 #include "saber/saber_types.h"
--- a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
+++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
@ -12,17 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 #include "gflags/gflags.h"
 #include "paddle/contrib/inference/paddle_inference_api.h"
 DEFINE_string(model, "", "Directory of the inference model.");
 namespace paddle {
 AnakinConfig GetConfig() {
  AnakinConfig config;
-  config.model_file = "./mobilenet_v2.anakin.bin";
+  config.model_file = FLAGS_model;
  config.device = 0;
  config.max_batch_size = 1;
  return config;
--- a/paddle/contrib/tape/CMakeLists.txt
+++ b/paddle/contrib/tape/CMakeLists.txt
@ -0,0 +1,25 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 if(APPLE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
 endif(APPLE)
 cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES})
 cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable)
 cc_test(test_tape
        SRCS test_tape.cc
        DEPS tape tape_variable)
--- a/paddle/contrib/tape/README.md
+++ b/paddle/contrib/tape/README.md
--- a/paddle/contrib/tape/computation_graph.png
+++ b/paddle/contrib/tape/computation_graph.png
--- a/paddle/contrib/tape/function.h
+++ b/paddle/contrib/tape/function.h
@ -0,0 +1,131 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <string>
 #include "paddle/contrib/tape/tape.h"
 #include "paddle/contrib/tape/variable.h"
 #include "paddle/fluid/framework/type_defs.h"
 namespace paddle {
 namespace tape {
 class Function {};
 class Fill {
 public:
  Fill(const std::string &initializer, const framework::AttributeMap &attrs)
      : initializer_(initializer), attrs_(attrs) {}
  void operator()(VariableHandle var) {
    get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_);
  }
 private:
  const std::string initializer_;
  const framework::AttributeMap attrs_;
 };
 class Mean {
 public:
  VariableHandle operator()(VariableHandle var) {
    VariableHandle out(new Variable("mean"));
    get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {});
    return out;
  }
 };
 class Linear {
 public:
  Linear(int in_dim, int out_dim, const std::string &act)
      : w_(new Variable("LinearWeight")),
        b_(new Variable("LinearBias")),
        act_(act) {
    Tape init_tape;
    std::string initializer = "fill_constant";
    framework::AttributeMap attrs;
    attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
    attrs["shape"] = std::vector<int>{in_dim, out_dim};
    attrs["value"] = 1.0f;
    init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
    attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
    attrs["shape"] = std::vector<int>{out_dim};
    attrs["value"] = 1.0f;
    init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
    init_tape.Forward();
  }
  VariableHandle operator()(VariableHandle input) {
    VariableHandle pre_bias(new Variable("linear"));
    get_global_tape().AddOp("mul",
                            {{"X", {input}}, {"Y", {w_}}},
                            {{"Out", {pre_bias}}},
                            {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
    VariableHandle pre_act(new Variable("linear"));
    get_global_tape().AddOp("elementwise_add",
                            {{"X", {pre_bias}}, {"Y", {b_}}},
                            {{"Out", {pre_act}}},
                            {{"axis", 1}});
    VariableHandle post_act(new Variable("linear"));
    get_global_tape().AddOp(
        act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {});
    return post_act;
  }
  std::vector<VariableHandle> Params() { return {w_, b_}; }
 private:
  VariableHandle w_;
  VariableHandle b_;
  std::string act_;
 };
 class SGD {
 public:
  SGD(float learning_rate) : learning_rate_(new Variable("sgd")) {
    Tape init_tape;
    std::string initializer = "fill_constant";
    framework::AttributeMap attrs;
    attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
    attrs["shape"] = std::vector<int>{1};
    attrs["value"] = learning_rate;
    init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs);
    init_tape.Forward();
  }
  void operator()(VariableHandle input) {
    PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(),
                   "optimization must happen after the backward");
    Tape temp_tape;
    temp_tape.AddOp("sgd",
                    {{"Param", {input}},
                     {"LearningRate", {learning_rate_}},
                     {"Grad", {input->Grad()}}},
                    {{"ParamOut", {input}}},
                    {});
    temp_tape.Forward();
  }
 private:
  VariableHandle learning_rate_;
 };
 }
 }
--- a/Show More
+++ b/Show More