notest,test=model_benchmark

test benchmark ci
289 changed files with 3369 additions and 9292 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License

-cmake_minimum_required(VERSION 3.15)
-cmake_policy(VERSION 3.10)
+cmake_minimum_required(VERSION 3.10)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@ -39,6 +38,11 @@ endif()
 if (WITH_GPU  AND WITH_ASCEND)
    message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
 endif()
+# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
+if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
+    message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "
+       "You can use cmake 3.16 (recommended), 3.10, 3.11, 3.15 or 3.17. Please refer to the install document: https://cmake.org/install/")
+endif()

 if(WITH_GPU AND NOT APPLE)
    enable_language(CUDA)
@ -57,7 +61,6 @@ if(WITH_MUSL)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
 endif()

-
 if(WIN32)
    option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)

@ -121,7 +124,7 @@ if(WIN32)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838")

    foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
-        set(${flag_var} "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
+        string(APPEND ${flag_var} "/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
    endforeach(flag_var)

    if (WITH_WIN_DUMP_DBG)
--- a/README_cn.md
+++ b/README_cn.md
@ -1,4 +1,4 @@
-
+#testtest
 <p align="center">
 <img align="center" src="doc/imgs/logo.png", width=1600>
 <p>
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@ -74,7 +74,7 @@ endfunction()
 #   select_nvcc_arch_flags(out_variable)
 function(select_nvcc_arch_flags out_variable)
  # List of arch names
-  set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "All" "Manual")
+  set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual")
  set(archs_name_default "Auto")
  list(APPEND archs_names "Auto")

@ -108,8 +108,6 @@ function(select_nvcc_arch_flags out_variable)
    set(cuda_arch_bin "70")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
-  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
-    set(cuda_arch_bin "80")
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
@ -208,11 +206,14 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
 message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")

-# Set C++14 support
+# Set C++11 support
 set(CUDA_PROPAGATE_HOST_FLAGS OFF)
 # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
 # So, don't set these flags here.
-set(CMAKE_CUDA_STANDARD 14)
+if (NOT WIN32) # windows msvc2015 support c++11 natively.
+    # -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
+  set(CMAKE_CUDA_STANDARD 11)
+endif(NOT WIN32)

 # (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
 # So replace /W[1-4] with /W0
--- a/cmake/cudnn.cmake
+++ b/cmake/cudnn.cmake
@ -94,7 +94,7 @@ macro(find_cudnn_version cudnn_header_file)
                "${CUDNN_MAJOR_VERSION} * 1000 +
                 ${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")
            message(STATUS "Current cuDNN header is ${cudnn_header_file} "
-              "Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCHLEVEL_VERSION}. ")
+              "Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}. ")
        endif()
    endif()
 endmacro()
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@ -14,15 +14,11 @@

 INCLUDE(ExternalProject)

-IF(WITH_ROCM)
-    add_definitions(-DWARPCTC_WITH_HIP)
-ENDIF()
-
 SET(WARPCTC_PREFIX_DIR  ${THIRD_PARTY_PATH}/warpctc)
 SET(WARPCTC_SOURCE_DIR  ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
 SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
 set(WARPCTC_REPOSITORY  ${GIT_URL}/baidu-research/warp-ctc.git)
-set(WARPCTC_TAG         c690fc5755abbdbdc98ef78d51ec10a6748a8cd1)
+set(WARPCTC_TAG         95a461eddeabd51099ef059dcfada1117eb1bfb8)

 SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
    CACHE PATH "Warp-ctc Directory" FORCE)
@ -61,7 +57,6 @@ ExternalProject_Add(
                    -DCMAKE_CXX_FLAGS_DEBUG=$<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline>
                    -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
                    -DWITH_GPU=${WITH_GPU}
-                    -DWITH_ROCM=${WITH_ROCM}
                    -DWITH_OMP=${USE_OMP}
                    -DWITH_TORCH=OFF
                    -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@ -13,7 +13,7 @@ if(NOT XPU_SDK_ROOT)
  elseif(WITH_SUNWAY)
      SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE)
  else()
-      SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_03_30.tar.gz" CACHE STRING "" FORCE)
+      SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_02_27.tar.gz" CACHE STRING "" FORCE)
  endif()

  SET(XPU_SOURCE_DIR              "${THIRD_PARTY_PATH}/xpu")
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@ -4,10 +4,10 @@ include(CheckCCompilerFlag)
 include(CheckCXXSymbolExists)
 include(CheckTypeSize)

-function(CheckCompilerCXX14Flag)
+function(CheckCompilerCXX11Flag)
    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-        if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.4)
-            message(FATAL_ERROR "Unsupported GCC version. GCC >= 5.4 required.")
+        if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
+            message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
        elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.2)
            message(WARNING "Found GCC ${CMAKE_CXX_COMPILER_VERSION} which is too high, recommended to use GCC 8.2")
        endif()
@ -20,15 +20,23 @@ function(CheckCompilerCXX14Flag)
                message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.")
            endif()
        else()
-            if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.4)
-                message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.4 required.")
+            if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
+                message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
            endif()
        endif()
    endif()
 endfunction()

-CheckCompilerCXX14Flag()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+CheckCompilerCXX11Flag()
+if (WITH_GPU)
+    if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
+       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+    else()
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+    endif()
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+endif()
 # safe_set_flag
 #
 # Set a compile flag only if compiler is support
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -492,8 +492,10 @@ function(nv_library TARGET_NAME)
        message(FATAL "Please specify source file or library in nv_library.")
      endif()
    endif(nv_library_SRCS)
-    if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
-      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
+    if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
+      if(${MSVC_VERSION} LESS_EQUAL 1900)
+        set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
+      endif()
    endif()
  endif()
 endfunction(nv_library)
@ -510,7 +512,7 @@ function(nv_binary TARGET_NAME)
      add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
      common_link(${TARGET_NAME})
    endif()
-    if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
+    if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
    endif()
  endif()
@ -537,7 +539,7 @@ function(nv_test TARGET_NAME)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
    set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
-    if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910))
+    if (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
    endif()
  endif()
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -198,9 +198,6 @@ copy(inference_lib_dist
 copy(inference_lib_dist
        SRCS  ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex128.h
        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)
-copy(inference_lib_dist
-        SRCS  ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
-        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)

 # CAPI inference library for only inference
 set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING
--- a/cmake/init.cmake
+++ b/cmake/init.cmake
@ -18,10 +18,6 @@ if(NOT WIN32)
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG")
    set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG")
 else()
-    # It has not been used now, it can specify CUDA compile flag manualy,
-    # its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous
-    # because CUDA will update by nvidia, then error will occur.
-    # Now, it's used in CUDA:[10.0, 10.2]
    set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
 endif()

--- a/cmake/paddle_win.props
+++ b/cmake/paddle_win.props
@ -15,7 +15,7 @@
            <Warning>InheritFromHost</Warning>

            <BaseCommandLineTemplate>-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]</BaseCommandLineTemplate>
-            <BuildCommandLineTemplate>--use-local-env $(CudaClVersion)</BuildCommandLineTemplate>
+            <BuildCommandLineTemplate>--use-local-env --cl-version $(CudaClVersion)</BuildCommandLineTemplate>
            <BuildDynamicCommandLineTemplate>[CodeGeneration]</BuildDynamicCommandLineTemplate>
            <CleanCommandLineTemplate>-clean</CleanCommandLineTemplate>
            <!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
--- a/go/README_cn.md
+++ b/go/README_cn.md
@ -50,7 +50,6 @@ output_data := value.Interface().([][]float32)

 运行
 ```bash
-go mod init github.com/paddlepaddle
 export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
 go run ./demo/mobilenet.go
 ```
--- a/go/demo/mobilenet.go
+++ b/go/demo/mobilenet.go
@ -13,7 +13,7 @@
 // limitations under the License.
 package main

-import "github.com/paddlepaddle/paddle"
+import "../paddle"
 import "strings"
 import "io/ioutil"
 import "strconv"
--- a/go/paddle/common.go
+++ b/go/paddle/common.go
@ -15,7 +15,7 @@
 package paddle

 // #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
+// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
 // #include <stdbool.h>
 // #include <paddle_c_api.h>
 import "C"
--- a/go/paddle/config.go
+++ b/go/paddle/config.go
@ -15,7 +15,7 @@
 package paddle

 // #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
+// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
 // #include <stdbool.h>
 // #include <stdlib.h>
 // #include <paddle_c_api.h>
--- a/go/paddle/predictor.go
+++ b/go/paddle/predictor.go
@ -15,7 +15,7 @@
 package paddle

 // #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
+// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
 // #include <stdbool.h>
 // #include "paddle_c_api.h"
 import "C"
@ -88,7 +88,7 @@ func (predictor *Predictor) GetInputNames() []string {
 }

 func (predictor *Predictor) GetOutputNames() []string {
-	names := make([]string, predictor.GetOutputNum())
+	names := make([]string, predictor.GetInputNum())
 	for i := 0; i < len(names); i++ {
 		names[i] = predictor.GetOutputName(i)
 	}
--- a/go/paddle/tensor.go
+++ b/go/paddle/tensor.go
@ -15,7 +15,7 @@
 package paddle

 // #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
+// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
 // #include <stdbool.h>
 // #include <stdlib.h>
 // #include <string.h>
@ -209,7 +209,7 @@ func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Va
 		value := reflect.Indirect(ptr)
 		value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
 		if len(shape) == 1 && value.Len() > 0 {
-			switch value.Index(0).Kind() {
+			switch value.Index(1).Kind() {
 			case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
 				binary.Read(r, Endian(), value.Interface())
 				return
--- a/paddle/fluid/extension/include/ext_dispatch.h
+++ b/paddle/fluid/extension/include/ext_dispatch.h
@ -47,22 +47,6 @@ namespace paddle {
    }                                                                     \
  }()

-#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...)                   \
-  [&] {                                                                        \
-    const auto& __dtype__ = TYPE;                                              \
-    switch (__dtype__) {                                                       \
-      PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float,           \
-                           __VA_ARGS__)                                        \
-      PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double,          \
-                           __VA_ARGS__)                                        \
-      PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT16, paddle::float16, \
-                           __VA_ARGS__)                                        \
-      default:                                                                 \
-        PD_THROW("function " #NAME " is not implemented for data type `",      \
-                 ::paddle::ToString(__dtype__), "`");                          \
-    }                                                                          \
-  }()
-
 ///////// Integral Dispatch Marco ///////////

 #define PD_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...)                           \
--- a/paddle/fluid/extension/include/ext_dtype.h
+++ b/paddle/fluid/extension/include/ext_dtype.h
@ -19,13 +19,11 @@ limitations under the License. */
 #include "complex128.h"     // NOLINT
 #include "complex64.h"      // NOLINT
 #include "ext_exception.h"  // NOLINT
-#include "float16.h"        // NOLINT

 namespace paddle {

 using complex64 = paddle::platform::complex64;
 using complex128 = paddle::platform::complex128;
-using float16 = paddle::platform::float16;

 enum class DataType {
  BOOL,
@ -34,7 +32,6 @@ enum class DataType {
  INT16,
  INT32,
  INT64,
-  FLOAT16,
  FLOAT32,
  FLOAT64,
  COMPLEX64,
@ -56,8 +53,6 @@ inline std::string ToString(DataType dtype) {
      return "int32_t";
    case DataType::INT64:
      return "int64_t";
-    case DataType::FLOAT16:
-      return "float16";
    case DataType::FLOAT32:
      return "float";
    case DataType::FLOAT64:
@ -78,7 +73,6 @@ inline std::string ToString(DataType dtype) {
  _(int16_t, DataType::INT16)       \
  _(int, DataType::INT32)           \
  _(int64_t, DataType::INT64)       \
-  _(float16, DataType::FLOAT16)     \
  _(float, DataType::FLOAT32)       \
  _(double, DataType::FLOAT64)      \
  _(complex64, DataType::COMPLEX64) \
--- a/paddle/fluid/extension/include/ext_op_meta_info.h
+++ b/paddle/fluid/extension/include/ext_op_meta_info.h
@ -204,68 +204,38 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
 // Record Op infershape core function
 using InferShapeFunc = std::vector<std::vector<int64_t>> (*)(
    const std::vector<std::vector<int64_t>>& input_shapes,
-    const std::vector<std::vector<std::vector<int64_t>>>& vec_input_shapes,
-    const std::vector<boost::any>& attrs);
+    const std::vector<std::vector<std::vector<int64_t>>>& vec_input_shapes);

-#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type)              \
-  template <typename... Tail>                                                 \
-  struct InferShapeCallHelper<input_type, Tail...> {                          \
-    template <int in_idx, int vec_in_idx, int attr_idx,                       \
-              typename... PreviousArgs>                                       \
-    static Return InferShape(                                                 \
-        const std::vector<std::vector<int64_t>>& input_shapes,                \
-        const std::vector<std::vector<std::vector<int64_t>>>&                 \
-            vec_input_shapes,                                                 \
-        const std::vector<boost::any>& attrs, const PreviousArgs&... pargs) { \
-      input_type arg = input_shapes[in_idx];                                  \
-      return InferShapeCallHelper<Tail...>::template InferShape<              \
-          in_idx + 1, vec_in_idx, attr_idx>(input_shapes, vec_input_shapes,   \
-                                            attrs, pargs..., arg);            \
-    }                                                                         \
-  }
-
-#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(input_type)             \
-  template <typename... Tail>                                                 \
-  struct InferShapeCallHelper<input_type, Tail...> {                          \
-    template <int in_idx, int vec_in_idx, int attr_idx,                       \
-              typename... PreviousArgs>                                       \
-    static Return InferShape(                                                 \
-        const std::vector<std::vector<int64_t>>& input_shapes,                \
-        const std::vector<std::vector<std::vector<int64_t>>>&                 \
-            vec_input_shapes,                                                 \
-        const std::vector<boost::any>& attrs, const PreviousArgs&... pargs) { \
-      input_type arg = vec_input_shapes[vec_in_idx];                          \
-      return InferShapeCallHelper<Tail...>::template InferShape<              \
-          in_idx, vec_in_idx + 1, attr_idx>(input_shapes, vec_input_shapes,   \
-                                            attrs, pargs..., arg);            \
-    }                                                                         \
+#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type)             \
+  template <typename... Tail>                                                \
+  struct InferShapeCallHelper<input_type, Tail...> {                         \
+    template <int in_idx, int vec_in_idx, typename... PreviousArgs>          \
+    static Return InferShape(                                                \
+        const std::vector<std::vector<int64_t>>& input_shapes,               \
+        const std::vector<std::vector<std::vector<int64_t>>>&                \
+            vec_input_shapes,                                                \
+        const PreviousArgs&... pargs) {                                      \
+      input_type arg = input_shapes[in_idx];                                 \
+      return InferShapeCallHelper<Tail...>::template InferShape<in_idx + 1,  \
+                                                                vec_in_idx>( \
+          input_shapes, vec_input_shapes, pargs..., arg);                    \
+    }                                                                        \
  }

-#define PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(attr_type)                \
-  template <typename... Tail>                                                 \
-  struct InferShapeCallHelper<attr_type, Tail...> {                           \
-    template <int in_idx, int vec_in_idx, int attr_idx,                       \
-              typename... PreviousArgs>                                       \
-    static Return InferShape(                                                 \
-        const std::vector<std::vector<int64_t>>& input_shapes,                \
-        const std::vector<std::vector<std::vector<int64_t>>>&                 \
-            vec_input_shapes,                                                 \
-        const std::vector<boost::any>& attrs, const PreviousArgs&... pargs) { \
-      try {                                                                   \
-        attr_type arg = boost::any_cast<attr_type>(attrs[attr_idx]);          \
-        return InferShapeCallHelper<Tail...>::template InferShape<            \
-            in_idx, vec_in_idx, attr_idx + 1>(input_shapes, vec_input_shapes, \
-                                              attrs, pargs..., arg);          \
-      } catch (boost::bad_any_cast&) {                                        \
-        PD_THROW(                                                             \
-            "Attribute cast error in custom operator InferShapeFn. "          \
-            "Expected " #attr_type                                            \
-            " value. InferShapeFn's attribute list must be exactly same as "  \
-            "Forward "                                                        \
-            "KernelFn's attribute list except std::vector<int64_t> "          \
-            "attribute.");                                                    \
-      }                                                                       \
-    }                                                                         \
+#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(input_type)           \
+  template <typename... Tail>                                               \
+  struct InferShapeCallHelper<input_type, Tail...> {                        \
+    template <int in_idx, int vec_in_idx, typename... PreviousArgs>         \
+    static Return InferShape(                                               \
+        const std::vector<std::vector<int64_t>>& input_shapes,              \
+        const std::vector<std::vector<std::vector<int64_t>>>&               \
+            vec_input_shapes,                                               \
+        const PreviousArgs&... pargs) {                                     \
+      input_type arg = vec_input_shapes[vec_in_idx];                        \
+      return InferShapeCallHelper<Tail...>::template InferShape<            \
+          in_idx, vec_in_idx + 1>(input_shapes, vec_input_shapes, pargs..., \
+                                  arg);                                     \
+    }                                                                       \
  }

 template <typename F, F f>
@ -275,10 +245,10 @@ template <typename Return, typename... Args, Return (*impl_fn)(Args...)>
 struct InferShapeFuncImpl<Return (*)(Args...), impl_fn> {
  static Return InferShape(
      const std::vector<std::vector<int64_t>>& input_shapes,
-      const std::vector<std::vector<std::vector<int64_t>>>& vec_input_shapes,
-      const std::vector<boost::any>& attrs) {
-    return InferShapeCallHelper<Args..., TypeTag<int>>::template InferShape<
-        0, 0, 0>(input_shapes, vec_input_shapes, attrs);
+      const std::vector<std::vector<std::vector<int64_t>>>& vec_input_shapes) {
+    return InferShapeCallHelper<Args..., TypeTag<int>>::template InferShape<0,
+                                                                            0>(
+        input_shapes, vec_input_shapes);
  }

 private:
@ -295,26 +265,14 @@ struct InferShapeFuncImpl<Return (*)(Args...), impl_fn> {
  PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(
      std::vector<std::vector<int64_t>>);

-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const bool&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const int&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const float&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const int64_t&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const std::string&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const std::vector<int>&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const std::vector<float>&);
-  PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(const std::vector<std::string>&);
-  // NOTE(chenweihang): InferShape can't support std::vector<int64_t> attr type,
-  // because the input type is std::vector<int64_t>, only can use one rule to
-  // parse std::vector<int64_t> parameter
-
  // end: base template
  template <typename T>
  struct InferShapeCallHelper<TypeTag<T>> {
-    template <int in_idx, int vec_in_idx, int attr_idx>
+    template <int in_idx, int vec_in_idx>
    static Return InferShape(
        const std::vector<std::vector<int64_t>>& input_shapes,
        const std::vector<std::vector<std::vector<int64_t>>>& vec_input_shapes,
-        const std::vector<boost::any>& attrs, const Args&... args) {
+        const Args&... args) {
      return impl_fn(args...);
    }
  };
--- a/paddle/fluid/extension/include/ext_tensor.h
+++ b/paddle/fluid/extension/include/ext_tensor.h
@ -52,9 +52,6 @@ class PD_DLL_DECL Tensor {
  /// \brief Construct a Tensor on target Place for CustomOp.
  /// Generally it's only used for user to create Tensor.
  explicit Tensor(const PlaceType& place);
-  /// \brief Construct a Tensor on target Place with shape for CustomOp.
-  /// Generally it's only used for user to create Tensor.
-  Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
  /// \brief Reset the shape of the tensor.
  /// Generally it's only used for the input tensor.
  /// Reshape must be called before calling
--- a/paddle/fluid/extension/src/ext_tensor.cc
+++ b/paddle/fluid/extension/src/ext_tensor.cc
@ -22,7 +22,6 @@ limitations under the License. */
 #include "paddle/fluid/platform/complex128.h"
 #include "paddle/fluid/platform/complex64.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/float16.h"
 #include "paddle/fluid/platform/transform.h"

 namespace paddle {
@ -102,32 +101,13 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,

 void Tensor::reshape(const std::vector<int64_t> &shape) {
  GET_CASTED_TENSOR
-  auto new_dim = framework::make_ddim(shape);
-  if (tensor->numel() != framework::product(new_dim)) {
-    LOG(WARNING) << "Custom Op: Calling reshape to a new shape which is bigger "
-                    "or smaller"
-                 << "than original shape will not change your tensor's memory "
-                    "Please call"
-                 << "paddle::Tensor::mutable_data<T>() after to reallocate "
-                    "your tensor's size."
-                 << std::endl;
-  }
-  tensor->Resize(new_dim);
+  tensor->Resize(framework::make_ddim(shape));
 }

 Tensor::Tensor(const PlaceType &place)
    : tensor_(std::make_shared<framework::LoDTensor>()),
      place_(place),
      stream_(StreamWrapper()) {}
-
-Tensor::Tensor(const PlaceType &place, const std::vector<int64_t> &shape)
-    : tensor_(std::make_shared<framework::LoDTensor>()),
-      place_(place),
-      stream_(StreamWrapper()) {
-  GET_CASTED_TENSOR
-  tensor->Resize(framework::make_ddim(shape));
-}
-
 template <typename T>
 T *Tensor::mutable_data(const PlaceType &place) {
  place_ = place;
@ -190,8 +170,6 @@ DataType Tensor::type() const {
    return DataType::COMPLEX64;
  } else if (type == framework::proto::VarType::COMPLEX128) {
    return DataType::COMPLEX128;
-  } else if (type == framework::proto::VarType::FP16) {
-    return DataType::FLOAT16;
  }
  // TODO(JiabinYang) Support more dtype here
  return DataType::FLOAT32;
@ -251,8 +229,6 @@ template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
    const PlaceType &target_place) const;
 template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex128>(
    const PlaceType &target_place) const;
-template PD_DLL_DECL Tensor
-Tensor::copy_to<paddle::platform::float16>(const PlaceType &target_place) const;

 template PD_DLL_DECL float *Tensor::data<float>() const;
 template PD_DLL_DECL double *Tensor::data<double>() const;
@ -266,8 +242,6 @@ template PD_DLL_DECL paddle::platform::complex64 *
 Tensor::data<paddle::platform::complex64>() const;
 template PD_DLL_DECL paddle::platform::complex128 *
 Tensor::data<paddle::platform::complex128>() const;
-template PD_DLL_DECL paddle::platform::float16 *
-Tensor::data<paddle::platform::float16>() const;

 template PD_DLL_DECL float *Tensor::mutable_data<float>();
 template PD_DLL_DECL double *Tensor::mutable_data<double>();
@ -281,8 +255,6 @@ template PD_DLL_DECL paddle::platform::complex64 *
 Tensor::mutable_data<paddle::platform::complex64>();
 template PD_DLL_DECL paddle::platform::complex128 *
 Tensor::mutable_data<paddle::platform::complex128>();
-template PD_DLL_DECL paddle::platform::float16 *
-Tensor::mutable_data<paddle::platform::float16>();

 template PD_DLL_DECL float *Tensor::mutable_data<float>(const PlaceType &place);
 template PD_DLL_DECL double *Tensor::mutable_data<double>(
@ -302,8 +274,6 @@ template PD_DLL_DECL paddle::platform::complex64 *
 Tensor::mutable_data<paddle::platform::complex64>(const PlaceType &place);
 template PD_DLL_DECL paddle::platform::complex128 *
 Tensor::mutable_data<paddle::platform::complex128>(const PlaceType &place);
-template PD_DLL_DECL paddle::platform::float16 *
-Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);

 std::vector<int64_t> Tensor::shape() const {
  GET_CASTED_TENSOR
@ -374,11 +344,6 @@ Tensor Tensor::cast(const DataType &target_type) const {
                               CastDataType<paddle::platform::complex128>(
                                   *tensor, rlt_tensor_, ctx));
      break;
-    case framework::proto::VarType::FP16:
-      framework::VisitDataType(
-          dst_type,
-          CastDataType<paddle::platform::float16>(*tensor, rlt_tensor_, ctx));
-      break;
    // TODO(JiabinYang) Support more dtype here
    default:
      PADDLE_THROW(platform::errors::Unimplemented(
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -360,11 +360,46 @@ set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_prot

 cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES})

+# Old custom op extension mechanism related, will be removed in 2.1.0
+cc_library(paddle_framework_shared
+    SHARED SRCS executor.cc operator.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc
+    ${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc
+    DEPS ${FLUID_FRAMEWORK_MODULES})
+get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
+set_target_properties(paddle_framework_shared PROPERTIES OUTPUT_NAME paddle_framework)
+target_link_libraries(paddle_framework_shared ${os_dependency_modules})
+
+if (LINUX)
+  set(FLUID_FRAMEWORK_SHARED_LIB
+      ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.so
+      CACHE INTERNAL "Fluid framework lib")
+endif()
+
+if (WIN32)
+  if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
+    set(paddle_framework_lib_path ${CMAKE_CURRENT_BINARY_DIR})
+  else()
+    set(paddle_framework_lib_path ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
+  endif()
+  set(FLUID_FRAMEWORK_IMPORT_LIB
+      ${paddle_framework_lib_path}/paddle_framework.lib
+      CACHE INTERNAL "Fluid framework lib")
+  set(FLUID_FRAMEWORK_SHARED_LIB
+      ${paddle_framework_lib_path}/paddle_framework.dll
+      CACHE INTERNAL "Fluid framework dll")
+endif()
+
+if(APPLE)
+  set(FLUID_FRAMEWORK_SHARED_LIB
+      ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dylib
+      CACHE INTERNAL "Fluid framework lib")
+endif()
 if(WITH_TESTING AND TEST selected_rows_test)
  set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
 endif()

-##### 2.0 New custom op extension mechanism related #####
+# New custom op extension mechanism related

 # if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
 set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
--- a/paddle/fluid/framework/c/c_api.cc
+++ b/paddle/fluid/framework/c/c_api.cc
@ -0,0 +1,53 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/c/c_api.h"
+
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/platform/enforce.h"
+
+extern "C" {
+
+paddle::framework::OpInfoMap &PD_GetOpInfoMap() {
+  return paddle::framework::OpInfoMap::Instance();
+}
+
+void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool) {
+  paddle::platform::DeviceContextPool::SetPool(pool);
+}
+
+std::vector<std::string> PD_GetGradOpDescStrs(
+    const paddle::framework::OpDesc &op_desc,
+    const std::unordered_set<std::string> &no_grad_set,
+    std::unordered_map<std::string, std::string> *grad_to_var,
+    const std::vector<paddle::framework::BlockDesc *> &grad_block) {
+  auto &op_info = PD_GetOpInfoMap().Get(op_desc.Type());
+  std::vector<std::string> ret;
+  if (op_info.grad_op_maker_) {
+    auto grad_op_descs =
+        op_info.grad_op_maker_(op_desc, no_grad_set, grad_to_var, grad_block);
+    size_t op_num = grad_op_descs.size();
+    ret.resize(op_num);
+    for (size_t i = 0; i < op_num; ++i) {
+      PADDLE_ENFORCE_EQ(
+          grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true,
+          paddle::platform::errors::Unavailable(
+              "Cannot serialize operator desc message."));
+    }
+  }
+  return ret;
+}
+
+}  // end extern "C"
--- a/paddle/fluid/framework/c/c_api.h
+++ b/paddle/fluid/framework/c/c_api.h
@ -0,0 +1,55 @@
+/* copyright (c) 2019 paddlepaddle authors. all rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/platform/device_context.h"
+
+namespace paddle {
+namespace framework {
+class OpInfoMap;
+}  // namespace framework
+namespace platform {
+class DeviceContextPool;
+}  // namespace platform
+}  // namespace paddle
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// C-API to get global OpInfo map.
+paddle::framework::OpInfoMap &PD_GetOpInfoMap();
+
+// C-API to init global DeviceContextPool from outside.
+void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool);
+
+// C-API to serialize the grad op protocol message to a binary string.
+std::vector<std::string> PD_GetGradOpDescStrs(
+    const paddle::framework::OpDesc &op_desc,
+    const std::unordered_set<std::string> &no_grad_set,
+    std::unordered_map<std::string, std::string> *grad_to_var,
+    const std::vector<paddle::framework::BlockDesc *> &grad_block);
+
+#ifdef __cplusplus
+}
+#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
xiegegege	f343b5ab0e	notest,test=model_benchmark	5 years ago
xiegegege	c9461d1bd1	test benchmark ci	5 years ago