Compare commits

...

52 Commits

Author SHA1 Message Date
wuhuanzhou 587d99ae44
update compilation with C++14 (#31815)
5 years ago
tianshuo78520a b09c1ce09a
fix whl package push pypi (#31585)
5 years ago
Thunderbrook 393b3bd6b7
fix split core (#31892)
5 years ago
wuhuanzhou 3a95a0bc26
update cmake minimum version to 3.15 (#31807)
5 years ago
taixiurong 52b05baca3
fix some bug in transformer training in xpu (#31918)
5 years ago
Wenyu 5394194e3a
support minus-int idx to LayerList (#31750)
5 years ago
furnace ef8323d49e
[ROCM] Add ROCm support for warpctc op (#31817)
5 years ago
Jiawei Wang 95f808c878
fix stack op grad nullptr (#31962)
5 years ago
liym27 57d4288ad4
[dynamic setitem] Fix bug of dynamic setitem: Decerease axes to do right broadcast (#31960)
5 years ago
石晓伟 0fa6c8a35c
fix a syntax error, test=develop (#31930)
5 years ago
Pei Yang 98e803e04f
map_matmul_to_mul_pass support 3dim (#31958)
5 years ago
wuhuanzhou a37a7f67e1
modify CI recommend information (#31395)
5 years ago
jakpiase 6dca7a1de7
Added int8 kernel for oneDNN LSTM op (#31894)
5 years ago
Pei Yang 14b7e3cf06
[Paddle-TRT] TRT inference support for BERT/Transformer in paddle 2.0 api (#31744)
5 years ago
Zhou Wei 245252b86e
fix bug when dtype of to_tensor is core.VarType (#31931)
5 years ago
Zhen Wang e1f931610e
Fix save/load error in imperative qat UT. (#31937)
5 years ago
Yiqun Liu e50bc2c2a6
Enhance cmake to support specifying CUDA_ARCH_NAME to Ampere. (#31923)
5 years ago
Zhou Wei 04a49b097e
[Custom OP]Remove old custom OP and reduce whl package volume (#31813)
5 years ago
wangguanzhong fe2848686b
add exclusive for test_conv2d_op, test=develop (#31936)
5 years ago
chajchaj 73a6fa3ed0
add deprecated for softmax_with_cross_entropy (#31722)
5 years ago
Shang Zhizhou 8084b7594b
fix batchnorm when inpu dims < 3 (#31933)
5 years ago
zlsh80826 64ee255ffd
[Paddle-TRT] yolobox (#31755)
5 years ago
Aurelius84 c4b60efabd
Fix segment Fault from set_value (#31891)
5 years ago
wuhuanzhou 17030ff28b
fix op benchmark ci error caused by missing test_pr branch, test=document_fix (#31920)
5 years ago
niuliling123 a71d72d921
relu forward and backward with vectortype (#31869)
5 years ago
tianshuo78520a 8829a309fe
Delete cudnn6 code (#31835)
5 years ago
wanghuancoder b48841ba2e
modify API nn.Bilinear's doc (#31889)
5 years ago
liym27 525c32e33c
Fix bug of set_value op:Decerease axes to do right broadcast (#31875)
5 years ago
ronnywang 123949eb48
[ROCM] added a cudnn switch of conv2d for rocm platform (#31836)
5 years ago
Shang Zhizhou 61805d8f0a
fix cmake model path (#31866)
5 years ago
Jiabin Yang 51eb29de18
[CustomOP] Add shape related constructor for Tensor (#31681)
5 years ago
zlsh80826 e3a38d790a
[Paddle-TRT] roi_align_plugin (#31732)
5 years ago
zlsh80826 bfb5cf5567
[Paddle-TRT] trt affine channel converter (#31628)
5 years ago
cc b47478efc2
[dygraph qat] Use layer to calculate output scale (#31861)
5 years ago
lilong12 c3974d0e2a
[3D-parallel] Reformat pipeline parallel (#31786)
5 years ago
zlsh80826 01aa252624
[Paddle-TRT] multiclass nms (#31742)
5 years ago
Wilber 70b67f1029
fix go api bug. (#31857)
5 years ago
tianshuo78520a e804f08559
delete include framework.pb.h (#31859)
5 years ago
Chengmo f58cb01864
【Paddle.Fleet】fix dataset zip py3 bug (#31441)
5 years ago
Kaipeng Deng bf09dcb346
add GPU tensor notice & update default_collate_fn/default_convert_fn. test=develop (#31763)
5 years ago
Chen Weihang 27f2d8df8e
Polish two error messages (#31852)
5 years ago
Zhou Wei 511e204e62
LRScheduler.get_lr should not update lr in LinearWarmup (#31843)
5 years ago
niuliling123 6472d62093
Revert "add relu forward kernel and backward kernel (#31613)" (#31853)
5 years ago
winter-wang e7f28d6c0d
fix runtime crash when rnn model inference, test=develop (#31833)
5 years ago
parap1uie-s 5d89ec36dc
Update pooling.py (#31829)
5 years ago
Huihuang Zheng 649868ffb2
[Dy2stat] Fix the bug that loop_body_func may return single element (#31806)
5 years ago
Wojciech Uss e5f7a834d4
fix cache key in concat oneDNN kernel (#31820)
5 years ago
Aurelius84 f2cfc0f46d
[CustomOp]Avoid raising warning while import paddle (#31804)
5 years ago
cc 84a551380e
[dygraph qat] Refine saving output scale to infer program (#31784)
5 years ago
Chen Weihang 68497e7b39
change trainable to stop_gradient in optimizer (#31823)
5 years ago
ronnywang 270699e647
[ROCM] fix test_matmul_v2_op (#31802)
5 years ago
Zhou Wei 1eb927f935
Restore the third-party library cache for windows (#31811)
5 years ago

@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@ -38,11 +39,6 @@ endif()
if (WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif()
# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "
"You can use cmake 3.16 (recommended), 3.10, 3.11, 3.15 or 3.17. Please refer to the install document: https://cmake.org/install/")
endif()
if(WITH_GPU AND NOT APPLE)
enable_language(CUDA)
@ -61,7 +57,6 @@ if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy")
endif()
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zm1000 /fp:fast")
if(WIN32)
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)

@ -74,7 +74,7 @@ endfunction()
# select_nvcc_arch_flags(out_variable)
function(select_nvcc_arch_flags out_variable)
# List of arch names
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual")
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "All" "Manual")
set(archs_name_default "Auto")
list(APPEND archs_names "Auto")
@ -108,6 +108,8 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin "70")
elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
set(cuda_arch_bin "75")
elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set(cuda_arch_bin "80")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(cuda_arch_bin ${paddle_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
@ -206,14 +208,11 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
# Set C++11 support
# Set C++14 support
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
if (NOT WIN32) # windows msvc2015 support c++11 natively.
# -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
set(CMAKE_CUDA_STANDARD 11)
endif(NOT WIN32)
set(CMAKE_CUDA_STANDARD 14)
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0

@ -94,7 +94,7 @@ macro(find_cudnn_version cudnn_header_file)
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")
message(STATUS "Current cuDNN header is ${cudnn_header_file} "
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}. ")
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCHLEVEL_VERSION}. ")
endif()
endif()
endmacro()

@ -14,11 +14,15 @@
INCLUDE(ExternalProject)
IF(WITH_ROCM)
add_definitions(-DWARPCTC_WITH_HIP)
ENDIF()
SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
set(WARPCTC_TAG cd828e5b6c3b953b82af73f7f44cddc393a20efa)
set(WARPCTC_TAG c690fc5755abbdbdc98ef78d51ec10a6748a8cd1)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE)
@ -57,6 +61,7 @@ ExternalProject_Add(
-DCMAKE_CXX_FLAGS_DEBUG=$<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline>
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU}
-DWITH_ROCM=${WITH_ROCM}
-DWITH_OMP=${USE_OMP}
-DWITH_TORCH=OFF
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON

@ -13,7 +13,7 @@ if(NOT XPU_SDK_ROOT)
elseif(WITH_SUNWAY)
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz" CACHE STRING "" FORCE)
else()
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_02_27.tar.gz" CACHE STRING "" FORCE)
SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_03_30.tar.gz" CACHE STRING "" FORCE)
endif()
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")

@ -4,10 +4,10 @@ include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
include(CheckTypeSize)
function(CheckCompilerCXX11Flag)
function(CheckCompilerCXX14Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.4)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 5.4 required.")
elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.2)
message(WARNING "Found GCC ${CMAKE_CXX_COMPILER_VERSION} which is too high, recommended to use GCC 8.2")
endif()
@ -20,23 +20,15 @@ function(CheckCompilerCXX11Flag)
message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.")
endif()
else()
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.4)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.4 required.")
endif()
endif()
endif()
endfunction()
CheckCompilerCXX11Flag()
if (WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
CheckCompilerCXX14Flag()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
# safe_set_flag
#
# Set a compile flag only if compiler is support

@ -50,6 +50,7 @@ output_data := value.Interface().([][]float32)
运行
```bash
go mod init github.com/paddlepaddle
export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
go run ./demo/mobilenet.go
```

@ -13,7 +13,7 @@
// limitations under the License.
package main
import "../paddle"
import "github.com/paddlepaddle/paddle"
import "strings"
import "io/ioutil"
import "strconv"

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <paddle_c_api.h>
import "C"

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <paddle_c_api.h>

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include "paddle_c_api.h"
import "C"
@ -88,7 +88,7 @@ func (predictor *Predictor) GetInputNames() []string {
}
func (predictor *Predictor) GetOutputNames() []string {
names := make([]string, predictor.GetInputNum())
names := make([]string, predictor.GetOutputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetOutputName(i)
}

@ -15,7 +15,7 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_fluid_c
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
@ -209,7 +209,7 @@ func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Va
value := reflect.Indirect(ptr)
value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
if len(shape) == 1 && value.Len() > 0 {
switch value.Index(1).Kind() {
switch value.Index(0).Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), value.Interface())
return

@ -52,6 +52,9 @@ class PD_DLL_DECL Tensor {
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
explicit Tensor(const PlaceType& place);
/// \brief Construct a Tensor on target Place with shape for CustomOp.
/// Generally it's only used for user to create Tensor.
Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
/// Reshape must be called before calling

@ -102,13 +102,32 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc,
void Tensor::reshape(const std::vector<int64_t> &shape) {
GET_CASTED_TENSOR
tensor->Resize(framework::make_ddim(shape));
auto new_dim = framework::make_ddim(shape);
if (tensor->numel() != framework::product(new_dim)) {
LOG(WARNING) << "Custom Op: Calling reshape to a new shape which is bigger "
"or smaller"
<< "than original shape will not change your tensor's memory "
"Please call"
<< "paddle::Tensor::mutable_data<T>() after to reallocate "
"your tensor's size."
<< std::endl;
}
tensor->Resize(new_dim);
}
Tensor::Tensor(const PlaceType &place)
: tensor_(std::make_shared<framework::LoDTensor>()),
place_(place),
stream_(StreamWrapper()) {}
Tensor::Tensor(const PlaceType &place, const std::vector<int64_t> &shape)
: tensor_(std::make_shared<framework::LoDTensor>()),
place_(place),
stream_(StreamWrapper()) {
GET_CASTED_TENSOR
tensor->Resize(framework::make_ddim(shape));
}
template <typename T>
T *Tensor::mutable_data(const PlaceType &place) {
place_ = place;

@ -360,46 +360,11 @@ set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_prot
cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES})
# Old custom op extension mechanism related, will be removed in 2.1.0
cc_library(paddle_framework_shared
SHARED SRCS executor.cc operator.cc
${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc
DEPS ${FLUID_FRAMEWORK_MODULES})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
set_target_properties(paddle_framework_shared PROPERTIES OUTPUT_NAME paddle_framework)
target_link_libraries(paddle_framework_shared ${os_dependency_modules})
if (LINUX)
set(FLUID_FRAMEWORK_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.so
CACHE INTERNAL "Fluid framework lib")
endif()
if (WIN32)
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(paddle_framework_lib_path ${CMAKE_CURRENT_BINARY_DIR})
else()
set(paddle_framework_lib_path ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
endif()
set(FLUID_FRAMEWORK_IMPORT_LIB
${paddle_framework_lib_path}/paddle_framework.lib
CACHE INTERNAL "Fluid framework lib")
set(FLUID_FRAMEWORK_SHARED_LIB
${paddle_framework_lib_path}/paddle_framework.dll
CACHE INTERNAL "Fluid framework dll")
endif()
if(APPLE)
set(FLUID_FRAMEWORK_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dylib
CACHE INTERNAL "Fluid framework lib")
endif()
if(WITH_TESTING AND TEST selected_rows_test)
set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
endif()
# New custom op extension mechanism related
##### 2.0 New custom op extension mechanism related #####
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)

@ -1,53 +0,0 @@
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/c/c_api.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
extern "C" {
paddle::framework::OpInfoMap &PD_GetOpInfoMap() {
return paddle::framework::OpInfoMap::Instance();
}
void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool) {
paddle::platform::DeviceContextPool::SetPool(pool);
}
std::vector<std::string> PD_GetGradOpDescStrs(
const paddle::framework::OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> *grad_to_var,
const std::vector<paddle::framework::BlockDesc *> &grad_block) {
auto &op_info = PD_GetOpInfoMap().Get(op_desc.Type());
std::vector<std::string> ret;
if (op_info.grad_op_maker_) {
auto grad_op_descs =
op_info.grad_op_maker_(op_desc, no_grad_set, grad_to_var, grad_block);
size_t op_num = grad_op_descs.size();
ret.resize(op_num);
for (size_t i = 0; i < op_num; ++i) {
PADDLE_ENFORCE_EQ(
grad_op_descs[i]->Proto()->SerializePartialToString(&ret[i]), true,
paddle::platform::errors::Unavailable(
"Cannot serialize operator desc message."));
}
}
return ret;
}
} // end extern "C"

@ -1,55 +0,0 @@
/* copyright (c) 2019 paddlepaddle authors. all rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace framework {
class OpInfoMap;
} // namespace framework
namespace platform {
class DeviceContextPool;
} // namespace platform
} // namespace paddle
#ifdef __cplusplus
extern "C" {
#endif
// C-API to get global OpInfo map.
paddle::framework::OpInfoMap &PD_GetOpInfoMap();
// C-API to init global DeviceContextPool from outside.
void PD_InitDevicesPool(paddle::platform::DeviceContextPool *pool);
// C-API to serialize the grad op protocol message to a binary string.
std::vector<std::string> PD_GetGradOpDescStrs(
const paddle::framework::OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> *grad_to_var,
const std::vector<paddle::framework::BlockDesc *> &grad_block);
#ifdef __cplusplus
}
#endif

@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/extension/include/ext_tensor.h"
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/custom_tensor_utils.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"

@ -37,7 +37,7 @@ class CustomTensorUtils {
/// \brief Share data FROM another tensor.
/// Use this to pass tensor from op to op
/// \return void.
static void ShareDataFrom(const void* src, const Tensor& dst);
static void ShareDataFrom(const void* src, const paddle::Tensor& dst);
static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType(
const paddle::DataType& dtype) {

@ -18,7 +18,6 @@
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h"

@ -34,15 +34,19 @@ namespace patterns {
static PDNode* create_emb_vars(PDPattern* pattern, const std::string& name,
const std::string& arg,
bool is_persist = false) {
std::unordered_set<std::string> embedding_ops{"lookup_table",
"lookup_table_v2"};
PDNode* node =
pattern->NewNode(name)->assert_is_op_input("lookup_table", arg);
pattern->NewNode(name)->assert_is_ops_input(embedding_ops, arg);
if (is_persist) return node->assert_is_persistable_var();
return node;
}
static PDNode* create_emb_out_vars(PDPattern* pattern, const std::string& name,
const std::string& arg) {
std::unordered_set<std::string> embedding_ops{"lookup_table",
"lookup_table_v2"};
PDNode* node = pattern->NewNode(name)
->assert_is_only_output_of_op("lookup_table")
->assert_is_only_output_of_ops(embedding_ops)
->assert_is_op_input("elementwise_add", arg)
->AsIntermediate();
return node;
@ -56,10 +60,12 @@ void Embedding2Eltwise1Pattern::operator()() {
create_emb_vars(pattern, lookup_table1_w_repr(), "W", true);
auto* lookup_table2_w =
create_emb_vars(pattern, lookup_table2_w_repr(), "W", true);
std::unordered_set<std::string> embedding_ops{"lookup_table",
"lookup_table_v2"};
auto* lookup_table1 =
pattern->NewNode(lookup_table1_repr())->assert_is_op("lookup_table");
pattern->NewNode(lookup_table1_repr())->assert_is_ops(embedding_ops);
auto* lookup_table2 =
pattern->NewNode(lookup_table2_repr())->assert_is_op("lookup_table");
pattern->NewNode(lookup_table2_repr())->assert_is_ops(embedding_ops);
auto* lookup_table1_out =
create_emb_out_vars(pattern, lookup_table1_out_repr(), "X");
auto* lookup_table2_out =
@ -80,8 +86,10 @@ void Embedding1Eltwise1Pattern::operator()() {
create_emb_vars(pattern, lookup_table1_x_repr(), "Ids");
auto* lookup_table1_w =
create_emb_vars(pattern, lookup_table1_w_repr(), "W", true);
std::unordered_set<std::string> embedding_ops{"lookup_table",
"lookup_table_v2"};
auto* lookup_table1 =
pattern->NewNode(lookup_table1_repr())->assert_is_op("lookup_table");
pattern->NewNode(lookup_table1_repr())->assert_is_ops(embedding_ops);
auto* lookup_table1_out =
create_emb_out_vars(pattern, lookup_table1_out_repr(), "Y");
auto* eltwise_add =
@ -347,4 +355,5 @@ REGISTER_PASS_CAPABILITY(embedding_eltwise_layernorm_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.EQ("lookup_table", 0)
.LE("lookup_table_v2", 1)
.EQ("elementweise_add", 0));

@ -652,6 +652,36 @@ PDNode *PDNode::assert_is_ops_input(
return this;
}
PDNode *PDNode::assert_is_only_input_of_ops(
const std::unordered_set<std::string> &op_types) {
assert_is_var();
asserts_.emplace_back([=](Node *x) {
for (auto *op : x->outputs) {
if (op && op->IsOp() && op->Op() && op_types.count(op->Op()->Type()) &&
op->inputs.size() == 1) {
return true;
}
}
return false;
});
return this;
}
PDNode *PDNode::assert_is_only_output_of_ops(
const std::unordered_set<std::string> &op_types) {
assert_is_var();
asserts_.emplace_back([=](Node *x) {
for (auto *op : x->inputs) {
if (op && op->IsOp() && op->Op() && op_types.count(op->Op()->Type()) &&
op->outputs.size() == 1) {
return true;
}
}
return false;
});
return this;
}
bool VarLinksToOp(Node *node, const std::string &op_type) {
for (auto *out : node->outputs) {
if (out->IsOp() && out->Op()->Type() == op_type) {

@ -28,7 +28,6 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/inference/analysis/dot.h"
@ -146,6 +145,11 @@ struct PDNode {
const std::unordered_set<std::string>& op_types,
const std::string& argument, int nth);
PDNode* assert_is_only_input_of_ops(
const std::unordered_set<std::string>& op_types);
PDNode* assert_is_only_output_of_ops(
const std::unordered_set<std::string>& op_types);
PDNode* assert_has_n_inputs(size_t n);
PDNode* assert_has_n_outputs(size_t n);

@ -14,7 +14,6 @@
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"

@ -17,7 +17,6 @@
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
#include "paddle/fluid/framework/ir/pass_test_util.h"
#include "paddle/fluid/framework/naive_executor.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save