commit
36cd18b549
@ -0,0 +1,31 @@
|
||||
include(ExternalProject)
|
||||
|
||||
set(DLPACK_SOURCE_DIR ${THIRD_PARTY_PATH}/dlpack)
|
||||
set(DLPACK_INCLUDE_DIR ${DLPACK_SOURCE_DIR}/src/extern_dlpack/include)
|
||||
|
||||
include_directories(${DLPACK_INCLUDE_DIR})
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_dlpack
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
GIT_REPOSITORY "https://github.com/dmlc/dlpack.git"
|
||||
GIT_TAG "v0.2"
|
||||
PREFIX ${DLPACK_SOURCE_DIR}
|
||||
UPDATE_COMMAND ""
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/dlpack_dummy.c)
|
||||
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
|
||||
add_library(dlpack STATIC ${dummyfile})
|
||||
else()
|
||||
add_library(dlpack INTERFACE)
|
||||
endif()
|
||||
|
||||
add_dependencies(dlpack extern_dlpack)
|
||||
|
||||
LIST(APPEND externl_project_dependencies dlpack)
|
@ -0,0 +1,44 @@
|
||||
if (NOT WITH_AMD_GPU)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# rocprim is "ROCm Parallel Primitives" for short.
|
||||
# It is a header-only library providing HIP and HC parallel primitives
|
||||
# for developing performant GPU-accelerated code on AMD ROCm platform.
|
||||
|
||||
if("x${HCC_HOME}" STREQUAL "x")
|
||||
set(HCC_HOME "/opt/rocm/hcc")
|
||||
endif()
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(ROCPRIM_SOURCE_DIR ${THIRD_PARTY_PATH}/rocprim)
|
||||
SET(ROCPRIM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocprim)
|
||||
SET(ROCPRIM_INCLUDE_DIR ${ROCPRIM_INSTALL_DIR}/include)
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_rocprim
|
||||
GIT_REPOSITORY "https://github.com/ROCmSoftwarePlatform/rocPRIM.git"
|
||||
GIT_TAG 5bd41b96ab8d8343330fb2c3e1b96775bde3b3fc
|
||||
PREFIX ${ROCPRIM_SOURCE_DIR}
|
||||
UPDATE_COMMAND ""
|
||||
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${HCC_HOME}/bin/hcc
|
||||
CMAKE_ARGS -DONLY_INSTALL=ON
|
||||
CMAKE_ARGS -DBUILD_TEST=OFF
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ROCPRIM_INSTALL_DIR}
|
||||
|
||||
INSTALL_DIR ${ROCPRIM_INSTALL_DIR}
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
)
|
||||
|
||||
INCLUDE_DIRECTORIES(${ROCPRIM_INCLUDE_DIR})
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/rocprim_dummy.c)
|
||||
file(WRITE ${dummyfile} "const char *dummy_rocprim = \"${dummyfile}\";")
|
||||
add_library(rocprim STATIC ${dummyfile})
|
||||
else()
|
||||
add_library(rocprim INTERFACE)
|
||||
endif()
|
||||
|
||||
add_dependencies(rocprim extern_rocprim)
|
@ -0,0 +1,127 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/dlpack_tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
namespace internal {
|
||||
template <typename T>
|
||||
static ::DLDataType GetDLDataTypeCode() {
|
||||
::DLDataType dtype;
|
||||
if (std::is_same<T, platform::float16>::value ||
|
||||
std::is_floating_point<T>::value) {
|
||||
dtype.code = kDLFloat;
|
||||
} else if (std::is_unsigned<T>::value) {
|
||||
dtype.code = kDLUInt;
|
||||
} else if (std::is_integral<T>::value) {
|
||||
dtype.code = kDLInt;
|
||||
} else {
|
||||
PADDLE_THROW("Unsupported data type %s", typeid(T).name());
|
||||
}
|
||||
dtype.bits = 8 * sizeof(T);
|
||||
dtype.lanes = 1;
|
||||
return dtype;
|
||||
}
|
||||
|
||||
static DLDataType GetDLDataTypeFromTypeIndex(const std::type_index &type) {
|
||||
#define REG_DL_DATA_TYPE(type) \
|
||||
{ std::type_index(typeid(type)), GetDLDataTypeCode<type>() }
|
||||
static const std::unordered_map<std::type_index, ::DLDataType>
|
||||
type_to_dtype_map({
|
||||
REG_DL_DATA_TYPE(platform::float16), // NOLINT
|
||||
REG_DL_DATA_TYPE(float), // NOLINT
|
||||
REG_DL_DATA_TYPE(double), // NOLINT
|
||||
REG_DL_DATA_TYPE(int), // NOLINT
|
||||
REG_DL_DATA_TYPE(int64_t), // NOLINT
|
||||
REG_DL_DATA_TYPE(bool), // NOLINT
|
||||
REG_DL_DATA_TYPE(size_t), // NOLINT
|
||||
REG_DL_DATA_TYPE(int16_t), // NOLINT
|
||||
REG_DL_DATA_TYPE(uint8_t), // NOLINT
|
||||
REG_DL_DATA_TYPE(int8_t) // NOLINT
|
||||
});
|
||||
static auto type_to_dtype_map_end_it = type_to_dtype_map.end();
|
||||
auto it = type_to_dtype_map.find(type);
|
||||
PADDLE_ENFORCE(it != type_to_dtype_map_end_it, "Unsupported data type %s",
|
||||
type.name());
|
||||
return it->second;
|
||||
#undef REG_DL_DATA_TYPE
|
||||
}
|
||||
|
||||
struct DLContextVisitor : public boost::static_visitor<::DLContext> {
|
||||
inline ::DLContext operator()(const platform::CPUPlace &place) const {
|
||||
DLContext ctx;
|
||||
ctx.device_type = kDLCPU;
|
||||
ctx.device_id = 0;
|
||||
return ctx;
|
||||
}
|
||||
|
||||
inline ::DLContext operator()(const platform::CUDAPlace &place) const {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
DLContext ctx;
|
||||
ctx.device_type = kDLGPU;
|
||||
ctx.device_id = place.device;
|
||||
return ctx;
|
||||
#else
|
||||
PADDLE_THROW("platform::CUDAPlace is not supported in CPU only version");
|
||||
#endif
|
||||
}
|
||||
|
||||
inline ::DLContext operator()(const platform::CUDAPinnedPlace &place) const {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
DLContext ctx;
|
||||
ctx.device_type = kDLCPUPinned;
|
||||
ctx.device_id = 0;
|
||||
return ctx;
|
||||
#else
|
||||
PADDLE_THROW(
|
||||
"platform::CUDAPinnedPlace is not supported in CPU only version");
|
||||
#endif
|
||||
}
|
||||
};
|
||||
} // namespace internal
|
||||
|
||||
DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
|
||||
// init data, data buffer
|
||||
t_.data = const_cast<void *>(tensor.data<void>());
|
||||
|
||||
// init ctx, DLContext type with device_type and device_id
|
||||
auto place = tensor.place();
|
||||
t_.ctx = boost::apply_visitor(internal::DLContextVisitor(), place);
|
||||
|
||||
// init dtype
|
||||
t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type());
|
||||
t_.dtype.lanes = lanes;
|
||||
|
||||
// init ndim, tensor rank
|
||||
auto &dims = tensor.dims();
|
||||
using DimType = decltype(t_.ndim); // int
|
||||
t_.ndim = static_cast<DimType>(dims.size());
|
||||
|
||||
// init shape, tensor dims
|
||||
t_.shape = shape_;
|
||||
for (DimType i = 0; i < t_.ndim; ++i) {
|
||||
t_.shape[i] = dims[i];
|
||||
}
|
||||
|
||||
// init strides, nullptr means the tensor is compact
|
||||
t_.strides = nullptr;
|
||||
|
||||
// init byte_offset
|
||||
t_.byte_offset = 0;
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,45 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <dlpack/dlpack.h>
|
||||
#include "paddle/fluid/framework/tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
class DLPackTensor {
|
||||
public:
|
||||
using LaneType = decltype(::DLTensor::dtype.lanes); // uint16_t
|
||||
using ShapeType =
|
||||
std::remove_reference<decltype(::DLTensor::shape[0])>::type; // int64_t
|
||||
|
||||
// lanes is only used in CPU to enable vectorization
|
||||
explicit DLPackTensor(const Tensor& tensor, LaneType lanes = 1);
|
||||
|
||||
inline operator const ::DLTensor&() const { return t_; }
|
||||
|
||||
inline operator ::DLTensor&() { return t_; }
|
||||
|
||||
private:
|
||||
::DLTensor t_;
|
||||
|
||||
// The shape in DLTensor is defined as int64_t*
|
||||
// Add this member to make TVMTensor init without heap allocation
|
||||
ShapeType shape_[9];
|
||||
};
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,113 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/dlpack_tensor.h"
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <vector>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
namespace { // NOLINT
|
||||
template <typename T>
|
||||
constexpr uint8_t GetDLDataTypeCode() {
|
||||
return std::is_same<platform::float16, T>::value ||
|
||||
std::is_floating_point<T>::value
|
||||
? static_cast<uint8_t>(kDLFloat)
|
||||
: (std::is_unsigned<T>::value
|
||||
? static_cast<uint8_t>(kDLUInt)
|
||||
: (std::is_integral<T>::value ? static_cast<uint8_t>(kDLInt)
|
||||
: static_cast<uint8_t>(-1)));
|
||||
}
|
||||
} // NOLINT
|
||||
|
||||
template <typename T>
|
||||
void TestMain(const platform::Place &place, uint16_t lanes) {
|
||||
DDim dims{4, 5, 6, 7};
|
||||
Tensor tensor;
|
||||
tensor.Resize(dims);
|
||||
void *p = tensor.mutable_data<T>(place);
|
||||
|
||||
DLPackTensor dlpack_tensor(tensor, lanes);
|
||||
::DLTensor &dl_tensor = dlpack_tensor;
|
||||
|
||||
CHECK_EQ(p, dl_tensor.data);
|
||||
if (platform::is_cpu_place(place)) {
|
||||
CHECK_EQ(kDLCPU, dl_tensor.ctx.device_type);
|
||||
CHECK_EQ(0, dl_tensor.ctx.device_id);
|
||||
} else if (platform::is_gpu_place(place)) {
|
||||
CHECK_EQ(kDLGPU, dl_tensor.ctx.device_type);
|
||||
CHECK_EQ(boost::get<platform::CUDAPlace>(place).device,
|
||||
dl_tensor.ctx.device_id);
|
||||
} else if (platform::is_cuda_pinned_place(place)) {
|
||||
CHECK_EQ(kDLCPUPinned, dl_tensor.ctx.device_type);
|
||||
CHECK_EQ(0, dl_tensor.ctx.device_id);
|
||||
} else {
|
||||
CHECK_EQ(false, true);
|
||||
}
|
||||
|
||||
CHECK_EQ(dims.size(), dl_tensor.ndim);
|
||||
for (auto i = 0; i < dims.size(); ++i) {
|
||||
CHECK_EQ(dims[i], dl_tensor.shape[i]);
|
||||
}
|
||||
|
||||
CHECK_EQ(dl_tensor.strides == nullptr, true);
|
||||
CHECK_EQ(static_cast<uint64_t>(0), dl_tensor.byte_offset);
|
||||
|
||||
CHECK_EQ(lanes, dl_tensor.dtype.lanes);
|
||||
CHECK_EQ(sizeof(T) * 8, dl_tensor.dtype.bits);
|
||||
|
||||
CHECK_EQ(GetDLDataTypeCode<T>(), dl_tensor.dtype.code);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TestMainLoop() {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
std::vector<platform::Place> places{platform::CPUPlace(),
|
||||
platform::CUDAPlace(0),
|
||||
platform::CUDAPinnedPlace()};
|
||||
if (platform::GetCUDADeviceCount() > 1) {
|
||||
places.emplace_back(platform::CUDAPlace(1));
|
||||
}
|
||||
#else
|
||||
std::vector<platform::Place> places{platform::CPUPlace()};
|
||||
#endif
|
||||
std::vector<uint16_t> lanes{1, 2};
|
||||
for (auto &p : places) {
|
||||
for (auto &l : lanes) {
|
||||
TestMain<T>(p, l);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define PADDLE_DLPACK_TEST(type) \
|
||||
TEST(dlpack, test_##type) { TestMainLoop<type>(); }
|
||||
|
||||
using float16 = platform::float16;
|
||||
PADDLE_DLPACK_TEST(float16);
|
||||
PADDLE_DLPACK_TEST(float);
|
||||
PADDLE_DLPACK_TEST(double);
|
||||
PADDLE_DLPACK_TEST(int);
|
||||
PADDLE_DLPACK_TEST(int64_t);
|
||||
PADDLE_DLPACK_TEST(bool);
|
||||
PADDLE_DLPACK_TEST(size_t);
|
||||
PADDLE_DLPACK_TEST(int16_t);
|
||||
PADDLE_DLPACK_TEST(uint8_t);
|
||||
PADDLE_DLPACK_TEST(int8_t);
|
||||
|
||||
#undef PADDLE_DLPACK_TEST
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,72 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/transfer_scope_cache.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
std::unordered_map<size_t, Scope*>& global_transfer_data_cache() {
|
||||
thread_local auto* x = new std::unordered_map<size_t, Scope*>;
|
||||
return *x;
|
||||
}
|
||||
|
||||
std::unordered_set<Scope*>& global_transfer_scope_cache() {
|
||||
thread_local auto* x = new std::unordered_set<Scope*>;
|
||||
return *x;
|
||||
}
|
||||
|
||||
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
|
||||
const Scope* scope) {
|
||||
Scope* new_scope{nullptr};
|
||||
size_t infer_cache_key =
|
||||
CombineHash(OpKernelType::Hash()(type0), OpKernelType::Hash()(type1));
|
||||
infer_cache_key =
|
||||
CombineHash(infer_cache_key, std::hash<const Scope*>()(scope));
|
||||
|
||||
auto it = global_transfer_data_cache().find(infer_cache_key);
|
||||
if (it != global_transfer_data_cache().end()) {
|
||||
new_scope = global_transfer_data_cache()[infer_cache_key];
|
||||
} else {
|
||||
new_scope = &scope->NewScope();
|
||||
global_transfer_data_cache()[infer_cache_key] = new_scope;
|
||||
}
|
||||
global_transfer_scope_cache().insert(new_scope);
|
||||
return new_scope;
|
||||
}
|
||||
|
||||
void RemoveKidsFromTransferScopeCache(Scope* scope) {
|
||||
auto it = global_transfer_scope_cache().find(scope);
|
||||
if (it != global_transfer_scope_cache().end()) {
|
||||
global_transfer_scope_cache().erase(it);
|
||||
}
|
||||
for (auto* s : scope->kids()) {
|
||||
auto it = global_transfer_scope_cache().find(s);
|
||||
if (it != global_transfer_scope_cache().end()) {
|
||||
global_transfer_scope_cache().erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
// remove global transfer data cache
|
||||
auto& cache = global_transfer_data_cache();
|
||||
for (auto it = cache.begin(); it != cache.end();) {
|
||||
if (it->second == scope)
|
||||
it = cache.erase(it);
|
||||
else
|
||||
it++;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue