commit
36cd18b549
@ -0,0 +1,31 @@
|
|||||||
|
include(ExternalProject)
|
||||||
|
|
||||||
|
set(DLPACK_SOURCE_DIR ${THIRD_PARTY_PATH}/dlpack)
|
||||||
|
set(DLPACK_INCLUDE_DIR ${DLPACK_SOURCE_DIR}/src/extern_dlpack/include)
|
||||||
|
|
||||||
|
include_directories(${DLPACK_INCLUDE_DIR})
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
extern_dlpack
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
GIT_REPOSITORY "https://github.com/dmlc/dlpack.git"
|
||||||
|
GIT_TAG "v0.2"
|
||||||
|
PREFIX ${DLPACK_SOURCE_DIR}
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CONFIGURE_COMMAND ""
|
||||||
|
BUILD_COMMAND ""
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
TEST_COMMAND ""
|
||||||
|
)
|
||||||
|
|
||||||
|
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||||
|
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/dlpack_dummy.c)
|
||||||
|
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
|
||||||
|
add_library(dlpack STATIC ${dummyfile})
|
||||||
|
else()
|
||||||
|
add_library(dlpack INTERFACE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_dependencies(dlpack extern_dlpack)
|
||||||
|
|
||||||
|
LIST(APPEND externl_project_dependencies dlpack)
|
@ -0,0 +1,44 @@
|
|||||||
|
if (NOT WITH_AMD_GPU)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# rocprim is "ROCm Parallel Primitives" for short.
|
||||||
|
# It is a header-only library providing HIP and HC parallel primitives
|
||||||
|
# for developing performant GPU-accelerated code on AMD ROCm platform.
|
||||||
|
|
||||||
|
if("x${HCC_HOME}" STREQUAL "x")
|
||||||
|
set(HCC_HOME "/opt/rocm/hcc")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(ROCPRIM_SOURCE_DIR ${THIRD_PARTY_PATH}/rocprim)
|
||||||
|
SET(ROCPRIM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocprim)
|
||||||
|
SET(ROCPRIM_INCLUDE_DIR ${ROCPRIM_INSTALL_DIR}/include)
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
extern_rocprim
|
||||||
|
GIT_REPOSITORY "https://github.com/ROCmSoftwarePlatform/rocPRIM.git"
|
||||||
|
GIT_TAG 5bd41b96ab8d8343330fb2c3e1b96775bde3b3fc
|
||||||
|
PREFIX ${ROCPRIM_SOURCE_DIR}
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${HCC_HOME}/bin/hcc
|
||||||
|
CMAKE_ARGS -DONLY_INSTALL=ON
|
||||||
|
CMAKE_ARGS -DBUILD_TEST=OFF
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ROCPRIM_INSTALL_DIR}
|
||||||
|
|
||||||
|
INSTALL_DIR ${ROCPRIM_INSTALL_DIR}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
)
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${ROCPRIM_INCLUDE_DIR})
|
||||||
|
|
||||||
|
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||||
|
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/rocprim_dummy.c)
|
||||||
|
file(WRITE ${dummyfile} "const char *dummy_rocprim = \"${dummyfile}\";")
|
||||||
|
add_library(rocprim STATIC ${dummyfile})
|
||||||
|
else()
|
||||||
|
add_library(rocprim INTERFACE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_dependencies(rocprim extern_rocprim)
|
@ -0,0 +1,127 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/dlpack_tensor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
template <typename T>
|
||||||
|
static ::DLDataType GetDLDataTypeCode() {
|
||||||
|
::DLDataType dtype;
|
||||||
|
if (std::is_same<T, platform::float16>::value ||
|
||||||
|
std::is_floating_point<T>::value) {
|
||||||
|
dtype.code = kDLFloat;
|
||||||
|
} else if (std::is_unsigned<T>::value) {
|
||||||
|
dtype.code = kDLUInt;
|
||||||
|
} else if (std::is_integral<T>::value) {
|
||||||
|
dtype.code = kDLInt;
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW("Unsupported data type %s", typeid(T).name());
|
||||||
|
}
|
||||||
|
dtype.bits = 8 * sizeof(T);
|
||||||
|
dtype.lanes = 1;
|
||||||
|
return dtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DLDataType GetDLDataTypeFromTypeIndex(const std::type_index &type) {
|
||||||
|
#define REG_DL_DATA_TYPE(type) \
|
||||||
|
{ std::type_index(typeid(type)), GetDLDataTypeCode<type>() }
|
||||||
|
static const std::unordered_map<std::type_index, ::DLDataType>
|
||||||
|
type_to_dtype_map({
|
||||||
|
REG_DL_DATA_TYPE(platform::float16), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(float), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(double), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(int), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(int64_t), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(bool), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(size_t), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(int16_t), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(uint8_t), // NOLINT
|
||||||
|
REG_DL_DATA_TYPE(int8_t) // NOLINT
|
||||||
|
});
|
||||||
|
static auto type_to_dtype_map_end_it = type_to_dtype_map.end();
|
||||||
|
auto it = type_to_dtype_map.find(type);
|
||||||
|
PADDLE_ENFORCE(it != type_to_dtype_map_end_it, "Unsupported data type %s",
|
||||||
|
type.name());
|
||||||
|
return it->second;
|
||||||
|
#undef REG_DL_DATA_TYPE
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DLContextVisitor : public boost::static_visitor<::DLContext> {
|
||||||
|
inline ::DLContext operator()(const platform::CPUPlace &place) const {
|
||||||
|
DLContext ctx;
|
||||||
|
ctx.device_type = kDLCPU;
|
||||||
|
ctx.device_id = 0;
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ::DLContext operator()(const platform::CUDAPlace &place) const {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
DLContext ctx;
|
||||||
|
ctx.device_type = kDLGPU;
|
||||||
|
ctx.device_id = place.device;
|
||||||
|
return ctx;
|
||||||
|
#else
|
||||||
|
PADDLE_THROW("platform::CUDAPlace is not supported in CPU only version");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ::DLContext operator()(const platform::CUDAPinnedPlace &place) const {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
DLContext ctx;
|
||||||
|
ctx.device_type = kDLCPUPinned;
|
||||||
|
ctx.device_id = 0;
|
||||||
|
return ctx;
|
||||||
|
#else
|
||||||
|
PADDLE_THROW(
|
||||||
|
"platform::CUDAPinnedPlace is not supported in CPU only version");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace internal
|
||||||
|
|
||||||
|
DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
|
||||||
|
// init data, data buffer
|
||||||
|
t_.data = const_cast<void *>(tensor.data<void>());
|
||||||
|
|
||||||
|
// init ctx, DLContext type with device_type and device_id
|
||||||
|
auto place = tensor.place();
|
||||||
|
t_.ctx = boost::apply_visitor(internal::DLContextVisitor(), place);
|
||||||
|
|
||||||
|
// init dtype
|
||||||
|
t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type());
|
||||||
|
t_.dtype.lanes = lanes;
|
||||||
|
|
||||||
|
// init ndim, tensor rank
|
||||||
|
auto &dims = tensor.dims();
|
||||||
|
using DimType = decltype(t_.ndim); // int
|
||||||
|
t_.ndim = static_cast<DimType>(dims.size());
|
||||||
|
|
||||||
|
// init shape, tensor dims
|
||||||
|
t_.shape = shape_;
|
||||||
|
for (DimType i = 0; i < t_.ndim; ++i) {
|
||||||
|
t_.shape[i] = dims[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// init strides, nullptr means the tensor is compact
|
||||||
|
t_.strides = nullptr;
|
||||||
|
|
||||||
|
// init byte_offset
|
||||||
|
t_.byte_offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <dlpack/dlpack.h>
|
||||||
|
#include "paddle/fluid/framework/tensor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
class DLPackTensor {
|
||||||
|
public:
|
||||||
|
using LaneType = decltype(::DLTensor::dtype.lanes); // uint16_t
|
||||||
|
using ShapeType =
|
||||||
|
std::remove_reference<decltype(::DLTensor::shape[0])>::type; // int64_t
|
||||||
|
|
||||||
|
// lanes is only used in CPU to enable vectorization
|
||||||
|
explicit DLPackTensor(const Tensor& tensor, LaneType lanes = 1);
|
||||||
|
|
||||||
|
inline operator const ::DLTensor&() const { return t_; }
|
||||||
|
|
||||||
|
inline operator ::DLTensor&() { return t_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
::DLTensor t_;
|
||||||
|
|
||||||
|
// The shape in DLTensor is defined as int64_t*
|
||||||
|
// Add this member to make TVMTensor init without heap allocation
|
||||||
|
ShapeType shape_[9];
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,113 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/dlpack_tensor.h"
|
||||||
|
#include <glog/logging.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
namespace { // NOLINT
|
||||||
|
template <typename T>
|
||||||
|
constexpr uint8_t GetDLDataTypeCode() {
|
||||||
|
return std::is_same<platform::float16, T>::value ||
|
||||||
|
std::is_floating_point<T>::value
|
||||||
|
? static_cast<uint8_t>(kDLFloat)
|
||||||
|
: (std::is_unsigned<T>::value
|
||||||
|
? static_cast<uint8_t>(kDLUInt)
|
||||||
|
: (std::is_integral<T>::value ? static_cast<uint8_t>(kDLInt)
|
||||||
|
: static_cast<uint8_t>(-1)));
|
||||||
|
}
|
||||||
|
} // NOLINT
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void TestMain(const platform::Place &place, uint16_t lanes) {
|
||||||
|
DDim dims{4, 5, 6, 7};
|
||||||
|
Tensor tensor;
|
||||||
|
tensor.Resize(dims);
|
||||||
|
void *p = tensor.mutable_data<T>(place);
|
||||||
|
|
||||||
|
DLPackTensor dlpack_tensor(tensor, lanes);
|
||||||
|
::DLTensor &dl_tensor = dlpack_tensor;
|
||||||
|
|
||||||
|
CHECK_EQ(p, dl_tensor.data);
|
||||||
|
if (platform::is_cpu_place(place)) {
|
||||||
|
CHECK_EQ(kDLCPU, dl_tensor.ctx.device_type);
|
||||||
|
CHECK_EQ(0, dl_tensor.ctx.device_id);
|
||||||
|
} else if (platform::is_gpu_place(place)) {
|
||||||
|
CHECK_EQ(kDLGPU, dl_tensor.ctx.device_type);
|
||||||
|
CHECK_EQ(boost::get<platform::CUDAPlace>(place).device,
|
||||||
|
dl_tensor.ctx.device_id);
|
||||||
|
} else if (platform::is_cuda_pinned_place(place)) {
|
||||||
|
CHECK_EQ(kDLCPUPinned, dl_tensor.ctx.device_type);
|
||||||
|
CHECK_EQ(0, dl_tensor.ctx.device_id);
|
||||||
|
} else {
|
||||||
|
CHECK_EQ(false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(dims.size(), dl_tensor.ndim);
|
||||||
|
for (auto i = 0; i < dims.size(); ++i) {
|
||||||
|
CHECK_EQ(dims[i], dl_tensor.shape[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_EQ(dl_tensor.strides == nullptr, true);
|
||||||
|
CHECK_EQ(static_cast<uint64_t>(0), dl_tensor.byte_offset);
|
||||||
|
|
||||||
|
CHECK_EQ(lanes, dl_tensor.dtype.lanes);
|
||||||
|
CHECK_EQ(sizeof(T) * 8, dl_tensor.dtype.bits);
|
||||||
|
|
||||||
|
CHECK_EQ(GetDLDataTypeCode<T>(), dl_tensor.dtype.code);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void TestMainLoop() {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
std::vector<platform::Place> places{platform::CPUPlace(),
|
||||||
|
platform::CUDAPlace(0),
|
||||||
|
platform::CUDAPinnedPlace()};
|
||||||
|
if (platform::GetCUDADeviceCount() > 1) {
|
||||||
|
places.emplace_back(platform::CUDAPlace(1));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
std::vector<platform::Place> places{platform::CPUPlace()};
|
||||||
|
#endif
|
||||||
|
std::vector<uint16_t> lanes{1, 2};
|
||||||
|
for (auto &p : places) {
|
||||||
|
for (auto &l : lanes) {
|
||||||
|
TestMain<T>(p, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PADDLE_DLPACK_TEST(type) \
|
||||||
|
TEST(dlpack, test_##type) { TestMainLoop<type>(); }
|
||||||
|
|
||||||
|
using float16 = platform::float16;
|
||||||
|
PADDLE_DLPACK_TEST(float16);
|
||||||
|
PADDLE_DLPACK_TEST(float);
|
||||||
|
PADDLE_DLPACK_TEST(double);
|
||||||
|
PADDLE_DLPACK_TEST(int);
|
||||||
|
PADDLE_DLPACK_TEST(int64_t);
|
||||||
|
PADDLE_DLPACK_TEST(bool);
|
||||||
|
PADDLE_DLPACK_TEST(size_t);
|
||||||
|
PADDLE_DLPACK_TEST(int16_t);
|
||||||
|
PADDLE_DLPACK_TEST(uint8_t);
|
||||||
|
PADDLE_DLPACK_TEST(int8_t);
|
||||||
|
|
||||||
|
#undef PADDLE_DLPACK_TEST
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,72 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/transfer_scope_cache.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
std::unordered_map<size_t, Scope*>& global_transfer_data_cache() {
|
||||||
|
thread_local auto* x = new std::unordered_map<size_t, Scope*>;
|
||||||
|
return *x;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_set<Scope*>& global_transfer_scope_cache() {
|
||||||
|
thread_local auto* x = new std::unordered_set<Scope*>;
|
||||||
|
return *x;
|
||||||
|
}
|
||||||
|
|
||||||
|
Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1,
|
||||||
|
const Scope* scope) {
|
||||||
|
Scope* new_scope{nullptr};
|
||||||
|
size_t infer_cache_key =
|
||||||
|
CombineHash(OpKernelType::Hash()(type0), OpKernelType::Hash()(type1));
|
||||||
|
infer_cache_key =
|
||||||
|
CombineHash(infer_cache_key, std::hash<const Scope*>()(scope));
|
||||||
|
|
||||||
|
auto it = global_transfer_data_cache().find(infer_cache_key);
|
||||||
|
if (it != global_transfer_data_cache().end()) {
|
||||||
|
new_scope = global_transfer_data_cache()[infer_cache_key];
|
||||||
|
} else {
|
||||||
|
new_scope = &scope->NewScope();
|
||||||
|
global_transfer_data_cache()[infer_cache_key] = new_scope;
|
||||||
|
}
|
||||||
|
global_transfer_scope_cache().insert(new_scope);
|
||||||
|
return new_scope;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoveKidsFromTransferScopeCache(Scope* scope) {
|
||||||
|
auto it = global_transfer_scope_cache().find(scope);
|
||||||
|
if (it != global_transfer_scope_cache().end()) {
|
||||||
|
global_transfer_scope_cache().erase(it);
|
||||||
|
}
|
||||||
|
for (auto* s : scope->kids()) {
|
||||||
|
auto it = global_transfer_scope_cache().find(s);
|
||||||
|
if (it != global_transfer_scope_cache().end()) {
|
||||||
|
global_transfer_scope_cache().erase(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove global transfer data cache
|
||||||
|
auto& cache = global_transfer_data_cache();
|
||||||
|
for (auto it = cache.begin(); it != cache.end();) {
|
||||||
|
if (it->second == scope)
|
||||||
|
it = cache.erase(it);
|
||||||
|
else
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue