commit
c110f56574
@ -0,0 +1,72 @@
|
|||||||
|
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
IF(NOT ${WITH_MKLDNN})
|
||||||
|
return()
|
||||||
|
ENDIF(NOT ${WITH_MKLDNN})
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(MKLDNN_PROJECT "extern_mkldnn")
|
||||||
|
SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
|
||||||
|
SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX})
|
||||||
|
IF(NOT "$ENV{HOME}" STREQUAL "/root")
|
||||||
|
SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn")
|
||||||
|
SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
|
||||||
|
|
||||||
|
IF(WIN32)
|
||||||
|
MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet."
|
||||||
|
"Force WITH_MKLDNN=OFF")
|
||||||
|
SET(WITH_MKLDNN OFF)
|
||||||
|
return()
|
||||||
|
ELSE(WIN32)
|
||||||
|
SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
|
||||||
|
MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
|
||||||
|
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||||
|
#SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
|
||||||
|
ENDIF(WIN32)
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
|
||||||
|
|
||||||
|
IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
|
||||||
|
SET(MKLDNN_DEPENDS ${MKLML_PROJECT})
|
||||||
|
SET(MKLDNN_MKLROOT ${MKLML_ROOT})
|
||||||
|
SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB})
|
||||||
|
SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR})
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${MKLDNN_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
DEPENDS ${MKLDNN_DEPENDS}
|
||||||
|
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
|
||||||
|
GIT_TAG "v0.9"
|
||||||
|
PREFIX ${MKLDNN_SOURCES_DIR}
|
||||||
|
CONFIGURE_COMMAND mkdir -p <SOURCE_DIR>/build
|
||||||
|
BUILD_COMMAND cd <SOURCE_DIR>/build
|
||||||
|
&& cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT}
|
||||||
|
&& $(MAKE)
|
||||||
|
INSTALL_COMMAND cd <SOURCE_DIR>/build && $(MAKE) install
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
)
|
||||||
|
|
||||||
|
ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
|
||||||
|
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY})
|
||||||
|
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
|
||||||
|
MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}")
|
||||||
|
LIST(APPEND external_project_dependencies mkldnn)
|
||||||
@ -0,0 +1,64 @@
|
|||||||
|
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
IF(NOT ${WITH_MKLML})
|
||||||
|
return()
|
||||||
|
ENDIF(NOT ${WITH_MKLML})
|
||||||
|
|
||||||
|
INCLUDE(ExternalProject)
|
||||||
|
|
||||||
|
SET(MKLML_PROJECT "extern_mklml")
|
||||||
|
SET(MKLML_VER "mklml_lnx_2018.0.20170425")
|
||||||
|
SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz")
|
||||||
|
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
|
||||||
|
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
|
||||||
|
SET(MKLML_DST_DIR "opt/paddle/third_party/mklml")
|
||||||
|
SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}")
|
||||||
|
IF(NOT "$ENV{HOME}" STREQUAL "/root")
|
||||||
|
SET(MKLML_INSTALL_ROOT "$ENV{HOME}")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR})
|
||||||
|
SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER})
|
||||||
|
SET(MKLML_INC_DIR ${MKLML_ROOT}/include)
|
||||||
|
SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib)
|
||||||
|
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
|
||||||
|
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
|
||||||
|
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
|
||||||
|
|
||||||
|
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
|
||||||
|
|
||||||
|
SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt)
|
||||||
|
FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n"
|
||||||
|
"cmake_minimum_required(VERSION 3.0)\n"
|
||||||
|
"install(DIRECTORY ${MKLML_VER}\n"
|
||||||
|
" DESTINATION ${MKLML_DST_DIR})\n")
|
||||||
|
|
||||||
|
ExternalProject_Add(
|
||||||
|
${MKLML_PROJECT}
|
||||||
|
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||||
|
PREFIX ${MKLML_SOURCE_DIR}
|
||||||
|
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
|
||||||
|
DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL}
|
||||||
|
&& tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
|
||||||
|
DOWNLOAD_NO_PROGRESS 1
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
|
||||||
|
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
|
||||||
|
)
|
||||||
|
|
||||||
|
ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
|
||||||
|
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
|
||||||
|
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
|
||||||
|
LIST(APPEND external_project_dependencies mklml)
|
||||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,159 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "TensorType.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*\brief Depthwise convolution forward. The outputData
|
||||||
|
* of depthwise convolution is same with ExpandConvLayer
|
||||||
|
* when groups equals inputChannels in ExpandConvLayer.
|
||||||
|
*
|
||||||
|
* \param[in] inputData input data.
|
||||||
|
* \param[in] filterData the Paramters of the depthwise conv layer..
|
||||||
|
* \param[in] batchSize batch size of input data.
|
||||||
|
* \param[in] outputChannels channels of outputData.
|
||||||
|
* \param[in] outputHeight height of outputData.
|
||||||
|
* \param[in] outputWidth width of outputData.
|
||||||
|
* \param[in] inputChannels channels of inputData.
|
||||||
|
* \param[in] inputHeight height of inputData.
|
||||||
|
* \param[in] inputWidth width of inputData..
|
||||||
|
* \param[in] filterMultiplier equals to outputChannels/groups_.
|
||||||
|
* \param[in] filterHeight height of filter.
|
||||||
|
* \param[in] filterWidth widht of filter.
|
||||||
|
* \param[in] strideH stride size in height direction.
|
||||||
|
* \param[in] strideW stride size in width direction.
|
||||||
|
* \param[in] paddingH padding size in height direction.
|
||||||
|
* \param[in] paddingW padding size in width direction.
|
||||||
|
* \param[out] outputData outputData.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <DeviceType Device, class T>
|
||||||
|
class DepthwiseConvFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(const T* inputData,
|
||||||
|
const T* filterData,
|
||||||
|
int batchSize,
|
||||||
|
int outputChannels,
|
||||||
|
int outputHeight,
|
||||||
|
int outputWidth,
|
||||||
|
int inputChannels,
|
||||||
|
int inputHeight,
|
||||||
|
int inputWidth,
|
||||||
|
int filterMultiplier,
|
||||||
|
int filterHeight,
|
||||||
|
int filterWidth,
|
||||||
|
int strideH,
|
||||||
|
int strideW,
|
||||||
|
int paddingH,
|
||||||
|
int paddingW,
|
||||||
|
T* outputData);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
*\brief Functor tot compute the depthwise convolution backprop w.r.t input.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* \param[in] outputGradData the grad data of output.
|
||||||
|
* \param[in] filterData the Paramters of the depthwise conv layer..
|
||||||
|
* \param[in] batchSize batch size of input data.
|
||||||
|
* \param[in] outputChannels channels of outputData.
|
||||||
|
* \param[in] outputHeight height of outputData.
|
||||||
|
* \param[in] outputWidth width of outputData.
|
||||||
|
* \param[in] inputChannels channels of input data.
|
||||||
|
* \param[in] inputHeight height of inputData.
|
||||||
|
* \param[in] inputWidth width of inputData.
|
||||||
|
* \param[in] filterMultiplier equals to outputChannels/groups_.
|
||||||
|
* \param[in] filterHeight height of filter.
|
||||||
|
* \param[in] filterWidth widht of filter.
|
||||||
|
* \param[in] strideH stride size in height direction.
|
||||||
|
* \param[in] strideW stride size in width direction.
|
||||||
|
* \param[in] paddingH padding size in height direction.
|
||||||
|
* \param[in] paddingW padding size in width direction.
|
||||||
|
* \param[out] inputGrad the grad data of input.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <DeviceType Device, class T>
|
||||||
|
class DepthwiseConvGradInputFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(const T* outputGrad,
|
||||||
|
const T* filterData,
|
||||||
|
int batchSize,
|
||||||
|
int outputChannels,
|
||||||
|
int outputHeight,
|
||||||
|
int outputWidth,
|
||||||
|
int inputChannels,
|
||||||
|
int inputHeight,
|
||||||
|
int inputWidth,
|
||||||
|
int filterMultiplier,
|
||||||
|
int filterHeight,
|
||||||
|
int filterWidth,
|
||||||
|
int strideH,
|
||||||
|
int strideW,
|
||||||
|
int paddingH,
|
||||||
|
int paddingW,
|
||||||
|
T* inputGrad);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
*\brief Functor tot compute the depthwise convolution backprop w.r.t filter.
|
||||||
|
*
|
||||||
|
* \param[in] outputGradData the grad data of output.
|
||||||
|
* \param[in] inputData inputData.
|
||||||
|
* \param[in] batchSize batch size of input data.
|
||||||
|
* \param[in] outputChannels channels of outputData.
|
||||||
|
* \param[in] outputHeight height of outputData.
|
||||||
|
* \param[in] outputWidth width of outputData.
|
||||||
|
* \param[in] inputChannels channels of input data.
|
||||||
|
* \param[in] inputHeight height of inputData.
|
||||||
|
* \param[in] inputWidth width of inputData.
|
||||||
|
* \param[in] filterMultiplier equals to outputChannels/groups_.
|
||||||
|
* \param[in] filterHeight height of filter.
|
||||||
|
* \param[in] filterWidth widht of filter.
|
||||||
|
* \param[in] strideH stride size in height direction.
|
||||||
|
* \param[in] strideW stride size in width direction.
|
||||||
|
* \param[in] paddingH padding size in height direction.
|
||||||
|
* \param[in] paddingW padding size in width direction.
|
||||||
|
* \param[in] colData Auxiliary data when calculating filterGrad.
|
||||||
|
* \param[in] multiplierData Auxiliary data when calculating filterGrad.
|
||||||
|
* \param[out] filterGrad the grad data of filter.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <DeviceType Device, class T>
|
||||||
|
class DepthwiseConvGradFilterFunctor {
|
||||||
|
public:
|
||||||
|
void operator()(const T* outputGrad,
|
||||||
|
const T* inputData,
|
||||||
|
int batchSize,
|
||||||
|
int outputChannels,
|
||||||
|
int outputHeight,
|
||||||
|
int outputWidth,
|
||||||
|
int inputChannels,
|
||||||
|
int inputHeight,
|
||||||
|
int inputWidth,
|
||||||
|
int filterMultiplier,
|
||||||
|
int filterHeight,
|
||||||
|
int filterWidth,
|
||||||
|
int strideH,
|
||||||
|
int strideW,
|
||||||
|
int paddingH,
|
||||||
|
int paddingW,
|
||||||
|
T* colData,
|
||||||
|
T* filterGrad);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace paddle
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,11 +1,16 @@
|
|||||||
add_subdirectory(detail)
|
add_subdirectory(detail)
|
||||||
|
|
||||||
cc_library(memory SRCS memory.cc)
|
cc_library(memory SRCS memory.cc)
|
||||||
|
cc_library(memcpy SRCS memcpy.cc DEPS device_context)
|
||||||
|
|
||||||
cc_library(paddle_memory
|
cc_library(paddle_memory
|
||||||
DEPS
|
DEPS
|
||||||
memory meta_data
|
memory
|
||||||
meta_cache memory_block
|
memcpy
|
||||||
buddy_allocator system_allocator)
|
meta_data
|
||||||
|
meta_cache
|
||||||
|
memory_block
|
||||||
|
buddy_allocator
|
||||||
|
system_allocator)
|
||||||
|
|
||||||
cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory)
|
cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory)
|
||||||
|
|||||||
@ -1,140 +1,4 @@
|
|||||||
## Design
|
# Region-based Heterogeneous Memory Management
|
||||||
|
|
||||||
### Usage
|
Please check out the [design documentation](http://gangliao.me) to find out more details about
|
||||||
|
buddy memory allocator for both CPU and GPU.
|
||||||
To allocate 4KB CPU memory:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
p = memory::Alloc(platform::CPUPlace(), 4*1024);
|
|
||||||
```
|
|
||||||
|
|
||||||
To allocate 4KB memory on the 3rd GPU:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
p = memory::Alloc(platform::GPUPlace(2), 4*1024);
|
|
||||||
```
|
|
||||||
|
|
||||||
To free memory and check the so-far used amount of memory on a place:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
auto pl = platform::GPUPlace(0);
|
|
||||||
p = memory::Alloc(pl, 4*1024);
|
|
||||||
cout << memory::Used(pl);
|
|
||||||
memory::Free(pl, p);
|
|
||||||
```
|
|
||||||
|
|
||||||
### API
|
|
||||||
|
|
||||||
In `paddle/memory/memory.h` we have:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
namespace memory {
|
|
||||||
template <typename Place> void* Alloc(Place, size_t);
|
|
||||||
template <typename Place> void Free(Place, void*);
|
|
||||||
template <typename Place> size_t Used(Place);
|
|
||||||
} // namespace memory
|
|
||||||
```
|
|
||||||
|
|
||||||
These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
template<>
|
|
||||||
void* Alloc<CPUPlace>(CPUPlace p, size_t size) {
|
|
||||||
return GetCPUBuddyAllocator()->Alloc(size);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
and
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
template<>
|
|
||||||
void Alloc<GPUPlace>(GPUPlace p, size_t size) {
|
|
||||||
return GetGPUBuddyAllocator(p.id)->Alloc(size);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Similar specializations exist for `Free` and `Used`.
|
|
||||||
|
|
||||||
### Implementation
|
|
||||||
|
|
||||||
`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions.
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
BuddyAllocator* GetCPUBuddyAllocator() {
|
|
||||||
static BuddyAllocator* a = NULL;
|
|
||||||
if (a == NULL) {
|
|
||||||
a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
|
|
||||||
static BuddyAllocator* as = NULL;
|
|
||||||
if (as == NULL) {
|
|
||||||
as = new BuddyAllocator*[platform::NumGPUs()];
|
|
||||||
for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) {
|
|
||||||
as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return as[gpu_id);
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `BuddyAllocator`
|
|
||||||
|
|
||||||
`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) {
|
|
||||||
...
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
class BuddyAllocator {
|
|
||||||
private:
|
|
||||||
struct Block {
|
|
||||||
size_t size;
|
|
||||||
Block* left, right;
|
|
||||||
size_t index; // allocator id
|
|
||||||
};
|
|
||||||
...
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace.
|
|
||||||
|
|
||||||
#### System Allocators
|
|
||||||
|
|
||||||
The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`.
|
|
||||||
|
|
||||||
## Justification
|
|
||||||
|
|
||||||
I got inspiration from Majel and Caffe2, though above design look different from both.
|
|
||||||
|
|
||||||
### Caffe2
|
|
||||||
|
|
||||||
In Caffe2, `Tensor<Context>::mutable_data()` allocates the memroy. In particular, [`Tensor<Context>::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor<Context>::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479).
|
|
||||||
|
|
||||||
There are two implementations of `Context`:
|
|
||||||
|
|
||||||
1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory.
|
|
||||||
|
|
||||||
1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory.
|
|
||||||
|
|
||||||
### Majel
|
|
||||||
|
|
||||||
In Majel, there are basically two allocator types:
|
|
||||||
|
|
||||||
1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`.
|
|
||||||
1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`.
|
|
||||||
|
|
||||||
However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces.
|
|
||||||
|
|
||||||
In Majel there are hidden global variables like:
|
|
||||||
|
|
||||||
1. `cpu::SystemAllocator g_cpu_allocator`, and
|
|
||||||
1. `vector<gpu::SystemAllocator*> g_gpu_allocators(NUM_GPUS)`.
|
|
||||||
|
|
||||||
Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`.
|
|
||||||
|
|||||||
@ -0,0 +1,70 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/memory/memcpy.h"
|
||||||
|
|
||||||
|
#include <cstring> // for memcpy
|
||||||
|
|
||||||
|
#include "paddle/platform/device_context.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
|
||||||
|
platform::CPUPlace,
|
||||||
|
const void* src, size_t num) {
|
||||||
|
std::memcpy(dst, src, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef PADDLE_ONLY_CPU
|
||||||
|
template <>
|
||||||
|
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
|
||||||
|
void* dst,
|
||||||
|
platform::GPUPlace src_place,
|
||||||
|
const void* src, size_t num,
|
||||||
|
cudaStream_t stream) {
|
||||||
|
platform::GPUPlaceGuard g(src_place.device);
|
||||||
|
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
|
||||||
|
void* dst,
|
||||||
|
platform::CPUPlace src_place,
|
||||||
|
const void* src, size_t num,
|
||||||
|
cudaStream_t stream) {
|
||||||
|
platform::GPUPlaceGuard g(dst_place.device);
|
||||||
|
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
|
||||||
|
void* dst,
|
||||||
|
platform::GPUPlace src_place,
|
||||||
|
const void* src, size_t num,
|
||||||
|
cudaStream_t stream) {
|
||||||
|
if (dst_place == src_place) {
|
||||||
|
platform::GPUPlaceGuard g(src_place.device);
|
||||||
|
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
|
||||||
|
} else {
|
||||||
|
platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
|
||||||
|
stream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // PADDLE_ONLY_CPU
|
||||||
|
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
||||||
@ -0,0 +1,33 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "paddle/platform/gpu_info.h"
|
||||||
|
#include "paddle/platform/place.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
|
||||||
|
template <typename DstPlace, typename SrcPlace>
|
||||||
|
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
|
||||||
|
|
||||||
|
#ifndef PADDLE_ONLY_CPU
|
||||||
|
template <typename DstPlace, typename SrcPlace>
|
||||||
|
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
|
||||||
|
cudaStream_t stream);
|
||||||
|
#endif // PADDLE_ONLY_CPU
|
||||||
|
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue