Merge branch 'develop' of github.com:baidu/Paddle into feature/c_api

feature/design_of_v2_layer_converter
Yu Yang 9 years ago
commit 9c1c19b6d0

@ -1 +0,0 @@
.gitignore

@ -0,0 +1,15 @@
*.DS_Store
build/
*.user
.vscode
.idea
.project
.cproject
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
!build/*.deb

1
.gitignore vendored

@ -7,6 +7,7 @@ build/
.project
.cproject
.pydevproject
.settings/
Makefile
.test_env/
third_party/

@ -40,7 +40,7 @@ option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
@ -90,14 +90,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES}
${PYTHON_LIBRARIES}
)
if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)

@ -3,20 +3,17 @@
FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_GPU
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=${WITH_AVX:-OFF}
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
@ -31,7 +28,7 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake locales clang-format-3.8 && \
apt-get install -y automake locales clang-format-3.8 swig && \
apt-get clean -y
# git credential to skip password typing
@ -51,8 +48,6 @@ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1
RUN apt-get install -y swig
VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service

@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
the capability of PaddlePaddle to make a huge impact for your product.
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
It is recommended to check out the
[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
before looking into the
[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
We provide [English](http://www.paddlepaddle.org/develop/doc/) and
[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling.
You can also run distributed training jobs on Kubernetes clusters.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
This system supports training deep learning models on multiple machines
with data parallelism.
- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html)
- [Python API](http://paddlepaddle.org/doc/ui/) <br>
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide.
- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
We appreciate your contributions!
## Ask Questions

@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
endif()
endfunction()
if(ON_COVERALLS)
if(WITH_COVERAGE)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")

@ -134,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov"
#
execute_process(
COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null
WORKING_DIRECTORY ${GCDA_DIR}
)
endforeach()

@ -45,7 +45,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""

@ -14,7 +14,8 @@
INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf 3.1)
set(PROTOBUF_VERSION 3.1)
FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION})
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)

@ -90,26 +90,6 @@ function(link_paddle_exe TARGET_NAME)
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES} util)
endif()
if(WITH_GPU)
target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY})
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME )
if(HAVE_CLOCK_GETTIME)
target_link_libraries(${TARGET_NAME} rt)
endif()
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()

@ -109,6 +109,12 @@ sum_to_one_norm
:members: sum_to_one_norm
:noindex:
cross_channel_norm
------------------
.. automodule:: paddle.v2.layer
:members: cross_channel_norm
:noindex:
Recurrent Layers
================

@ -51,7 +51,7 @@ PaddlePaddle supports some build options.
<tr><td class="left">WITH_TIMER</td><td class="left">Compile PaddlePaddle with stats timer</td></tr>
<tr><td class="left">WITH_PROFILER</td><td class="left">Compile PaddlePaddle with GPU profiler</td></tr>
<tr><td class="left">WITH_DOC</td><td class="left">Compile PaddlePaddle with documentation</td></tr>
<tr><td class="left">ON_COVERALLS</td><td class="left">Compile PaddlePaddle with code coverage</td></tr>
<tr><td class="left">WITH_COVERAGE</td><td class="left">Compile PaddlePaddle with code coverage</td></tr>
<tr><td class="left">COVERALLS_UPLOAD</td><td class="left">Package code coverage data to coveralls</td></tr>
<tr><td class="left">ON_TRAVIS</td><td class="left">Exclude special unit test on Travis CI</td></tr>
</tbody>

@ -1,21 +1,3 @@
FUNCTION(generate_python_api target_name)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
COMMAND ${SWIG_EXECUTABLE} -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
${external_project_dependencies}
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
ADD_CUSTOM_TARGET(${target_name} ALL DEPENDS
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${external_project_dependencies})
ENDFUNCTION(generate_python_api)
set(API_SOURCES
Arguments.cpp
ConfigParser.cpp
@ -33,65 +15,86 @@ set(API_HEADER
PaddleAPI.h
Internal.h)
add_library(paddle_api STATIC
${API_SOURCES})
add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp)
list(LENGTH "${GFLAGS_LIBRARIES}" GFLAGS_LIBRARIES_LENGTH)
INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
if(${GFLAGS_LIBRARIES_LENGTH} EQUAL 0 AND TARGET "${GFLAGS_LIBRARIES}")
# Because gflags compiled by cmake, so it is imported by cmake target,
# not a real library path. Get the real library path here.
message(STATUS "GFLAGS Libraries is ${GFLAGS_LIBRARIES}")
get_target_property(GFLAGS_LOCATION ${GFLAGS_LIBRARIES} LOCATION)
message(STATUS "GFLAGS Target location is ${GFLAGS_LOCATION}")
else()
set(GFLAGS_LOCATION ${GFLAGS_LIBRARIES})
endif()
FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
SET(CMAKE_CXX_FLAGS "-std=c++11 -fPIC -Wall")
IF(WITH_COVERAGE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
ENDIF(WITH_COVERAGE)
configure_file(
paddle_api_config.py.in
${PROJ_ROOT}/paddle/api/paddle_api_config.py
SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter
paddle_function
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_api
paddle_cuda
paddle_trainer_lib
paddle_network
paddle_proto
${external_project_dependencies}
)
generate_python_api(python_swig_sources)
IF(APPLE)
SET(MACOS_LD_FLAGS "-undefined dynamic_lookup -Wl,-all_load")
ELSE(APPLE)
SET(START_GROUP "-Xlinker -start-group")
SET(END_GROUP "-Xlinker -end-group")
SET(ARCHIVE_START "-Wl,--whole-archive")
SET(ARCHIVE_END "-Wl,--no-whole-archive")
ENDIF(APPLE)
file(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
SWIG_ADD_MODULE(swig_paddle python Paddle.i)
SWIG_LINK_LIBRARIES(swig_paddle
${MACOS_LD_FLAGS}
${START_GROUP}
${ARCHIVE_START}
paddle_gserver
paddle_function
${METRIC_LIBS}
${ARCHIVE_END}
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_parameter
paddle_math
paddle_utils
paddle_proto
paddle_cuda
paddle_api
${CMAKE_DL_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${RDMA_LD_FLAGS}
${RDMA_LIBS}
${START_END}
)
# TODO(yuyang18) : make wheel name calculated by cmake
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/dist/.timestamp
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp
COMMAND rm -rf py_paddle.egg-info build
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS python_swig_sources
paddle_parameter
paddle_function
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_trainer
paddle_api
paddle_cuda
${PY_PADDLE_PYTHON_FILES}
DEPENDS _swig_paddle
)
install(DIRECTORY ${PROJ_ROOT}/paddle/dist/
DESTINATION opt/paddle/share/wheels
)
# TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
add_custom_target(python_api_wheel ALL DEPENDS
${PROJ_ROOT}/paddle/dist/.timestamp)
add_dependencies(python_api_wheel python_swig_sources
paddle_parameter
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_trainer
paddle_api
paddle_cuda)
install(DIRECTORY ${PROJ_ROOT}/paddle/dist/ DESTINATION opt/paddle/share/wheels)
if(WITH_TESTING)
IF(NOT PY_PIP_FOUND)

@ -1,17 +0,0 @@
PADDLE_BUILD_DIR="@CMAKE_CURRENT_BINARY_DIR@/../"
WITH_GPU="@WITH_GPU@"
PROTOBUF_LIBRARY="@PROTOBUF_LIBRARY@"
ZLIB_LIBRARIES="@ZLIB_LIBRARIES@"
CMAKE_THREAD_LIB="@CMAKE_THREAD_LIBS_INIT@"
CMAKE_DL_LIBS="@CMAKE_DL_LIBS@"
WITH_PYTHON="@WITH_PYTHON@"
PYTHON_LIBRARIES="@PYTHON_LIBRARIES@"
GLOG_LIBRARIES="@GLOG_LIBRARIES@"
GFLAGS_LIBRARIES="@GFLAGS_LIBRARIES@"
GFLAGS_LOCATION="@GFLAGS_LOCATION@"
CBLAS_LIBRARIES="@CBLAS_LIBRARIES@"
CUDA_LIBRARIES="@CUDA_CUDART_LIBRARY@"
WITH_COVERALLS="@ON_COVERALLS@"

@ -1,157 +0,0 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_api_config import *
import os.path
import platform
system = platform.system().lower()
is_osx = (system == 'darwin')
is_win = (system == 'windows')
is_lin = (system == 'linux')
if is_lin:
whole_start = "-Wl,--whole-archive"
whole_end = "-Wl,--no-whole-archive"
elif is_osx:
whole_start = ""
whole_end = ""
LIB_DIRS = [
"math", 'function', 'utils', 'parameter', "gserver", "api", "cuda",
"pserver", "trainer"
]
PARENT_LIB_DIRS = ['proto']
class PaddleLDFlag(object):
def __init__(self):
self.paddle_build_dir = PADDLE_BUILD_DIR
self.paddle_build_dir = os.path.abspath(self.paddle_build_dir)
self.with_gpu = PaddleLDFlag.cmake_bool(WITH_GPU)
self.protolib = PROTOBUF_LIBRARY
self.zlib = ZLIB_LIBRARIES
self.thread = CMAKE_THREAD_LIB
self.dl_libs = CMAKE_DL_LIBS
self.with_python = PaddleLDFlag.cmake_bool(WITH_PYTHON)
self.python_libs = PYTHON_LIBRARIES
self.glog_libs = GLOG_LIBRARIES
self.with_coverage = PaddleLDFlag.cmake_bool(WITH_COVERALLS)
self.gflags_libs = GFLAGS_LIBRARIES
self.gflags_location = GFLAGS_LOCATION
self.cblas_libs = CBLAS_LIBRARIES
self.curt = CUDA_LIBRARIES
def ldflag_str(self):
return " ".join(
[self.libs_dir_str(), self.parent_dir_str(), self.libs_str()])
def libs_dir_str(self):
libdirs = LIB_DIRS
return " ".join(
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
libdirs))
def parent_dir_str(self):
libdirs = PARENT_LIB_DIRS
return " ".join(
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
libdirs))
def libs_str(self):
libs = [
whole_start,
"-lpaddle_gserver",
"-lpaddle_function",
whole_end,
"-lpaddle_pserver",
"-lpaddle_trainer_lib",
"-lpaddle_network",
'-lpaddle_parameter',
"-lpaddle_math",
'-lpaddle_utils',
"-lpaddle_proto",
"-lpaddle_cuda",
"-lpaddle_api",
self.normalize_flag(self.protolib),
self.normalize_flag(self.glog_libs),
self.normalize_flag(self.gflags_libs),
self.normalize_flag(self.zlib),
self.normalize_flag(self.thread),
self.normalize_flag(self.dl_libs),
self.normalize_flag(self.cblas_libs),
]
if self.with_python:
libs.append(self.normalize_flag(self.python_libs))
if self.with_gpu:
libs.append(self.normalize_flag(self.curt))
if self.with_coverage:
libs.append("-fprofile-arcs")
return " ".join(filter(lambda l: len(l) != 0, libs))
def normalize_flag(self, cmake_flag):
"""
CMake flag string to ld flag
:type cmake_flag: str
"""
if ";" in cmake_flag:
return " ".join(map(self.normalize_flag, cmake_flag.split(";")))
if cmake_flag.startswith("/"): # is a path
return cmake_flag
elif cmake_flag.startswith("-l"): # normal link command
return cmake_flag
elif cmake_flag in [
"gflags-shared", "gflags-static", "gflags_nothreads-shared",
"gflags_nothreads-static"
]: # special for gflags
assert PaddleLDFlag.cmake_bool(self.gflags_location)
return self.gflags_location
elif len(cmake_flag) != 0:
return "".join(["-l", cmake_flag])
else:
return ""
@staticmethod
def cmake_bool(cmake_str):
"""
CMake bool string to bool
:param cmake_str: cmake boolean string
:type cmake_str: str
:rtype: bool
"""
if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith(
"-NOTFOUND"):
return False
else:
return True
def c_flag(self):
if self.with_coverage:
return [
"-fprofile-arcs", "-ftest-coverage", "-O0", "-g",
"-std=c++11"
]
else:
return ["-std=c++11"]
except ImportError:
class PaddleLDFlag(object):
def ldflag_str(self):
pass
def c_flag(self):
pass

@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER)
filter_test(GSERVER_SOURCES)
if(NOT WITH_GPU)
list(REMOVE_ITEM GSERVER_HEADER
layers/CudnnConvBaseLayer.h
layers/CudnnConvLayer.h
layers/CudnnConvTransLayer.h
layers/CudnnPoolLayer.h
layers/CudnnBatchNormLayer.h)
list(REMOVE_ITEM GSERVER_SOURCES
layers/CudnnConvBaseLayer.cpp
layers/CudnnConvLayer.cpp
layers/CudnnConvTransLayer.cpp
layers/CudnnPoolLayer.cpp
layers/CudnnBatchNormLayer.cpp)
compile_cu_as_cpp(layers/LstmCompute.cu)

@ -164,15 +164,6 @@ public:
argu.value = value;
data_.push_back(argu);
}
/**
* @brief Append user defined data
* @param[in] ptr user defined data
*/
void appendUserDefinedPtr(UserDefinedVectorPtr ptr) {
Argument argu;
argu.udp = ptr;
data_.push_back(argu);
}
/*
* @brief Append argument

@ -0,0 +1,150 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvBaseOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu)
: Operator(config, useGpu) {
CHECK(useGpu);
CHECK_EQ(config_.input_indices_size(), 2L);
caffeMode_ = true;
getConvParams();
computeConvSizes();
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
workSpace_ = nullptr;
isSelectAlgo_ = false;
}
void ConvBaseOperator::allocConvWorkSpace() {
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
workSpaceInBytes_ = maxWorkSpace;
}
}
void ConvBaseOperator::computeConvSizes() {
hl_create_filter_descriptor(
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&imageDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvBaseOperator::reshapeImageDescriptors() {
hl_tensor_reshape(imageDesc_,
1,
channels_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
1,
numFilters_,
outputH_,
outputW_,
numFilters_ * outputH_ * outputW_,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvBaseOperator::getConvParams() {
configNumFilters_ = config_.num_filters();
const ConvConfig &conf = config_.conv_conf();
padding_ = conf.padding();
stride_ = conf.stride();
filterSize_ = conf.filter_size();
paddingY_ = conf.padding_y();
strideY_ = conf.stride_y();
filterSizeY_ = conf.filter_size_y();
filterPixels_ = filterSize_ * filterSizeY_;
configChannels_ = conf.channels();
imgSize_ = conf.img_size();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
isDeconv_ = (config_.type() == "conv") ? false : true;
if (isDeconv_) {
channels_ = configNumFilters_;
numFilters_ = configChannels_;
} else {
channels_ = configChannels_;
numFilters_ = configNumFilters_;
}
}
} // namespace paddle

@ -0,0 +1,112 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Operator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvBaseOperator : public Operator {
public:
ConvBaseOperator(const OperatorConfig &config, bool useGpu);
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvBaseOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(imageDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
protected:
/**
* Get convolution parameters from layer config and
* initialize member variables.
*/
void getConvParams();
/**
* Allocate Gpu Memory for cudnn convolution algorithms.
*/
void allocConvWorkSpace();
/**
* Create cudnn tensor descriptor for convolution operation.
*/
void computeConvSizes();
/**
* Reshape cudnn tensor descriptor.
*/
void reshapeImageDescriptors();
/**
* Reshape cudnn tensor descriptor.
*/
virtual void reshape(int batchSize) = 0;
/**
* Check filter size is equal to the size calculated by parameters from
* layer config.
*/
void checkFilterSize(const MatrixPtr &filter) {
CHECK_EQ(static_cast<int>(filter->getWidth()),
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
bool isDeconv_;
int imageH_, imageW_, outputH_, outputW_;
hl_tensor_descriptor imageDesc_;
hl_tensor_descriptor outputDesc_;
hl_filter_descriptor filterDesc_;
hl_convolution_descriptor convDesc_;
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_, channels_;
/// from parsing config
int configNumFilters_, configChannels_;
int padding_, stride_, filterSize_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void *workSpace_;
bool isSelectAlgo_;
};
} // namespace paddle

@ -0,0 +1,195 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseProjection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
ThreadLocalD<std::vector<MemoryHandle *>> ConvBaseProjection::convMem_;
ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config,
ParameterPtr parameter,
bool useGpu)
: Projection(config, parameter, useGpu) {
CHECK(useGpu); // only support GPU
getConvParams();
initCudnn();
size_t height = filterH_ * filterW_ * channels_ / groups_;
size_t width = numFilters_;
weight_.reset(new Weight(height, width, parameter));
weightOffset_ = height * width / groups_;
}
void ConvBaseProjection::getConvParams() {
const ConvConfig &conf = config_.conv_conf();
paddingH_ = conf.padding_y();
paddingW_ = conf.padding();
strideH_ = conf.stride_y();
strideW_ = conf.stride();
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
configImgW_ = conf.img_size();
configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
configOutW_ = conf.output_x();
configChannels_ = conf.channels();
configNumFilters_ = config_.num_filters();
isDeconv_ = (config_.type() == "conv") ? false : true;
channels_ = (isDeconv_) ? configNumFilters_ : configChannels_;
numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_;
groups_ = conf.groups();
CHECK_EQ(channels_ % groups_, 0);
CHECK_EQ(numFilters_ % groups_, 0);
}
void ConvBaseProjection::initCudnn() {
hl_create_filter_descriptor(&filterDesc_,
channels_ / groups_,
numFilters_ / groups_,
filterH_,
filterW_);
hl_create_tensor_descriptor(&imageDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
batchNum_ = 0;
isSelectAlgo_ = false;
}
void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
// The stride between two consecutive samples in the output of ConvProjection
// may not be numFilters_ * outputH_ * outputW_ (conv) or
// channels_ * imageH_ * imageW_ (deconv)
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
size_t nStrideImage, nStrideOutput;
if (isDeconv_) {
nStrideImage = out_->value->getStride();
nStrideOutput = numFilters_ * outputH_ * outputW_;
} else {
nStrideImage = channels_ * imageH_ * imageW_;
nStrideOutput = out_->value->getStride();
}
hl_tensor_reshape(imageDesc_,
batchSize,
channels_ / groups_,
imageH_,
imageW_,
nStrideImage,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
batchSize,
numFilters_ / groups_,
outputH_,
outputW_,
nStrideOutput,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
}
void ConvBaseProjection::reshape(int batchSize) {
size_t width = calOutputSize();
CHECK_EQ(width, out_->value->getWidth());
CHECK_EQ(calInputSize(), in_->value->getWidth());
isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
if (!isSelectAlgo_) {
reshapeTensorDesc(batchSize);
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
workSpaceInBytes_ = maxWorkSpace;
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_
<< " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_;
}
isSelectAlgo_ = true;
}
void *ConvBaseProjection::getSpaceBytes(size_t size) {
std::vector<MemoryHandle *> &convMem = *convMem_;
if (convMem.empty()) {
int numDevices = hl_get_device_count();
convMem.resize(numDevices);
}
int devId = hl_get_device();
MemoryHandle **localMem = &(convMem[devId]);
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
*localMem = new GpuMemoryHandle(size);
}
return (*localMem)->getBuf();
}
ConvBaseProjection::~ConvBaseProjection() {
hl_destroy_tensor_descriptor(imageDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
} // namespace paddle

@ -0,0 +1,116 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
/**
* @brief Base class for ConvProjection and ConvTransProjection.
*/
class ConvBaseProjection : public Projection {
public:
/**
* Constructor.
*/
ConvBaseProjection(const ProjectionConfig& config,
ParameterPtr parameter,
bool useGpu);
~ConvBaseProjection();
protected:
void getConvParams();
void initCudnn();
void reshapeTensorDesc(int batchSize);
void reshape(int batchSize);
virtual size_t calOutputSize() = 0;
virtual size_t calInputSize() = 0;
static void* getSpaceBytes(size_t size);
/// True if it's deconv projection layer, false if it's ConvProjection layer
bool isDeconv_;
/// imageH_ and imageW_ / outputH_ and outputW_
/// is calculated from the input layer.
int imageH_, imageW_;
int outputH_, outputW_;
/// configImgH_ and configImgW_ / configOutH_ and configOutW_
/// is obtained from config.
int configImgH_, configImgW_;
int configOutH_, configOutW_;
/// channels_ and numFilters_ are defined in terms of convolution semantics
int channels_, numFilters_;
/// configChannels and configNumFilters_ are obtained from config
/// For Conv they are the same as channels_ and numFilters
/// For ConvTrans they are opposite to channels_ and numFilters
int configChannels_, configNumFilters_;
int paddingH_, paddingW_;
int strideH_, strideW_;
int filterH_, filterW_;
/// One group offset of input data.
int inputOffset_;
/// One group offset of output data.
int outputOffset_;
/// One group offset of weight.
int weightOffset_;
int groups_;
/// Cudnn tensor descriptor for input.
hl_tensor_descriptor imageDesc_;
/// Cudnn tensor descriptor for output.
hl_tensor_descriptor outputDesc_;
/// Cudnn tensor descriptor for filter.
hl_filter_descriptor filterDesc_;
/// Cudnn tensor descriptor for a convolution operation.
hl_convolution_descriptor convDesc_;
/// Record the algorithm for forward convolution, which is obtained by cudnn
/// api to search the best suited algorithm.
int fwdAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// filter coefficients.
int bwdFilterAlgo_;
/// Record the algorithm for computing convolution gradient with respect to
/// the output.
int bwdDataAlgo_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// forward convolution with the specified algo.
size_t fwdLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardFilter with the specified algo.
size_t bwdDataLimitBytes_;
/// Amount of GPU memory needed as workspace to be able to execute a
/// backwardData with the specified algo.
size_t bwdFilterLimitBytes_;
/// Size of total work space.
size_t workSpaceInBytes_;
/// Whether to call cuDNN api to choose conv algorithm.
bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;
bool bias_;
std::unique_ptr<Weight> weight_;
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
};
} // namespace paddle

File diff suppressed because it is too large Load Diff

@ -0,0 +1,44 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvOperator : public ConvBaseOperator {
public:
ConvOperator(const OperatorConfig &config, bool useGpu)
: ConvBaseOperator(config, useGpu) {}
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvOperator() {}
void forward() override;
void backward() override;
void reshape(int batchSize) override;
};
} // namespace paddle

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save