diff --git a/.gitignore b/.gitignore index c84b2fc8c7..9622ab78e0 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ cmake-build-* python/paddle/v2/framework/core.so CMakeFiles cmake_install.cmake - +paddle/.timestamp +python/paddlepaddle.egg-info/ diff --git a/.travis.yml b/.travis.yml index 376c693602..b4b83fcdbc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,8 +37,8 @@ before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - - pip install rarfile + - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..c75b83e50c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) +set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) @@ -36,8 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include_directories("${PROJ_ROOT}") -include_directories("${PROJ_ROOT}/paddle/cuda/include") +include_directories("${PADDLE_SOURCE_DIR}") +include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) @@ -144,7 +144,7 @@ if(WITH_GPU) endif(WITH_GPU) if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) endif() if(USE_NNPACK) @@ -164,10 +164,12 @@ if(WITH_GOLANG) add_subdirectory(go) endif(WITH_GOLANG) +set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) endif() + if(WITH_DOC) add_subdirectory(doc) endif() diff --git a/Dockerfile b/Dockerfile index 06a3d89307..41b6729124 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ - automake locales clang-format-3.8 swig doxygen cmake \ + automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ net-tools && \ @@ -64,13 +64,28 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile + pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] +# TODO(qijun) The template library Eigen doesn't work well with GCC 5 +# coming with the default Docker image, so we switch to use GCC 4.8 +# by default. And I will check Eigen library later. + +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ + # Install woboq_codebrowser to /woboq RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ (cd /woboq \ diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 2ac0989546..209f9078a6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -129,7 +129,7 @@ if(WITH_GOLANG) add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND touch ${CMAKE_BINARY_DIR}/glide - DEPENDS ${PROJ_ROOT}/go/glide.lock + DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" ) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 5184f0815f..8d5d533126 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME) if(SOURCES_LIST) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" "--filter=${STYLE_FILTER}" ${SOURCES_LIST} COMMENT "cpplint: Checking source code style" diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e26d8d9df3..b27eb71550 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -9,10 +9,12 @@ function(CheckCompilerCXX11Flag) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() - # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. - # Use Debug mode instead for now. - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + if(NOT ANDROID) + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf6..d2aab938d4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/cmake/package.cmake b/cmake/package.cmake index ff49a2d08e..79e02147f3 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack include (CMakePackageConfigHelpers) diff --git a/cmake/util.cmake b/cmake/util.cmake index 4a27623b7f..0da4969d31 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -141,8 +141,8 @@ endmacro() function(create_resources res_file output_file) add_custom_command( OUTPUT ${output_file} - COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} - DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) + COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index ac1583a24c..cde650128a 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -4,7 +4,7 @@ set(tmp_version "HEAD") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} - WORKING_DIRECTORY ${PROJ_ROOT} + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index dc7c6d2e59..cb330ea5e1 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -257,6 +257,11 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +kmax_sequence_score +------------------- +.. autoclass:: paddle.v2.layer.kmax_sequence_score + :noindex: + sub_nested_seq -------------- .. autoclass:: paddle.v2.layer.sub_nested_seq diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md new file mode 100644 index 0000000000..1f4d4ec16f --- /dev/null +++ b/doc/design/auto_gradient_check.md @@ -0,0 +1,146 @@ +## Auto Gradient Checker Design + +## Backgraound: +- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: + - 1. you should get the right backpropagation formula according to the forward computation. + - 2. you should implement it right in CPP. + - 3. it's difficult to prepare test data. + +- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: + - 1. numeric gradient checker only need forward operator. + - 2. user only need to prepare the input data for forward Operator. + +## Mathematical Theory +The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) + + +## Numeric Gradient Implementation +### Python Interface +```python +def get_numeric_gradient(op, + input_values, + output_name, + input_to_check, + delta=0.005, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable need to get gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it could occur numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ +``` + +### Explaination: + +- Why need `output_name` + - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. + +- Why need `input_to_check` + - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. + + +### Core Algorithm Implementation + + +```python + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. + for i in xrange(tensor_size): + # get one input element throw it's index i. + origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + # plus delta to this element, run op and get the sum of the result tensor. + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. + return gradient_flat.reshape(tensor_to_check.get_dims()) +``` + +## Auto Graident Checker Framework + +Each Operator Kernel has three kinds of Gradient: + +- 1. Numeric Gradient +- 2. CPU Operator Gradient +- 3. GPU Operator Gradient(if supported) + +Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. + +- 1. calculate the numeric gradient. +- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) + +#### Python Interface + +```python + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ +``` + +### How to check if two numpy array is close enough? +if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative + +```python +numeric_grad = ... +operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + +abs_numeric_grad = numpy.abs(numeric_grad) +# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative +# error. +abs_numeric_grad[abs_numeric_grad < 1e-3] = 1 + +diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad +max_diff = numpy.max(diff_mat) +``` + + +#### Notes: +1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. + + +#### Refs: + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 95cad835b1..41b35b5b23 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,22 +13,18 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -124,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b477f0120c..5822c2481d 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser @@ -29,7 +25,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -124,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/go/glide.lock b/go/glide.lock index be1fb24d77..1ecdd21752 100644 --- a/go/glide.lock +++ b/go/glide.lock @@ -1,5 +1,5 @@ hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 -updated: 2017-08-03T21:46:51.744995189Z +updated: 2017-08-07T23:37:48.867469328Z imports: - name: github.com/beorn7/perks version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 @@ -10,7 +10,7 @@ imports: - name: github.com/cockroachdb/cmux version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 - name: github.com/coreos/etcd - version: c31bec0f29facff13f7c3e3d948e55dd6689ed42 + version: d0d1a87aa96ae14914751d42264262cb69eda170 subpackages: - alarm - auth @@ -24,6 +24,7 @@ imports: - error - etcdserver - etcdserver/api + - etcdserver/api/etcdhttp - etcdserver/api/v2http - etcdserver/api/v2http/httptypes - etcdserver/api/v3client @@ -210,11 +211,6 @@ testImports: version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9 subpackages: - spew -- name: github.com/docker/docker - version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e - subpackages: - - pkg/ioutils - - pkg/longpath - name: github.com/pmezard/go-difflib version: d8ed2627bdf02c080bf22230dbb337003b7aba2d subpackages: diff --git a/go/master/service_test.go b/go/master/service_test.go index 5f91910ecc..2d00c22d6f 100644 --- a/go/master/service_test.go +++ b/go/master/service_test.go @@ -1,24 +1,30 @@ package master_test import ( + "io/ioutil" + "net/url" "os" + "strings" "testing" "time" "github.com/PaddlePaddle/Paddle/go/master" "github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/embed" - "github.com/docker/docker/pkg/ioutils" "github.com/stretchr/testify/assert" ) func TestNewServiceWithEtcd(t *testing.T) { // setup an embed etcd server - etcdDir, err := ioutils.TempDir("", "") + etcdDir, err := ioutil.TempDir("", "") if err != nil { t.Fatal(err) } cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} cfg.Dir = etcdDir e, err := embed.StartEtcd(cfg) if err != nil { @@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) { t.Fatal(err) } }() - select { - case <-e.Server.ReadyNotify(): - t.Log("Server is ready!") - case <-time.After(60 * time.Second): - e.Server.Stop() // trigger a shutdown - t.Fatal("Server took too long to start!") - } - ep := []string{"127.0.0.1:2379"} + <-e.Server.ReadyNotify() + + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + + ep := []string{endpoint} masterAddr := "127.0.0.1:3306" store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30) if err != nil { diff --git a/go/pserver/client/c/cclient.go b/go/pserver/client/c/cclient.go index 14ad077455..a49cd01522 100644 --- a/go/pserver/client/c/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { - // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) +func paddle_new_etcd_pserver_client(etcdEndpoints *C.char) C.paddle_pserver_client { addr := C.GoString(etcdEndpoints) etcdClient := client.NewEtcd(addr) - c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) + c := client.NewClient(etcdClient, etcdClient.Desired(), etcdClient) return add(c) } @@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) { //export paddle_begin_init_params func paddle_begin_init_params(client C.paddle_pserver_client) C.int { c := get(client) - if selected := c.BeginInitParams(); selected { + selected, err := c.BeginInitParams() + if err != nil { + panic(err) + } + + if selected { return 1 } return 0 diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index 572a61e4cc..8d9c6b9b20 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -17,12 +17,10 @@ def main(): # network config x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param( - name='w', learning_rate=1e-3), + param_attr=paddle.attr.Param(name='w'), size=1, act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param( - name='b', learning_rate=1e-3)) + bias_attr=paddle.attr.Param(name='b')) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=y_predict, label=y) diff --git a/go/pserver/client/client.go b/go/pserver/client/client.go index 15adda4735..20d91e7703 100644 --- a/go/pserver/client/client.go +++ b/go/pserver/client/client.go @@ -27,9 +27,13 @@ import ( // TODO(helin): add RPC call retry logic -// Selector selects if the client should initialize parameter servers. +// Selector selects if the client should initialize parameters and +// reports the initialization process done. type Selector interface { - Select() bool + // Select selects if the client should initialize parameter servers. + Select() (bool, error) + // Done indicates the initialization process is done. + Done() error } // Server is the identification of a parameter Server. @@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { // servers. Other trainers will be blocked until the initialization is // done, and they need to get the initialized parameters from // parameter servers using GetParams. -func (c *Client) BeginInitParams() bool { +func (c *Client) BeginInitParams() (bool, error) { return c.sel.Select() } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index 1243ebd683..c3d88e926d 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -124,8 +124,12 @@ func initEtcdClient() { type selector bool -func (s selector) Select() bool { - return bool(s) +func (s selector) Select() (bool, error) { + return bool(s), nil +} + +func (s selector) Done() error { + return nil } type lister []client.Server @@ -135,7 +139,11 @@ func (l lister) List() []client.Server { } func testClient(t *testing.T, c *client.Client) { - selected := c.BeginInitParams() + selected, err := c.BeginInitParams() + if err != nil { + t.Fatal(err) + } + if !selected { t.Fatal("should be selected.") } diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index 977ae5af37..f9071caaa8 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -16,53 +16,60 @@ package client import ( "context" + "errors" + "fmt" "strconv" "strings" "time" "github.com/PaddlePaddle/Paddle/go/pserver" "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" log "github.com/sirupsen/logrus" ) const ( defaultEtcdTimeout time.Duration = 5 * time.Second + + initLockPath = "/init_ps/lock" + initDonePath = "/init_ps/done" + initDoneVal = "1" ) -// EtcdClient is used by pserver client that is a part of trainer process. +// Etcd is used by pserver client that is a part of trainer process. // TODO: -// 1. add watcher to watch the change state of pservers) -// 1. add etcd lock) -type EtcdClient struct { +// 1. add watcher to watch the change state of pservers. +type Etcd struct { client *clientv3.Client timeout time.Duration endpoints []string + lock *concurrency.Mutex } // Desired read ps desired number from etcd. -func (p *EtcdClient) Desired() int { +func (e *Etcd) Desired() int { var psDesired int for { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) - resp, err := p.client.Get(ctx, pserver.PsDesired) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + resp, err := e.client.Get(ctx, pserver.PsDesired) cancel() if err != nil { log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infoln("Waiting for ps desired registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) if err != nil { log.Errorf("psDesired %d invalid %v", psDesired, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int { } // List return the pserver list read from etcd. -func (p *EtcdClient) List() []Server { - psDesired := p.Desired() +func (e *Etcd) List() []Server { + psDesired := e.Desired() servers := make([]Server, psDesired) for { for i := 0; i < psDesired; i++ { - ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) psKey := pserver.PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) - resp, err := p.client.Get(ctx, psKey) + resp, err := e.client.Get(ctx, psKey) cancel() if err != nil { log.Infof("Get psKey= %s error, %v", psKey, err) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } kvs := resp.Kvs if len(kvs) == 0 { log.Infof("Waiting for ps addr registered ...") - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } @@ -100,7 +107,7 @@ func (p *EtcdClient) List() []Server { // TODO(Longfei) check the ps address if psAddr == "" { log.Infof("Get psKey = %s, psAddr is empty", psKey) - time.Sleep(p.timeout) + time.Sleep(e.timeout) continue } log.Debugf("got value (%s) for key: %s", psAddr, psKey) @@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server { } // NewEtcd create a etcd client to return the state of pserver on etcd. -func NewEtcd(endpoints string) *EtcdClient { +func NewEtcd(endpoints string) *Etcd { ep := strings.Split(endpoints, ",") var cli *clientv3.Client var err error @@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient { break } log.Infof("Connected to etcd: %s\n", endpoints) - client := &EtcdClient{ + client := &Etcd{ client: cli, timeout: defaultEtcdTimeout, endpoints: ep, } return client } + +// Select indicates if the current trainer is selected to initialize +// the pserver parameters. +func (e *Etcd) Select() (bool, error) { + sess, err := concurrency.NewSession(e.client, concurrency.WithTTL(5)) + if err != nil { + return false, err + } + + lock := concurrency.NewMutex(sess, initLockPath) + log.Infof("Trying to acquire lock at %s.", initLockPath) + // Do not use timeout context here, since we don't know how + // long does it take for other trainers to initialize the + // parameters. + err = lock.Lock(context.Background()) + if err != nil { + return false, err + } + log.Infof("Successfully acquired lock at %s.", initLockPath) + + get := clientv3.OpGet(initDonePath) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(lock.IsOwner()).Then(get).Commit() + cancel() + if err != nil { + return false, err + } + + if !tresp.Succeeded { + return false, errors.New("no longer the owner of the lock") + } + + resp := tresp.Responses[0].GetResponseRange() + + if len(resp.Kvs) == 0 { + // Key value not set, select current trainer. + e.lock = lock + log.Infoln("Trainer selected.") + return true, nil + } + + if string(resp.Kvs[0].Value) == initDoneVal { + log.Infoln("Initialization is already done.") + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } + return false, nil + } + + return false, fmt.Errorf("key %s have unexpected value: %v", initDonePath, resp.Kvs[0].Value) +} + +// Done indicates the parameter initialization process is done. +func (e *Etcd) Done() error { + if e.lock == nil { + return errors.New("lock is nil, Done called unexpectedly") + } + + put := clientv3.OpPut(initDonePath, initDoneVal) + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + tresp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit() + cancel() + if err != nil { + return err + } + + if !tresp.Succeeded { + return errors.New("no longer the owner of the lock") + } + + ctx, cancel = context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err != nil { + log.Errorln(err) + } else { + e.lock = nil + } + + return nil +} + +// Close closes the etcd client. +func (e *Etcd) Close() error { + var err error + if e.lock != nil { + ctx, cancel := context.WithTimeout(context.Background(), e.timeout) + err = e.lock.Unlock(ctx) + cancel() + if err == nil { + e.lock = nil + } + } + + cErr := e.client.Close() + if cErr != nil { + if err != nil { + log.Errorln(cErr) + return err + } + return cErr + } + + return err +} diff --git a/go/pserver/client/etcd_client_test.go b/go/pserver/client/etcd_client_test.go new file mode 100644 index 0000000000..08742433e7 --- /dev/null +++ b/go/pserver/client/etcd_client_test.go @@ -0,0 +1,106 @@ +package client_test + +import ( + "io/ioutil" + "net/url" + "os" + "strings" + "sync" + "testing" + + "github.com/PaddlePaddle/Paddle/go/pserver/client" + "github.com/coreos/etcd/embed" +) + +func TestSelector(t *testing.T) { + etcdDir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatal(err) + } + cfg := embed.NewConfig() + lpurl, _ := url.Parse("http://localhost:0") + lcurl, _ := url.Parse("http://localhost:0") + cfg.LPUrls = []url.URL{*lpurl} + cfg.LCUrls = []url.URL{*lcurl} + cfg.Dir = etcdDir + e, err := embed.StartEtcd(cfg) + if err != nil { + t.Fatal(err) + } + + defer func() { + e.Close() + if err := os.RemoveAll(etcdDir); err != nil { + t.Fatal(err) + } + }() + + <-e.Server.ReadyNotify() + + port := strings.Split(e.Clients[0].Addr().String(), ":")[1] + endpoint := "127.0.0.1:" + port + + var mu sync.Mutex + selectedCount := 0 + var wg sync.WaitGroup + selectAndDone := func(c *client.Etcd) { + defer wg.Done() + + selected, err := c.Select() + if err != nil { + panic(err) + } + + if selected { + mu.Lock() + selectedCount++ + mu.Unlock() + err = c.Done() + if err != nil { + t.Fatal(err) + } + } + } + + c0 := client.NewEtcd(endpoint) + c1 := client.NewEtcd(endpoint) + c2 := client.NewEtcd(endpoint) + c3 := client.NewEtcd(endpoint) + wg.Add(3) + go selectAndDone(c0) + go selectAndDone(c1) + go selectAndDone(c2) + wg.Wait() + + // simulate trainer crashed and restarted after the + // initialization process. + wg.Add(1) + go selectAndDone(c3) + wg.Wait() + + mu.Lock() + if selectedCount != 1 { + t.Fatal("selected count wrong:", selectedCount) + } + mu.Unlock() + + err = c0.Close() + if err != nil { + t.Fatal(err) + } + + err = c1.Close() + if err != nil { + t.Fatal(err) + } + + err = c2.Close() + if err != nil { + t.Fatal(err) + } + + err = c3.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 7a1e8b8b26..d7b3d2bdec 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES}) add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) -INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) +INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle) -FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) +FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) @@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle ${START_END} ) -add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle COMMAND ${CMAKE_COMMAND} -E touch .timestamp - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle DEPENDS _swig_paddle ) # TODO(yuyang18) : make wheel name calculated by cmake -add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) +add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so) if(WITH_TESTING) IF(NOT PY_PIP_FOUND) diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 5934cb898b..8cd73b348c 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( config->m->getConfig(), pserverSpec, useEtcd)); return updater; #else - throw UnsupportError(); + throw UnsupportError("not compiled with WITH_GOLANG"); #endif } diff --git a/paddle/capi/Arguments.cpp b/paddle/capi/Arguments.cpp index 8b81ec69e6..1ec403077e 100644 --- a/paddle/capi/Arguments.cpp +++ b/paddle/capi/Arguments.cpp @@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args, return kPD_NO_ERROR; } +paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth) { + if (args == nullptr) return kPD_NULLPTR; + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + a->args[ID].setFrameHeight(frameHeight); + a->args[ID].setFrameWidth(frameWidth); + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, uint64_t ID, uint32_t nestedLevel, diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index d71ea26a5d..7c32524a00 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -111,6 +111,20 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids); +/** + * @brief paddle_arguments_set_frame_shape Set the fram size of one argument + * in array, which index is `ID`. + * @param [in] args arguments array + * @param [in] ID array index + * @param [in] frameHeight maximum height of input images + * @param [in] frameWidth maximum width of input images + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth); + /** * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one * argument in array, which index is `ID`. diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h index a78522e4a7..e32f2f9836 100644 --- a/paddle/capi/examples/model_inference/common/common.h +++ b/paddle/capi/examples/model_inference/common/common.h @@ -3,18 +3,21 @@ #include #include -#define CHECK(stmt) \ - do { \ - paddle_error __err__ = stmt; \ - if (__err__ != kPD_NO_ERROR) { \ - fprintf(stderr, "Invoke paddle error %d \n" #stmt, __err__); \ - exit(__err__); \ - } \ +#define CHECK(stmt) \ + do { \ + paddle_error __err__ = stmt; \ + if (__err__ != kPD_NO_ERROR) { \ + fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \ + exit(__err__); \ + } \ } while (0) void* read_config(const char* filename, long* size) { FILE* file = fopen(filename, "r"); - if (file == NULL) return NULL; + if (file == NULL) { + fprintf(stderr, "Open %s error\n", filename); + return NULL; + } fseek(file, 0L, SEEK_END); *size = ftell(file); fseek(file, 0L, SEEK_SET); diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index 00f76e0152..b3287552db 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size) { + if (mergedModel == nullptr) return kPD_NULLPTR; + std::istringstream is(std::string(static_cast(mergedModel), size)); + int64_t modelConfigSize = 0; + is.read((char*)(&modelConfigSize), sizeof(modelConfigSize)); + std::string modelConfigProtobuf; + modelConfigProtobuf.resize(modelConfigSize); + is.read(&modelConfigProtobuf[0], modelConfigSize); + paddle::TrainerConfig config; + if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) { + return kPD_PROTOBUF_ERROR; + } + auto ptr = new paddle::capi::CGradientMachine(); + ptr->machine.reset(paddle::GradientMachine::create( + config.model_config(), CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); + std::vector& parameters = ptr->machine->getParameters(); + for (auto& para : parameters) { + para->load(is); + } + + *machine = ptr; + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { delete cast(machine); return kPD_NO_ERROR; diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index d7e2dd9bf8..c613ade5b2 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine; PD_API paddle_error paddle_gradient_machine_create_for_inference( paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); +/** + * @brief Create a gradient machine used for model inference, using config with + * parameters which is generated by `paddle merge_model`. + * @param [out] machine that used for model inference. + * @param [in] mergedModel + * @param [in] size + * @return paddle_error + */ +PD_API paddle_error +paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size); + /** * @brief Load parameter from disk. * @param machine Gradient Machine. diff --git a/paddle/capi/tests/CMakeLists.txt b/paddle/capi/tests/CMakeLists.txt index d73f6b7733..8208808b94 100644 --- a/paddle/capi/tests/CMakeLists.txt +++ b/paddle/capi/tests/CMakeLists.txt @@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC ${PADDLE_CAPI_INC_PATH}) target_link_libraries(capi_test_gradientMachine paddle_capi) add_test(NAME capi_test_gradientMachine - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/capi/tests) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f6ad5b2e42..9e98afb311 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) +cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor) +cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) + cc_test(variable_test SRCS variable_test.cc) cc_library(scope SRCS scope.cc) @@ -32,6 +35,11 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc. # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) +add_custom_command(TARGET framework_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto + COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/ + COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) @@ -40,11 +48,16 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - fill_zeros_like_op - recurrent_op) + sgd_op + add_op + mul_op + rowwise_add_op + sigmoid_op + softmax_op + mean_op + cross_entropy_op + recurrent_op + uniform_random_op + gaussian_random_op + fill_zeros_like_op) endif(WITH_PYTHON) diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 3a5820e9c6..49a62bedb6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -14,7 +14,6 @@ limitations under the License. */ #pragma once -#include #include #include #include @@ -24,6 +23,7 @@ limitations under the License. */ #include "paddle/framework/attribute.pb.h" #include "paddle/framework/op_desc.pb.h" #include "paddle/platform/enforce.h" +#include "paddle/platform/variant.h" namespace paddle { namespace framework { diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 13706f8b56..437a44a8aa 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/framework/backward.h" + #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" @@ -132,8 +133,9 @@ std::shared_ptr BackwardRecursive( std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); for (std::string& grad_input : grad_op->inputs_) { if (no_grad_names.count(grad_input)) { - std::string prefix = - grad_input.substr(0, grad_input.size() - kGradVarSuffix.size()); + // +1 for \0 + std::string prefix = grad_input.substr( + 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); grad_input = prefix + kZeroVarSuffix; // If part of input gradient of that operator is not calculated, fill @@ -166,7 +168,7 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); - no_grad_names.insert(kEmptyVarName + kGradVarSuffix); + no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); for (auto& name : no_grad_vars) { no_grad_names.insert(name + kGradVarSuffix); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 6c6e12ca25..da3b9c8bed 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -17,16 +17,23 @@ #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace framework { +using OperatorBase = framework::OperatorBase; +using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; +using OpProto = framework::OpProto; +using OpAttrChecker = framework::OpAttrChecker; +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; + class EmptyOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(EmptyOp, OperatorBase) + void InferShape(const Scope &scope) const override {} - void Run(const Scope &scope, - const platform::DeviceContext &dev_ctx) const override {} + void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { @@ -71,7 +78,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { } }; -class FcOp : public ops::NetOp { +class FcOp : public operators::NetOp { public: void Init() override { AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, @@ -143,6 +150,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { } // namespace paddle namespace f = paddle::framework; +namespace ops = paddle::operators; using EnforceNotMet = paddle::platform::EnforceNotMet; REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker); REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp); @@ -165,10 +173,10 @@ TEST(Backward, simple_op_grad) { ASSERT_EQ(4UL, gop->inputs_.size()); ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]); ASSERT_EQ("rowwise_add_grad", gop->type_); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]); - ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]); + ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]); + ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]); - ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix)); + ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X"))); } TEST(Backward, simple_op_not_need_grad) { @@ -176,7 +184,7 @@ TEST(Backward, simple_op_not_need_grad) { ASSERT_NE(fwd, nullptr); auto gop = f::Backward(*fwd, {"X"}); ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(), - "X" + f::kGradVarSuffix), + f::GradVarName("X")), gop->outputs_.end()); auto no_input_gop = f::Backward(*fwd, {"X", "b"}); @@ -244,18 +252,18 @@ TEST(Backward, net_input_of_network_not_need_grad) { all_output.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { - ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end()); + ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end()); } // Not Generated X - ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end()); + ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, - first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix)); + first_fc_grad->ops_[2]->Output(f::GradVarName("A"))); } TEST(Backward, net_shared_weight) { @@ -307,15 +315,15 @@ TEST(Backward, op_part_of_output_are_not_need) { ASSERT_EQ(1UL, fill_zero.inputs_.size()); ASSERT_EQ("Z", fill_zero.inputs_[0]); ASSERT_EQ(1UL, fill_zero.outputs_.size()); - ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.type_); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size()); // I/O/OG - ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix)); - ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix)); - ASSERT_EQ("X" + f::kGradVarSuffix, - d_many_out.Output("x" + f::kGradVarSuffix)); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, + d_many_out.Input(f::GradVarName("z"))); + ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); + ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); } TEST(Backward, op_part_of_input_are_not_need) { @@ -325,10 +333,9 @@ TEST(Backward, op_part_of_input_are_not_need) { ASSERT_EQ(grad_mul.type_, "mul_grad"); ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.outputs_.size(), 2UL); - ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName); - ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix); - ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix), - "out" + f::kGradVarSuffix); + ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName); + ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b")); + ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); ASSERT_EQ(grad_mul.Input("A"), "a"); ASSERT_EQ(grad_mul.Input("B"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 5aa5af0c19..95f294b627 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -14,13 +14,12 @@ limitations under the License. */ #pragma once -#include #include #include #include #include "paddle/framework/dim.h" #include "paddle/platform/enforce.h" -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/platform/variant.h" namespace paddle { namespace framework { diff --git a/paddle/framework/details/lod_tensor.cc b/paddle/framework/details/lod_tensor.cc new file mode 100644 index 0000000000..9ad3979e5b --- /dev/null +++ b/paddle/framework/details/lod_tensor.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { +namespace details { + +using LOD = LODTensor::LOD; + +std::shared_ptr SliceLOD(const LOD &lod, size_t level_begin, + size_t level_end) { + auto new_lod = std::make_shared(); + new_lod->reserve(level_end - level_begin); + for (size_t i = level_begin; i < level_end; i++) { + new_lod->emplace_back(lod[i]); + } + return new_lod; +} + +std::shared_ptr SliceLOD(const LOD &lod, size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared) { + // slice the lod. + auto new_lod = std::make_shared(); + new_lod->reserve(lod.size() - level); + auto start = lod.at(level)[elem_begin]; + auto end = lod.at(level)[elem_end]; + + for (auto it = lod.begin() + level; it != lod.end(); it++) { + auto it_begin = std::find(it->begin(), it->end(), start); + auto it_end = std::find(it_begin, it->end(), end); + PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info"); + PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info"); + new_lod->emplace_back(it_begin, it_end + 1); + if (!tensor_shared) { + // reset offset if tensor is copyed and sliced. + std::transform(new_lod->back().begin(), new_lod->back().end(), + new_lod->back().begin(), + [start](int v) { return v - start; }); + PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD"); + } + } + return new_lod; +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/details/lod_tensor.h b/paddle/framework/details/lod_tensor.h new file mode 100644 index 0000000000..9a6a6cd2ea --- /dev/null +++ b/paddle/framework/details/lod_tensor.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace framework { +namespace details { + +/* + * Slice levels from LOD. + * + * @lod: LOD to slice. + * @level_begin: level to begin slice. + * @level_end: level to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level_begin, size_t level_end); + +/* + * Slice elements from a level of LOD. + * + * @lod: LOD to slice. + * @level: which level to slice. + * @elem_begin: element's index to begin slice. + * @elem_end: element's index to end slice. + */ +std::shared_ptr SliceLOD(const LODTensor::LOD &lod, + size_t level, size_t elem_begin, + size_t elem_end, bool tensor_shared); +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 6d032fb78f..8bd2bc5902 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -19,45 +19,44 @@ permissions and limitations under the License. */ namespace paddle { namespace framework { -class OpRegistry; - -using VarIndexMap = std::unordered_map; +typedef std::vector Ints; enum class OpArgType { IN, OUT }; -static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +const Ints* AttrFormat(const AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static const std::vector* GetOpFormat(const OperatorBase* op, - const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +Ints* AttrFormat(AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, - const OpArgType& src_type, const OpArgType& dst_type, +static void TransOpArg(const OperatorBase* src_op, + std::vector& grad_inputs, + std::vector& grad_outputs, + AttributeMap& grad_attrs, + std::unordered_map& grad_idxs, + const std::string& src_type, const std::string& dst_type, int& idx, bool is_grad) { const std::vector& src_inout = - src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; - const std::vector* src_format = GetOpFormat(src_op, src_type); + (src_type == "input_format") ? src_op->inputs_ : src_op->outputs_; + + const std::vector* src_format = AttrFormat(src_op->Attrs(), src_type); std::vector& dst_inout = - dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; - std::vector* dst_format = GetOpFormat(dst_op, dst_type); + (dst_type == "input_format") ? grad_inputs : grad_outputs; + + std::vector* dst_format = AttrFormat(grad_attrs, dst_type); + const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const auto& src_arg_list = - src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); + (src_type == "input_format") ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { std::string src_name = arg.name(); std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; - (*dst_op->in_out_idxs_)[dst_name] = idx++; + grad_idxs[dst_name] = idx++; int src_arg_idx = src_op->in_out_idxs_->at(src_name); int src_begin = src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); @@ -76,26 +75,42 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, } OperatorBase* BuildGradOp(const OperatorBase* op) { - std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - grad_op->attrs_ = op->attrs_; - grad_op->attrs_.erase("input_format"); - grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OpArgType::IN) != nullptr) { - grad_op->attrs_["output_format"] = std::vector({0}); + const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); + + AttributeMap grad_attrs(op->Attrs()); + grad_attrs.erase("input_format"); + grad_attrs.erase("output_format"); + if (op->Attrs().count("input_format") > 0) { + grad_attrs["output_format"] = std::vector({0}); } - if (GetOpFormat(op, OpArgType::IN) != nullptr || - GetOpFormat(op, OpArgType::OUT) != nullptr) { - grad_op->attrs_["input_format"] = std::vector({0}); + if (op->Attrs().count("input_format") > 0 || + op->Attrs().count("output_format") > 0) { + grad_attrs["input_format"] = std::vector({0}); } - grad_op->in_out_idxs_.reset(new VarIndexMap()); + + std::vector grad_inputs, grad_outputs; + + using VarIndexMap = std::unordered_map; + VarIndexMap* grad_idxs = new VarIndexMap; int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, + "input_format", "input_format", in_idx, false); // I + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, + "output_format", "input_format", in_idx, false); // G + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, + "output_format", "input_format", in_idx, true); // OG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, + "input_format", "output_format", out_idx, true); // IG + + OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); + + grad_op->type_ = grad_op_type; + grad_op->inputs_ = grad_inputs; + grad_op->outputs_ = grad_outputs; + grad_op->attrs_ = grad_attrs; + grad_op->in_out_idxs_.reset(grad_idxs); + return grad_op; } diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index cf7143eba4..19e552b745 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,6 +10,8 @@ namespace framework { class NOP : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(NOP, OperatorBase) + void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const platform::DeviceContext &dev_ctx) const override {} @@ -83,21 +85,19 @@ TEST(GradOpBuilder, MutiInOut) { EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), std::vector({"out2_1", "out2_2"})); - EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix), - "out1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")), + f::GradVarName("out1")); + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")), std::vector( - {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix})); + {f::GradVarName("out2_1"), f::GradVarName("out2_2")})); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), - std::vector({"in2_1" + f::kGradVarSuffix, - "in2_2" + f::kGradVarSuffix, - "in2_3" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix), - "in3" + f::kGradVarSuffix); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), + std::vector({f::GradVarName("in2_1"), + f::GradVarName("in2_2"), + f::GradVarName("in2_3")})); + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3")); } TEST(GradOpBuilder, IOIgnoredInGradient) { @@ -119,19 +119,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) { EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), std::vector({"out1_1", "out1_2"})); EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName); - EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")), std::vector( - {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix), - "out2" + f::kGradVarSuffix); + {f::GradVarName("out1_1"), f::GradVarName("out1_2")})); + EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")), + f::GradVarName("out2")); ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); - EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix), - "in1" + f::kGradVarSuffix); - EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix), + EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1")); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")), std::vector( - {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix})); - EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix), + {f::GradVarName("in2_1"), f::GradVarName("in2_2")})); + EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")), std::vector( - {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix})); + {f::GradVarName("in3_1"), f::GradVarName("in3_2")})); } diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc new file mode 100644 index 0000000000..70045dbf7a --- /dev/null +++ b/paddle/framework/lod_tensor.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_tensor.h" + +#include + +namespace paddle { +namespace framework { + +LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + // slice levels just need to update LOD info, each level will contains the + // whole tensor_, so no need to modify tensor_. + return LODTensor(tensor_, new_lod); +} + +LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin, + size_t elem_end) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + true /*tensor_shared*/); + + // slice elements just need to update LOD info, because offsets are not + // changed, so the original tensor_ can be reused. + return LODTensor(tensor_, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h new file mode 100644 index 0000000000..4933479b10 --- /dev/null +++ b/paddle/framework/lod_tensor.h @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#if (!PADDLE_ONLY_CPU) +#include +#include +#endif + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace framework { + +/* + * LODTensor (Level of details Tensor) + * see https://en.wikipedia.org/wiki/Level_of_details for reference. + */ +class LODTensor { + public: +// Level save offsets of each unit. +#ifdef PADDLE_ONLY_CPU + using Level = std::vector; +#else + using Level = thrust::device_vector; +#endif + // LOD stores offsets of each level of units, the largest units level first, + // then the smaller units level. Each Level stores the offsets of units in + // Tesor. + typedef std::vector LOD; + + LODTensor() {} + LODTensor(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + Reset(tensor, lod); + } + + void Reset(const std::shared_ptr &tensor, + const std::shared_ptr &lod) { + tensor_ = tensor; + lod_start_pos_ = lod; + } + + /* + * Get a element from LOD. + */ + size_t lod_element(size_t level, size_t elem) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem < NumElements(level), + "element begin [%d] out of range [%d]", elem, + NumElements(level)); + return (*lod_start_pos_)[level][elem]; + } + + /* + * Number of LODTensor's levels, each level has units of data, for example, + * in the sentence's view, article, paragraph, sentence are 3 levels. + */ + size_t NumLevels() const { + return lod_start_pos_ ? lod_start_pos_->size() : 0UL; + } + /* + * Number of elements in a level. + */ + size_t NumElements(size_t level = 0) const { + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + // the last offset is the end of last element + return lod_start_pos_->at(level).size() - 1; + } + + /* + * Slice of levels[level_begin:level_end], with tensor copied. + */ + template + LODTensor SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const; + + /* + * Slice of levels[level_begin:level_end], with tensor shared. + */ + LODTensor SliceShared(size_t level_begin, size_t level_end) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor copied. + * @note: low performance in slice lod_start_pos_. + */ + template + LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end, + const platform::Place &dst_place) const; + + /* + * Slice of elements of a level, [elem_begin: elem_end], with tensor shared. + * @note: low performance in slice lod_start_pos_. + */ + LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const; + + /* + * Copy other's lod_start_pos_, to share LOD info. + * @note: the LOD info should not be changed. + */ + void ShareLOD(const LODTensor &other) { + lod_start_pos_ = other.lod_start_pos_; + } + + /* + * Copy other's lod_start_pos_'s content, free to mutate. + */ + void CopyLOD(const LODTensor &other) { + lod_start_pos_ = std::make_shared(*other.lod_start_pos_); + } + /* + * Determine whether LODTensor has a valid LOD info. + */ + bool HasLOD() const { return bool(lod_start_pos_); } + LOD *lod() const { return lod_start_pos_.get(); } + + std::shared_ptr &tensor() { return tensor_; } + Tensor *raw_tensor() { return tensor_.get(); } + + private: + std::shared_ptr lod_start_pos_; + std::shared_ptr tensor_; +}; + +} // namespace framework +} // namespace paddle + +#include "paddle/framework/lod_tensor_impl.h" diff --git a/paddle/framework/lod_tensor_impl.h b/paddle/framework/lod_tensor_impl.h new file mode 100644 index 0000000000..0eb6469aea --- /dev/null +++ b/paddle/framework/lod_tensor_impl.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/details/lod_tensor.h" + +namespace paddle { +namespace framework { + +template +LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(*tensor_, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +template +LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin, + size_t elem_end, + const platform::Place &dst_place) const { + PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced."); + PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, + NumLevels()); + PADDLE_ENFORCE(elem_begin < NumElements(level), + "element begin [%d] out of range [%d]", elem_begin, + NumElements(level)); + PADDLE_ENFORCE(elem_end < NumElements(level) + 1, + "element end [%d] out of range [%d]", elem_end, + NumElements(level)); + + auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end, + false /*tensor_shared*/); + + auto start_idx = new_lod->front().front(); + auto end_idx = new_lod->front().back() - 1 /*the next element's start*/; + auto sliced_tensor = tensor_->Slice(start_idx, end_idx); + auto new_tensor = std::make_shared(); + new_tensor->CopyFrom(sliced_tensor, dst_place); + + return LODTensor(new_tensor, new_lod); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc new file mode 100644 index 0000000000..511716375e --- /dev/null +++ b/paddle/framework/lod_tensor_test.cc @@ -0,0 +1,165 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/lod_tensor.h" + +#include +#include +#include + +namespace paddle { +namespace framework { + +class LODTensorTester : public ::testing::Test { + public: + virtual void SetUp() override { + lod_tensor.reset(new LODTensor); + // tensor's batch_size: 30 + // 3 levels + // 0 10 20 + // 0 5 10 15 20 + // 0 2 5 7 10 12 15 20 + auto lod = std::make_shared(); + lod->push_back(std::vector{0, 10, 20}); + lod->push_back(std::vector{0, 5, 10, 15, 20}); + lod->push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); + + auto tensor = std::make_shared(); + tensor->Resize({20 /*batch size*/, 128 /*dim*/}); + // malloc memory + tensor->mutable_data(place); + + lod_tensor->Reset(tensor, lod); + } + + protected: + std::unique_ptr lod_tensor; + platform::CPUPlace place; +}; + +TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); } + +TEST_F(LODTensorTester, NumElements) { + ASSERT_EQ(lod_tensor->NumElements(0), 2UL); + ASSERT_EQ(lod_tensor->NumElements(1), 4UL); + ASSERT_EQ(lod_tensor->NumElements(2), 8UL); +} + +TEST_F(LODTensorTester, SliceShared_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + } +} + +TEST_F(LODTensorTester, SliceCopied_Level) { + // slice 1 level + for (size_t level = 0; level < 3UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 1, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } + // slice 2 level + for (size_t level = 0; level < 2UL; ++level) { + auto new_lod_tensor = + lod_tensor->SliceCopied(level, level + 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor->NumElements(level + 1)); + // ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor()); + // TODO(superjom) add tensor comparation here. + } +} + +TEST_F(LODTensorTester, SliceShared_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceShared(level, 0, 2); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); +} + +TEST_F(LODTensorTester, SliceCopied_Element) { + size_t level = 0; + auto new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + new_lod_tensor = lod_tensor->SliceCopied(level, 0, 2, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor()); + + level = 1; + // LOD is + // 0 5 10 + // 0 2 5 7 10 + new_lod_tensor = lod_tensor->SliceCopied(level, 1, 3, place); + ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); + + ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL); + ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL); + + // TODO(superjom) compare the content of these tensors +} + +TEST_F(LODTensorTester, ShareLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.ShareLOD(*lod_tensor); + ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod()); +} + +TEST_F(LODTensorTester, CopyLOD) { + LODTensor new_lod_tensor; + new_lod_tensor.CopyLOD(*lod_tensor); + ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod()); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b2813da83d..cb9164eec1 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -69,18 +69,18 @@ class OpProtoAndCheckerMaker { VariableBuilder AddInput(const std::string& name, const std::string& comment) { - auto input = proto_->mutable_inputs()->Add(); - *input->mutable_name() = name; - *input->mutable_comment() = comment; + VarProto* input = proto_->add_inputs(); + input->set_name(name); + input->set_comment(comment); return VariableBuilder{input, [=] { this->SetHasMultipleInput(); }, nullptr}; } VariableBuilder AddOutput(const std::string& name, const std::string& comment) { - auto output = proto_->mutable_outputs()->Add(); - *output->mutable_name() = name; - *output->mutable_comment() = comment; + VarProto* output = proto_->add_outputs(); + output->set_name(name); + output->set_comment(comment); return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); }, [=] { this->SetHasTemporaryOutput(); }}; } @@ -89,17 +89,15 @@ class OpProtoAndCheckerMaker { TypedAttrChecker& AddAttr(const std::string& name, const std::string& comment, bool generated = false) { - auto attr = proto_->mutable_attrs()->Add(); - *attr->mutable_name() = name; - *attr->mutable_comment() = comment; + AttrProto* attr = proto_->add_attrs(); + attr->set_name(name); + attr->set_comment(comment); attr->set_generated(generated); attr->set_type(AttrTypeID()); return op_checker_->AddAttrChecker(name); } - void AddComment(const std::string& comment) { - *(proto_->mutable_comment()) = comment; - } + void AddComment(const std::string& comment) { proto_->set_comment(comment); } private: void SetHasMultiple(const std::string& in_out, bool* flag) { @@ -187,7 +185,7 @@ class OpRegistry { OpProto& op_proto = protos()[op_type]; auto maker = ProtoMakerType(&op_proto, &op_checker); maker.Validate(); - *op_proto.mutable_type() = op_type; + op_proto.set_type(op_type); PADDLE_ENFORCE( op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", @@ -260,12 +258,6 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } - static bool SupportGPU(const std::string& op_type) { - OperatorWithKernel::OpKernelKey key; - key.place_ = platform::GPUPlace(); - return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0; - } - static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); @@ -313,22 +305,45 @@ class OpRegistry { } }; +class Registrar { + public: + // In our design, various kinds of classes, e.g., operators and kernels, have + // their corresponding registry and registrar. The action of registration is + // in the constructor of a global registrar variable, which, however, are not + // used in the code that calls package framework, and would be removed from + // the generated binary file by the linker. To avoid such removal, we add + // Touch to all registrar classes and make USE_OP macros to call this + // method. So, as long as the callee code calls USE_OP, the global + // registrar variable won't be removed by the linker. + void Touch() {} +}; + template -class OpRegisterHelper { +class OpRegistrar : public Registrar { public: - explicit OpRegisterHelper(const char* op_type) { + explicit OpRegistrar(const char* op_type) { OpRegistry::RegisterOp(op_type); } }; template -class GradOpRegisterHelper { +class GradOpRegistrar : public Registrar { public: - GradOpRegisterHelper(const char* op_type, const char* grad_op_type) { + GradOpRegistrar(const char* op_type, const char* grad_op_type) { OpRegistry::RegisterGradOp(op_type, grad_op_type); } }; +template +class OpKernelRegistrar : public Registrar { + public: + explicit OpKernelRegistrar(const char* op_type) { + OperatorWithKernel::OpKernelKey key; + key.place_ = PlaceType(); + OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType); + } +}; + /** * check if MACRO is used in GLOBAL NAMESPACE. */ @@ -339,97 +354,121 @@ class GradOpRegisterHelper { msg) /** - * Macro to Register Operator. + * Macro to register Operator. */ -#define REGISTER_OP(__op_type, __op_class, __op_maker_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \ - "REGISTER_OP must be in global namespace"); \ - static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \ - __op_register_##__op_type##__(#__op_type); \ - int __op_register_##__op_type##_handle__() { return 0; } +#define REGISTER_OP(op_type, op_class, op_maker_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ + static ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__(#op_type); \ + int TouchOpRegistrar_##op_type() { \ + __op_registrar_##op_type##__.Touch(); \ + return 0; \ + } /** - * Macro to Register Gradient Operator. + * Macro to register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(__op_type, __grad_op_type, __grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__grad_op_type, \ - "REGISTER_GRADIENT_OP must be in global namespace"); \ - static ::paddle::framework::GradOpRegisterHelper<__grad_op_class> \ - __op_gradient_register_##__op_type##__grad_op_type##__(#__op_type, \ - #__grad_op_type); \ - int __op_gradient_register_##__op_type##__grad_op_type##_handle__() { \ - return 0; \ +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); \ + int TouchOpGradientRegistrar_##op_type() { \ + __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ + return 0; \ } /** - * Macro to Forbid user register Gradient Operator. + * Macro to register OperatorKernel. */ -#define NO_GRADIENT(__op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__op_type##_grad, \ - "NO_GRADIENT must be in global namespace") +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ + return 0; \ + } /** - * Macro to Register OperatorKernel. + * Macro to Forbid user register Gradient Operator. */ -#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be in global namespace"); \ - struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \ - __op_kernel_register__##type##__##DEVICE_TYPE##__() { \ - ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ - key.place_ = PlaceType(); \ - ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ - .reset(new __VA_ARGS__()); \ - } \ - }; \ - static __op_kernel_register__##type##__##DEVICE_TYPE##__ \ - __reg_kernel_##type##__##DEVICE_TYPE##__; \ - int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } - -// (type, KernelType) -#define REGISTER_OP_GPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) - -// (type, KernelType) -#define REGISTER_OP_CPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) +#define NO_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##op_type##_grad, \ + "NO_GRADIENT must be called in global namespace") + +#define REGISTER_OP_GPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) + +#define REGISTER_OP_CPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) /** * Macro to mark what Operator and Kernel we will use and tell the compiler to * link them into target. */ -#define USE_OP_WITHOUT_KERNEL(op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_without_kernel_##op_type, \ - "USE_OP_WITHOUT_KERNEL must be in global namespace"); \ - extern int __op_register_##op_type##_handle__(); \ - static int __use_op_ptr_##op_type##_without_kernel__ \ - __attribute__((unused)) = __op_register_##op_type##_handle__() - -#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "USE_OP_KERNEL must be in global namespace"); \ - extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \ - static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \ - __attribute__((unused)) = \ - __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() - -// use Operator with only cpu kernel. -#define USE_OP_CPU(op_type) \ - USE_OP_WITHOUT_KERNEL(op_type); \ - USE_OP_KERNEL(op_type, CPU) +#define USE_OP_ITSELF(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_itself_##op_type, \ + "USE_OP_ITSELF must be called in global namespace"); \ + extern int TouchOpRegistrar_##op_type(); \ + static int use_op_itself_##op_type##_ __attribute__((unused)) = \ + TouchOpRegistrar_##op_type() + +// TODO(fengjiayi): Most ops' gradient op have not been compeleted. So we use +// `NO_GRAD` to disable micro USE_OP_GRADIENT(op_type). Otherwise the code can't +// be compiled. `NO_GRAD` should be removed after all gradient ops are +// compeleted. +#define NO_GRAD +#ifndef NO_GRAD +#define USE_OP_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_gradient_##op_type, \ + "USE_OP_GRADIENT must be called in global namespace"); \ + extern int TouchOpGradientRegistrar_##op_type(); \ + static int use_op_gradient_##op_type##_ __attribute__((unused)) = \ + TouchOpGradientRegistrar_##op_type() +#else +#define USE_OP_GRADIENT(op_type) +#endif + +#define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "USE_OP_DEVICE_KERNEL must be in global namespace"); \ + extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \ + static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \ + __attribute__((unused)) = \ + TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() + +// TODO(fengjiayi): The following macros seems ugly, do we have better method? #ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) USE_OP_CPU(op_type) +#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #else -#define USE_OP(op_type) \ - USE_OP_CPU(op_type); \ - USE_OP_KERNEL(op_type, GPU) +#define USE_OP_KERNEL(op_type) \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_DEVICE_KERNEL(op_type, GPU) #endif +#define USE_NO_GRAD_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type) + +#define USE_CPU_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_GRADIENT(op_type) + +#define USE_OP(op_type) \ + USE_NO_GRAD_OP(op_type); \ + USE_OP_GRADIENT(op_type) + } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 9894928a7a..e64126c709 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,6 +7,8 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(CosineOp, OperatorBase) + void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} void InferShape(const Scope& scope) const override {} @@ -27,6 +29,8 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(MyTestOp, OperatorBase) + void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 03fabff79b..68e7fedcd6 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include -#include #include #include #include @@ -27,25 +26,26 @@ limitations under the License. */ #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" #include "paddle/platform/place.h" +#include "paddle/platform/variant.h" #include "paddle/utils/Error.h" namespace paddle { namespace framework { /// If a variable is a empty variable, that name will be used. -const std::string kEmptyVarName = "@EMPTY@"; +constexpr char kEmptyVarName[] = "@EMPTY@"; /// If a variable is a temporary variable, that name will be set in Python, /// but it will be convert to a unique name in scope after OpCreator. -const std::string kTempVarName = "@TEMP@"; +constexpr char kTempVarName[] = "@TEMP@"; /// If a variable's name has a certain suffix, it means that the /// variable is the gradient of another varibale. /// e.g. Variable "x@GRAD" is the gradient of varibale "x". -const std::string kGradVarSuffix = "@GRAD"; +constexpr char kGradVarSuffix[] = "@GRAD"; /// Variables with this suffix are supposed to be filled up with zeros. -const std::string kZeroVarSuffix = "@ZERO"; +constexpr char kZeroVarSuffix[] = "@ZERO"; inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; @@ -63,6 +63,17 @@ class ExecutionContext; */ class OperatorBase { public: + OperatorBase() {} // TODO(yi): This constructor is to be removed. + OperatorBase(const std::string& type, const std::vector& inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : type_(type), + inputs_(inputs), + outputs_(outputs), + attrs_(attrs), + in_out_idxs_(in_out_idxs) {} + virtual ~OperatorBase() {} template @@ -88,21 +99,31 @@ class OperatorBase { virtual bool IsNetOp() const { return false; } + virtual bool SupportGPU() const { return false; } + /// rename inputs outputs name void Rename(const std::string& old_name, const std::string& new_name); //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; - //! Get a input which has multiple variables. //! TODO add a vector_view to prevent memory copy. std::vector Inputs(const std::string& name) const; + //! Get a output with argument's name described in `op_proto` const std::string& Output(const std::string& name) const; //! Get an output which has multiple variables. //! TODO add a vector_view to prevent memory copy. std::vector Outputs(const std::string& name) const; + const std::string Type() const { return type_; } + const std::vector Inputs() const { return inputs_; } + const std::vector Outputs() const { return outputs_; } + const AttributeMap& Attrs() const { return attrs_; } + const std::unordered_map* InOutIdx() const { + return in_out_idxs_.get(); + } + public: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: @@ -118,10 +139,10 @@ class OperatorBase { std::shared_ptr> in_out_idxs_; }; -class OperatorContext { +class InferShapeContext { public: - OperatorContext(const OperatorBase* op, const Scope& scope) - : op_(*op), scope_(scope) {} + InferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} size_t InputSize() const { return op_.inputs_.size(); } @@ -232,12 +253,6 @@ class OperatorContext { const Scope& scope_; }; -class InferShapeContext : public OperatorContext { - public: - InferShapeContext(const OperatorBase* op, const Scope& scope) - : OperatorContext(op, scope) {} -}; - template struct EigenDeviceConverter; @@ -253,11 +268,11 @@ struct EigenDeviceConverter { }; #endif -class ExecutionContext : public OperatorContext { +class ExecutionContext : public InferShapeContext { public: - ExecutionContext(const OperatorBase* op, const Scope& scope, + ExecutionContext(const OperatorBase& op, const Scope& scope, const platform::DeviceContext* device_context) - : OperatorContext(op, scope), device_context_(device_context) {} + : InferShapeContext(op, scope), device_context_(device_context) {} template & inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + struct OpKernelKey { platform::Place place_; @@ -308,14 +331,14 @@ class OperatorWithKernel : public OperatorBase { using OpKernelMap = std::unordered_map, OpKernelHash>; - void InferShape(const Scope& scope) const { - InferShape(InferShapeContext(this, scope)); + void InferShape(const Scope& scope) const override { + InferShape(InferShapeContext(*this, scope)); } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const final { auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); + opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx)); } static std::unordered_map& @@ -324,9 +347,25 @@ class OperatorWithKernel : public OperatorBase { return g_all_op_kernels; } + bool SupportGPU() const override { + OperatorWithKernel::OpKernelKey key; + key.place_ = platform::GPUPlace(); + return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0; + } + protected: virtual void InferShape(const InferShapeContext& ctx) const = 0; }; +#define DEFINE_OPERATOR_CTOR(Class, ParentClass) \ + public: \ + Class() { /* TODO(yi): This constructor is to be removed. */ \ + } \ + Class(const std::string& type, const std::vector& inputs, \ + const std::vector& outputs, \ + const ::paddle::framework::AttributeMap& attrs, \ + std::unordered_map* in_out_idxs) \ + : ParentClass(type, inputs, outputs, attrs, in_out_idxs) {} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 387aada749..7dbd5b14ab 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,6 +23,8 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(OpWithoutKernelTest, OperatorBase) + void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, @@ -97,6 +99,8 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { static int cpu_kernel_run_num = 0; class OpWithKernelTest : public OperatorWithKernel { + public: + DEFINE_OPERATOR_CTOR(OpWithKernelTest, OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& ctx) const override {} }; @@ -116,6 +120,8 @@ class CPUKernelTest : public OpKernel { // multiple inputs test class OperatorMultiInputsTest : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(OperatorMultiInputsTest, OperatorBase) + void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 9ee2c6af86..75cd5bcb38 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -18,13 +18,11 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" #include "paddle/framework/tensor_py.h" #include "paddle/operators/net_op.h" -#include "paddle/operators/type_alias.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/string/to_string.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -32,18 +30,23 @@ limitations under the License. */ namespace py = pybind11; USE_OP(add_two); -USE_OP_CPU(onehot_cross_entropy); -USE_OP_WITHOUT_KERNEL(fc); -USE_OP(sgd); +USE_CPU_OP(onehot_cross_entropy); +USE_NO_GRAD_OP(sgd); USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); +USE_OP(gaussian_random); +USE_OP(uniform_random); + namespace paddle { namespace framework { + +using Tensor = framework::Tensor; + template void ExposeOperator(ClassType &m) { m.def("infer_shape", &ClassType::type::InferShape) @@ -56,6 +59,26 @@ void ExposeOperator(ClassType &m) { [](const typename ClassType::type &op) -> std::vector { return op.outputs_; }) + .def("inputs", + [](const typename ClassType::type &op) -> std::vector { + return op.inputs_; + }) + .def("support_gpu", &ClassType::type::SupportGPU) + .def("temp_outputs", + [](const typename ClassType::type &op) -> std::vector { + auto iter = op.attrs_.find("temporary_index"); + std::vector ret; + if (iter == op.attrs_.end()) { + return ret; + } else { + auto tmp_idx = boost::get>(iter->second); + ret.reserve(tmp_idx.size()); + for (auto &index : tmp_idx) { + ret.push_back(op.outputs_.at(index)); + } + return ret; + } + }) .def("__str__", &ClassType::type::DebugString); } @@ -129,8 +152,8 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](Variable &self) -> ops::NetOp * { - return self.GetMutable(); + [](Variable &self) -> operators::NetOp * { + return self.GetMutable(); }, py::return_value_policy::reference); @@ -184,9 +207,13 @@ All parameter, weight, gradient are variables in Paddle. }); // clang-format on - py::class_(m, "GPUPlace").def(py::init()); + py::class_(m, "GPUPlace") + .def(py::init()) + .def("__str__", string::to_string); - py::class_(m, "CPUPlace").def(py::init<>()); + py::class_(m, "CPUPlace") + .def(py::init<>()) + .def("__str__", string::to_string); py::class_> operator_base( m, "Operator"); @@ -201,8 +228,6 @@ All parameter, weight, gradient are variables in Paddle. return OpRegistry::CreateOp(desc); }); - operator_base.def_static("support_gpu", &OpRegistry::SupportGPU); - operator_base.def("backward", [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { @@ -211,23 +236,24 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &ops::NetOp::AddOp) - .def( - "add_op", - [](ops::NetOp &self, const std::shared_ptr &net) -> void { - self.AddOp(std::static_pointer_cast(net)); - }) - .def("complete_add_op", &ops::NetOp::CompleteAddOp) - .def("complete_add_op", - [](std::shared_ptr &self) { self->CompleteAddOp(); }); + .def("add_op", &operators::NetOp::AddOp) + .def("add_op", + [](operators::NetOp &self, + const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); + }) + .def("complete_add_op", &operators::NetOp::CompleteAddOp) + .def("complete_add_op", [](std::shared_ptr &self) { + self->CompleteAddOp(); + }); ExposeOperator(net); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index c44df05e4b..cd1b4de426 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -18,6 +18,8 @@ limitations under the License. */ #include #include #include +#include + #include "paddle/framework/ddim.h" #include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" @@ -77,11 +79,11 @@ class Tensor { inline const DDim& dims() const; /*! Resize the dimensions of the memory block. */ - inline void Resize(const DDim& dims); + inline Tensor& Resize(const DDim& dims); /*! The internal of two tensors share the same memory block. */ template - inline void ShareDataWith(const Tensor& src); + inline Tensor& ShareDataWith(const Tensor& src); /** * @brief Copy the content of external tensor to a new place. diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 8d9bec6dc9..7d7263b899 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -23,9 +23,11 @@ template inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); + PADDLE_ENFORCE_GE( + holder_->size(), product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory.\n" + "or maybe the required data-type mismatches the data already stored."); } template @@ -78,9 +80,10 @@ inline T* Tensor::mutable_data(platform::Place place) { } template -inline void Tensor::ShareDataWith(const Tensor& src) { +inline Tensor& Tensor::ShareDataWith(const Tensor& src) { src.check_memory_size(); *this = src; + return *this; } template @@ -136,7 +139,10 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { return dst; } -inline void Tensor::Resize(const DDim& dims) { dims_ = dims; } +inline Tensor& Tensor::Resize(const DDim& dims) { + dims_ = dims; + return *this; +} inline const DDim& Tensor::dims() const { return dims_; } diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 20276181b9..7db38d5cae 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -19,7 +19,7 @@ TEST(Tensor, Dims) { using namespace paddle::framework; using namespace paddle::platform; Tensor tt; - tt.Resize(make_ddim({2, 3, 4})); + tt.Resize({2, 3, 4}); DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 93304f7303..7dfb6f61c5 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -38,10 +38,11 @@ if(WITH_GPU) add_simple_unittest(RowConvOpTest) add_simple_unittest(BlockExpandOpTest) add_simple_unittest(CropOpTest) + add_simple_unittest(DepthwiseConvOpTest) endif() -add_simple_unittest(ConvOpTest) add_simple_unittest(Im2ColTest) +add_simple_unittest(GemmConvOpTest) endif() add_style_check_target(paddle_function ${h_files}) diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp deleted file mode 100644 index 7f32c73479..0000000000 --- a/paddle/function/ConvOpTest.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "Function.h" -#include "FunctionTest.h" - -namespace paddle { - -enum TestType { - kForwardTest = 0, - kBackwardInputTest = 1, - kBackwardFilterTest = 2, -}; - -template -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64}) { - if (inputChannels > outputChannels) break; - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterSize, - filterSize}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterSize, - filterSize}); - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// Mainly used to test cases where the height and width (input, filter) -// are not equal. -template -class ConvolutionTest2 { -public: - ConvolutionTest2(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {16}) { - for (size_t inputHeight : {7, 31}) { - for (size_t inputWidth : {10, 54}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t inputChannels : {7}) { - for (size_t outputChannels : {7}) { - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - size_t stride = 1; - size_t padding = 0; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / - stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputHeight - << " inputWidth=" << inputWidth - << " outputChannels=" << outputChannels - << " filterHeight=" << filterHeight - << " filterWidth=" << filterWidth - << " outputHeight=" << outputHeight - << " outputWidth=" << outputWidth - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputHeight, inputWidth}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterHeight, - filterWidth}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterHeight, - filterWidth}); - TensorShape output{ - batchSize, outputChannels, outputHeight, outputWidth}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// ======Start Convolution TEST====== - -TEST(Forward, GEMM) { - ConvolutionTest test( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); - ConvolutionTest2 test2( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); -} - -#ifndef PADDLE_ONLY_CPU -TEST(Forward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); - ConvolutionTest2 test2( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); -} - -TEST(BackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); -} - -TEST(BackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); -} -#endif -// ======End Convolution TEST====== - -// ======Start DepthwiseConvolution TEST====== - -// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu -// version of depthwiseConv is implemented. - -#ifndef PADDLE_ONLY_CPU - -TEST(DepthwiseConvForward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); - ConvolutionTest2 test2( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); -} - -TEST(DepthwiseConvBackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); -} - -TEST(DepthwiseConvBackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); -} - -#endif -// ======End DepthwiseConvolution TEST====== - -} // namespace paddle diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h new file mode 100644 index 0000000000..cb02a96d0d --- /dev/null +++ b/paddle/function/ConvOpTest.h @@ -0,0 +1,256 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "FunctionTest.h" + +namespace paddle { + +template +void forward(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.run(); +} + +template +void backward_input(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); + test.run(); +} + +template +void backward_filter(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), ADD_TO); + test.run(); +} + +template +using Function = void (*)(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output); + +/** + * \brief A basic convolution function test interface. + * + * \param conv1 type name of convolution function 1. + * \param conv2 type name of convolution function 2. + * \param function test function, can be one of the forward, backward_input + * backward_filter function. + * Example: + * 1. Compare GemmConv's CPU and GPU implementation: + * Convolution( + * "GemmConv-CPU", "GemmConv-GPU", forward); + */ +template +void Convolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 5}) { + for (size_t inputSize : {7, 14, 31}) { + for (size_t filterSize : {1, 3, 5}) { + for (size_t inputChannels : {3, 16}) { + for (size_t outputChannels : {3, 16}) { + if (outputChannels < inputChannels) continue; + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + if (padding >= filterSize) break; + + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; + + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for + * image height is not equal image width. + */ +template +void Convolution2(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {4}) { + for (size_t inputHeight : {7, 31}) { + for (size_t inputWidth : {10, 54}) { + for (size_t filterHeight : {1, 5}) { + for (size_t filterWidth : {3, 7}) { + for (size_t inputChannels : {7}) { + for (size_t outputChannels : {7}) { + size_t stride = 1; + size_t padding = 0; + size_t outputHeight = + (inputHeight - filterHeight + 2 * padding + stride) / + stride; + size_t outputWidth = + (inputWidth - filterWidth + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputHeight + << " inputWidth=" << inputWidth + << " outputChannels=" << outputChannels + << " filterHeight=" << filterHeight + << " filterWidth=" << filterWidth + << " outputHeight=" << outputHeight + << " outputWidth=" << outputWidth + << " stride=" << stride << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputHeight, inputWidth}; + TensorShape filter{ + outputChannels, inputChannels, filterHeight, filterWidth}; + TensorShape output{ + batchSize, outputChannels, outputHeight, outputWidth}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for depthwise convolution. + */ +template +void DepthwiseConvolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 32}) { + for (size_t inputSize : {7, 14, 54}) { + for (size_t filterSize : {3, 4}) { + for (size_t inputChannels : {32}) { + for (size_t outputChannels : {32, 64}) { + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + // NNPACK only supports stride = 1 if batchSize > 1, + // and there has some bug when batchSize > 1 and groups != 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1) + break; + + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + size_t groups = inputChannels; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", groups) + .set("algo", (std::string) "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{groups, + outputChannels / groups, + inputChannels / groups, + filterSize, + filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/operators/mean_op_test.cc b/paddle/function/DepthwiseConvOpTest.cpp similarity index 51% rename from paddle/operators/mean_op_test.cc rename to paddle/function/DepthwiseConvOpTest.cpp index 375dcd50e1..f44ae0c342 100644 --- a/paddle/operators/mean_op_test.cc +++ b/paddle/function/DepthwiseConvOpTest.cpp @@ -13,13 +13,25 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include "ConvOpTest.h" -#include +namespace paddle { -USE_OP(mean); +#ifndef PADDLE_ONLY_CPU +TEST(DepthwiseConv, Forward) { + DepthwiseConvolution( + "GemmConv-CPU", "DepthwiseConv-GPU", forward); +} + +TEST(DepthwiseConv, BackwardInput) { + DepthwiseConvolution( + "GemmConvGradInput-CPU", "DepthwiseConvGradInput-GPU", backward_input); +} -TEST(MeanOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("mean"); - ASSERT_NE(it, protos.end()); +TEST(DepthwiseConv, BackwardFilter) { + DepthwiseConvolution( + "GemmConvGradFilter-CPU", "DepthwiseConvGradFilter-GPU", backward_filter); } +#endif + +} // namespace paddle diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 6360a6e023..7b0b1c6adb 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -93,8 +93,8 @@ TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 2U); - EXPECT_EQ(arg.shape()[0], 100); - EXPECT_EQ(arg.shape()[1], 200); + EXPECT_EQ(arg.shape()[0], 100U); + EXPECT_EQ(arg.shape()[1], 200U); EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.matrix().getHeight(), matrix->getHeight()); @@ -112,8 +112,8 @@ TEST(Arguments, Matrix) { TEST(Arguments, Vector) { VectorPtr vector = Vector::create(100, false); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 1); - EXPECT_EQ(arg.shape()[0], 100); + EXPECT_EQ(arg.shape().ndims(), 1U); + EXPECT_EQ(arg.shape()[0], 100U); EXPECT_EQ(arg.data(), vector->getData()); CpuVector inVector = arg.vector(); @@ -131,9 +131,9 @@ TEST(Arguments, Vector) { TEST(Arguments, CpuSparseMatrix) { CpuSparseMatrix sparse(200, 300, 50); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 2); - EXPECT_EQ(arg.shape()[0], 200); - EXPECT_EQ(arg.shape()[1], 300); + EXPECT_EQ(arg.shape().ndims(), 2U); + EXPECT_EQ(arg.shape()[0], 200U); + EXPECT_EQ(arg.shape()[1], 300U); EXPECT_EQ(arg.data(), sparse.getData()); // CHECK_EQ(arg.sparse().nnz(), 50); // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); @@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) { TEST(Arguments, BufferArg) { BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); CheckBufferArg check = [=](const BufferArg& arg) { - EXPECT_EQ(arg.shape().ndims(), 3); - EXPECT_EQ(arg.shape()[0], 1); - EXPECT_EQ(arg.shape()[1], 2); - EXPECT_EQ(arg.shape()[2], 3); + EXPECT_EQ(arg.shape().ndims(), 3U); + EXPECT_EQ(arg.shape()[0], 1U); + EXPECT_EQ(arg.shape()[1], 2U); + EXPECT_EQ(arg.shape()[2], 3U); }; BufferArgs argments; diff --git a/paddle/function/GemmConvOpTest.cpp b/paddle/function/GemmConvOpTest.cpp new file mode 100644 index 0000000000..5283d79a5a --- /dev/null +++ b/paddle/function/GemmConvOpTest.cpp @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ConvOpTest.h" + +namespace paddle { + +TEST(GemmConv, NaiveConv) { + Convolution( + "NaiveConv-CPU", "GemmConv-CPU", forward); + Convolution2( + "NaiveConv-CPU", "GemmConv-CPU", forward); +} + +#ifndef PADDLE_ONLY_CPU +TEST(GemmConv, Forward) { + Convolution( + "GemmConv-CPU", "GemmConv-GPU", forward); + Convolution2( + "GemmConv-CPU", "GemmConv-GPU", forward); +} + +TEST(GemmConv, BackwardInput) { + Convolution( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); + Convolution2( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); +} + +TEST(GemmConv, BackwardFilter) { + Convolution( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); + Convolution2( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); +} +#endif + +} // namespace paddle diff --git a/paddle/function/TensorShapeTest.cpp b/paddle/function/TensorShapeTest.cpp index e5c6982377..e55d516d4a 100644 --- a/paddle/function/TensorShapeTest.cpp +++ b/paddle/function/TensorShapeTest.cpp @@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) { EXPECT_EQ(t.ndims(), 3U); EXPECT_EQ(t.getElements(), 6U); - EXPECT_EQ(t[1], 2); + EXPECT_EQ(t[1], 2U); t.setDim(1, 100); EXPECT_EQ(t.getElements(), 300U); EXPECT_EQ(t[1], 100U); diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index 00d048eb21..6ccc487cf1 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -196,30 +196,30 @@ public: CHECK_EQ(status, nnp_status_success); } } else { - for (size_t g = 0; g < groups_; g++) { - // only supports stride = 1 - CHECK_EQ(strideH(), 1); - CHECK_EQ(strideW(), 1); - nnp_status status = - nnp_convolution_output(algorithm_, - batchSize, - inputChannels / groups_, - outputChannels / groups_, - inputSize, - padding, - kernelSize, - inputData + inputOffset * g, - filterData + filterOffset * g, - nullptr, /* bias */ - outputData + outputOffset * g, - bufferPtr, - sizePtr, - nnp_activation_identity, - nullptr, - threadpool_, /* threadpool */ - nullptr); - CHECK_EQ(status, nnp_status_success); - } + // only supports stride = 1 + CHECK_EQ(strideH(), 1); + CHECK_EQ(strideW(), 1); + + // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1. + CHECK_EQ(groups_, static_cast(1)); + nnp_status status = nnp_convolution_output(algorithm_, + batchSize, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + inputData, + filterData, + nullptr, /* bias */ + outputData, + bufferPtr, + sizePtr, + nnp_activation_identity, + nullptr, + threadpool_, /* threadpool */ + nullptr); + CHECK_EQ(status, nnp_status_success); } } diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp index 4818011211..4dd3982487 100644 --- a/paddle/function/nnpack/NNPACKConvOpTest.cpp +++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp @@ -13,87 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/function/Function.h" -#include "paddle/function/FunctionTest.h" - -DEFINE_string(algo, - "auto", - "The algorithm (auto, ft8x8, ft16x16, wt8x8, " - "implicit-gemm, or direct) for computing convolution of NNPACK."); +#include "paddle/function/ConvOpTest.h" namespace paddle { -#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \ - if (algo == "direct" && filterSize != 1) continue; \ - if (algo == "direct" && batchSize != 1) continue; \ - if (algo == "wt8x8" && filterSize != 3) continue; \ - if (algo == "implicit-gemm" && batchSize != 1) continue; \ - if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue; - -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64, 128}) { - if (inputChannels < outputChannels) break; - for (size_t stride : {1, 2}) { - // if batchSize > 1 NNPACKConv only supports stride = 1 - if (batchSize > 1 && stride > 1) break; - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - IS_NNPACK_SUPPORT(algo, filterSize, stride); - LOG(INFO) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("algo", algo)); - - TensorShape shape0{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape shape1{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape shape2{ - batchSize, outputChannels, outputSize, outputSize}; - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2)); - test.run(); - } - } - } - } - } - } - } - } -}; +TEST(NNPACK, Forward) { + Convolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); +} -TEST(Convolution, NNPACK) { - // NNPACK only supports stride = 1 - ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); +TEST(NNPACK, Depthwise) { + DepthwiseConvolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); } } // namespace paddle diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 0012636b8f..62cff9361c 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -23,6 +23,17 @@ endmacro() filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) + +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") +else() + message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 5de2170877..78e958e06f 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -112,7 +112,6 @@ BEGIN_DEFINE_ACTIVATION(softmax) private: MatrixPtr sftMaxSum_; MatrixPtr sftMaxDot_; -MatrixPtr one_; public: Error __must_check forward(Argument& act) { @@ -138,14 +137,6 @@ Error __must_check backward(Argument& act) { 1, /* trans */ false, useGpu(act.deviceId)); - if (!one_ || one_->getWidth() != outputG->getWidth()) { - Matrix::resizeOrCreate(one_, - 1, - outputG->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - one_->one(); - } sftMaxDot_->dotMul(*outputG, *outputV); sftMaxSum_->colMerge(*sftMaxDot_); diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp new file mode 100644 index 0000000000..8ce591d476 --- /dev/null +++ b/paddle/gserver/layers/KmaxSeqScoreLayer.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" + +namespace paddle { + +class KmaxSeqScoreLayer : public Layer { +private: + MatrixPtr scores_; + size_t beamSize_; + void kmaxScorePerSeq(const real* score, + real* sortedRes, + const ICpuGpuVectorPtr seqStartPos); + +public: + explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer); + +bool KmaxSeqScoreLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + CHECK_EQ(1U, inputLayers_.size()); + + beamSize_ = config_.beam_size(); + CHECK_GE(beamSize_, 1U); + + setNeedSequenceInfo(false); + setNeedGradient(false); + return ret; +} + +void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores, + real* sortedIds, + const ICpuGpuVectorPtr seqStartPos) { + int* starts = seqStartPos->getMutableData(false); + std::vector indices; + for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) { + int seqLen = starts[i + 1] - starts[i]; + int k = std::min(static_cast(beamSize_), seqLen); + + indices.resize(seqLen, 0); + std::iota(begin(indices), end(indices), 0.); + std::vector tmpScore(scores + starts[i], scores + starts[i + 1]); + std::partial_sort( + begin(indices), + begin(indices) + k, + end(indices), + [&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; }); + memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real)); + } +} + +void KmaxSeqScoreLayer::forward(PassType passType) { + Layer::forward(passType); + + const Argument& input = getInput(0); + const MatrixPtr inputScore = getInputValue(0); + + CHECK(input.hasSeq() || input.hasSubseq()) + << "input of " << getName() + << " must be a sequence or a nested sequence."; + CHECK_EQ(input.value->getWidth(), 1UL) + << "input of " << getName() + << " is score over a sequence or a nested sequence, so its width " + << " must be 1."; + + if (useGpu_) { + // this Layer runs only in CPU, if the model is runing on GPU, + // then copy the input to this layer from GPU to CPU. + Matrix::resizeOrCreate(scores_, + inputScore->getHeight(), + 1, + false /* trans */, + false /* useGpu */); + scores_->copyFrom(*inputScore); + } else { + scores_ = inputScore; + } + + Matrix::resizeOrCreate( + output_.value, + input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(), + beamSize_, + false, + false); + output_.value->one(); + output_.value->mulScalar(-1.); + + kmaxScorePerSeq(scores_->getData(), + output_.value->getData(), + input.hasSubseq() ? input.subSequenceStartPositions + : input.sequenceStartPositions); +} + +void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNBase.h b/paddle/gserver/layers/MKLDNNBase.h new file mode 100644 index 0000000000..4c0234e7b3 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "mkldnn.hpp" + +namespace paddle { + +typedef enum { + MKLDNN_BASE = 1, // basical info of MKLDNN + MKLDNN_TESTS = 1, // gtest info of MKLDNN + MKLDNN_SIZES = 2, // size info of MKLDNN + MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_ALL = 4, // show all info of MKLDNN +} MKLDNN_LOG_LEVEL; + +/** + * @brief MKLDNN CPU engine. + * + */ +class CPUEngine { +public: + static CPUEngine& Instance() { + // Thread-safe in C++11. + static CPUEngine myInstance; + return myInstance; + } + + // Disallow copy or move + CPUEngine(const CPUEngine&) = delete; // Copy constructor + CPUEngine(CPUEngine&&) = delete; // Move constructor + CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment + CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment + + mkldnn::engine& getEngine() { return cpuEngine_; } + +protected: + CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CPUEngine() {} + +private: + mkldnn::engine cpuEngine_; +}; + +/** + * @brief MKLDNN Stream. + * + */ +class MKLDNNStream { +public: + MKLDNNStream() : ready_(false) { resetState(); } + + virtual ~MKLDNNStream() {} + + /** + * @brief Submit stream + * @param prims The primitives vector + * @param block Waiting for the stream to complete + */ + void submit(std::vector& prims, bool block = true) { + resetState(); + stream_->submit(prims).wait(block); + ready_ = false; + } + + /** + * @brief Reset the mkldnn stream + */ + void resetState() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have method to reset this state + // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; + } + +private: + bool ready_; + std::shared_ptr stream_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp new file mode 100644 index 0000000000..30f567eaf8 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -0,0 +1,282 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNFcLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; +typedef inner_product_forward fc_fwd; +typedef inner_product_backward_weights fc_bwdWgt; +typedef inner_product_backward_data fc_bwdData; + +namespace paddle { + +REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); + +bool MKLDNNFcLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; + + // output size, cat not be changed + oc_ = getSize(); + oh_ = 1; + ow_ = 1; + + // input size can not change in FC + iLayerSize_ = inputLayers_[0]->getSize(); + CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); + + // create weight + weight_ = + std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNFcLayer::convertWeightsFromPaddle() { + if (FLAGS_use_mkldnn_wgt) { + return; + } + + if (hasInitedWgt_) { + return; + } + + // The weight_ is transposed from initial paddle weight + MatrixPtr paddleWgt = Matrix::create( + weight_->getW()->getData(), iLayerSize_, oc_, false, false); + + // TODO(TJ): remove this print when do not need differ weights + std::ostringstream ostr; + paddleWgt->print(ostr); + VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); + + // The mkldnn weight is transposed from initial paddle matrix + MatrixPtr paddleWgtT; + paddleWgt->transpose(paddleWgtT, true); + weight_->getW()->copyFrom(*paddleWgtT); + hasInitedWgt_ = true; +} + +void MKLDNNFcLayer::convertWeightsToPaddle() { + MatrixPtr dnnWgt = weight_->getW(); + MatrixPtr paddleWgt; + dnnWgt->transpose(paddleWgt, true); + + // copy paddle weight and override on weight_ + MatrixPtr dnnWgtT = Matrix::create( + dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); + dnnWgtT->copyFrom(*paddleWgt); +} + +void MKLDNNFcLayer::reshape() { + const Argument& input = getInput(0); + int batchSize = input.getBatchSize(); + if (bs_ == batchSize) { + return; + } + bs_ = batchSize; + ih_ = input.getFrameHeight(); + iw_ = input.getFrameWidth(); + if (ih_ == 0) { + ih_ = 1; + } + if (iw_ == 0) { + iw_ = 1; + } + hasSpatial_ = true; + if (ih_ == 1 && iw_ == 1) { + hasSpatial_ = false; + } + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); + ic_ = iLayerSize_ / (ih_ * iw_); + CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc_), getSize()); + printSizeInfo(); + + // reset output + output_.setFrameHeight(oh_); + output_.setFrameWidth(ow_); + resetOutput(bs_, oc_); + + // reset mkldnn forward + resetFwd(); + needResetBwd_ = true; + + convertWeightsFromPaddle(); +} + +void MKLDNNFcLayer::resetFwd() { + bool hasBias = biases_ && biases_->getW(); + real* iData = getInputValue(0)->getData(); + real* oData = getOutputValue()->getData(); + real* wData = weight_->getW()->getData(); + real* bData = hasBias ? biases_->getW()->getData() : NULL; + + // TODO(TJ): below create should be covered in MkldnnMatrix + // create memory desc + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + + // create memory primitive desc and memory self + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); + outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + + prop_kind pk = prop_kind::forward; + fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) + : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + if (bData != NULL) { + biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + } else { + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MKLDNNFcLayer::resetBwd() { + if (!needResetBwd_) { + return; + } + needResetBwd_ = false; + + bool hasBias = biases_ && biases_->getWGrad(); + real* iData = getInputValue(0)->getData(); + real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* oDiff = getOutputGrad()->getData(); + real* wDiff = weight_->getWGrad()->getData(); + real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; + + /// backward weight + // create memory desc for backward memory + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + + if (inVal_) { + // update data + inVal_->set_data_handle(iData); + } else { + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + } + + // create memory primitive desc and memory self + wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); + outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL + ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) + : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + if (bDiff != NULL) { + biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + /// backward data + if (iDiff == NULL) { + return; + } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + pipelineBwd_.push_back(*bwdData_); +} + +void MKLDNNFcLayer::forward(PassType passType) { + Layer::forward(passType); + reshape(); + + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + + // update input data + // since it might be changed if this is after data layer + real* iData = getInputValue(0)->getData(); + inVal_->set_data_handle(iData); + + // just submit forward pipeline + stream_->submit(pipelineFwd_); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } +} + +void MKLDNNFcLayer::backward(const UpdateCallback& callback) { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + resetBwd(); + + // update diff + real* oDiff = getOutputGrad()->getData(); + outGrad_->set_data_handle(oDiff); + + // just sumbmit backward pipeline + stream_->submit(pipelineBwd_); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } + } +} +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h new file mode 100644 index 0000000000..7954852a23 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MKLDNNLayer fc layer. + * + * The config file api is mkldnn_fc + */ +class MKLDNNFcLayer : public MKLDNNLayer { +protected: + // input layer size, can not be change after init + size_t iLayerSize_; // == ic * ih * iw + + // if has already init the weight + bool hasInitedWgt_; + + // if input layer has image size info (ih>1 && iw>1) + bool hasSpatial_; + + // fc weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNFcLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + + ~MKLDNNFcLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; + +protected: + /** + * reshape the input image sizes + * and reset output buffer size + * and reset mkldnn forward + */ + void reshape(); + + /** + * reset the forward primitve and memory + * only would be called when input size changes + */ + void resetFwd(); + + /** + * reset the backward primitve and memory for mkldnn fc + * only would be called when needed + */ + void resetBwd(); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h new file mode 100644 index 0000000000..63e29f447e --- /dev/null +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "MKLDNNBase.h" +#include "mkldnn.hpp" + +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); + +namespace paddle { + +class MKLDNNLayer; +typedef std::shared_ptr MKLDNNLayerPtr; + +/** + * @brief Base class of MKLDNNlayer. + * + */ +class MKLDNNLayer : public Layer { +protected: + // batch size + int bs_; + // input image channel, height and width + int ic_, ih_, iw_; + // output image channel, height and width + int oc_, oh_, ow_; + + // backward also need reset after reset forward handle + bool needResetBwd_; + + // mkldnn engine, stream and primivtives + mkldnn::engine engine_; + std::shared_ptr stream_; + std::shared_ptr fwd_; + std::shared_ptr bwdWgt_; + std::shared_ptr bwdData_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + + // TODO(TJ): change below memory as MKLDNNMatrixPtr type + std::shared_ptr inVal_; + std::shared_ptr inGrad_; + std::shared_ptr outVal_; + std::shared_ptr outGrad_; + std::shared_ptr wgtVal_; + std::shared_ptr wgtGrad_; + std::shared_ptr biasVal_; + std::shared_ptr biasGrad_; + +public: + explicit MKLDNNLayer(const LayerConfig& config) + : Layer(config), + bs_(0), + ic_(0), + ih_(0), + iw_(0), + oc_(0), + oh_(0), + ow_(0), + needResetBwd_(true), + engine_(mkldnn::engine::cpu, 0), + stream_(nullptr), + fwd_(nullptr), + bwdWgt_(nullptr), + bwdData_(nullptr) {} + + ~MKLDNNLayer() {} + + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + stream_.reset(new MKLDNNStream()); + engine_ = CPUEngine::Instance().getEngine(); + + // TODO(TJ): deivecId + return true; + } + + /** + * convert weight from paddle format to mkldnn format + * weight_ will be override + */ + virtual void convertWeightsFromPaddle() {} + + /** + * convert mkldnn weight to paddle format + * weight_ will be override + */ + virtual void convertWeightsToPaddle() {} + + /** + * print info about sizes + */ + virtual void printSizeInfo() { + VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; + } + + // TODO(TJ): move to MkldnnMatrix + // create memory desc + inline mkldnn::memory::desc createMD( + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { + // TODO(TJ): isFmtSuppoted(fmt) + return mkldnn::memory::desc(dims, type, fmt); + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 76f587fff7..648d3908f3 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -96,7 +96,7 @@ void SubNestedSequenceLayer::calSelectedCols( for (size_t i = 0; i < seqNum; ++i) { for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; - int selSubSeqIdx = selectedIndices->getElement(i, j); + size_t selSubSeqIdx = selectedIndices->getElement(i, j); CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - @@ -135,7 +135,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " << "must be a nested sequence."; const MatrixPtr selectedIndices = getInputValue(1); - CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight()); if (dynamic_cast(selectedIndices.get())) { /* diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 5511ab6b8b..c2a2993620 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider # mkdir will get error. add_test(NAME test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) ################# test_LayerGrad ####################### add_unittest_without_exec(test_LayerGrad @@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +########## test_Mkldnn layers and activations ########## +if(WITH_MKLDNN) + add_unittest_without_exec(test_MKLDNN + test_MKLDNN.cpp + MKLDNNTester.cpp + LayerGradUtil.cpp) + add_test(NAME test_MKLDNN COMMAND test_MKLDNN) +endif() + ################ test_CRFLayerGrad #################### add_unittest_without_exec(test_CRFLayerGrad test_CRFLayerGrad.cpp @@ -66,6 +75,16 @@ add_unittest_without_exec(test_BatchNorm add_test(NAME test_BatchNorm COMMAND test_BatchNorm) + + +################# test_KmaxSeqScore ####################### +add_unittest_without_exec(test_KmaxSeqScore + test_KmaxSeqScore.cpp + LayerGradUtil.cpp) + +add_test(NAME test_KmaxSeqScore + COMMAND test_KmaxSeqScore) + ################## test_Evaluator ####################### add_unittest(test_Evaluator test_Evaluator.cpp) @@ -82,8 +101,8 @@ if(WITH_PYTHON) test_PyDataProvider.cpp) add_test(NAME test_PyDataProvider - COMMAND .set_python_path.sh -d ./gserver/tests:${PROJ_ROOT}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ./gserver/tests:${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentLayer ####################### @@ -96,7 +115,7 @@ if(NOT WITH_DOUBLE) add_test(NAME test_WarpCTCLayer COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentGradientMachine ############### @@ -106,20 +125,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine test_RecurrentGradientMachine.cpp) add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d - ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_unittest_without_exec(test_NetworkCompare test_NetworkCompare.cpp) if(WITH_GPU) add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) else() add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() @@ -127,6 +146,6 @@ add_unittest_without_exec(test_PyDataProvider2 test_PyDataProvider2.cpp) add_test(NAME test_PyDataProvider2 - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/paddle/gserver/tests:${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle ) diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp new file mode 100644 index 0000000000..99c8c4948c --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -0,0 +1,369 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNTester.h" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" + +namespace paddle { + +// init data layer and test layer of both dnn and reference +void MKLDNNTester::reset(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize) { + const bool trans = false; + const bool useGpu = false; + + // clear + configs_.clear(); + layerNames_.clear(); + dataLayers_.clear(); + datas_.clear(); + layerMaps_.clear(); + parameters_.clear(); + testLayers_.clear(); + + // resize + configs_.resize(NUM); + layerNames_.resize(NUM); + dataLayers_.resize(NUM); + datas_.resize(NUM); + layerMaps_.resize(NUM); + parameters_.resize(NUM); + testLayers_.resize(NUM); + + // reset configs and layer names + configs_[DNN] = dnn; + configs_[REF] = ref; + layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer + layerNames_[REF] = "reference"; // second is reference layer + + // reset others + for (size_t i = 0; i < NUM; ++i) { + configs_[i].layerConfig.set_name(layerNames_[i]); + initDataLayer(configs_[i], + &(dataLayers_[i]), + &(datas_[i]), + &(layerMaps_[i]), + layerNames_[i], + batchSize, + trans, + useGpu); + initTestLayer( + configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); + } + dnnLayer_ = testLayers_[DNN]; + refLayer_ = testLayers_[REF]; + EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + + setInputImgSize(); +} + +void MKLDNNTester::setInputImgSize() { + for (size_t n = 0; n < dataLayers_.size(); ++n) { + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + // TODO(TJ): fix me when concat and elewise ready + dataLayers_[n][i]->getOutput().setFrameHeight(ih_); + dataLayers_[n][i]->getOutput().setFrameWidth(iw_); + } + } +} + +// init randome parameters of ref, and copy to mkldnn +void MKLDNNTester::randomWgtDatas() { + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + for (size_t i = 0; i < parameters_[REF].size(); ++i) { + const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + parameters_[REF][i]->randomize(); + dnnValue->copyFrom(*refValue); + + VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + printVector(dnnValue); + } +} + +// random botdata of ref layer and copy same to mkldnn +void MKLDNNTester::randomBotDatas() { + CHECK_EQ(dataLayers_.size(), NUM); + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); + dataLayers_[DNN][i]->getOutputValue()->copyFrom( + *(dataLayers_[REF][i]->getOutputValue())); + VLOG(lvl_) << "Input " << i << " data:"; + printMatrix(dataLayers_[REF][i]->getOutputValue()); + } +} + +void MKLDNNTester::randomTopDiffs() { + refLayer_->getOutputGrad()->randomizeUniform(); + dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + printMatrix(refLayer_->getOutputGrad()); +} + +void MKLDNNTester::checkForward() { + printTopDatas(); + double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), + testLayers_[REF]->getOutputValue()); + VLOG(MKLDNN_ALL) << "Check Forward"; + EXPECT_LE(fabs(delta), eps_); +} + +void MKLDNNTester::checkBackwardData() { + // TODO(TJ): uncomment me when batch norm ready + // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); + const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); + VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + printMatrix(dnnDiff); + VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + printMatrix(refDiff); + + double delta = compareMatrix(dnnDiff, refDiff); + EXPECT_LE(fabs(delta), eps_); + // TODO(TJ): uncomment me when batch norm ready + // if (isBN) { + // // the other two inputs in batch norm are for moving mean and var + // break; + // } + } +} + +void MKLDNNTester::checkBackwardWgts() { + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + vector dnnWgts; // used to temply save mkldnn weights + saveWgt(parameters_[DNN], dnnWgts); + + const MKLDNNLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); + CHECK(dnnlayer); + dnnlayer->convertWeightsToPaddle(); + for (size_t i = 0; i < parameters_[DNN].size(); ++i) { + const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + printVector(dnn); + VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + printVector(ref); + + double delta = compareVector(dnn, ref); + EXPECT_LE(fabs(delta), eps_); + } + + VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; + restoreWgt(dnnWgts, parameters_[DNN]); +} + +void MKLDNNTester::saveWgt(const vector& from, + vector& to) { + const bool useGpu = false; + to.resize(from.size()); + for (size_t i = 0; i < to.size(); ++i) { + const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); + to[i] = Vector::create(wgt->getSize(), useGpu); + to[i]->copyFrom(*wgt); + } +} + +void MKLDNNTester::restoreWgt(const vector& from, + vector& to) { + CHECK_EQ(from.size(), to.size()); + for (size_t i = 0; i < from.size(); ++i) { + const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); + wgt->copyFrom(*from[i]); + } +} + +// clear parameters grad +void MKLDNNTester::clearWgtDiffs() { + for (size_t n = 0; n < parameters_.size(); ++n) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } + } + } +} + +void MKLDNNTester::clearBotDiffs() { + // dnn and ref + for (size_t n = 0; n < dataLayers_.size(); ++n) { + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } + } +} + +void MKLDNNTester::clearBotDiffs(int n) { + CHECK_LT(n, NUM); + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } +} + +void MKLDNNTester::clearTopDatas() { + for (size_t i = 0; i < testLayers_.size(); ++i) { + testLayers_[i]->getOutputValue()->zeroMem(); + } +} + +void MKLDNNTester::printTopDatas() { + if (!log_) { + return; + } + + for (int n = 0; n < NUM; ++n) { + VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + printMatrix(testLayers_[n]->getOutputValue()); + } +} + +void MKLDNNTester::printMatrix(const MatrixPtr& m) { + if (!log_) { + return; + } + + std::ostringstream ostr; + m->print(ostr); + VLOG(lvl_) << std::endl << ostr.str(); +} + +void MKLDNNTester::printVector(const VectorPtr& v) { + if (!log_) { + return; + } + + std::ostringstream ostr; + v->print(ostr, v->getSize()); + VLOG(lvl_) << std::endl << ostr.str(); +} + +double MKLDNNTester::getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate, + const float thres) { + double delta = 0, sum = 0; + int failCnt = 0; + const double eps = 1e-5; + double maxOut = 0; + for (size_t i = 0; i < len; ++i) { + double ref = fabs(d2[i]); + double diff = fabs(d1[i] - d2[i]); + delta += diff; + sum += ref; + if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { + maxOut = std::max(maxOut, diff / ref); + failCnt++; + } + } + EXPECT_TRUE(std::isnormal(sum)); + EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(delta)); + VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; + return (failCnt / (float)len) > failRate ? maxOut : delta / sum; +} + +double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { + CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); + return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); +} + +double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { + CHECK_EQ(v1->getSize(), v2->getSize()); + return getDelta(v1->getData(), v2->getData(), v1->getSize()); +} + +void MKLDNNTester::runOnce() { + // test forward + randomBotDatas(); + dnnLayer_->forward(PASS_TRAIN); + refLayer_->forward(PASS_TRAIN); + checkForward(); + + // test backward + randomTopDiffs(); + dnnLayer_->backward(nullptr); + refLayer_->backward(nullptr); + checkBackwardData(); + checkBackwardWgts(); + + // clear buffers + // ref code will addto the diff, dnn code will writeto it + // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers + clearBotDiffs(REF); +} + +void MKLDNNTester::run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH, + size_t inputImgW, + size_t iter, + float epsilon, + bool log, + int level) { + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); + ih_ = inputImgH; + iw_ = inputImgW; + iter_ = iter; + eps_ = epsilon; + log_ = log; + lvl_ = level; + + // Firstly test FLAGS_use_mkldnn_wgt = false + FLAGS_use_mkldnn_wgt = false; + // reset and run once + reset(dnn, ref, batchSize); + randomWgtDatas(); + clearWgtDiffs(); + clearBotDiffs(); + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } + + // Then test FLAGS_use_mkldnn_wgt = true + FLAGS_use_mkldnn_wgt = true; + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weights and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); + + // restart again with flag true + reset(dnn, ref, batchSize); + + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); + + for (size_t i = 0; i < iter_; ++i) { + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; + runOnce(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h new file mode 100644 index 0000000000..522eeaf24b --- /dev/null +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "LayerGradUtil.h" +#include "paddle/gserver/layers/MKLDNNBase.h" + +namespace paddle { + +/** + * @brief test the functionality of Mkldnnlayers + * refer to paddle original function + */ +class MKLDNNTester { + enum { + DNN = 0, // MKLDNN layer + REF = 1, // Reference layer + NUM = 2, // Number of total + }; + +protected: + std::vector configs_; + vector layerNames_; + vector> dataLayers_; + vector> datas_; + vector layerMaps_; + vector> parameters_; + vector testLayers_; + LayerPtr dnnLayer_, refLayer_; + + /// run some iterations, all the result should pass + size_t iter_; + /// whether to print out the details + bool log_; + /// vlog level to print the matrix details datas + int lvl_; + /// epsilon + float eps_; + /// input image size, default 1 + size_t ih_, iw_; + +public: + explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { + iter_ = iter; + eps_ = epsilon; + log_ = false; + lvl_ = MKLDNN_ALL; + } + + ~MKLDNNTester() {} + +public: + void run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH = 1, + size_t inputImgW = 1, + size_t iter = 3, + float epsilon = 1e-4, + bool log = false, + int level = MKLDNN_ALL); + void setLogLevel(int lvl) { lvl_ = lvl; } + +private: + void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); + void setInputImgSize(); + void runOnce(); + + void randomWgtDatas(); + void randomBotDatas(); + void randomTopDiffs(); + + void checkForward(); + void checkBackwardData(); + void checkBackwardWgts(); + + void clearWgtDiffs(); + void clearBotDiffs(); + void clearBotDiffs(int n); // clear specific layer + void clearTopDatas(); + + void printTopDatas(); + void printMatrix(const MatrixPtr& m); + void printVector(const VectorPtr& v); + + void saveWgt(const vector& from, vector& to); + void restoreWgt(const vector& from, vector& to); + + double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); + double compareVector(const VectorPtr& v1, const VectorPtr& v2); + + /** + * Get delta percent + * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the + * max(diff/ref) + * else return sum(abs(a-b)) / sum(abs(b)) + * The return value should smaller than eps when passing. + */ + double getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate = 1e-3, + const float thres = 0.1); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp new file mode 100644 index 0000000000..308abe6816 --- /dev/null +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -0,0 +1,165 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" +#include "paddle/utils/GlobalConstants.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +DECLARE_bool(use_gpu); +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +vector randSampling(int range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + return num; +} + +void genRandomSeqInfo(vector& seqStartPosition, + vector& subSeqStartPosition) { + const int maxSeqNum = 100; + // generate random start position information + int seqNum = 1 + (rand() % maxSeqNum); + seqStartPosition.resize(seqNum + 1, 0); + subSeqStartPosition.resize(1, 0); + + for (int i = 0; i < seqNum; ++i) { + int subSeqLen = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqLen; ++j) + subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen); + seqStartPosition[i + 1] = subSeqStartPosition.back(); + } +} + +void genRandomGroundTruth(real* values, + vector>& groundTruth, + vector& startPos, + size_t beamSize) { + groundTruth.resize(startPos.size() - 1, vector(beamSize, -1)); + for (size_t i = 0; i < startPos.size() - 1; ++i) { + int seqLen = startPos[i + 1] - startPos[i]; + vector pos = + randSampling(seqLen, min(static_cast(beamSize), seqLen)); + for (size_t j = 0; j < pos.size(); ++j) { + groundTruth[i][j] = pos[j]; + values[startPos[i] + pos[j]] = 1.; + } + } +} + +void checkLayerOut(vector> groundTruth, + real* layerOut, + size_t beamSize) { + for (size_t i = 0; i < groundTruth.size(); ++i) { + int begPos = i * beamSize; + vector tmp(layerOut + begPos, layerOut + begPos + beamSize); + sort(begin(tmp), end(tmp)); + sort(begin(groundTruth[i]), end(groundTruth[i])); + for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]); + } +} + +TEST(Layer, kmaxSeqScoreLayer) { + const size_t maxBeamSize = 100; + size_t beamSize = 1 + (rand() % maxBeamSize); + + vector seqStartPosition; + vector subSeqStartPosition; + genRandomSeqInfo(seqStartPosition, subSeqStartPosition); + MatrixPtr inValue = + Matrix::create(subSeqStartPosition.back(), 1, false, false); + + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + mode.push_back(true); +#endif + + for (auto hasSubseq : {false, true}) { + vector> groundTruth; + inValue->randomizeUniform(); + genRandomGroundTruth(inValue->getData(), + groundTruth, + hasSubseq ? subSeqStartPosition : seqStartPosition, + beamSize); + + for (auto useGpu : mode) { + TestConfig config; + config.layerConfig.set_type("kmax_seq_score"); + config.layerConfig.set_beam_size(beamSize); + + if (hasSubseq) { + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "scores", + inValue, + seqStartPosition, + subSeqStartPosition}); + } else { + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition}); + } + config.layerConfig.add_inputs(); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + config, + &dataLayers, + &datas, + &layerMap, + "kmax_seq_score", + 100 /* actually this parameter is unused in self-defined input*/, + false, + useGpu); + // test layer initialize + std::vector parameters; + LayerPtr kmaxSeqScoreLayer; + FLAGS_use_gpu = useGpu; + initTestLayer(config, &layerMap, ¶meters, &kmaxSeqScoreLayer); + kmaxSeqScoreLayer->forward(PASS_TRAIN); + + const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue(); + CHECK_EQ(outValue->getHeight(), + hasSubseq ? subSeqStartPosition.size() - 1 + : seqStartPosition.size() - 1); + CHECK_EQ(outValue->getWidth(), beamSize); + checkLayerOut(groundTruth, outValue->getData(), beamSize); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand((size_t)(time(NULL))); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp new file mode 100644 index 0000000000..e1d2270df2 --- /dev/null +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "MKLDNNTester.h" +#include "ModelConfig.pb.h" + +using namespace paddle; // NOLINT + +DECLARE_bool(thread_local_rand_use_global_seed); +DECLARE_bool(use_gpu); +DECLARE_bool(use_mkldnn); + +struct testFCDesc { + int bs; + int ic; + int oc; + int ih, iw; // oh == ow == 1 +}; + +void testFcLayer(const testFCDesc& pm) { + const std::string compareTypes[] = {"mkldnn_fc", "fc"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_size(pm.oc); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); + cfg.layerConfig.add_inputs(); + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, FcLayer) { + testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); + testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); + testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); + testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); +} + +// TODO(TJ): add branch test + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + FLAGS_use_gpu = false; + FLAGS_use_mkldnn = true; + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 9981de6160..bf28092e82 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,13 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) set(MATH_SOURCES - "${PROJ_ROOT}/paddle/math/BaseMatrix.cu" - "${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" ${MATH_SOURCES}) if(NOT WITH_GPU) # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/BaseMatrix.cu") - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu") add_library(paddle_math STATIC ${MATH_SOURCES}) else() diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index 860cad1047..36d57bbb65 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -302,6 +302,10 @@ public: bool isSparse() const { return true; } private: + using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index f6cd5df338..16300db081 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -231,6 +231,9 @@ public: private: using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 531c3c8aff..c181bd7b88 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,28 +41,28 @@ function(op_library TARGET) endif() endfunction() +cc_test(gather_test SRCS gather_test.cc DEPS tensor) + cc_library(net_op SRCS net_op.cc DEPS op_registry) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) op_library(add_op SRCS add_op.cc add_op.cu) -cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) op_library(mean_op SRCS mean_op.cc mean_op.cu) -cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op) op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) +op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) -op_library(fc_op - SRCS fc_op.cc - DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) +op_library(uniform_random_op + SRCS uniform_random_op.cc uniform_random_op.cu) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index fb85093bb2..b886ded9bb 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -17,9 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { -class AddOp : public OperatorWithKernel { +class AddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set"); @@ -31,9 +32,9 @@ class AddOp : public OperatorWithKernel { } }; -class AddOpMaker : public OpProtoAndCheckerMaker { +class AddOpMaker : public framework::OpProtoAndCheckerMaker { public: - AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of add op"); AddInput("Y", "The second input of add op"); @@ -46,14 +47,18 @@ The equation is: Out = X + Y } }; -class AddOpGrad : public OperatorWithKernel { +class AddOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOpGrad, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} }; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker); REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad); -REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel); + +REGISTER_OP_CPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 9bd08634da..cec5f558cb 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -16,4 +16,6 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" -REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(add_two, + ops::AddKernel); diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index 9db19a6138..d76c10957e 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class AddKernel : public OpKernel { +class AddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input0 = context.Input(0); auto input1 = context.Input(1); auto output = context.Output(0); diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc deleted file mode 100644 index 3d52f54983..0000000000 --- a/paddle/operators/add_op_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#define private public -#include -USE_OP(add_two); -// USE_OP(add_two_grad); - -TEST(AddOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("add_two"); - ASSERT_NE(it, protos.end()); - auto& op_creators = paddle::framework::OpRegistry::op_creators(); - auto it1 = op_creators.find("add_two_grad"); - ASSERT_NE(it1, op_creators.end()); -} diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 942b919079..09aa589d3c 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -17,9 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { -class OnehotCrossEntropyOp : public OperatorWithKernel { +class OnehotCrossEntropyOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of OnehotCrossEntropyOp must be two"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, @@ -37,9 +38,11 @@ class OnehotCrossEntropyOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyGradientOp : public OperatorWithKernel { +class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyGradientOp, + framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); @@ -48,9 +51,10 @@ class OnehotCrossEntropyGradientOp : public OperatorWithKernel { } }; -class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker { +class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { public: - OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker) + OnehotCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp"); @@ -66,11 +70,14 @@ OnehotCrossEntropy Operator. } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, ops::OnehotCrossEntropyOpMaker); -REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); - +REGISTER_OP_CPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); +REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad, + ops::OnehotCrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL( onehot_cross_entropy_grad, - ops::OnehotCrossEntropyGradientOpKernel); + ops::OnehotCrossEntropyGradientOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index ec73721a81..4bbc8f093a 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -14,3 +14,8 @@ #define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + onehot_cross_entropy, + ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index e02e3e2945..d1bbc2cb66 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template T tolerable_value(T x) { static_assert(std::is_floating_point::value, @@ -38,9 +40,9 @@ T tolerable_value(T x) { } template -class OnehotCrossEntropyOpKernel : public OpKernel { +class OnehotCrossEntropyOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); const T* Xdata = X->data(); const int* label_data = ctx.Input(1)->data(); @@ -61,9 +63,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel { }; template -class OnehotCrossEntropyGradientOpKernel : public OpKernel { +class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto X = ctx.Input("X"); auto dX = ctx.Output(framework::GradVarName("X")); auto dY = ctx.Input(framework::GradVarName("Y")); diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc deleted file mode 100644 index b5cf236bac..0000000000 --- a/paddle/operators/fc_op.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "type_alias.h" - -namespace paddle { -namespace operators { - -class FullyConnectedOp : public NetOp { - public: - void Init() override { - AddOp(OpRegistry::CreateOp("mul", - { - Input("X"), Input("W"), - }, - {Output("before_act")}, {})); - auto b = Input("b"); - if (b != framework::kEmptyVarName) { - AddOp(OpRegistry::CreateOp("rowwise_add", - {Output("before_act"), Input("b")}, - {Output("before_act")}, {})); - } - - auto activation = GetAttr("activation"); - AddOp(OpRegistry::CreateOp(activation, {Output("before_act")}, - {Output("Y")}, {})); - CompleteAddOp(false); - } -}; - -class FullyConnectedOpMaker : public OpProtoAndCheckerMaker { - public: - FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "the input of fc operator"); - AddInput("W", "the weight of fc operator"); - AddInput("b", "the bias of fc operator"); - - AddOutput("Y", "the output of fc operator"); - AddOutput("before_act", "the before activation output of fc operator") - .SetTemporary(); - AddAttr("activation", "The activation key for fc layer") - .SetDefault("sigmoid") - .InEnum({"sigmoid", "softmax"}); - - //! TODO(yuyang18): Complete comment; - AddComment("FullyConnected Operator"); - } -}; -} // namespace operators -} // namespace paddle - -USE_OP(mul); -USE_OP(rowwise_add); -USE_OP(sigmoid); -USE_OP(softmax); - -REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 6dcc9372b2..eda23a0ccf 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(FillZerosLikeOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, @@ -50,8 +51,8 @@ The output will have the same size with input. } // namespace operators } // namespace paddle -REGISTER_OP(fill_zeros_like, paddle::operators::FillZerosLikeOp, - paddle::operators::FillZerosLikeOpMaker); +namespace ops = paddle::operators; +REGISTER_OP(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); REGISTER_OP_CPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu index 4f1054cf47..fdbcf520a0 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu @@ -16,6 +16,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" +namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_zeros_like, - paddle::operators::FillZerosLikeKernel); + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index dfaed2c9aa..f846c7a8ab 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h new file mode 100644 index 0000000000..0c73717d38 --- /dev/null +++ b/paddle/operators/gather.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace operators { + +// Implementation of CPU copy +template +void CPUGather(const T* params, const int* indices, const int slice_size, + const int index_size, T* output) { + const size_t slice_bytes = slice_size * sizeof(T); + + for (size_t i = 0; i < index_size; ++i) { + int index_ = indices[i]; + memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); + } +} + +// Implementation of GPU copy: +template +void GPUGather(const T* src, const int* index, const int slice_size, + const int index_size, T* output); + +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +void Gather(const platform::Place& place, const paddle::framework::Tensor* src, + const paddle::framework::Tensor* index, + paddle::framework::Tensor* output) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size() == 1); + int index_size = index->dims()[0]; + + auto src_dims = src->dims(); + paddle::framework::DDim output_dims(src_dims); + output_dims[0] = index_size; + + // slice size + int slice_size = 1; + for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + // Gathering + if (platform::is_cpu_place(place)) { + CPUGather(src->data(), index->data(), slice_size, index_size, + output->data()); + } +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc new file mode 100644 index 0000000000..5de748ec46 --- /dev/null +++ b/paddle/operators/gather_test.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather.h" +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" + +#include +#include +#include + +TEST(Gather, GatherData) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::operators; + + Tensor* src = new Tensor(); + Tensor* index = new Tensor(); + Tensor* output = new Tensor(); + + int* p_src = nullptr; + int* p_index = nullptr; + p_src = src->mutable_data(make_ddim({3, 4}), CPUPlace()); + p_index = index->mutable_data(make_ddim({2}), CPUPlace()); + + for (size_t i = 0; i < 12; ++i) p_src[i] = i; + p_index[0] = 1; + p_index[1] = 0; + + int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); + + Gather(CPUPlace(), src, index, output); + + for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); + for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); +} diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc new file mode 100644 index 0000000000..893cf56e5c --- /dev/null +++ b/paddle/operators/gaussian_random_op.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + // TODO(dzh): attribute does not support unsigned int. + // And we need a global random seed configuration. + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + seed = std::random_device()(); + } + std::mt19937 g(seed); + std::normal_distribution distribution(mean, std); + ssize_t size = framework::product(tensor->dims()); + for (int i = 0; i < size; ++i) { + data[i] = distribution(g); + } + } +}; + +class GaussianRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(GaussianRandomOp, framework::OperatorWithKernel) + protected: + void InferShape(const framework::InferShapeContext& context) const override { + auto* tensor = context.Output(0); + auto dims = GetAttr>("dims"); + PADDLE_ENFORCE(dims.size() > 0UL, + "dims can be one int or array. dims must be set."); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GaussianRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "output matrix of random op"); + AddComment(R"DOC( +GaussianRandom operator. +Use to initialize tensor with gaussian random generator. +)DOC"); + + AddAttr>("dims", "The dimension of random tensor."); + AddAttr("mean", "mean value of random.").SetDefault(.0f); + AddAttr("std", "minimum value of random value.").SetDefault(1.0f); + AddAttr("seed", + "Random seed of generator." + "0 means use system wide seed") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker); +REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu new file mode 100644 index 0000000000..1340b1e1e9 --- /dev/null +++ b/paddle/operators/gaussian_random_op.cu @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/platform/dynload/curand.h" +#include "paddle/platform/gpu_info.h" + +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class GaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.op_.GetAttr("mean"); + float std = context.op_.GetAttr("std"); + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + + int seed = context.op_.GetAttr("seed"); + if (seed == 0) { + std::random_device rd; + seed = rd(); + } + curandGenerator_t g; + PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( + &g, CURAND_RNG_PSEUDO_DEFAULT)); + PADDLE_ENFORCE( + platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); + platform::dynload::curandGenerateNormal( + g, data, framework::product(tensor->dims()), mean, std); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 8ab4e82ac4..f6abba7ab4 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -17,9 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { -class MeanOp : public OperatorWithKernel { +class MeanOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set"); @@ -28,9 +29,9 @@ class MeanOp : public OperatorWithKernel { } }; -class MeanOpMaker : public OpProtoAndCheckerMaker { +class MeanOpMaker : public framework::OpProtoAndCheckerMaker { public: - MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); AddOutput("Out", "The output of mean op").IgnoreGradient(); @@ -38,10 +39,11 @@ class MeanOpMaker : public OpProtoAndCheckerMaker { } }; -class MeanGradOp : public OperatorWithKernel { +class MeanGradOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanGradOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { - ctx.Output("X" + framework::kGradVarSuffix) + void InferShape(const framework::InferShapeContext &ctx) const override { + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; @@ -49,7 +51,10 @@ class MeanGradOp : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker); -REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel); +REGISTER_OP_CPU_KERNEL(mean, + ops::MeanKernel); REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp); -REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel); +REGISTER_OP_CPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index 8b97b0154c..7af624d81d 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -16,5 +16,8 @@ #include "paddle/operators/mean_op.h" -REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel); -REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(mean, + ops::MeanKernel); +REGISTER_OP_GPU_KERNEL(mean_grad, + ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 40a1e2d099..e8595a14fa 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -13,15 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenScalar = framework::EigenScalar; +template +using EigenVector = framework::EigenVector; + template -class MeanKernel : public OpKernel { +class MeanKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); @@ -36,13 +45,13 @@ class MeanKernel : public OpKernel { }; template -class MeanGradKernel : public OpKernel { +class MeanGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { - auto OG = context.Input("Out" + framework::kGradVarSuffix); + void Compute(const framework::ExecutionContext& context) const override { + auto OG = context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE(framework::product(OG->dims()) == 1, "Mean Gradient should be scalar"); - auto IG = context.Output("X" + framework::kGradVarSuffix); + auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index ccab9a994c..6115a3f333 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -17,9 +17,10 @@ namespace paddle { namespace operators { -class MulOp : public OperatorWithKernel { +class MulOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); auto dim0 = ctx.Input(0)->dims(); auto dim1 = ctx.Input(1)->dims(); @@ -37,9 +38,9 @@ class MulOp : public OperatorWithKernel { } }; -class MulOpMaker : public OpProtoAndCheckerMaker { +class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: - MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The first input of mul op"); AddInput("Y", "The second input of mul op"); @@ -52,9 +53,10 @@ The equation is: Out = X * Y } }; -class MulOpGrad : public OperatorWithKernel { +class MulOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOpGrad, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override {} + void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { LOG(INFO) << "MulGrad"; return ""; @@ -64,7 +66,8 @@ class MulOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker); REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); +REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 1dc04c4297..43debbc21a 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/mul_op.h" -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index 7ecd6e8ac0..ab12631c03 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class MulKernel : public OpKernel { +class MulKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { Eigen::array, 1> dim_pair = { {Eigen::IndexPair(1, 0)}}; @@ -40,5 +45,6 @@ class MulKernel : public OpKernel { Z.device(place) = X.contract(Y, dim_pair); } }; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc index fbc98e0992..a466c4f30f 100644 --- a/paddle/operators/net_op.cc +++ b/paddle/operators/net_op.cc @@ -15,7 +15,6 @@ */ #include "paddle/operators/net_op.h" -#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index bb2d02b56f..24c9e61c66 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -14,13 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/framework/op_desc.pb.h" -#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/scope.h" -#include "paddle/operators/type_alias.h" -#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -41,6 +35,8 @@ namespace operators { */ class NetOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(NetOp, framework::OperatorBase) + /** * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch @@ -65,6 +61,15 @@ class NetOp : public framework::OperatorBase { } } + bool SupportGPU() const override { + for (auto& op : ops_) { + if (!op->SupportGPU()) { + return false; + } + } + return true; + } + /** * @brief Add an operator by ptr */ diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index c0a345464a..0d5c3de798 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -2,31 +2,31 @@ #include -#include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" - namespace paddle { namespace operators { +using Scope = framework::Scope; +using DeviceContext = platform::DeviceContext; static int infer_shape_cnt = 0; static int run_cnt = 0; -class TestOp : public OperatorBase { +class TestOp : public framework::OperatorBase { public: - void InferShape(const framework::Scope& scope) const override { - ++infer_shape_cnt; - } - void Run(const framework::Scope& scope, - const paddle::platform::DeviceContext& dev_ctx) const override { + DEFINE_OPERATOR_CTOR(TestOp, framework::OperatorBase) + + void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } + void Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const override { ++run_cnt; } }; -class EmptyOp : public OperatorBase { +class EmptyOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(EmptyOp, framework::OperatorBase) + void InferShape(const Scope& scope) const override {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; template @@ -72,7 +72,7 @@ TEST(OpKernel, all) { net->Run(scope, dev_ctx); ASSERT_EQ(2, infer_shape_cnt); ASSERT_EQ(2, run_cnt); - ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); + ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet); } TEST(NetOp, insert_op) { diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 5e9c15ca0e..2438374205 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -14,17 +14,19 @@ #include "paddle/operators/recurrent_op.h" -#include #include #include #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" -#include "paddle/platform/enforce.h" namespace paddle { namespace operators { +using Scope = framework::Scope; +using Variable = framework::Variable; +using Tensor = framework::Tensor; + void RecurrentAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) ->GetMutable() @@ -135,10 +137,11 @@ void RecurrentOp::Init() { alg_.Init(std::move(arg)); } -class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker { +class RecurrentAlgorithmProtoAndCheckerMaker + : public framework::OpProtoAndCheckerMaker { public: - RecurrentAlgorithmProtoAndCheckerMaker(OpProto* proto, - OpAttrChecker* op_checker) + RecurrentAlgorithmProtoAndCheckerMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { const auto& name = RecurrentOp::kArgName; // inputs and outputs stored in proto diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index d1e60fed9c..fdd9d00537 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -100,6 +100,7 @@ class RecurrentGradientAlgorithm { }; class RecurrentOp final : public framework::OperatorBase { + DEFINE_OPERATOR_CTOR(RecurrentOp, framework::OperatorBase) public: void Init() override; diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 3607d14bf8..0c9a343415 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -27,6 +27,10 @@ namespace operators { using framework::make_ddim; using framework::DDim; +using framework::Tensor; +using framework::Variable; +using framework::Scope; +using framework::OpRegistry; class RecurrentOpTest : public ::testing::Test { protected: @@ -164,7 +168,7 @@ class RecurrentOpTest : public ::testing::Test { // father scope Scope scope_; - std::shared_ptr rnn_op_; + std::shared_ptr rnn_op_; }; TEST_F(RecurrentOpTest, Run) { @@ -391,4 +395,4 @@ TEST(RecurrentOp, LinkMemories) { USE_OP(add_two); USE_OP(mul); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 32c6c2dd4e..7e4770630e 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -18,7 +18,9 @@ namespace paddle { namespace operators { namespace rnn { -namespace fmw = paddle::framework; +namespace f = paddle::framework; + +using Tensor = framework::Tensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, @@ -30,10 +32,10 @@ void SegmentInputs(const std::vector& step_scopes, inlinks[i].external); Tensor* input = input_var->GetMutable(); - fmw::DDim dims = input->dims(); + f::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); - fmw::DDim step_dims = slice_ddim(dims, 1, dims.size()); + f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); @@ -58,11 +60,10 @@ void ConcatOutputs(const std::vector& step_scopes, auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); - fmw::DDim step_dims = - step_scope_var->template GetMutable()->dims(); + f::DDim step_dims = step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(fmw::make_ddim(dims_vec)); + output->Resize(f::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { @@ -104,7 +105,7 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op) { + const framework::OperatorBase& op) { arg->step_net = op.Input(name.step_net); arg->step_scopes = op.Output(name.step_scopes); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 379754b98f..17941c503c 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -17,12 +17,13 @@ #include #include "paddle/framework/operator.h" -#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { namespace rnn { +using Scope = framework::Scope; + /** * Memory of a RNN (same as the role of `Momory` in PaddlePaddle). * @@ -86,7 +87,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const OperatorBase& op); + const framework::OperatorBase& op); } // namespace rnn } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 8d1a36f2b3..402f6340a0 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -13,12 +13,14 @@ limitations under the License. */ #include "paddle/operators/rowwise_add_op.h" + namespace paddle { namespace operators { -class RowWiseAddOp : public OperatorWithKernel { +class RowWiseAddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(RowWiseAddOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2UL, "Two inputs is needed by rowwise add"); auto dim0 = ctx.Input(0)->dims(); @@ -32,9 +34,10 @@ class RowWiseAddOp : public OperatorWithKernel { } }; -class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { +class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker { public: - RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) + RowWiseAddOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("b", "The right input of row-wise add op, must be vector"); @@ -50,6 +53,7 @@ for i in xrange(X.shape[0]): } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker); -REGISTER_OP_CPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +REGISTER_OP_CPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index f76faa0a3a..86f80b8122 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -15,5 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/rowwise_add_op.h" -REGISTER_OP_GPU_KERNEL(rowwise_add, - ops::RowWiseAddKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + rowwise_add, ops::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index b52524c47c..82e9d70e95 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -13,15 +13,24 @@ limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenMatrix = framework::EigenMatrix; + template -class RowWiseAddKernel : public OpKernel { +class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto out = context.Output(0); out->mutable_data(context.GetPlace()); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index e0532f2f09..5b8093f0f7 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -17,9 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { -class SGDOp : public OperatorWithKernel { +class SGDOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SGDOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set"); @@ -31,9 +32,9 @@ class SGDOp : public OperatorWithKernel { } }; -class SGDOpMaker : public OpProtoAndCheckerMaker { +class SGDOpMaker : public framework::OpProtoAndCheckerMaker { public: - SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("param", "input parameter"); AddInput("grad", "input gradient"); @@ -51,5 +52,7 @@ param_out = param - learning_rate * grad; } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker); -REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel); +REGISTER_OP_CPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 72629ccfbb..f5ba6d3c29 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -15,4 +15,6 @@ #define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" -REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel); +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(sgd, + ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index bf5b195933..bfb449d0b0 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -13,15 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SGDOpKernel : public OpKernel { +class SGDOpKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output(0); diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc deleted file mode 100644 index 75137259f5..0000000000 --- a/paddle/operators/sgd_op_test.cc +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -USE_OP(sgd); -TEST(SGDOp, GetOpProto) { - auto& protos = paddle::framework::OpRegistry::protos(); - auto it = protos.find("sgd"); - ASSERT_NE(it, protos.end()); -} diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 1eb795faa8..a02e2dc39e 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -13,21 +13,24 @@ limitations under the License. */ #include "paddle/operators/sigmoid_op.h" + namespace paddle { namespace operators { -class SigmoidOp : public OperatorWithKernel { +class SigmoidOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output"); ctx.Output(0)->Resize(ctx.Input(0)->dims()); } }; -class SigmoidOpMaker : public OpProtoAndCheckerMaker { +class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: - SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "sigmoid input"); AddOutput("Y", "sigmoid output"); @@ -35,9 +38,10 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { } }; -class SigmoidOpGrad : public OperatorWithKernel { +class SigmoidOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOpGrad, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); } }; @@ -45,9 +49,11 @@ class SigmoidOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker); REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad); -REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_CPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +REGISTER_OP_CPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_CPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index e80ba081f2..1a50dfe14a 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -15,6 +15,9 @@ #define EIGEN_USE_GPU #include "paddle/operators/sigmoid_op.h" -REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel); -REGISTER_OP_GPU_KERNEL(sigmoid_grad, - ops::SigmoidGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(sigmoid, + ops::SigmoidKernel); +REGISTER_OP_GPU_KERNEL( + sigmoid_grad, ops::SigmoidGradKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index d513261e74..7af879b209 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -13,16 +13,21 @@ limitations under the License. */ #pragma once - -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + template -class SigmoidKernel : public OpKernel { +class SigmoidKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input(0); auto output = context.Output(0); output->mutable_data(context.GetPlace()); @@ -37,9 +42,9 @@ class SigmoidKernel : public OpKernel { }; template -class SigmoidGradKernel : public OpKernel { +class SigmoidGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto Y_t = context.Input("Y"); auto dY_t = context.Input(framework::GradVarName("Y")); auto dX_t = context.Output(framework::GradVarName("X")); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index c08e1b153c..9b6a679642 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -17,9 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { -class SoftmaxOp : public OperatorWithKernel { +class SoftmaxOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOp, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, "Only one input is need for softmax"); PADDLE_ENFORCE_EQ(ctx.Input("X")->dims().size(), 2UL, @@ -30,9 +31,10 @@ class SoftmaxOp : public OperatorWithKernel { } }; -class SoftmaxOpMaker : public OpProtoAndCheckerMaker { +class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { public: - SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) + SoftmaxOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "input of softmax"); AddOutput("Y", "output of softmax"); @@ -40,9 +42,10 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { } }; -class SoftmaxOpGrad : public OperatorWithKernel { +class SoftmaxOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOpGrad, framework::OperatorWithKernel) protected: - void InferShape(const InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, "Input of SoftmaxOpGrad should be 3, X, Y, YG"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, @@ -61,8 +64,11 @@ class SoftmaxOpGrad : public OperatorWithKernel { } // namespace operators } // namespace paddle +namespace ops = paddle::operators; + REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel); REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); -REGISTER_OP_CPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +REGISTER_OP_CPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index b79228580a..2e99a89699 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,9 +13,11 @@ limitations under the License. */ #define EIGEN_USE_GPU -#include "paddle/framework/op_registry.h" #include "paddle/operators/softmax_op.h" -REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); -REGISTER_OP_GPU_KERNEL(softmax_grad, - ops::SoftmaxGradKernel); +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL(softmax, + ops::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL( + softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index b2dbcf57ed..4fa6b59540 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -13,19 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - -#include "paddle/framework/ddim.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/tensor.h" -#include "paddle/operators/type_alias.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + template -class SoftmaxKernel : public OpKernel { +class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { auto input = context.Input("X"); auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); @@ -62,9 +64,9 @@ class SoftmaxKernel : public OpKernel { }; template -class SoftmaxGradKernel : public OpKernel { +class SoftmaxGradKernel : public framework::OpKernel { public: - void Compute(const ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext& context) const override { std::shared_ptr scale_ = std::make_shared(); auto Y = context.Input("Y"); diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h deleted file mode 100644 index eac12d35dd..0000000000 --- a/paddle/operators/type_alias.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -using OpKernel = framework::OpKernel; -using OperatorBase = framework::OperatorBase; -using InferShapeContext = framework::InferShapeContext; -using ExecutionContext = framework::ExecutionContext; -using Variable = framework::Variable; -template -using EigenScalar = framework::EigenScalar; -template -using EigenVector = framework::EigenVector; -template -using EigenMatrix = framework::EigenMatrix; -template -using EigenTensor = framework::EigenTensor; -using Tensor = framework::Tensor; -using Scope = framework::Scope; -using OperatorWithKernel = framework::OperatorWithKernel; -using OperatorBase = framework::OperatorBase; -using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; -using OpProto = framework::OpProto; -using OpAttrChecker = framework::OpAttrChecker; -using CPUPlace = platform::CPUPlace; -using GPUPlace = platform::GPUPlace; -using OpRegistry = framework::OpRegistry; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc new file mode 100644 index 0000000000..ea81ec053f --- /dev/null +++ b/paddle/operators/uniform_random_op.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class CPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist( + static_cast(context.op_.GetAttr("min")), + static_cast(context.op_.GetAttr("max"))); + for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) { + data[i] = dist(engine); + } + } +}; + +class UniformRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(UniformRandomOp, framework::OperatorWithKernel) + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), + "uniform_random's min must less then max"); + auto* tensor = ctx.Output(0); + auto dims = GetAttr>("dims"); + tensor->Resize(framework::make_ddim(dims)); + } +}; + +class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { + public: + UniformRandomOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "The output tensor of uniform random op"); + AddComment(R"DOC(Uniform random operator. + +Used to initialize tensor with uniform random generator. +)DOC"); + AddAttr>("dims", "the dimension of random tensor"); + AddAttr("min", "Minimum value of uniform random").SetDefault(-1.0f); + AddAttr("max", "Maximun value of uniform random").SetDefault(1.0f); + AddAttr("seed", + "Random seed of uniform random. " + "0 means generate a seed by system") + .SetDefault(0); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp, + paddle::operators::UniformRandomOpMaker); +REGISTER_OP_CPU_KERNEL(uniform_random, + paddle::operators::CPUUniformRandomKernel); diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu new file mode 100644 index 0000000000..b35ebe7b63 --- /dev/null +++ b/paddle/operators/uniform_random_op.cu @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +template +struct UniformGenerator { + T min_, max_; + unsigned int seed_; + + __host__ __device__ UniformGenerator(T min, T max, int seed) + : min_(min), max_(max), seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(min_, max_); + rng.discard(n); + return dist(rng); + } +}; + +// It seems that Eigen::Tensor::random in GPU will SEGFAULT. +// Use std::random and thrust::random(thrust is a std library in CUDA) to +// implement uniform random. +template +class GPUUniformRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output(0); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + if (seed == 0) { + std::random_device rd; + seed = rd(); + } + T min = static_cast(context.op_.GetAttr("min")); + T max = static_cast(context.op_.GetAttr("max")); + thrust::counting_iterator index_sequence_begin(0); + ssize_t N = framework::product(tensor->dims()); + thrust::transform(index_sequence_begin, index_sequence_begin + N, + thrust::device_ptr(data), + UniformGenerator(min, max, seed)); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_GPU_KERNEL(uniform_random, + paddle::operators::GPUUniformRandomKernel); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd77bb7daa..4154aad15c 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc) +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c44b7240a8..aad8097dbb 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -48,13 +48,13 @@ extern void *cublas_dso_handle; }; \ extern DynLoad__##__name __name #else -#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - inline template \ - cublasStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + }; \ extern DynLoad__##__name __name #endif diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index d8c46bc41e..7bfe0778c7 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -55,6 +55,7 @@ extern void *curand_dso_handle; __macro(curandSetPseudoRandomGeneratorSeed); \ __macro(curandGenerateUniform); \ __macro(curandGenerateUniformDouble); \ + __macro(curandGenerateNormal); \ __macro(curandDestroyGenerator); CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index d2adb997de..337a059fb1 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -15,11 +15,12 @@ limitations under the License. */ #pragma once #include -#include #include #include #include #include +#include "paddle/string/printf.h" +#include "paddle/string/to_string.h" #ifndef PADDLE_ONLY_CPU @@ -194,8 +195,8 @@ inline void throw_on_error(T e) { #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ - #__VAL0, #__VAL1, std::to_string(__VAL0), \ - std::to_string(__VAL1), \ + #__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \ + paddle::string::to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); } // namespace platform diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 4dfb697546..80bdee3d9d 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,10 +9,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include #include "gtest/gtest.h" #include "paddle/platform/enforce.h" +#include "paddle/string/piece.h" + +using StringPiece = paddle::string::Piece; +using paddle::string::HasPrefix; TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -22,19 +28,15 @@ TEST(ENFORCE, OK) { } TEST(ENFORCE, FAILED) { - bool in_catch = false; + bool caught_exception = false; try { PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); } catch (paddle::platform::EnforceNotMet error) { - // your error handling code here - in_catch = true; - std::string msg = "Enforce is not ok 123 at all"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all")); } - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE, NO_ARG_OK) { @@ -47,41 +49,27 @@ TEST(ENFORCE, NO_ARG_OK) { TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { int a = 2; - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = - "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + HasPrefix(StringPiece(error.what()), + "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NE, OK) { @@ -89,42 +77,32 @@ TEST(ENFORCE_NE, OK) { PADDLE_ENFORCE_NE(1.0, 2UL); } TEST(ENFORCE_NE, FAIL) { - bool in_catch = false; + bool caught_exception = false; try { // 2UL here to check data type compatible PADDLE_ENFORCE_NE(1.0, 1UL); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1.0 != 1UL failed, 1 == 1")) + << error.what() << " does not have expected prefix"; } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { - // 2UL here to check data type compatible PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GE, OK) { @@ -134,21 +112,16 @@ TEST(ENFORCE_GE, OK) { PADDLE_ENFORCE_GE(3.21, 2UL); } TEST(ENFORCE_GE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GE(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LE, OK) { @@ -159,21 +132,16 @@ TEST(ENFORCE_LE, OK) { PADDLE_ENFORCE_LE(2UL, 3.2); } TEST(ENFORCE_LE, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_GT(1, 2UL); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LT, OK) { @@ -182,21 +150,15 @@ TEST(ENFORCE_LT, OK) { PADDLE_ENFORCE_LT(2UL, 3); } TEST(ENFORCE_LT, FAIL) { - bool in_catch = false; - + bool caught_exception = false; try { PADDLE_ENFORCE_LT(1UL, 0.12); - } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); - } + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), + "enforce 1UL < 0.12 failed, 1 >= 0.12")); } - - ASSERT_TRUE(in_catch); + EXPECT_TRUE(caught_exception); } TEST(ENFORCE_NOT_NULL, OK) { @@ -205,20 +167,50 @@ TEST(ENFORCE_NOT_NULL, OK) { delete a; } TEST(ENFORCE_NOT_NULL, FAIL) { - bool in_catch = false; - int* a{nullptr}; - + bool caught_exception = false; try { + int* a = nullptr; PADDLE_ENFORCE_NOT_NULL(a); } catch (paddle::platform::EnforceNotMet error) { - in_catch = true; - const std::string msg = "a should not be null"; - const char* what = error.what(); - for (size_t i = 0; i < msg.length(); ++i) { - ASSERT_EQ(what[i], msg[i]); + caught_exception = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "a should not be null")); + } + EXPECT_TRUE(caught_exception); +} + +struct Dims { + size_t dims_[4]; + + bool operator==(const Dims& o) const { + for (size_t i = 0; i < 4; ++i) { + if (dims_[i] != o.dims_[i]) return false; } + return true; } +}; - ASSERT_TRUE(in_catch); +std::ostream& operator<<(std::ostream& os, const Dims& d) { + for (size_t i = 0; i < 4; ++i) { + if (i == 0) { + os << "["; + } + os << d.dims_[i]; + if (i == 4 - 1) { + os << "]"; + } else { + os << ", "; + } + } + return os; } + +TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { + Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}}; + PADDLE_ENFORCE_EQ(a, b); +} + +TEST(ENFORCE_USER_DEFINED_CLASS, NE) { + Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; + ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); +} \ No newline at end of file diff --git a/paddle/platform/place.h b/paddle/platform/place.h index a82e8c942f..1117476bb3 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include #include +#include "paddle/platform/variant.h" namespace paddle { namespace platform { diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h new file mode 100644 index 0000000000..c2257af1b5 --- /dev/null +++ b/paddle/platform/variant.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +#ifndef PADDLE_ONLY_CPU + +// Because boost's variadic templates has bug on nvcc, boost will disable +// variadic template support when GPU enabled on nvcc. +// Define BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same +// function symbols. +// +// https://github.com/PaddlePaddle/Paddle/issues/3386 +#ifndef BOOST_NO_CXX11_VARIADIC_TEMPLATES +#define BOOST_NO_CXX11_VARIADIC_TEMPLATES +#endif +#endif + +#include diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt index 6e8f9c37f6..b66a00ba06 100644 --- a/paddle/pserver/test/CMakeLists.txt +++ b/paddle/pserver/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test SocketTest.cpp) add_test(NAME socket_test - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) ####################### test_ProtoServer #################### @@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer IF(NOT ON_TRAVIS) add_test(NAME test_ProtoServer - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) ENDIF(NOT ON_TRAVIS) @@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS) add_unittest_without_exec(test_ParameterServer2 test_ParameterServer2.cpp) add_test(NAME test_ParameterServer2 - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port -n 4 + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4 ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt deleted file mode 100644 index 8e6b258e00..0000000000 --- a/paddle/pybind/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - recurrent_op - fill_zeros_like_op) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 44442be472..2f0205b770 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} - -DWITH_GOLANG=${WITH_GOLANG:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} @@ -51,7 +51,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ - -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ @@ -74,11 +74,11 @@ cat < +#include + +namespace paddle { +namespace string { +template +inline std::string to_string(T v) { + std::ostringstream sout; + sout << v; + return sout.str(); +} + +// Faster std::string/const char* type +template <> +inline std::string to_string(std::string v) { + return v; +} + +template <> +inline std::string to_string(const char* v) { + return std::string(v); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc new file mode 100644 index 0000000000..5ff1b007f1 --- /dev/null +++ b/paddle/string/to_string_test.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/string/to_string.h" +#include + +constexpr char kOutputString[] = "User Defined Output"; +class UserDefinedClass { +public: +}; + +std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { + s << kOutputString; + return s; +} + +TEST(to_string, normal) { + using namespace paddle::string; + ASSERT_EQ("10", to_string(10)); + ASSERT_EQ("abc", to_string("abc")); + ASSERT_EQ("1.2", to_string(1.2)); +} + +TEST(to_string, user_defined) { + using namespace paddle::string; + UserDefinedClass instance; + ASSERT_EQ(kOutputString, to_string(instance)); +} \ No newline at end of file diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index e1558e3fdf..35dcb235e7 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -50,8 +50,8 @@ void NewRemoteParameterUpdater::init( // create parameter server client. if (useEtcd_) { - parameterClient_ = paddle_new_etcd_pserver_client( - (char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); + parameterClient_ = + paddle_new_etcd_pserver_client((char *)pserverSpec_.c_str()); } else { parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0); @@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init( // from parameter server if (paddle_begin_init_params(parameterClient_)) { LOG(INFO) << "paddle_begin_init_params start"; + // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. + // This makes golang pserver compatible with handy V1 demos. + // TODO(wuyi): Refine or remove these ugly converting lines + OptimizerConfig optimizerConfigV2; + if (trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } else if (trainerConfig_.learning_method() == "adagrad") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adagrad()->set_epsilon( + trainerConfig_.ada_epsilon()); + } else if (trainerConfig_.learning_method() == "adadelta") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adadelta()->set_epsilon( + trainerConfig_.ada_epsilon()); + optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou()); + } else if (trainerConfig_.learning_method() == "adam") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam); + optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1()); + optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2()); + optimizerConfigV2.mutable_adam()->set_epsilon( + trainerConfig_.adam_epsilon()); + } else { + LOG(ERROR) << "got unsupported v1 optimizer config: " + << trainerConfig_.learning_method(); + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } + + if (trainerConfig_.learning_rate_schedule() == "constant") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + } else if (trainerConfig_.learning_rate_schedule() == "linear") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear); + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a( + trainerConfig_.learning_rate_decay_a()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b( + trainerConfig_.learning_rate_decay_b()); + } else { + LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: " + << trainerConfig_.learning_rate_schedule() << ", set to const"; + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + } + + // overwrite optimizerConfigV2 for per-parameter(layer) configs for (int i = 0; i < parameterSize(); ++i) { auto paramConfig = parameters_[i]->getConfig(); - LOG(INFO) << "old param config: " << paramConfig.DebugString(); - // FIXME(typhoonzero): convert old paramConfig to optimizerConfig - OptimizerConfig optimizeConfigV2; - auto sgdConfigV2 = optimizeConfigV2.mutable_sgd(); - sgdConfigV2->set_momentum(paramConfig.momentum()); - sgdConfigV2->set_decay(paramConfig.decay_rate()); - optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - auto constlr = optimizeConfigV2.mutable_const_lr(); + if (paramConfig.has_momentum() && + trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.mutable_sgd()->set_momentum(paramConfig.momentum()); + } if (paramConfig.has_learning_rate()) { - constlr->set_learning_rate(paramConfig.learning_rate()); - } else { - constlr->set_learning_rate(trainerConfig_.learning_rate()); + switch (optimizerConfigV2.lr_policy()) { + case 0: + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + case 1: + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + } } - if (trainerConfig_.algorithm() == "sgd") { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - // FIXME: config all algorithms - } else { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + if (paramConfig.has_decay_rate()) { + switch (optimizerConfigV2.optimizer()) { + case 1: // SGD + optimizerConfigV2.mutable_sgd()->set_decay( + paramConfig.decay_rate()); + break; + case 2: // Adadelta + optimizerConfigV2.mutable_adadelta()->set_decay( + paramConfig.decay_rate()); + break; + case 3: // Adagrad + optimizerConfigV2.mutable_adagrad()->set_decay( + paramConfig.decay_rate()); + break; + case 4: // Adam + optimizerConfigV2.mutable_adam()->set_decay( + paramConfig.decay_rate()); + break; + } } - std::string bytes = optimizeConfigV2.SerializeAsString(); + // send param and config to pserver + std::string bytes = optimizerConfigV2.SerializeAsString(); const char *array = bytes.data(); int size = (int)bytes.size(); paddle_init_param( diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 133e2be104..eba40862b9 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -28,6 +28,8 @@ DECLARE_bool(with_cost); DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -44,6 +46,8 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn + << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 08b2d8a38e..f01ad4142d 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -2,19 +2,19 @@ add_unittest_without_exec(test_Compare test_Compare.cpp) add_test(NAME test_Compare - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_Compare - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_Trainer ########################### add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/gen_proto_data.py && - ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && + ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_TrainerOnePass ########################## if(WITH_PYTHON) @@ -23,60 +23,60 @@ if(WITH_PYTHON) add_unittest_without_exec(test_TrainerOnePass test_TrainerOnePass.cpp) add_test(NAME test_TrainerOnePass - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests - ${PROJ_ROOT}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests + ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################ test_CompareTwoNets ###################### add_unittest_without_exec(test_CompareTwoNets test_CompareTwoNets.cpp) add_test(NAME test_CompareTwoNets - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts test_CompareTwoOpts.cpp) add_test(NAME test_CompareTwoOpts - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --num_passes=1 --need_high_accuracy=0 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_CompareSparse ################## add_unittest_without_exec(test_CompareSparse test_CompareSparse.cpp) if(NOT ON_TRAVIS) add_test(NAME test_CompareSparse - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ./.set_port.sh -p port -n 6 ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################# test_recurrent_machine_generation ############### add_unittest_without_exec(test_recurrent_machine_generation test_recurrent_machine_generation.cpp) add_test(NAME test_recurrent_machine_generation - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_PyDataProviderWrapper ######################### add_unittest_without_exec(test_PyDataProviderWrapper test_PyDataProviderWrapper.cpp) add_test(NAME test_PyDataProviderWrapper - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_config_parser ######################### add_test(NAME test_config_parser - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/config_parser_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 320f671ed9..600c83a848 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -20,6 +20,14 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); #endif +#ifdef PADDLE_USE_MKLDNN +// TODO(TJ): change to true when MKLDNN layers support multi-inputs +DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); +#else +DEFINE_bool(use_mkldnn, false, "Only support CPU training"); +#endif + +DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index dc4faef833..0aca4c0ee0 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -40,3 +40,5 @@ DECLARE_bool(show_layer_stat); DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index aa923b3553..c770ce1698 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -13,6 +13,6 @@ add_executable( link_paddle_exe(test_CustomStackTracePrint) if(NOT APPLE) add_test(NAME test_CustomStackTracePrint - COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index 18584cafe7..6212c2e60a 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -9,15 +9,15 @@ foreach(filename ${proto_filenames}) get_filename_component(ABS_FIL ${filename} ABSOLUTE) get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) + ${PADDLE_SOURCE_DIR}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY ${CUR_PROTO_GEN_PY} ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + ARGS "--python_out=${PADDLE_SOURCE_DIR}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${external_project_dependencies}) + DEPENDS ${ABS_FIL} protoc) endforeach() add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e7..16c519d45a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,3 @@ -set(OUTPUT_DIR - "${CMAKE_CURRENT_BINARY_DIR}/build") file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) @@ -18,7 +16,7 @@ SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") add_custom_command(TARGET ${COPY_PADDLE_MASTER} - COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ + COMMAND cp ${paddle_master_LIB_PATH} ${PADDLE_SOURCE_DIR}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) @@ -27,19 +25,21 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so - COMMAND cmake -E copy $ ${PROJ_ROOT}/python/paddle/v2/framework/core.so +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so + COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so DEPENDS paddle_pybind) -add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) +add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) -add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp +add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS - ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) + ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c8fc49e20d..da99e5bd53 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): + layer_type = 'fc' + def __init__(self, name, size, @@ -1611,14 +1613,27 @@ class FCLayer(LayerBase): bias=True, error_clipping_threshold=None, **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) + if use_mkldnn: + self.layer_type = 'mkldnn_fc' + config_assert( + len(inputs) == 1, + "MkldnnFCLayer support one and only one input!") + super(FCLayer, self).__init__( + name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" - + if use_mkldnn: + config_assert(not sparse, + "MkldnnFCLayer do not support sparse format yet") + if use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] if sparse: psize = self.inputs[input_index].nnz else: @@ -1631,6 +1646,11 @@ class FCLayer(LayerBase): self.config.error_clipping_threshold = error_clipping_threshold +@config_layer('mkldnn_fc') +class MkldnnFcLayer(FCLayer): + layer_type = 'mkldnn_fc' + + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): def __init__(self, @@ -3248,6 +3268,16 @@ class CTCLayer(LayerBase): config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs') +@config_layer('kmax_seq_score') +class KmaxSeqScoreLayer(LayerBase): + def __init__(self, name, inputs, beam_size, **xargs): + super(KmaxSeqScoreLayer, self).__init__( + name, 'kmax_seq_score', 0, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.') + self.config.beam_size = beam_size + + @config_layer('warp_ctc') class WarpCTCLayer(LayerBase): def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2c7cebc359..1bc55c8696 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -132,6 +132,7 @@ __all__ = [ 'sub_nested_seq_layer', 'clip_layer', 'slice_projection', + 'kmax_sequence_score_layer', ] @@ -228,6 +229,8 @@ class LayerType(object): SUB_NESTED_SEQ = 'sub_nested_seq' CLIP_LAYER = 'clip' + KMAX_SEQ_SCORE = 'kmax_seq_score' + @staticmethod def is_layer_type(type_name): """ @@ -6158,7 +6161,8 @@ def clip_layer(input, min, max, name=None): :type min: double :param max: The upper threshold for clipping. :type max: double - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ Layer( name=name, @@ -6168,3 +6172,41 @@ def clip_layer(input, min, max, name=None): max=max) return LayerOutput( name, LayerType.CLIP_LAYER, parents=[input], size=input.size) + + +@wrap_name_default() +@layer_support() +def kmax_sequence_score_layer(input, name=None, beam_size=1): + """ + This layer accepts one input which are scores over a sequence or a nested + sequence, and returns indices of beam_size sequences with highest scores. + + .. code-block:: python + + kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size) + + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. It stores scores over a sequence or a nested + sequence and its size must be 1. + :type input: LayerOutput. + :param beam_size: squence indices with top beam_size scores are returned. + :type beam_size: double + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer " + "accepts only one input.") + assert input.size == 1, ( + "input of kmax_sequence_score_layer is a score" + "over a sequence or a nested sequence, so its width must be 1.") + + Layer( + name=name, + type=LayerType.KMAX_SEQ_SCORE, + inputs=[input.name], + beam_size=beam_size) + + return LayerOutput( + name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt index 6c860fd497..580aef935b 100644 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -1,17 +1,17 @@ #################### test_config_parser ######################### add_test(NAME layers_test - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_test(NAME test_reset_hook - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) add_test(NAME test_layerHelpers COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal ) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a627369275..a61beb871a 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr new file mode 100644 index 0000000000..81bd71f68e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_kmax_seq_socre_layer.protostr @@ -0,0 +1,66 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "data" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "exponential" + inputs { + input_layer_name: "data" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + beam_size: 5 +} +parameters { + name: "___fc_layer_0__.w0" + size: 128 + initial_mean: 0.0 + initial_std: 0.0883883476483 + dims: 128 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__kmax_sequence_score_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "data" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_0__" + input_layer_names: "data" + output_layer_names: "__kmax_sequence_score_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py new file mode 100644 index 0000000000..d245c5a41c --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) + +data = data_layer(name="data", size=128) +scores = fc_layer(input=data, size=1, act=ExpActivation()) +kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5) + +outputs(kmax_seq_id) diff --git a/python/paddle/v2/framework/.gitignore b/python/paddle/v2/framework/.gitignore new file mode 100644 index 0000000000..2ff540d576 --- /dev/null +++ b/python/paddle/v2/framework/.gitignore @@ -0,0 +1 @@ +proto diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 541639ac21..b76c05dc81 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,6 +1,5 @@ py_test(test_net SRCS test_net.py) -py_test(test_fc_op SRCS test_fc_op.py) py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) @@ -13,6 +12,7 @@ py_test(test_protobuf SRCS test_protobuf.py) py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) +py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) @@ -20,4 +20,7 @@ py_test(gradient_checker SRCS gradient_checker.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) + py_test(test_operator SRCS test_operator.py) +# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) +py_test(test_uniform_random_op SRCS test_uniform_random_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index cfd29932f5..015e832e82 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,16 +1,31 @@ +import unittest + +import numpy import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator -import numpy -import unittest __all__ = ['get_numeric_gradient'] +def create_op(op_type): + kwargs = dict() + for in_name in Operator.get_op_input_names(op_type): + kwargs[in_name] = in_name + for out_name in Operator.get_op_output_names(op_type): + kwargs[out_name] = out_name + + return Operator(op_type, **kwargs) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + def get_numeric_gradient(op, input_values, output_name, input_to_check, - delta=1e-2, + delta=0.005, local_scope=None): """ Get Numeric Gradient for an operator's input. @@ -58,24 +73,151 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) + # prepare a numpy array to store the gradient. gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') + + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. x_pos = origin + delta tensor_to_check.set_float_element(i, x_pos) y_pos = get_output() + # plus delta to this element, run op and get the sum of the result tensor. x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() - tensor_to_check.set_float_element(i, origin) # restore old value + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. return gradient_flat.reshape(tensor_to_check.get_dims()) +class GradientChecker(unittest.TestCase): + def assert_is_close(self, numeric_grads, scope, max_relative_error, + msg_prefix): + for name in numeric_grads: + b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + a = numeric_grads[name] + + abs_a = numpy.abs(a) + # if abs_a is nearly zero, then use abs error for a, not relative + # error. + abs_a[abs_a < 1e-3] = 1 + + diff_mat = numpy.abs(a - b) / abs_a + max_diff = numpy.max(diff_mat) + + def err_msg(): + offset = numpy.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ + if no_grad_set is None: + no_grad_set = set() + + tmp_outs = forward_op.temp_outputs() + no_tmp_out = filter(lambda name: name not in tmp_outs, + forward_op.outputs()) + if len(no_tmp_out) != 1: + raise ValueError("non temp out_names should be 1") + + in_names = forward_op.inputs() + for no_grad in no_grad_set: + if no_grad not in in_names: + raise ValueError("no_grad should be in in_names") + + backward_op = core.Operator.backward(forward_op, no_grad_set) + + places = [core.CPUPlace()] + if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): + places.append(core.GPUPlace(0)) + + numeric_grad = dict() + # get numeric gradient + for check_name in inputs_to_check: + numeric_grad[check_name] = \ + get_numeric_gradient(forward_op, input_vars, output_name, + check_name) + + # get operator gradient according to different device + for place in places: + scope = core.Scope() + ctx = core.DeviceContext.create(place) + + # create input var and set value + for name, value in input_vars.iteritems(): + if name not in in_names: + raise ValueError(name + " not in op.inputs_") + var = scope.new_var(name).get_tensor() + var.set_dims(value.shape) + var.set(value, place) + + # create output var + for out_name in forward_op.outputs(): + scope.new_var(out_name).get_tensor() + + # infer the shape of output var and compute/set value of output var + forward_op.infer_shape(scope) + forward_op.run(scope, ctx) + + # create output grad var + # set shape as the output var + # set value of this grad to ones + for name in forward_op.outputs(): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = 1.0 * numpy.ones(out_tensor.shape()) + grad_tensor.set(data, place) + + # create input grad var + for name in backward_op.outputs(): + scope.new_var(name).get_tensor() + + # infer the shape of input gradient var and compute/set it's value + # with backward op + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + self.assert_is_close(numeric_grad, scope, max_relative_error, + "Gradient Check On %s" % str(place)) + + if __name__ == '__main__': class GetNumericGradientTest(unittest.TestCase): @@ -87,4 +229,28 @@ if __name__ == '__main__': arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) + def test_softmax_op(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - numpy.max(x) + exps = numpy.exp(shiftx) + return exps / numpy.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = numpy.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + softmax_op = Operator("softmax", X="X", Y="Y") + + X = numpy.random.random((2, 2)).astype("float32") + Y = numpy.apply_along_axis(stable_softmax, 1, X) + dY = numpy.ones(Y.shape) + dX = label_softmax_grad(Y, dY) + + arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') + numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + unittest.main() diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index da6bed0fcd..dd65e0f2dc 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -1,6 +1,5 @@ -import paddle.v2.framework.core as core -import unittest import numpy +import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator @@ -24,7 +23,7 @@ class OpTestMeta(type): scope = core.Scope() kwargs = dict() places = [core.CPUPlace()] - if core.is_compile_gpu() and core.Operator.support_gpu(self.type): + if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: @@ -53,6 +52,8 @@ class OpTestMeta(type): kwargs[attr_name] = self.attrs[attr_name] op = Operator(self.type, **kwargs) + if isinstance(place, core.GPUPlace) and not op.support_gpu(): + return op.infer_shape(scope) diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index b26e25d58b..4815192e25 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,9 +1,10 @@ import unittest import numpy from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op -class TestSGD(unittest.TestCase): +class TestCrossEntropy(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): @@ -20,7 +21,18 @@ class TestSGD(unittest.TestCase): self.outputs = {'Y': numpy.array(Y).astype("float32")} -# TODO(superjom) add gradient check +class CrossEntropyGradOpTest(GradientChecker): + def test_softmax_grad(self): + op = create_op("onehot_cross_entropy") + batch_size = 100 + class_num = 10 + inputs = { + "X": numpy.random.uniform( + 0.1, 1.0, [batch_size, class_num]).astype("float32"), + "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") + } + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py index 4d5af08e15..d504bc8b43 100644 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -40,7 +40,5 @@ class TestFc(unittest.TestCase): self.assertTrue(np.allclose(py_data, op_data)) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py new file mode 100644 index 0000000000..f95ed70b58 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -0,0 +1,36 @@ +import unittest +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy + + +class GaussianRandomTest(unittest.TestCase): + def test_cpu(self): + self.gaussian_random_test(place=core.CPUPlace()) + + def test_gpu(self): + if core.is_compile_gpu(): + self.gaussian_random_test(place=core.GPUPlace(0)) + + def gaussian_random_test(self, place): + scope = core.Scope() + scope.new_var("Out").get_tensor() + + op = Operator( + "gaussian_random", + Out="Out", + dims=[1000, 784], + mean=.0, + std=1., + seed=10) + + op.infer_shape(scope) + context = core.DeviceContext.create(place) + op.run(scope, context) + tensor = numpy.array(scope.find_var("Out").get_tensor()) + self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) + self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index b30896553d..cc7f09e715 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -3,6 +3,15 @@ from paddle.v2.framework.op import Operator import unittest +def fc(X, W, Y): + ret_v = core.Net.create() + + ret_v.add_op(Operator("mul", X="X", Y="W", Out="pre_activation")) + ret_v.add_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.complete_add_op(True) + return ret_v + + class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() @@ -10,18 +19,18 @@ class TestNet(unittest.TestCase): net.add_op(op1) net2 = core.Net.create() - net2.add_op(Operator("fc", X="X", W="w", Y="fc.out")) + net2.add_op(fc(X="X", W="w", Y="fc.out")) net2.complete_add_op(True) net.add_op(net2) net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). +Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation). Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). - Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). - Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). + Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). + Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). + Op(mul), inputs:(X, W), outputs:(pre_activation). + Op(sigmoid), inputs:(pre_activation), outputs:(fc.out). ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index d20e085b8e..e670d93653 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,9 +1,8 @@ import unittest import numpy as np -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +from gradient_checker import GradientChecker, create_op from op_test_util import OpTestMeta @@ -25,62 +24,11 @@ class TestSoftmaxOp(unittest.TestCase): } -class TestSoftmaxGradOp(unittest.TestCase): - def test_softmax_grad(self): - op = Operator('softmax', X="X", Y="Y") - backward_op = core.Operator.backward(op, set()) - self.assertEqual(backward_op.type(), "softmax_grad") - expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' - self.assertEqual(expected, str(backward_op)) - - batch_size = 3 - class_num = 5 - # Initialize X and add 1e-2 for numerical stability - Y = np.random.rand(batch_size, class_num).astype(np.float32) - Y = Y + 1e-2 - dY = np.random.rand(batch_size, class_num).astype(np.float32) - - # Reference implementation of cross entropy with soft labels - def label_softmax_grad(Y, dY): - dX = Y * 0.0 - for i in range(batch_size): - d = np.dot(Y[i, :], dY[i, :]) - dX[i, :] = Y[i, :] * (dY[i, :] - d) - return dX - - expected = label_softmax_grad(Y, dY) - - scope = core.Scope() - places = [] - places.append(core.CPUPlace()) - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - y = scope.new_var("Y") - y_tensor = y.get_tensor() - y_tensor.set_dims([batch_size, class_num]) - y_tensor.alloc_float(place) - y_tensor.set(Y, place) - - dy = scope.new_var("Y@GRAD") - dy_tensor = dy.get_tensor() - dy_tensor.set_dims([batch_size, class_num]) - dy_tensor.alloc_float(place) - dy_tensor.set(dY, place) - - x = scope.new_var("X") - dx = scope.new_var("X@GRAD") - - tensor = scope.find_var("X@GRAD").get_tensor() - backward_op.infer_shape(scope) - self.assertEqual([batch_size, class_num], tensor.shape()) - - ctx = core.DeviceContext.create(place) - backward_op.run(scope, ctx) - actual = np.array(tensor) - - np.testing.assert_almost_equal(actual, expected, decimal=3) +class SoftmaxGradOpTest(GradientChecker): + def test_softmax(self): + op = create_op("softmax") + inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} + self.check_grad(op, inputs, set("X"), "Y") if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py new file mode 100644 index 0000000000..c3d2bb44da --- /dev/null +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -0,0 +1,35 @@ +import unittest +from paddle.v2.framework.op import Operator +import paddle.v2.framework.core as core +import numpy + + +class UniformRandomTest(unittest.TestCase): + def test_uniform_random_cpu(self): + self.uniform_random_test(place=core.CPUPlace()) + + def test_uniform_random_gpu(self): + if core.is_compile_gpu(): + self.uniform_random_test(place=core.GPUPlace(0)) + + def uniform_random_test(self, place): + scope = core.Scope() + scope.new_var("X").get_tensor() + + op = Operator( + "uniform_random", + Out="X", + dims=[1000, 784], + min=-5.0, + max=10.0, + seed=10) + + op.infer_shape(scope) + ctx = core.DeviceContext.create(place) + op.run(scope, ctx) + tensor = numpy.array(scope.find_var("X").get_tensor()) + self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index ba58198033..29f0945eb4 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,13 +1,26 @@ -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.optimizers as v1_optimizers +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Optimizers(update equation) for SGD method. -TODO(zhihong) : create new optimizer with proto config, add new optimizer here - TODO(yuyang18): Complete comments. """ +import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils +import paddle.trainer_config_helpers.optimizers as v1_optimizers +from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig + __all__ = [ 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'RMSProp', 'ModelAverage', 'L2Regularization' @@ -70,7 +83,8 @@ class Optimizer(object): gradient_machine.prefetch(in_args) parameter_updater.getParametersRemote() - :param pserver_spec: pserver location, eg: localhost:3000 + :param pserver_spec: pserver location, eg: localhost:3000, if use etcd, + pserver_spec should be the etcd endpoints, eg: http://localhost:2379 :return: parameter_updater """ if is_local: diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index a9cba8ca0b..b8af5abaea 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp @@ -113,16 +127,7 @@ class Parameters(object): """ return iter(self.__param_conf__) - def __getitem__(self, key): - """ - Get parameter by parameter name. It uses Python dict syntax. - - :note: It will always copy the parameter from C++ side. - :param key: Parameter name - :type key: basestring - :return: parameter value - :rtype: np.ndarray - """ + def __getter_inner(self, key, param_type): import py_paddle.swig_paddle as api shape = self.get_shape(key) @@ -138,7 +143,7 @@ class Parameters(object): each_gradient_machine, key) # for simplify implementation now, we always copy from C++ assert isinstance(param, api.Parameter) - val = param.getBuf(api.PARAMETER_VALUE) + val = param.getBuf(param_type) assert isinstance(val, api.Vector) val = val.copyToNumpyArray() return val @@ -146,6 +151,19 @@ class Parameters(object): raise RuntimeError("Unexpected branch") + def __getitem__(self, key): + """ + Get parameter by parameter name. It uses Python dict syntax. + + :note: It will always copy the parameter from C++ side. + :param key: Parameter name + :type key: basestring + :return: parameter value + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_VALUE) + def get_shape(self, key): """ get shape of the parameter. @@ -202,6 +220,19 @@ class Parameters(object): """ return self.__getitem__(key=parameter_name) + def get_grad(self, key): + """ + Get grandient by parameter name. + + :note: It will always copy the parameter from C++ side. + :param key: parameter name + :type key: basestring + :return: The grandient matrix. + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_GRADIENT) + def set(self, parameter_name, value): """ Set parameter by parameter name & matrix. @@ -250,7 +281,13 @@ class Parameters(object): size = reduce(lambda a, b: a * b, param.shape) f.write(struct.pack("IIQ", 0, 4, size)) param = param.astype(np.float32) - f.write(param.tostring()) + s = param.tostring() + wrote_size = 0 + buf = buffer(s, wrote_size, 65535) + while buf: # f.write crashes with big data blog. + f.write(buf) + wrote_size += 65535 + buf = buffer(s, wrote_size, 65535) def deserialize(self, name, f): """ diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 76bae0bb12..9c4dd5f250 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -161,14 +161,14 @@ class SGD(object): self.__parameter_updater__.update(each_param) cost_sum = out_args.sum() cost = cost_sum / len(data_batch) - self.__parameter_updater__.finishBatch(cost) - batch_evaluator.finish() event_handler( v2_event.EndIteration( pass_id=pass_id, batch_id=batch_id, cost=cost, evaluator=batch_evaluator)) + self.__parameter_updater__.finishBatch(cost) + batch_evaluator.finish() self.__parameter_updater__.finishPass() pass_evaluator.finish() diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 0000000000..3df822bd76 --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,9 @@ +requests==2.9.2 +numpy>=1.12 +protobuf==3.1 +recordio +matplotlib +rarfile +scipy>=0.19.0 +Pillow +nltk>=3.2.2 diff --git a/python/setup.py.in b/python/setup.py.in index 38f0a503be..38728aa2fd 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,5 +1,4 @@ from setuptools import setup, Distribution - class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -18,15 +17,8 @@ packages=['paddle', 'paddle.v2.framework.proto', 'py_paddle'] -setup_requires=["requests", - "numpy>=1.12", - "protobuf==3.1", - "recordio", - "matplotlib", - "rarfile", - "scipy>=0.19.0", - "Pillow", - "nltk>=3.2.2"] +with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: + setup_requires = f.read().splitlines() if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] @@ -45,14 +37,14 @@ setup(name='paddlepaddle', '': '${CMAKE_CURRENT_SOURCE_DIR}', # The paddle.v2.framework.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', - 'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' + 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', + 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, - scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], + scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'], distclass=BinaryDistribution, data_files=[('/usr/local/opt/paddle/bin', - ['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', - '${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] + ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', + '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])] )